Commit 113fa625 authored by Mindiell's avatar Mindiell

Little corrections about url titles, plus adding title in 'presse' table

parent 9699bf46
......@@ -34,19 +34,15 @@ def get_cursor():
return db.cursor()
return None
def get_url(message, command=""):
def get_url(message):
"""
Retrieve the url behind the command.
Retrieve the url in the message.
"""
# Let's get what is behind the command
result = re.search("!%s ([^ ]*)" % command, message)
if not result:
return ""
url = result.group(1)
# Verify the presence of http
result = re.search("^(https?://)(.+)$", url)
# Let's get the url
result = re.search("(https?[^ ]+)", message)
if not result:
return "http"
url = result.group(1)
# Removing anchor if needed
result = re.search("^([^#]*)", url)
if result:
......@@ -55,6 +51,25 @@ def get_url(message, command=""):
url = re.sub("[?&](utm_medium|utm_source|utm_campaign|xtor)=[^&]*", "", url)
return url
def get_title(message):
title = ""
website = ""
try:
url = get_url(message)#re.search("(http[^ ]*)", msg).group(1)
website = re.search("//([^/]*)", url).group(1)
f = urllib.URLopener().open(url)
content = f.read()
title = re.search("<title>([^<]+)</title>", content).group(1)
except:
pass
# Unescaping HTML entities
if title:
title = re.sub("&gt;", ">", title)
title = re.sub("&lt;", "<", title)
title = re.sub("&quot;", '"', title)
title = re.sub("&amp;", "&", title)
return (title, website)
class Wantzel(object):
"""
Wantzel bot.
......@@ -98,6 +113,10 @@ class Wantzel(object):
# Cleaning user name
user = re.search("([^!]*)!", user).group(1)
print("Message received: %s %s %s" % (user, channel, msg))
# Whatever is done, get the title of an existing url in a message
title = ""
if "http" in msg:
title, website = get_title(msg)
# Never answer to botself
if user!=config.nickname:
# If it's a query, bot should answer to the user as the channel
......@@ -112,28 +131,15 @@ class Wantzel(object):
command = command.group(1)
print("Command: %s" % command)
if command.startswith("rp"):
self.rp(command, user, channel, msg)
self.rp(command, user, channel, msg, title)
elif command=="help":
self.help(user, channel, msg)
elif command=="kill":
self.kill(user, channel, msg)
elif command=="stats":
self.stats(user, channel, msg)
# Whatever is done, get the title of an existing url in a message
if "http" in msg:
content = ""
title = ""
website = ""
try:
url = re.search("(http[^ ]*)", msg).group(1)
website = re.search("//([^/]*)", url).group(1)
f = urllib.URLopener().open(url)
content = f.read()
title = re.search("<title>([^<]+)</title>", content).group(1)
except:
pass
if title and website:
self.send_message(channel, messages["title"] % (title, website))
if title and website:
self.send_message(channel, messages["title"] % (title, website))
def on_joined(self, channel):
"""
......@@ -161,14 +167,14 @@ class Wantzel(object):
else:
self.send_message(channel, messages["help"])
def rp(self, command, user, channel, msg):
def rp(self, command, user, channel, msg, title=""):
"""
Adding the article in rp database.
"""
print("rp command %s" % command)
cite = 0
note = 0
url = get_url(msg, command)
url = get_url(msg)
print("url: %s" % url)
if url=="":
return
......@@ -194,8 +200,11 @@ class Wantzel(object):
# lang, published, nid, screenshot, title, fetched, seemscite
print("Adding an article by %s: %s" % (user, url))
result = cursor.execute(
"INSERT INTO presse SET url=%s, provenance=%s, cite=%s, note=%s, datec=NOW()",
(url, user, cite, note)
"""INSERT INTO presse SET
url=%s, provenance=%s, cite=%s, note=%s, datec=NOW(), title=%s,
lang='', published=0, nid=0, screenshot=0, fetched=0, seemscite=0
""",
(url, user, cite, note, title)
)
self.send_message(channel, messages["rp_new_article"] % user)
else:
......@@ -216,7 +225,7 @@ class Wantzel(object):
"""
#TODO: Gérer les droits de cette commande
print("kill command")
url = get_url(msg, "kill")
url = get_url(msg)
print("url: %s" % url)
if url=="":
return
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment