Commit 113fa625 authored by Mindiell's avatar Mindiell

Little corrections about url titles, plus adding title in 'presse' table

parent 9699bf46
...@@ -34,19 +34,15 @@ def get_cursor(): ...@@ -34,19 +34,15 @@ def get_cursor():
return db.cursor() return db.cursor()
return None return None
def get_url(message, command=""): def get_url(message):
""" """
Retrieve the url behind the command. Retrieve the url in the message.
""" """
# Let's get what is behind the command # Let's get the url
result = re.search("!%s ([^ ]*)" % command, message) result = re.search("(https?[^ ]+)", message)
if not result:
return ""
url = result.group(1)
# Verify the presence of http
result = re.search("^(https?://)(.+)$", url)
if not result: if not result:
return "http" return "http"
url = result.group(1)
# Removing anchor if needed # Removing anchor if needed
result = re.search("^([^#]*)", url) result = re.search("^([^#]*)", url)
if result: if result:
...@@ -55,6 +51,25 @@ def get_url(message, command=""): ...@@ -55,6 +51,25 @@ def get_url(message, command=""):
url = re.sub("[?&](utm_medium|utm_source|utm_campaign|xtor)=[^&]*", "", url) url = re.sub("[?&](utm_medium|utm_source|utm_campaign|xtor)=[^&]*", "", url)
return url return url
def get_title(message):
title = ""
website = ""
try:
url = get_url(message)#re.search("(http[^ ]*)", msg).group(1)
website = re.search("//([^/]*)", url).group(1)
f = urllib.URLopener().open(url)
content = f.read()
title = re.search("<title>([^<]+)</title>", content).group(1)
except:
pass
# Unescaping HTML entities
if title:
title = re.sub("&gt;", ">", title)
title = re.sub("&lt;", "<", title)
title = re.sub("&quot;", '"', title)
title = re.sub("&amp;", "&", title)
return (title, website)
class Wantzel(object): class Wantzel(object):
""" """
Wantzel bot. Wantzel bot.
...@@ -98,6 +113,10 @@ class Wantzel(object): ...@@ -98,6 +113,10 @@ class Wantzel(object):
# Cleaning user name # Cleaning user name
user = re.search("([^!]*)!", user).group(1) user = re.search("([^!]*)!", user).group(1)
print("Message received: %s %s %s" % (user, channel, msg)) print("Message received: %s %s %s" % (user, channel, msg))
# Whatever is done, get the title of an existing url in a message
title = ""
if "http" in msg:
title, website = get_title(msg)
# Never answer to botself # Never answer to botself
if user!=config.nickname: if user!=config.nickname:
# If it's a query, bot should answer to the user as the channel # If it's a query, bot should answer to the user as the channel
...@@ -112,28 +131,15 @@ class Wantzel(object): ...@@ -112,28 +131,15 @@ class Wantzel(object):
command = command.group(1) command = command.group(1)
print("Command: %s" % command) print("Command: %s" % command)
if command.startswith("rp"): if command.startswith("rp"):
self.rp(command, user, channel, msg) self.rp(command, user, channel, msg, title)
elif command=="help": elif command=="help":
self.help(user, channel, msg) self.help(user, channel, msg)
elif command=="kill": elif command=="kill":
self.kill(user, channel, msg) self.kill(user, channel, msg)
elif command=="stats": elif command=="stats":
self.stats(user, channel, msg) self.stats(user, channel, msg)
# Whatever is done, get the title of an existing url in a message if title and website:
if "http" in msg: self.send_message(channel, messages["title"] % (title, website))
content = ""
title = ""
website = ""
try:
url = re.search("(http[^ ]*)", msg).group(1)
website = re.search("//([^/]*)", url).group(1)
f = urllib.URLopener().open(url)
content = f.read()
title = re.search("<title>([^<]+)</title>", content).group(1)
except:
pass
if title and website:
self.send_message(channel, messages["title"] % (title, website))
def on_joined(self, channel): def on_joined(self, channel):
""" """
...@@ -161,14 +167,14 @@ class Wantzel(object): ...@@ -161,14 +167,14 @@ class Wantzel(object):
else: else:
self.send_message(channel, messages["help"]) self.send_message(channel, messages["help"])
def rp(self, command, user, channel, msg): def rp(self, command, user, channel, msg, title=""):
""" """
Adding the article in rp database. Adding the article in rp database.
""" """
print("rp command %s" % command) print("rp command %s" % command)
cite = 0 cite = 0
note = 0 note = 0
url = get_url(msg, command) url = get_url(msg)
print("url: %s" % url) print("url: %s" % url)
if url=="": if url=="":
return return
...@@ -194,8 +200,11 @@ class Wantzel(object): ...@@ -194,8 +200,11 @@ class Wantzel(object):
# lang, published, nid, screenshot, title, fetched, seemscite # lang, published, nid, screenshot, title, fetched, seemscite
print("Adding an article by %s: %s" % (user, url)) print("Adding an article by %s: %s" % (user, url))
result = cursor.execute( result = cursor.execute(
"INSERT INTO presse SET url=%s, provenance=%s, cite=%s, note=%s, datec=NOW()", """INSERT INTO presse SET
(url, user, cite, note) url=%s, provenance=%s, cite=%s, note=%s, datec=NOW(), title=%s,
lang='', published=0, nid=0, screenshot=0, fetched=0, seemscite=0
""",
(url, user, cite, note, title)
) )
self.send_message(channel, messages["rp_new_article"] % user) self.send_message(channel, messages["rp_new_article"] % user)
else: else:
...@@ -216,7 +225,7 @@ class Wantzel(object): ...@@ -216,7 +225,7 @@ class Wantzel(object):
""" """
#TODO: Gérer les droits de cette commande #TODO: Gérer les droits de cette commande
print("kill command") print("kill command")
url = get_url(msg, "kill") url = get_url(msg)
print("url: %s" % url) print("url: %s" % url)
if url=="": if url=="":
return return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment