diff --git a/wantzel.py b/wantzel.py
index 73f0c54be26c205925daf41a43a263437be3cbca..3e840df2fe13172268aa8af449bb6c6552ec262f 100644
--- a/wantzel.py
+++ b/wantzel.py
@@ -34,19 +34,15 @@ def get_cursor():
return db.cursor()
return None
-def get_url(message, command=""):
+def get_url(message):
"""
- Retrieve the url behind the command.
+ Retrieve the url in the message.
"""
- # Let's get what is behind the command
- result = re.search("!%s ([^ ]*)" % command, message)
- if not result:
- return ""
- url = result.group(1)
- # Verify the presence of http
- result = re.search("^(https?://)(.+)$", url)
+ # Let's get the url
+ result = re.search("(https?[^ ]+)", message)
if not result:
return "http"
+ url = result.group(1)
# Removing anchor if needed
result = re.search("^([^#]*)", url)
if result:
@@ -55,6 +51,25 @@ def get_url(message, command=""):
url = re.sub("[?&](utm_medium|utm_source|utm_campaign|xtor)=[^&]*", "", url)
return url
+def get_title(message):
+ title = ""
+ website = ""
+ try:
+ url = get_url(message)#re.search("(http[^ ]*)", msg).group(1)
+ website = re.search("//([^/]*)", url).group(1)
+ f = urllib.URLopener().open(url)
+ content = f.read()
+ title = re.search("
([^<]+)", content).group(1)
+ except:
+ pass
+ # Unescaping HTML entities
+ if title:
+ title = re.sub(">", ">", title)
+ title = re.sub("<", "<", title)
+ title = re.sub(""", '"', title)
+ title = re.sub("&", "&", title)
+ return (title, website)
+
class Wantzel(object):
"""
Wantzel bot.
@@ -98,6 +113,10 @@ class Wantzel(object):
# Cleaning user name
user = re.search("([^!]*)!", user).group(1)
print("Message received: %s %s %s" % (user, channel, msg))
+ # Whatever is done, get the title of an existing url in a message
+ title = ""
+ if "http" in msg:
+ title, website = get_title(msg)
# Never answer to botself
if user!=config.nickname:
# If it's a query, bot should answer to the user as the channel
@@ -112,28 +131,15 @@ class Wantzel(object):
command = command.group(1)
print("Command: %s" % command)
if command.startswith("rp"):
- self.rp(command, user, channel, msg)
+ self.rp(command, user, channel, msg, title)
elif command=="help":
self.help(user, channel, msg)
elif command=="kill":
self.kill(user, channel, msg)
elif command=="stats":
self.stats(user, channel, msg)
- # Whatever is done, get the title of an existing url in a message
- if "http" in msg:
- content = ""
- title = ""
- website = ""
- try:
- url = re.search("(http[^ ]*)", msg).group(1)
- website = re.search("//([^/]*)", url).group(1)
- f = urllib.URLopener().open(url)
- content = f.read()
- title = re.search("([^<]+)", content).group(1)
- except:
- pass
- if title and website:
- self.send_message(channel, messages["title"] % (title, website))
+ if title and website:
+ self.send_message(channel, messages["title"] % (title, website))
def on_joined(self, channel):
"""
@@ -161,14 +167,14 @@ class Wantzel(object):
else:
self.send_message(channel, messages["help"])
- def rp(self, command, user, channel, msg):
+ def rp(self, command, user, channel, msg, title=""):
"""
Adding the article in rp database.
"""
print("rp command %s" % command)
cite = 0
note = 0
- url = get_url(msg, command)
+ url = get_url(msg)
print("url: %s" % url)
if url=="":
return
@@ -194,8 +200,11 @@ class Wantzel(object):
# lang, published, nid, screenshot, title, fetched, seemscite
print("Adding an article by %s: %s" % (user, url))
result = cursor.execute(
- "INSERT INTO presse SET url=%s, provenance=%s, cite=%s, note=%s, datec=NOW()",
- (url, user, cite, note)
+ """INSERT INTO presse SET
+ url=%s, provenance=%s, cite=%s, note=%s, datec=NOW(), title=%s,
+ lang='', published=0, nid=0, screenshot=0, fetched=0, seemscite=0
+ """,
+ (url, user, cite, note, title)
)
self.send_message(channel, messages["rp_new_article"] % user)
else:
@@ -216,7 +225,7 @@ class Wantzel(object):
"""
#TODO: Gérer les droits de cette commande
print("kill command")
- url = get_url(msg, "kill")
+ url = get_url(msg)
print("url: %s" % url)
if url=="":
return