Commit ca112f58 authored by Mindiell's avatar Mindiell

Modifying the unescape part of webpage's title

parent 8b89bec5
...@@ -14,6 +14,7 @@ TODO: ...@@ -14,6 +14,7 @@ TODO:
""" """
import feedparser import feedparser
import HTMLParser
import importlib import importlib
from irc import IrcClientFactory from irc import IrcClientFactory
import MySQLdb import MySQLdb
...@@ -76,16 +77,8 @@ def get_title(message): ...@@ -76,16 +77,8 @@ def get_title(message):
title = re.search("<title>([^<]+)</title>", content).group(1) title = re.search("<title>([^<]+)</title>", content).group(1)
except: except:
pass pass
# Unescaping HTML entities # Unescaping HTML entities and removing multiple lines
if title: title = HTMLParser.HTMLParser().unescape(re.sub("\n|\r", "", title))
title = re.sub("&gt;|&#062;", ">", title)
title = re.sub("&lt;|&#060;", "<", title)
title = re.sub("&quot;|&#034;", '"', title)
title = re.sub("&apos;|&#039;", "'", title)
title = re.sub("&amp;|&#038;", "&", title)
title = re.sub("&ndash;|&#8211;", "–", title)
# Multiple lines titles are compressed
title = re.sub("\n|\r", "", title)
return (title, website) return (title, website)
def is_moderator(name): def is_moderator(name):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment