Commit ca112f58 authored by Mindiell's avatar Mindiell

Modifying the unescape part of webpage's title

parent 8b89bec5
......@@ -14,6 +14,7 @@ TODO:
"""
import feedparser
import HTMLParser
import importlib
from irc import IrcClientFactory
import MySQLdb
......@@ -76,16 +77,8 @@ def get_title(message):
title = re.search("<title>([^<]+)</title>", content).group(1)
except:
pass
# Unescaping HTML entities
if title:
title = re.sub("&gt;|&#062;", ">", title)
title = re.sub("&lt;|&#060;", "<", title)
title = re.sub("&quot;|&#034;", '"', title)
title = re.sub("&apos;|&#039;", "'", title)
title = re.sub("&amp;|&#038;", "&", title)
title = re.sub("&ndash;|&#8211;", "–", title)
# Multiple lines titles are compressed
title = re.sub("\n|\r", "", title)
# Unescaping HTML entities and removing multiple lines
title = HTMLParser.HTMLParser().unescape(re.sub("\n|\r", "", title))
return (title, website)
def is_moderator(name):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment