-def html_unentities(s):
- s = s.decode('utf-8')
- entities = {}
- newstring = u""
-
- entities[u' '] = u' '
- entities[u'¡'] = u'¡'
- entities[u'¢'] = u'¢'
- entities[u'£'] = u'£'
- entities[u'¤'] = u'¤'
- entities[u'¥'] = u'¥'
- entities[u'¦'] = u'¦'
- entities[u'§'] = u'§'
- entities[u'¨'] = u'¨'
- entities[u'©'] = u'©'
- entities[u'ª'] = u'ª'
- entities[u'«'] = u'«'
- entities[u'¬'] = u'¬'
- entities[u'­'] = u''
- entities[u'®'] = u'®'
- entities[u'¯'] = u'¯'
- entities[u'°'] = u'°'
- entities[u'±'] = u'±'
- entities[u'²'] = u'²'
- entities[u'³'] = u'³'
- entities[u'´'] = u'´'
- entities[u'µ'] = u'µ'
- entities[u'¶'] = u'¶'
- entities[u'·'] = u'·'
- entities[u'¸'] = u'¸'
- entities[u'¹'] = u'¹'
- entities[u'º'] = u'º'
- entities[u'»'] = u'»'
- entities[u'¼'] = u'¼'
- entities[u'½'] = u'½'
- entities[u'¾'] = u'¾'
- entities[u'¿'] = u'¿'
- entities[u'À'] = u'À'
- entities[u'Á'] = u'Á'
- entities[u'Â'] = u'Â'
- entities[u'Ã'] = u'Ã'
- entities[u'Ä'] = u'Ä'
- entities[u'Å'] = u'Å'
- entities[u'Æ'] = u'Æ'
- entities[u'Ç'] = u'Ç'
- entities[u'È'] = u'È'
- entities[u'É'] = u'É'
- entities[u'Ê'] = u'Ê'
- entities[u'Ë'] = u'Ë'
- entities[u'Ì'] = u'Ì'
- entities[u'Í'] = u'Í'
- entities[u'Î'] = u'Î'
- entities[u'Ï'] = u'Ï'
- entities[u'Ð'] = u'Ð'
- entities[u'Ñ'] = u'Ñ'
- entities[u'Ò'] = u'Ò'
- entities[u'Ó'] = u'Ó'
- entities[u'Ô'] = u'Ô'
- entities[u'Õ'] = u'Õ'
- entities[u'Ö'] = u'Ö'
- entities[u'×'] = u'×'
- entities[u'Ø'] = u'Ø'
- entities[u'Ù'] = u'Ù'
- entities[u'Ú'] = u'Ú'
- entities[u'Û'] = u'Û'
- entities[u'Ü'] = u'Ü'
- entities[u'Ý'] = u'Ý'
- entities[u'Þ'] = u'Þ'
- entities[u'ß'] = u'ß'
- entities[u'à'] = u'à'
- entities[u'á'] = u'á'
- entities[u'â'] = u'â'
- entities[u'ã'] = u'ã'
- entities[u'ä'] = u'ä'
- entities[u'å'] = u'å'
- entities[u'æ'] = u'æ'
- entities['ç'] = u'ç'
- entities[u'è'] = u'è'
- entities[u'é'] = u'é'
- entities[u'ê'] = u'ê'
- entities[u'ë'] = u'ë'
- entities[u'ì'] = u'ì'
- entities[u'í'] = u'í'
- entities[u'î'] = u'î'
- entities[u'ï'] = u'ï'
- entities[u'ð'] = u'ð'
- entities[u'ñ'] = u'ñ'
- entities[u'ò'] = u'ò'
- entities[u'ó'] = u'ó'
- entities[u'ô'] = u'ô'
- entities[u'õ'] = u'õ'
- entities[u'ö'] = u'ö'
- entities[u'÷'] = u'÷'
- entities[u'ø'] = u'ø'
- entities[u'ù'] = u'ù'
- entities[u'ú'] = u'ú'
- entities[u'û'] = u'û'
- entities[u'ü'] = u'ü'
- entities[u'ý'] = u'ý'
- entities[u'þ'] = u'þ'
- entities[u'ÿ'] = u'ÿ'
- entities[u'"'] = u'"'
- entities[u"'"] = '''
- entities[u'<'] = u'<'
- entities[u'>'] = u'>'
- entities[u'&'] = u'&'
- buffer = ""
- mutex = False
- s_ent = re.findall(r'[&].*?[;]', s)
- for rp in s_ent:
- s = re.sub(rp.replace('&','[&]').replace(';','[;]'), entities[rp], s)
- return s
-
-class ScheduleHammer(object):
- def __init__(self, HTML):
- if isinstance(HTML, str):
- self.S_tag = '[<]li[>].*?[<]font.*?[>]'
- self.E_tag = '[<][/]font[>].*?[<][/]li[>]'
- self.items = re.findall(r'%s(?P<content>.*?)%s'%(self.S_tag,
- self.E_tag), HTML)
- else:
- raise TypeError("ScheduleHammer must take only a string as \
- parameter")
-
- def __iter__(self):
- for title in self.items:
- if "<br" in title:
- yield html_unentities(title.split('<br')[0])
- else:
- yield html_unentities(title)
-
- def __getitem__(self, pos):
- if "<br" in self.items[pos]:
- return html_unentities(self.items[pos].split("<br")[0])
- else:
- return html_unentities(self.items[pos])
-