python的html.parser问题
#代码是很初级的,用于得到python官网网页源代码里事件的时间及地点,但出现的错误很奇怪,网上也找不到
from html.parser import HTMLParser
from html.entities import name2codepoint
class myhtmlparser(HTMLParser):
def __init__(self):
self.summary={'date':[],'location':['myend']}
def handle_starttag(self,tag,attrs):
if tag=='time':
self.summary['date'].apeend(str(attrs))
def handle_data(self,data):
self.summary['location'].remove('myend')
self.summary['location'].append(str(data))
self.summary['location'].append('myend')
def get_print(self):
for x in range(100):
if self.summary['location'][x]=='myend':
break
else:
print('event%s:\n date: %s location:%s'%(str(x+1),self.summary['date'][x],self.summary['location'][x]))
htm="""
\\此处为html源代码
"""
parser=myhtmlparser()
parser.feed(htm)
parser.get_print()
以下是输出结果
c:\Pythonfile>python test1.py
Traceback (most recent call last):
File "test1.py", line 939, in <module>
parser.feed(htm)
File "C:\My_install\python\lib\html\parser.py", line 110, in feed
self.rawdata = self.rawdata + data
AttributeError: 'myhtmlparser' object has no attribute 'rawdata'