import requests
def get_html(url):
r = requests.get(url)
html = r.content.decode()
return html
def parse_data(data):
lines = []
url = ''
for line in data.split('\n'):
if '下一章' in line:
url = line.split('"')[1] .strip()
break
if line.startswith('<p>') and '=' not in line:
line = line.strip()[3:-4]
lines.append(line + '\n')
return lines, url
def write_file(content):
with open('平凡的世界.txt', 'a') as f:
f.writelines(content)
def main(url):
while url:
html = get_html(url)
content, url = parse_data(html)
write_file(content)
if '06.html' in url:
break
if __name__ == '__main__':
data_url = 'http://www.pingfandeshijie.net/di-yi-bu-01.html'
main(data_url)
暂无评论