From 0a3fc596a86e8c56d345ee2e9aa2d917dcb6d65f Mon Sep 17 00:00:00 2001 From: MortySmith <1612945690@qq.com> Date: Sun, 31 Dec 2023 12:40:32 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=99=E7=88=AC=E5=8F=96=E5=8F=A4=E8=AF=97?= =?UTF-8?q?=E6=96=87=E7=9A=84=E7=A8=8B=E5=BA=8F=E5=A2=9E=E5=8A=A0=E4=BA=86?= =?UTF-8?q?=E4=BF=9D=E5=AD=98=E8=AF=97=E6=96=87=E5=88=B0=E6=9C=AC=E5=9C=B0?= =?UTF-8?q?word=E6=96=87=E4=BB=B6=E9=87=8C=E7=9A=84=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spiders/spider_gushiwen.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/spiders/spider_gushiwen.py b/spiders/spider_gushiwen.py index 351ba93..96db5c6 100644 --- a/spiders/spider_gushiwen.py +++ b/spiders/spider_gushiwen.py @@ -17,6 +17,7 @@ import requests import re import time +from docx import Document HEADERS = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36' @@ -94,11 +95,23 @@ def spider(): time.sleep(1) - # 2.显示数据 + # 2.显示数据,并把爬取好的诗词保存到本地 + keys_to_print = ['title', 'content'] + doc = Document() for poem in poems: print(poem) print("==" * 40) - + for i in poem: + for key in keys_to_print: + value = i.get(key) + if value: + paragraph = doc.add_paragraph() + if key == 'title': + paragraph.add_run(f'《{value}》') + elif key == 'content': + paragraph.add_run(f'{value}') + + doc.save('D:/output.docx') # 指定保存的位置 print('恭喜!爬取数据完成!')