使用scrapy采集社交媒体的公开信息
yellowDog
2024-09-20 26dd760e060c9f89c37dc8ecea36007d2d37ad4f
feat:新增小红书根据时间停止采集条件
1 files modified
8 ■■■■■ changed files
aijuke_spider/spiders/xhs/xhs_comment_spider.py 8 ●●●●● patch | view | raw | blame | history
aijuke_spider/spiders/xhs/xhs_comment_spider.py
@@ -1,3 +1,4 @@
import datetime
import json
import traceback
from typing import List, Tuple
@@ -69,6 +70,13 @@
            for comment in comments.get('data', {}).get('comments'):
                self.logger.debug(
                    f'{aweme.video_id},{aweme.tenant_id},{aweme.task_id},{aweme.task_name},{aweme.platform},{comment.get("content")}')
                if publish_type == 0:
                    self.change_status(aweme=aweme)
                    return
                if (datetime.datetime.now() - datetime.datetime.fromtimestamp(
                        comment.get('create_time')/1000)).days >= int(publish_type):
                    self.change_status(aweme=aweme)
                    return
                yield {
                    'aweme': aweme,
                    'comment': comment,