aijuke_spider/__pycache__/__init__.cpython-311.pycBinary files differ
aijuke_spider/__pycache__/extensions.cpython-311.pycBinary files differ
aijuke_spider/__pycache__/items.cpython-311.pycBinary files differ
aijuke_spider/__pycache__/middlewares.cpython-311.pycBinary files differ
aijuke_spider/__pycache__/pipelines.cpython-311.pycBinary files differ
aijuke_spider/__pycache__/settings.cpython-311.pycBinary files differ
aijuke_spider/config/__pycache__/__init__.cpython-311.pycBinary files differ
aijuke_spider/config/__pycache__/db_config.cpython-311.pycBinary files differ
aijuke_spider/pipelines.py
@@ -564,7 +564,8 @@ video_info = item.get('data', {}) demand: Demand = item.get('demand', {}) task: Task = item.get('task', {}) if video_info.get('aweme_info', {}).get('statistics', {}).get('comment_count', 0) < 5: # 判断作品是否出评 if video_info.get('aweme_info', {}).get('statistics', {}).get('comment_count', 0) < 3: return item if not video_info: return @@ -746,10 +747,15 @@ if spider.name != 'douyin_v_user_spider': return item video = item.get("video") # 判断作品是否出评 if video.get('statistics', {}).get('comment_count', 0) < 1: return item task_id = item.get("task_id") task_name = item.get("task_name") job_unique_no = item.get("job_unique_no") v = item.get("v") grab_post = Aweme( task_id=task_id, task_name=task_name, aijuke_spider/spiders/__pycache__/__init__.cpython-311.pycBinary files differ
aijuke_spider/spiders/base/__pycache__/__init__.cpython-311.pycBinary files differ
aijuke_spider/spiders/base/__pycache__/base_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/__init__.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/a_bogus.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/douyin_comment_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/douyin_hot_post_comment_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/douyin_search_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/douyin_user_profile_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/douyin_v_user.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/douyin_v_user_profile_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/__pycache__/utils.cpython-311.pycBinary files differ
aijuke_spider/spiders/douyin/douyin_v_user_profile_spider.py
@@ -13,6 +13,7 @@ class DouyinVUserProfileSpider(BaseSpider): platform = "DY" tenant_id="1" # 默认租户id name = "douyin_v_user_profile_spider" task_type=3 allowed_domains = ["douyin.com"] @@ -60,24 +61,29 @@ v_user_list = self.get_v_user() for v_user in v_user_list: params: dict = self.params.copy() params.update({'sec_user_id': v_user.unique_no}) params.update({'sec_user_id': v_user.sec_id}) headers = self.headers.copy() a_bogus = gen_abogus(params=params, user_agent=headers['user-agent']) params.update({'a_bogus': a_bogus}) yield scrapy.Request(url=f'{self.base_url}/aweme/v1/web/user/profile/other/?{urllib.parse.urlencode(params)}', meta={'v_user': v_user, 'params': params}, headers=headers, url = f'{self.base_url}/aweme/v1/web/user/profile/other/?{urllib.parse.urlencode(params)}' self.logger.info(f'大V用户id:{v_user.id}, 平台用户主页:{url},平台用户标识:{v_user.unique_no}') yield scrapy.Request(url= url, meta={'v_user': v_user, 'params': params}, headers=headers, callback=self.parse) def parse(self, response: Response, *args, **kwargs): if response.status == 200 and len(response.text) > 0: user_info = response.json().get('user') yield { 'user_info': user_info, 'v_user': response.meta.get('v_user'), } self.logger.debug(f'抓取用户,{user_info.get("nickname", None)},{user_info.get("sec_uid")}') try: # self.logger.info(f'响应response.text: {response.text} \n') if response.status == 200 and len(response.text) > 0: user_info = response.json().get('user') yield { 'user_info': user_info, 'v_user': response.meta.get('v_user'), } self.logger.info(f'parse 抓取用户,{user_info.get("nickname", None)},{user_info.get("sec_uid")}') else: self.logger.info(f'parse response status: {response.status }') except Exception as e: self.logger.error(f"处理响应时出错: {e}") def get_v_user(self) -> List[V]: @@ -85,15 +91,15 @@ try: query = session.query(V).filter( V.platform == self.platform, V.url.isnot(None), V.nickname.is_(None) V.url.isnot(None) # V.nickname.is_(None) ).order_by(V.id.desc()) if self.tenant_id is not None: query = query.filter( V.tenant_id == self.tenant_id, ) result = query.all() self.logger.info(f'查询到{len(result)}条数据') self.logger.info(f'查询到aijuke_v_user表的{len(result)}条数据') return result except Exception as e: self.logger.error(e) aijuke_spider/spiders/kuaishou/__pycache__/__init__.cpython-311.pycBinary files differ
aijuke_spider/spiders/kuaishou/__pycache__/kuaishou_comment_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/kuaishou/__pycache__/kuaishou_hot_post_comment_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/kuaishou/__pycache__/kuaishou_search_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/kuaishou/__pycache__/kuaishou_user_profile_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/kuaishou/__pycache__/kuaishou_v_user.cpython-311.pycBinary files differ
aijuke_spider/spiders/kuaishou/__pycache__/kuaishou_v_user_profile_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/kuaishou/__pycache__/utils.cpython-311.pycBinary files differ
aijuke_spider/spiders/xhs/__pycache__/__init__.cpython-311.pycBinary files differ
aijuke_spider/spiders/xhs/__pycache__/utils.cpython-311.pycBinary files differ
aijuke_spider/spiders/xhs/__pycache__/xhs_comment_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/xhs/__pycache__/xhs_search_spider.cpython-311.pycBinary files differ
aijuke_spider/spiders/xhs/__pycache__/xhs_user_profile_spider.cpython-311.pycBinary files differ