使用scrapy采集社交媒体的公开信息
yellowDog
2024-09-18 de69e4c3180bf515a8546b5f5389fa131cc8e0b2
refactor:修改小红书xpath
1 files modified
2 ■■■ changed files
aijuke_spider/pipelines.py 2 ●●● patch | view | raw | blame | history
aijuke_spider/pipelines.py
@@ -494,7 +494,7 @@
            tenant_id=comment_got.tenant_id,
            platform=comment_got.platform,
            grab_datetime=comment_got.grab_datetime,
            age = tree.xpath('//div[@class="gender"]/span[@class="gender-text"]/text()')[0].replace('岁','') if tree.xpath('//div[@class="gender"]/span[@class="gender-text"]/text()') else None,
            # age = tree.xpath('//div[@class="gender"]/span[@class="gender-text"]/text()')[0].replace('岁','') if tree.xpath('//div[@class="gender"]/span[@class="gender-text"]/text()') else None,
            follow=tree.xpath('//div[@class="user-interactions"]//span[@class="count"]/text()')[0],  # 关注数
            sex = "男" if re.search(r'<use xlink:href="#male"', user_info) else "女" if re.search(r'<use xlink:href="#female"', user_info) else "未知",
            fans=tree.xpath('//div[@class="user-interactions"]//span[@class="count"]/text()')[1],  # 粉丝数