| | |
| | | # 基础配置 |
| | | PLATFORM = "xhs" |
| | | PLATFORM = "dy" |
| | | KEYWORDS = "python,golang" |
| | | LOGIN_TYPE = "qrcode" # qrcode or phone or cookie |
| | | COOKIES = "" |
| | |
| | | ENABLE_GET_IMAGES = False |
| | | |
| | | # 是否开启爬评论模式, 默认不开启爬评论 |
| | | ENABLE_GET_COMMENTS = False |
| | | ENABLE_GET_COMMENTS = True |
| | | |
| | | # 是否开启爬二级评论模式, 默认不开启爬二级评论, 目前仅支持 xhs, bilibili |
| | | # 老版本项目使用了 db, 则需参考 schema/tables.sql line 287 增加表字段 |
| | |
| | | |
| | | # 指定抖音需要爬取的ID列表 |
| | | DY_SPECIFIED_ID_LIST = [ |
| | | "7280854932641664319", |
| | | "7202432992642387233" |
| | | "7383967087674281228", |
| | | '7334007011404729612', |
| | | '7378373529244306725', |
| | | '7280489551796145466', |
| | | '7253441575030738234', |
| | | '7389577992764280114' |
| | | # ........................ |
| | | ] |
| | | |
| | |
| | | |
| | | # 指定Dy创作者ID列表(sec_id) |
| | | DY_CREATOR_ID_LIST = [ |
| | | "MS4wLjABAAAATJPY7LAlaa5X-c8uNdWkvz0jUGgpw4eeXIwu_8BhvqE", |
| | | "MS4wLjABAAAA_AjUSCZATiI47vnge919AQ7GTdrxTZTtQV0FGbTaroc", |
| | | # ........................ |
| | | ] |
| | | |
| | |
| | | raise ValueError("Invalid Media Platform Currently only supported xhs or dy or ks or bili ...") |
| | | return crawler_class() |
| | | |
| | | |
| | | async def main(): |
| | | # parse cmd |
| | | await cmd_arg.parse_cmd() |
| | |
| | | |
| | | crawler = CrawlerFactory.create_crawler(platform=config.PLATFORM) |
| | | await crawler.start() |
| | | |
| | | |
| | | if config.SAVE_DATA_OPTION == "db": |
| | | await db.close() |
| | | |
| | |
| | | python-dotenv==1.0.1 |
| | | jieba==0.42.1 |
| | | wordcloud==1.9.3 |
| | | matplotlib==3.9.0 |
| | | matplotlib |