16 files modified
2 files added
| | |
| | | Base = declarative_base() |
| | | |
| | | |
| | | class BaseSpider(scrapy.Spider): |
| | | def __init__(self, *args, **kwargs): |
| | | super(BaseSpider, self).__init__(*args, **kwargs) |
| | | |
| | | @classmethod |
| | | def from_crawler(cls, crawler, *args, **kwargs): |
| | | spider = super(BaseSpider, cls).from_crawler(crawler, *args, **kwargs) |
| | | spider.task_id = kwargs.get('task_id', None) |
| | | spider.job_unique_no = kwargs.get('job_unique_no', None) |
| | | spider.tenant_id = kwargs.get('tenant_id', None) |
| | | spider.task_name = kwargs.get('task_name', None) |
| | | spider.sec_id = kwargs.get('sec_id', None) |
| | | spider.uuid = kwargs.get('uuid', None) |
| | | spider.demand_id = kwargs.get('demand_id', None) |
| | | |
| | | # 将参数保存到 settings |
| | | crawler.settings.set('TASK_ID', spider.task_id) |
| | | crawler.settings.set('JOB_UNIQUE_NO', spider.job_unique_no) |
| | | crawler.settings.set('TENANT_ID', spider.tenant_id) |
| | | crawler.settings.set('TASK_NAME', spider.task_name) |
| | | crawler.settings.set('SEC_ID', spider.sec_id) |
| | | crawler.settings.set('UUID', spider.uuid) |
| | | crawler.settings.set('DEMAND_ID', spider.demand_id) |
| | | |
| | | return spider |
| | | |
| | | |
| | | class Aweme(Base): |
New file |
| | |
| | | import scrapy |
| | | |
| | | |
| | | class BaseSpider(scrapy.Spider): |
| | | def __init__(self, *args, **kwargs): |
| | | super(BaseSpider, self).__init__(*args, **kwargs) |
| | | |
| | | @classmethod |
| | | def from_crawler(cls, crawler, *args, **kwargs): |
| | | spider = super(BaseSpider, cls).from_crawler(crawler, *args, **kwargs) |
| | | spider.task_id = kwargs.get('task_id', None) |
| | | spider.job_unique_no = kwargs.get('job_unique_no', None) |
| | | spider.tenant_id = kwargs.get('tenant_id', None) |
| | | spider.task_name = kwargs.get('task_name', None) |
| | | spider.sec_id = kwargs.get('sec_id', None) |
| | | spider.uuid = kwargs.get('uuid', None) |
| | | spider.demand_id = kwargs.get('demand_id', None) |
| | | |
| | | # 将参数保存到 settings |
| | | crawler.settings.set('TASK_ID', spider.task_id) |
| | | crawler.settings.set('JOB_UNIQUE_NO', spider.job_unique_no) |
| | | crawler.settings.set('TENANT_ID', spider.tenant_id) |
| | | crawler.settings.set('TASK_NAME', spider.task_name) |
| | | crawler.settings.set('SEC_ID', spider.sec_id) |
| | | crawler.settings.set('UUID', spider.uuid) |
| | | crawler.settings.set('DEMAND_ID', spider.demand_id) |
| | | |
| | | return spider |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import Aweme, Demand, Task, BaseSpider |
| | | from aijuke_spider.items import Aweme, Demand, Task |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | from aijuke_spider.spiders.douyin.utils import get_web_id, gen_abogus |
| | | |
| | | |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import Aweme, Demand, Task, BaseSpider |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | from aijuke_spider.items import Aweme, Demand, Task |
| | | from aijuke_spider.spiders.douyin.utils import get_web_id, gen_abogus |
| | | |
| | | |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import Demand, Task, BaseSpider |
| | | from aijuke_spider.items import Demand, Task |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | from aijuke_spider.spiders.douyin.utils import gen_abogus |
| | | |
| | | |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import CommentGot, BaseSpider |
| | | from aijuke_spider.items import CommentGot |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | from aijuke_spider.spiders.douyin.utils import gen_abogus |
| | | |
| | | |
| | |
| | | import scrapy |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import V, BaseSpider |
| | | from aijuke_spider.items import V |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | from aijuke_spider.spiders.douyin.utils import gen_abogus |
| | | |
| | | |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import V, BaseSpider |
| | | from aijuke_spider.items import V |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | from aijuke_spider.spiders.douyin.utils import gen_abogus |
| | | |
| | | |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import Aweme, Demand, Task, BaseSpider |
| | | from aijuke_spider.items import Aweme, Demand, Task |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | |
| | | class KuaiShouCommentSpider(BaseSpider): |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import Aweme, Demand, BaseSpider |
| | | from aijuke_spider.items import Aweme, Demand |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | |
| | | class KuaiShouHotPostCommentSpider(BaseSpider): |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import Demand, Task, BaseSpider |
| | | from aijuke_spider.items import Demand, Task |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | |
| | | class KuaiShouSearchSpider(BaseSpider): |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import CommentGot, BaseSpider |
| | | from aijuke_spider.items import CommentGot |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | |
| | | class KuaiShouUserProfileSpider(BaseSpider): |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import V, BaseSpider |
| | | from aijuke_spider.items import V |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | |
| | | class KuaiShouVUserSpider(BaseSpider): |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import V, BaseSpider |
| | | from aijuke_spider.items import V |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | |
| | | class KuaiShouVUserProfileSpider(BaseSpider): |
| | |
| | | from sqlalchemy.dialects import mysql |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import Aweme, Demand, Task, BaseSpider |
| | | |
| | | from aijuke_spider.items import Aweme, Demand, Task |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | class XHSCommentSpider(BaseSpider): |
| | | platform = "XHS" |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import Demand, Task, BaseSpider |
| | | from aijuke_spider.items import Demand, Task |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | from aijuke_spider.spiders.xhs.utils import get_search_id |
| | | |
| | | |
| | |
| | | from scrapy.http import Response |
| | | |
| | | from aijuke_spider.config.db_config import matrix_session |
| | | from aijuke_spider.items import CommentGot, BaseSpider |
| | | from aijuke_spider.items import CommentGot |
| | | from aijuke_spider.spiders.base.base_spider import BaseSpider |
| | | |
| | | |
| | | class XHSUserProfileSpider(BaseSpider): |