使用scrapy采集社交媒体的公开信息
yellowDog
2024-09-09 df124700b3259e6e5bb2291e1aa763b093bd1635
refactor:记录执行日志时,添加执行状态
1 files modified
11 ■■■■■ changed files
aijuke_spider/pipelines.py 11 ●●●●● patch | view | raw | blame | history
aijuke_spider/pipelines.py
@@ -95,6 +95,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -186,6 +187,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -291,6 +293,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -385,6 +388,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -461,6 +465,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -538,6 +543,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -597,6 +603,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -656,6 +663,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -720,6 +728,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -778,6 +787,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()
@@ -839,6 +849,7 @@
            log.pages = spider.crawler.stats.get_value('response_received_count', 0)
            log.items = spider.crawler.stats.get_value('item_scraped_count', 0)
            log.finish_time=datetime.datetime.fromtimestamp(int(time.time())).strftime('%Y-%m-%d %H:%M:%S')
            log.status=3
            self.session.commit()
        except SQLAlchemyError as e:
            self.session.rollback()