New file |
| | |
| | | import asyncio |
| | | import re |
| | | from typing import Optional |
| | | |
| | | from playwright.async_api import async_playwright, BrowserContext |
| | | |
| | | max_count = 8 |
| | | |
| | | |
| | | async def get_video_id(short_url: str, context: BrowserContext) -> Optional[str]: |
| | | page = await context.new_page() |
| | | await page.goto(short_url) |
| | | video_id = None |
| | | while True: |
| | | match = re.search(r"https://www\.douyin\.com/video/(\d+)", page.url) |
| | | if match: |
| | | video_id = match.group(1) |
| | | break |
| | | await page.close() |
| | | return video_id |
| | | |
| | | |
| | | async def worker(context, task_queue, results): |
| | | while True: |
| | | short_url = await task_queue.get() |
| | | if short_url is None: |
| | | break |
| | | video_url = await get_video_id(short_url, context) |
| | | results.append(video_url) |
| | | task_queue.task_done() |
| | | |
| | | |
| | | async def transform(short_urls: list): |
| | | async with async_playwright() as playwright: |
| | | browser = await playwright.chromium.launch(headless=False) |
| | | context = await browser.new_context() |
| | | |
| | | task_queue = asyncio.Queue() |
| | | for url in short_urls: |
| | | task_queue.put_nowait(url) |
| | | |
| | | results = [] |
| | | workers = [asyncio.create_task(worker(context, task_queue, results)) for i in range(max_count)] |
| | | |
| | | await task_queue.join() |
| | | |
| | | for _ in range(max_count): |
| | | task_queue.put_nowait(None) |
| | | |
| | | await asyncio.gather(*workers) |
| | | |
| | | await browser.close() |
| | | |
| | | for url in results: |
| | | print(url) |
| | | return results |
| | | |
| | | |
| | | if __name__ == '__main__': |
| | | url_list = [ |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6E51g7r/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6E51g7r/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6E5kPtv/ HIi:/ 11/27 q@R.KJ ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPfUEb/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6E51g7r/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6E51g7r/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6E5kPtv/ HIi:/ 11/27 q@R.KJ ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPfUEb/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6E51g7r/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6E51g7r/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6E5kPtv/ HIi:/ 11/27 q@R.KJ ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPfUEb/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6Eah69H/ c@N.Wm 06/13 BgB:/ ", |
| | | "https://v.douyin.com/i6E51g7r/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6E51g7r/ LWM:/ A@T.yT 07/01 ", |
| | | "https://v.douyin.com/i6E5kPtv/ HIi:/ 11/27 q@R.KJ ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPYWnt/ 09/05 CHv:/ T@Y.zT ", |
| | | "https://v.douyin.com/i6EPfUEb/ LWM:/ A@T.yT 07/01 ", |
| | | |
| | | ] |
| | | asyncio.run(transform(url_list)) |