browser.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. import os
  2. from concurrent.futures import ThreadPoolExecutor
  3. from playwright.sync_api import BrowserContext, sync_playwright
  4. class Browser(object):
  5. def __init__(self,
  6. channel: str = 'msedge',
  7. need_login: bool = True,
  8. headless: bool = True,
  9. ua: str = 'pc',
  10. image: bool = False):
  11. """
  12. 可用对象包括:
  13. self.context
  14. self.browser
  15. self.playwright
  16. [注意]
  17. playwright非线程安全
  18. 不能在同一线程内多次创建playwright实例,不能在不同线程调用同一个全局playwright对象
  19. 若需要在线程内调用,则需要在每个线程内创建playwright实例,可参考do_login写法
  20. """
  21. self.start(channel, need_login, headless, ua, image)
  22. def anti_js(self):
  23. """
  24. 注入js反检测,没用
  25. """
  26. # js ="./js/anti.js"
  27. js = "./js/stealth.min.js"
  28. self.context.add_init_script(path=js)
  29. def do_login(self):
  30. """
  31. 登录
  32. """
  33. from login import Login
  34. storage_state = "./auth.json" if os.path.exists("./auth.json") else None
  35. self.context = self.browser.new_context(
  36. **self._ua,
  37. storage_state=storage_state,
  38. permissions=['notifications'],
  39. ignore_https_errors=True,
  40. )
  41. _login = Login(self.context)
  42. if not _login.check_login():
  43. with ThreadPoolExecutor(max_workers=1) as executor:
  44. future = executor.submit(_login.new_login)
  45. cookies = future.result()
  46. self.context.clear_cookies()
  47. self.context.add_cookies(cookies)
  48. def start(self, channel, need_login, headless, ua, image) -> BrowserContext:
  49. """
  50. 启动浏览器
  51. """
  52. _args = [
  53. '--disable-blink-features=AutomationControlled',
  54. ]
  55. if not image: # 不显示图片
  56. _args.append("--blink-settings=imagesEnabled=false")
  57. self.playwright = sync_playwright().start()
  58. self.browser = self.playwright.chromium.launch(
  59. channel=channel,
  60. headless=headless,
  61. ignore_default_args=['--enable-automation'],
  62. args=_args,
  63. )
  64. if ua == 'pc':
  65. self._ua: dict = self.playwright.devices['Desktop Edge']
  66. self._ua['user_agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50'
  67. else:
  68. self._ua = self.playwright.devices['iPhone 12']
  69. if need_login: # 重用登录状态
  70. self.do_login()
  71. else:
  72. self.context = self.browser.new_context(
  73. **self._ua,
  74. permissions=['notifications'],
  75. ignore_https_errors=True,
  76. )
  77. # self.anti_js()
  78. def stop(self):
  79. """
  80. 关闭浏览器
  81. """
  82. self.context.close()
  83. self.browser.close()
  84. self.playwright.stop()
  85. if __name__ == "__main__":
  86. edge = Browser()
  87. # edge = Browser(headless=False)
  88. p = edge.context.new_page()
  89. # p.goto('https://antispider1.scrape.center/')
  90. # p.goto('https://antoinevastel.com/bots/')
  91. # p.keyboard.press('End')
  92. p.goto('https://antoinevastel.com/bots/datadome') # 过不去
  93. # p.goto('https://www.douyin.com/search/xinhuashe?&type=user')
  94. # p.screenshot(path="end.png")
  95. edge.stop()