api.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. # -*- encoding: utf-8 -*-
  2. '''
  3. @File : api.py
  4. @Time : 2023年06月19日 20:20:58 星期一
  5. @Author : erma0
  6. @Version : 0.1
  7. @Link : https://erma0.cn
  8. @Desc : 抖音爬虫封装API
  9. '''
  10. import os
  11. import shutil
  12. import stat
  13. import threading
  14. import time
  15. from copy import deepcopy
  16. from enum import Enum
  17. from typing import List, Union
  18. import ujson as json
  19. import uvicorn
  20. from fastapi import BackgroundTasks, FastAPI, HTTPException, Request
  21. from fastapi.middleware.cors import CORSMiddleware
  22. from fastapi.responses import FileResponse, UJSONResponse
  23. from pydantic import BaseModel
  24. from uvicorn.config import LOGGING_CONFIG
  25. from browser import Browser
  26. from spider import Douyin
  27. date_fmt = "%Y-%m-%d %H:%M:%S"
  28. LOGGING_CONFIG["formatters"]["access"]["fmt"] = '%(asctime)s %(levelprefix)s %(client_addr)s - "%(request_line)s" %(status_code)s'
  29. LOGGING_CONFIG["formatters"]["default"]["fmt"] = "%(asctime)s %(levelprefix)s %(message)s"
  30. LOGGING_CONFIG["formatters"]["default"]["datefmt"] = date_fmt
  31. LOGGING_CONFIG["formatters"]["access"]["datefmt"] = date_fmt
  32. title: str = '抖音爬虫API'
  33. version: str = "0.1.0"
  34. update = "20230619"
  35. author = "erma0"
  36. web = "https://douyin.erma0.cn/"
  37. github = "https://github.com/erma0/douyin"
  38. doc = "https://douyin.erma0.cn/docs"
  39. description: str = """
  40. ### ❤️开源不易,欢迎star⭐
  41. - 支持采集账号主页作品、喜欢作品、音乐原声作品、搜索作品、关注列表、粉丝列表、合集作品、单个作品
  42. ### 📢声明
  43. > 本仓库为学习`playwright`爬虫、命令行调用`Aria2`及`FastAPI/AMIS/Eel`实现`WEBUI`的案例,仅用于测试和学习研究,禁止用于商业用途或任何非法用途。
  44. > 任何用户直接或间接使用、传播本仓库内容时责任自负,本仓库的贡献者不对该等行为产生的任何后果负责。
  45. > 如果相关方认为该项目的代码可能涉嫌侵犯其权利,请及时通知删除相关代码。
  46. """
  47. contact = {"author": author, "url": github}
  48. tags_metadata = [
  49. {
  50. "name": "同步",
  51. "description": "耗时短的接口,直接返回结果",
  52. },
  53. {
  54. "name": "异步",
  55. "description": "耗时长的接口,无法直接返回结果,返回一个轮询接口",
  56. },
  57. ]
  58. edge = None
  59. running_ls = []
  60. download_path = './下载_API'
  61. app = FastAPI(
  62. title=title,
  63. description=description,
  64. version=version,
  65. openapi_tags=tags_metadata,
  66. contact=contact,
  67. )
  68. # 允许跨域
  69. app.add_middleware(
  70. CORSMiddleware,
  71. allow_origins=["*"],
  72. allow_methods=["*"],
  73. allow_headers=["*"],
  74. )
  75. class DouyinAPI(Douyin):
  76. def _append_awemes(self, aweme_list: List[dict]):
  77. super()._append_awemes(aweme_list)
  78. with open(f"{download_path}/{self.type}_{self.id}.json", 'w', encoding='utf-8') as f:
  79. json.dump({'code': 1, 'num': len(self.results)}, f, ensure_ascii=False)
  80. def _append_users(self, user_list: List[dict]):
  81. super()._append_users(user_list)
  82. with open(f"{download_path}/{self.type}_{self.id}.json", 'w', encoding='utf-8') as f:
  83. json.dump({'code': 1, 'num': len(self.results)}, f, ensure_ascii=False)
  84. def running(a: DouyinAPI, target: str):
  85. # a.aria2_conf = f"{download_path}/{a.type}_{a.id}.txt"
  86. a.run()
  87. with open(f"{download_path}/{a.type}_{a.id}.json", 'w', encoding='utf-8') as f:
  88. json.dump({
  89. 'code': 0,
  90. 'num': len(a.results),
  91. 'data': a.results,
  92. }, f, ensure_ascii=False)
  93. running_ls.remove(target)
  94. class API(BaseModel):
  95. Version: str = version
  96. Update: str = update
  97. Web: str = web
  98. GitHub: str = github
  99. Doc: str = doc
  100. Time: str = time.ctime()
  101. class DataVideo(BaseModel):
  102. id: str
  103. desc: str
  104. download_addr: Union[str, List[str]]
  105. time: int = None
  106. digg_count: int = None
  107. share_count: int = None
  108. collect_count: int = None
  109. comment_count: int = None
  110. diggCount: int = None
  111. shareCount: int = None
  112. collectCount: int = None
  113. commentCount: int = None
  114. liveWatchCount: int = None
  115. music_title: str = None
  116. music_url: str = None
  117. cover: str = None
  118. tags: List[dict] = None
  119. class DataUser(BaseModel):
  120. nickname: str
  121. sec_uid: str
  122. uid: str = None
  123. signature: str = None
  124. avatar: str = None
  125. short_id: str = None
  126. unique_id: str = None
  127. unique_id_modify_time: int = None
  128. aweme_count: int = None
  129. favoriting_count: int = None
  130. follower_count: int = None
  131. following_count: int = None
  132. constellation: int = None
  133. create_time: int = None
  134. enterprise_verify_reason: str = None
  135. is_gov_media_vip: bool = None
  136. total_favorited: int = None
  137. share_qrcode_uri: str = None
  138. class DataSync(BaseModel):
  139. code: int = 0 # 0 已完成;1 正在运行
  140. num: int = 0
  141. data: List[Union[DataVideo, DataUser]] = None
  142. class DataAsync(BaseModel):
  143. code: int = 0 # 0 成功投递;1 正在运行;
  144. url: str
  145. class TypeAsync(str, Enum):
  146. post = 'post'
  147. like = 'like'
  148. favorite = 'favorite'
  149. music = 'music'
  150. search = 'search'
  151. collection = 'collection'
  152. follow = 'follow'
  153. fans = 'fans'
  154. @app.get(
  155. "/",
  156. response_class=UJSONResponse,
  157. response_model=API,
  158. )
  159. def api(req: Request):
  160. return {'Web': req.base_url._url, 'Doc': f'{req.base_url._url}docs'}
  161. @app.get(
  162. "/api/video",
  163. response_class=UJSONResponse,
  164. response_model=DataVideo,
  165. tags=['同步'],
  166. response_model_exclude_unset=True,
  167. response_model_exclude_defaults=True,
  168. )
  169. def get_video(url: str):
  170. start_browser()
  171. a = DouyinAPI(context=edge.context, url=url, type='video', down_path=download_path)
  172. a.run()
  173. return deepcopy(a.results[0])
  174. @app.get(
  175. "/api/user",
  176. response_class=UJSONResponse,
  177. response_model=DataUser,
  178. tags=['同步'],
  179. response_model_exclude_unset=True,
  180. response_model_exclude_defaults=True,
  181. )
  182. def get_user(id: str):
  183. start_browser()
  184. a = DouyinAPI(context=edge.context, url=id, type='id', down_path=download_path)
  185. a.run()
  186. return deepcopy(a.results[0])
  187. @app.get("/api/info", response_class=UJSONResponse, tags=['同步'])
  188. def get_info(url: str):
  189. start_browser()
  190. a = DouyinAPI(context=edge.context, url=url, down_path=download_path)
  191. a.has_more = False
  192. a.run()
  193. return deepcopy(a.info)
  194. @app.get(
  195. "/api/{type_async}",
  196. response_class=UJSONResponse,
  197. response_model=DataAsync,
  198. tags=['异步'],
  199. status_code=201,
  200. )
  201. def start_async(type_async: TypeAsync, url: str, background_tasks: BackgroundTasks, req: Request):
  202. start_browser()
  203. target = f"{type_async.value}_{url.strip()}"
  204. a = DouyinAPI(context=edge.context, url=url, type=type_async.value, down_path=download_path)
  205. if target in running_ls:
  206. code = 1
  207. # return RedirectResponse(url=f"{req.base_url._url}/api/{type_async.value}/{a.id}", status_code=303)
  208. else:
  209. code = 0
  210. running_ls.append(target)
  211. background_tasks.add_task(running, a, target)
  212. return {'code': code, 'url': f"{req.base_url._url}api/{type_async.value}/{a.id}"}
  213. @app.get(
  214. "/api/{type_async}/{id}",
  215. response_class=UJSONResponse,
  216. response_model=DataSync,
  217. tags=['同步'],
  218. response_model_exclude_unset=True,
  219. response_model_exclude_defaults=True,
  220. )
  221. def async_result(type_async: TypeAsync, id: str, down: bool = False):
  222. suffix = 'txt' if down else 'json'
  223. file = f"{download_path}/{type_async.value}_{id}.{suffix}"
  224. if not os.path.exists(file):
  225. raise HTTPException(status_code=404, detail="目标不存在")
  226. return FileResponse(file)
  227. @app.get("/t", response_class=UJSONResponse)
  228. def tt():
  229. print('start')
  230. time.sleep(3)
  231. print('end')
  232. return {"url": "type"}
  233. @app.get("/init")
  234. def start_browser():
  235. global edge
  236. if not isinstance(edge, Browser):
  237. edge = Browser(headless=False)
  238. # 删除只读文件夹
  239. def remove_readonly(func, path, _):
  240. os.chmod(path, stat.S_IWRITE)
  241. func(path)
  242. # 每10分钟清理一次下载文件夹,删除1小时以前的记录
  243. def clean_download_path():
  244. root = download_path
  245. if not os.path.exists(root): os.makedirs(root)
  246. while True:
  247. now = time.time()
  248. for file in os.listdir(root):
  249. file_path = os.path.join(root, file)
  250. if now - os.path.getmtime(file_path) > 60 * 60 * 1:
  251. if os.path.isfile(file_path):
  252. os.remove(file_path)
  253. else:
  254. shutil.rmtree(file_path, onerror=remove_readonly)
  255. time.sleep(60 * 10)
  256. # 启动后执行
  257. @app.on_event("startup")
  258. def startup():
  259. threading.Thread(target=clean_download_path, daemon=True).start()
  260. if __name__ == '__main__':
  261. # uvicorn api:app --host '0.0.0.0' --port 567 --reload
  262. # uvicorn.run("api:app", host='0.0.0.0', port=567, reload=True, limit_concurrency=5, use_colors=True)
  263. uvicorn.run("api:app", host='0.0.0.0', port=567, limit_concurrency=5, use_colors=True)