Skip to content
This repository has been archived by the owner on May 16, 2023. It is now read-only.

支持从豆瓣用户动态RSS中获取想看/在看/看过 #4711

Merged
merged 1 commit into from
May 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions app/media/douban.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,29 @@ def get_douban_info(self, metainfo):
douban_info["actors"] = celebrities.get("actors")
return douban_info

def get_latest_douban_interests(self, dtype, userid, wait=False):
"""
获取最新动态中的想看/在看/看过数据
"""
if wait:
time = round(random.uniform(1, 5), 1)
log.info("【Douban】随机休眠:%s 秒" % time)
sleep(time)
if dtype == "do":
web_infos = self.doubanweb.do_in_interests(userid=userid)
elif dtype == "collect":
web_infos = self.doubanweb.collect_in_interests(userid=userid)
elif dtype == "wish":
web_infos = self.doubanweb.wish_in_interests(userid=userid)
else:
web_infos = self.doubanweb.interests(userid=userid)
if not web_infos:
return []
for web_info in web_infos:
web_info["id"] = web_info.get("url").split("/")[-2]
return web_infos


def get_douban_wish(self, dtype, userid, start, wait=False):
"""
获取豆瓣想看列表数据
Expand Down
80 changes: 80 additions & 0 deletions app/media/doubanapi/webapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import requests
from lxml import etree
import datetime

from app.utils import RequestUtils, ExceptionUtils
from app.utils.commons import singleton
Expand All @@ -13,6 +14,7 @@ class DoubanWeb(object):
_session = requests.Session()

_movie_base = "https://movie.douban.com"
_rss_base = "https://www.douban.com"
_search_base = "https://search.douban.com"
_page_limit = 50
_timout = 5
Expand All @@ -36,6 +38,8 @@ class DoubanWeb(object):
"top250": f"{_movie_base}/top250",
# 用户名称
"user": f"{_movie_base}/people/%s/",
# 用户动态 RSS
"rss": f"{_rss_base}/feed/people/%s/interests"
}

_webparsers = {
Expand Down Expand Up @@ -155,12 +159,64 @@ def __invoke_json(cls, url, *kwargs):
timeout=cls._timout).get_res(url=req_url % kwargs)
return req.json() if req else None

@classmethod
def __invoke_rss(cls, url, *kwargs):
req_url = cls._weburls.get(url)
if not req_url:
return None
return RequestUtils(timeout=cls._timout).get(url=req_url % kwargs)

@staticmethod
def __get_json(json):
if not json:
return None
return json.get("subjects")

@classmethod
def __get_rss_list(cls, xml):
if not xml:
return None

tree = etree.XML(xml.encode("utf-8"))
items = tree.xpath("//item")
if not items:
return None
result = []
for item in items:
title = item.xpath("./title/text()")[0][2:]
dtype = item.xpath("./title/text()")[0][:2]
link = item.xpath("./link/text()")[0]
pubDate = item.xpath(".//pubDate/text()")[0] #Tue, 09 May 2023 15:01:14 GMT
# convert to 2023-05-10
date = datetime.datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z")
new_date = date.strftime("%Y-%m-%d")

desc = item.xpath("./description/text()")[0]
desc_tree = etree.HTML(desc)
cover = desc_tree.xpath("//img/@src")[0]
def map_type():
if dtype == '想看':
return 'wish'
elif dtype == '看过':
return 'collect'
elif dtype == '在看':
return 'do'
else:
return 'collect'

dtype = map_type()

if 'movie' in link:
obj = {
"title": title,
"url": link,
"cover": cover,
"date": new_date,
"type": dtype,
}
result.append(obj)
return result

@classmethod
def __get_list(cls, url, html):
if not url or not html:
Expand Down Expand Up @@ -245,6 +301,30 @@ def do(self, cookie, userid, start=0):
"""
return self.__get_list("do", self.__invoke_web("do", cookie, userid, start))

def interests(self, userid):
"""
动态
"""
return self.__get_rss_list(self.__invoke_rss("rss", userid))

def wish_in_interests(self, userid):
"""
想看
"""
return list(filter(lambda x: x.get("type") == "wish", self.interests(userid)))

def do_in_interests(self, userid):
"""
在看
"""
return list(filter(lambda x: x.get("type") == "do", self.interests(userid)))

def collect_in_interests(self, userid):
"""
看过
"""
return list(filter(lambda x: x.get("type") == "collect", self.interests(userid)))

def search(self, cookie, keyword):
"""
搜索
Expand Down
Loading