diff options
| author | A Farzat <a@farzat.xyz> | 2025-10-08 09:34:42 +0300 |
|---|---|---|
| committer | A Farzat <a@farzat.xyz> | 2025-10-08 09:48:32 +0300 |
| commit | 7595389a5fce42ed7d663a246c9597fa3545d742 (patch) | |
| tree | 2abe95f522134b27982f345d5c1bb8c756b67e7e | |
| parent | 6ca8b8991a72ec5600aa1732acd170ba5ffeb853 (diff) | |
| download | csca5028-7595389a5fce42ed7d663a246c9597fa3545d742.tar.gz csca5028-7595389a5fce42ed7d663a246c9597fa3545d742.zip | |
Add a module to obtain Subscription info from URLs
| -rw-r--r-- | components/extractor/extract_sub_info.py | 37 | ||||
| -rw-r--r-- | requirements.txt | 2 |
2 files changed, 39 insertions, 0 deletions
diff --git a/components/extractor/extract_sub_info.py b/components/extractor/extract_sub_info.py new file mode 100644 index 0000000..c128fdf --- /dev/null +++ b/components/extractor/extract_sub_info.py @@ -0,0 +1,37 @@ +from typing import Any, Dict, cast +from urllib.parse import urlparse, parse_qs +from urllib.request import urlopen + +from bs4 import BeautifulSoup +from feedparser import parse # type: ignore + +from .check_url import is_youtube, is_playlist, is_channel + +def get_sub_info_from_yt_url(url: str) -> Dict[str, Any]: + if not is_youtube(url): + raise Exception(url+" is not a youtube URL.") + if is_playlist(url): + return get_feed_details(get_playlist_feed(url)) + return get_feed_details(get_channel_feed(url)) + +def get_playlist_feed(url: str) -> str: + parsed_url = urlparse(url) + # Extract playlist ID from query parameters + query_params = parse_qs(parsed_url.query) + playlist_id = query_params['list'][0] + return "https://www.youtube.com/feeds/videos.xml?playlist_id="+playlist_id + +def get_channel_feed(url: str, html: str = '') -> str: + html = html or urlopen(url).read().decode('utf-8') + soup = BeautifulSoup(html, 'html.parser') + link_obj = soup.find('link', {'title': "RSS"}) + assert link_obj + return cast(str, link_obj["href"]) + +def get_feed_details(url: str) -> Dict[str, Any]: + feed = parse(url).feed + return { + 'id': feed["id"], + 'link': feed["links"][0]["href"], + 'title': feed["title"], + } diff --git a/requirements.txt b/requirements.txt index 3c3ef86..56de09d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +beautifulsoup4==4.14.2 blinker==1.9.0 certifi==2025.8.3 charset-normalizer==3.4.3 @@ -23,6 +24,7 @@ pytz==2025.2 schedule==1.2.2 sentinels==1.1.1 sgmllib3k==1.0.0 +soupsieve==2.8 types-Flask-Cors==6.0.0.20250809 typing_extensions==4.14.1 Werkzeug==3.1.3 |
