summaryrefslogtreecommitdiff
path: root/components/extractor/obtain_vid_info.py
diff options
context:
space:
mode:
authorA Farzat <a@farzat.xyz>2025-10-09 18:10:00 +0300
committerA Farzat <a@farzat.xyz>2025-10-09 19:51:11 +0300
commit2a2af44eacf0e962853dba0b6ecae19fe18f9ea3 (patch)
tree093f8bf465af54711dce6eb5f8eb97f75cb3718f /components/extractor/obtain_vid_info.py
parent9fb20553d9f9e21c012f730a728c33d368e09bf2 (diff)
downloadcsca5028-2a2af44eacf0e962853dba0b6ecae19fe18f9ea3.tar.gz
csca5028-2a2af44eacf0e962853dba0b6ecae19fe18f9ea3.zip
Add the ability to use YouTube API
This is to circumvent YouTube blocking.
Diffstat (limited to 'components/extractor/obtain_vid_info.py')
-rw-r--r--components/extractor/obtain_vid_info.py18
1 files changed, 17 insertions, 1 deletions
diff --git a/components/extractor/obtain_vid_info.py b/components/extractor/obtain_vid_info.py
index bfbe861..45e2bdc 100644
--- a/components/extractor/obtain_vid_info.py
+++ b/components/extractor/obtain_vid_info.py
@@ -1,9 +1,25 @@
+from sys import stderr
+from traceback import print_exc
from urllib.request import urlopen
from bs4 import BeautifulSoup
from isodate import parse_duration # type: ignore
+from requests import get
-def obtain_vid_duration(url: str, html: str = '') -> int:
+def obtain_vid_duration(url: str, vid_id: str, html: str='', api_key: str='') -> int:
+ if api_key:
+ try:
+ data = get("https://www.googleapis.com/youtube/v3/videos", params={
+ 'part': "contentDetails",
+ 'id': vid_id[9:],
+ 'key': api_key,
+ }).json()
+ duration_str = data['items'][0]['contentDetails']['duration']
+ print(vid_id[9:], duration_str)
+ return int(parse_duration(duration_str).total_seconds())
+ except:
+ print("Web scraping will be used due to an error with the following id:", vid_id, file=stderr)
+ print_exc()
html = html or urlopen(url).read().decode('utf-8')
soup = BeautifulSoup(html, 'html.parser')