From c641e6f1948f683bf7b1fff08bce125cb64563df Mon Sep 17 00:00:00 2001 From: A Farzat Date: Wed, 8 Oct 2025 10:44:07 +0300 Subject: Add unit tests for obtain_vid_info --- components/extractor/check_url.py | 5 ++- tests/data/video@WI4U1SVIO3I.html | 85 +++++++++++++++++++++++++++++++++++++++ tests/data/video@iD1Z7ccGyhk.html | 85 +++++++++++++++++++++++++++++++++++++++ tests/data/video@k7RM-ot2NWY.html | 85 +++++++++++++++++++++++++++++++++++++++ tests/obtain_vid_info.py | 17 ++++++++ tests/utils/vid_url_to_html.py | 18 +++++++++ 6 files changed, 294 insertions(+), 1 deletion(-) create mode 100644 tests/data/video@WI4U1SVIO3I.html create mode 100644 tests/data/video@iD1Z7ccGyhk.html create mode 100644 tests/data/video@k7RM-ot2NWY.html create mode 100644 tests/obtain_vid_info.py create mode 100644 tests/utils/vid_url_to_html.py diff --git a/components/extractor/check_url.py b/components/extractor/check_url.py index b574b22..13d7259 100644 --- a/components/extractor/check_url.py +++ b/components/extractor/check_url.py @@ -11,7 +11,10 @@ def is_video(url: str) -> bool: if not is_youtube(url): return False parsed_url = urlparse(url) - if parsed_url.path in ('/watch', '/shorts/', '/embed/'): + for path in ('/shorts/', '/embed/'): + if parsed_url.path.startswith(path): + return True + if parsed_url.path == '/watch' and 'v' in parse_qs(parsed_url.query): return True return parsed_url.netloc == 'youtu.be' diff --git a/tests/data/video@WI4U1SVIO3I.html b/tests/data/video@WI4U1SVIO3I.html new file mode 100644 index 0000000..94def22 --- /dev/null +++ b/tests/data/video@WI4U1SVIO3I.html @@ -0,0 +1,85 @@ +Discord forces new terms on users after data breach - YouTube

\ No newline at end of file diff --git a/tests/data/video@iD1Z7ccGyhk.html b/tests/data/video@iD1Z7ccGyhk.html new file mode 100644 index 0000000..096f385 --- /dev/null +++ b/tests/data/video@iD1Z7ccGyhk.html @@ -0,0 +1,85 @@ +Cancelling the gym - YouTube

\ No newline at end of file diff --git a/tests/data/video@k7RM-ot2NWY.html b/tests/data/video@k7RM-ot2NWY.html new file mode 100644 index 0000000..d643dfd --- /dev/null +++ b/tests/data/video@k7RM-ot2NWY.html @@ -0,0 +1,85 @@ +Linear combinations, span, and basis vectors | Chapter 2, Essence of linear algebra - YouTube

\ No newline at end of file diff --git a/tests/obtain_vid_info.py b/tests/obtain_vid_info.py new file mode 100644 index 0000000..701104d --- /dev/null +++ b/tests/obtain_vid_info.py @@ -0,0 +1,17 @@ +from unittest import TestCase + +from components.extractor.obtain_vid_info import obtain_vid_duration +from .utils.vid_url_to_html import get_vid_html_from_url + +class TestObtainVidInfo(TestCase): + def test_obtain_vid_duration_from_shorts(self) -> None: + url = "https://www.youtube.com/shorts/iD1Z7ccGyhk" + self.assertEqual(60, obtain_vid_duration(url, html=get_vid_html_from_url(url))) + + def test_obtain_vid_duration_from_videos(self) -> None: + url = "https://www.youtube.com/watch?v=WI4U1SVIO3I" + self.assertEqual(8*60+11, obtain_vid_duration(url, html=get_vid_html_from_url(url))) + + def test_obtain_vid_duration_from_videos_with_params(self) -> None: + url = "https://www.youtube.com/watch?v=k7RM-ot2NWY&list=PLZHQObOWTQDPD3MizzM2xVFitgF8hE_ab&index=2&pp=iAQB" + self.assertEqual(9*60+59, obtain_vid_duration(url, html=get_vid_html_from_url(url))) diff --git a/tests/utils/vid_url_to_html.py b/tests/utils/vid_url_to_html.py new file mode 100644 index 0000000..355e830 --- /dev/null +++ b/tests/utils/vid_url_to_html.py @@ -0,0 +1,18 @@ +from urllib.parse import urlparse, parse_qs + +from components.extractor.check_url import is_video + +def extract_vid_id(url: str) -> str: + if not is_video(url): + raise Exception(url + " is not a YouTube video URL") + parsed_url = urlparse(url) + if parsed_url.netloc == 'youtu.be': + return parsed_url.path[1:] + if parsed_url.path == '/watch': + query_params = parse_qs(parsed_url.query) + return query_params['v'][0] + return parsed_url.path.split('/')[-1] + +def get_vid_html_from_url(url: str) -> str: + with open(f'tests/data/video@{extract_vid_id(url)}.html', 'r') as file: + return file.read() -- cgit v1.3.1