summaryrefslogtreecommitdiff
path: root/components/extractor/check_url.py
blob: 13d725935f694d96a1fd7b9074815f6cfb39b363 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from re import search
from urllib.parse import urlparse, parse_qs, ParseResult

def is_youtube(url: str) -> bool:
    """
    Affirm the YouTube domain and that there is something after the domain.
    """
    return bool(search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', url))

def is_video(url: str) -> bool:
    if not is_youtube(url):
        return False
    parsed_url = urlparse(url)
    for path in ('/shorts/', '/embed/'):
        if parsed_url.path.startswith(path):
            return True
    if parsed_url.path == '/watch' and 'v' in parse_qs(parsed_url.query):
        return True
    return parsed_url.netloc == 'youtu.be'

def is_playlist(url: str) -> bool:
    if not is_youtube(url):
        return False
    parsed_url = urlparse(url)
    query_params = parse_qs(parsed_url.query)
    return 'list' in query_params

def is_channel(url: str) -> bool:
    if not is_youtube(url):
        return False
    parsed_url = urlparse(url)
    return parsed_url.path.startswith(('/c/', '/user/', '/channel/', '/@'))