aboutsummaryrefslogtreecommitdiff
path: root/components/extractor/obtain_vid_info.py
blob: bfbe86192b0399d2c19da701c730916f3f8ffc7d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
from urllib.request import urlopen

from bs4 import BeautifulSoup
from isodate import parse_duration # type: ignore

def obtain_vid_duration(url: str, html: str = '') -> int:
    html = html or urlopen(url).read().decode('utf-8')
    soup = BeautifulSoup(html, 'html.parser')

    duration_meta = soup.find('meta', itemprop='duration')
    assert duration_meta
    duration = parse_duration(duration_meta['content'])
    return int(duration.total_seconds())