From a79fe26ade3a46ddf5649b2318e33ea95e15e0fa Mon Sep 17 00:00:00 2001 From: A Farzat Date: Wed, 8 Oct 2025 09:45:15 +0300 Subject: Add a module to get vid info through scraping --- components/extractor/obtain_vid_info.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 components/extractor/obtain_vid_info.py (limited to 'components') diff --git a/components/extractor/obtain_vid_info.py b/components/extractor/obtain_vid_info.py new file mode 100644 index 0000000..bfbe861 --- /dev/null +++ b/components/extractor/obtain_vid_info.py @@ -0,0 +1,13 @@ +from urllib.request import urlopen + +from bs4 import BeautifulSoup +from isodate import parse_duration # type: ignore + +def obtain_vid_duration(url: str, html: str = '') -> int: + html = html or urlopen(url).read().decode('utf-8') + soup = BeautifulSoup(html, 'html.parser') + + duration_meta = soup.find('meta', itemprop='duration') + assert duration_meta + duration = parse_duration(duration_meta['content']) + return int(duration.total_seconds()) -- cgit v1.2.3-70-g09d2