aboutsummaryrefslogtreecommitdiff
path: root/components/extractor/check_url.py
blob: b574b22c6073c925020862b9f75b1bfcd59c6a8c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from re import search
from urllib.parse import urlparse, parse_qs, ParseResult

def is_youtube(url: str) -> bool:
    """
    Affirm the YouTube domain and that there is something after the domain.
    """
    return bool(search(r'^(?:http|//).*(?:youtube\.com|youtu\.be)/.+', url))

def is_video(url: str) -> bool:
    if not is_youtube(url):
        return False
    parsed_url = urlparse(url)
    if parsed_url.path in ('/watch', '/shorts/', '/embed/'):
        return True
    return parsed_url.netloc == 'youtu.be'

def is_playlist(url: str) -> bool:
    if not is_youtube(url):
        return False
    parsed_url = urlparse(url)
    query_params = parse_qs(parsed_url.query)
    return 'list' in query_params

def is_channel(url: str) -> bool:
    if not is_youtube(url):
        return False
    parsed_url = urlparse(url)
    return parsed_url.path.startswith(('/c/', '/user/', '/channel/', '/@'))