From 69bcc9822c82ee6d726c98deafbdd0a20527a55a Mon Sep 17 00:00:00 2001 From: A Farzat Date: Thu, 21 Aug 2025 09:41:56 +0300 Subject: Move components to root directory Some components might be shared with other applications such as the data analyser later on. --- components/database.py | 16 ++++++ components/subscriptions/main.py | 64 ++++++++++++++++++++++ components/subscriptions/typing.py | 13 +++++ components/users/typing.py | 13 +++++ components/videos.py | 27 +++++++++ data-collection/components/database.py | 16 ------ data-collection/components/subscriptions/main.py | 64 ---------------------- data-collection/components/subscriptions/typing.py | 13 ----- data-collection/components/users/typing.py | 13 ----- data-collection/components/videos.py | 27 --------- 10 files changed, 133 insertions(+), 133 deletions(-) create mode 100644 components/database.py create mode 100644 components/subscriptions/main.py create mode 100644 components/subscriptions/typing.py create mode 100644 components/users/typing.py create mode 100644 components/videos.py delete mode 100644 data-collection/components/database.py delete mode 100644 data-collection/components/subscriptions/main.py delete mode 100644 data-collection/components/subscriptions/typing.py delete mode 100644 data-collection/components/users/typing.py delete mode 100644 data-collection/components/videos.py diff --git a/components/database.py b/components/database.py new file mode 100644 index 0000000..72f63e6 --- /dev/null +++ b/components/database.py @@ -0,0 +1,16 @@ +import atexit +from typing import Any, Dict +from pymongo import MongoClient +from pymongo.database import Database +from pymongo.collection import Collection +from components.subscriptions.typing import SubsDict +from components.users.typing import UserDict + +client: MongoClient[Any] = MongoClient("mongodb://localhost", tz_aware=True) +database: Database[Any] = client.get_database("youtube") +subscriptions: Collection[SubsDict] = database.get_collection("subscriptions") +users: Collection[UserDict] = database.get_collection("users") + +@atexit.register +def _cleanup() -> None: + client.close() diff --git a/components/subscriptions/main.py b/components/subscriptions/main.py new file mode 100644 index 0000000..26b0c64 --- /dev/null +++ b/components/subscriptions/main.py @@ -0,0 +1,64 @@ +from dataclasses import dataclass, field, asdict +from datetime import datetime, UTC +from sys import stderr +from typing import TypedDict, List, cast +from bson.objectid import ObjectId +from feedparser import parse # type: ignore +from pymongo.collection import Collection +from pymongo.results import InsertOneResult, UpdateResult +import schedule +from components.database import subscriptions +from components.subscriptions.typing import SubsDict +from components.videos import VideoTuple + +@dataclass +class Subscription: + _id: str + link: str + time_between_fetches: int + last_fetch: datetime = datetime.min.replace(tzinfo=UTC) + last_video_update: datetime = datetime.min.replace(tzinfo=UTC) + videos: List[VideoTuple] = field(default_factory=list) + subscribers: List[ObjectId] = field(default_factory=list) + + def __post_init__(self) -> None: + self._job: schedule.Job = schedule.every(self.time_between_fetches).minutes.do(self.fetch) + self._collection: Collection[SubsDict] = subscriptions + self._in_db: bool = False + + def fetch(self) -> None: + try: + rss = parse(self.link) + except Exception as e: + print("Ran into an exception while fetching", self._id + ":", e, file=stderr) + return + for vid in map(VideoTuple.from_rss_entry, rss.entries): + if vid.published > self.last_video_update: + self.videos.append(vid) + elif vid.updated > self.last_video_update: + for i, old_vid in enumerate(self.videos): + if vid.id == old_vid.id: + self.videos[i] = vid + break + last_video_update = max((vid.updated for vid in self.videos)) + if last_video_update > self.last_video_update: + print("Updating", self._id) + self.last_video_update = last_video_update + self.update_videos() + self.last_fetch = datetime.now(tz=UTC) + + def asdict(self) -> SubsDict: + return cast(SubsDict, asdict(self)) + + def insert(self) -> InsertOneResult: + return self._collection.insert_one(self.asdict()) + + def update_videos(self) -> UpdateResult: + return self._collection.update_one( + {"_id": self._id}, + {"$set": { + "videos": self.videos, + "last_video_update": self.last_video_update, + "last_fetch": self.last_fetch, + }}, + ) diff --git a/components/subscriptions/typing.py b/components/subscriptions/typing.py new file mode 100644 index 0000000..8f2a298 --- /dev/null +++ b/components/subscriptions/typing.py @@ -0,0 +1,13 @@ +from datetime import datetime +from typing import TypedDict, List +from bson.objectid import ObjectId +from components.videos import VideoTuple + +class SubsDict(TypedDict): + _id: str + link: str + time_between_fetches: int # In minutes. + last_fetch: datetime + last_video_update: datetime + videos: List[VideoTuple] + subscribers: List[ObjectId] diff --git a/components/users/typing.py b/components/users/typing.py new file mode 100644 index 0000000..f03eecb --- /dev/null +++ b/components/users/typing.py @@ -0,0 +1,13 @@ +from datetime import datetime +from typing import TypedDict, NamedTuple, List, Tuple +from bson.objectid import ObjectId + +class SubscriptionItem(NamedTuple): + id: str + time_between_fetches: int + last_viewed: datetime + +class UserDict(TypedDict): + id: ObjectId + name: str + subscriptions: List[SubscriptionItem] diff --git a/components/videos.py b/components/videos.py new file mode 100644 index 0000000..5b2d644 --- /dev/null +++ b/components/videos.py @@ -0,0 +1,27 @@ +from typing import NamedTuple, Any, Self +from datetime import datetime + +class VideoTuple(NamedTuple): + id: str + link: str + title: str + author: str + author_channel: str + published: datetime + updated: datetime + thumbnail: str + summary: str + + @classmethod + def from_rss_entry(cls, entry: Any) -> Self: + return cls( + id = entry.id, + link = entry.link, + title = entry.title, + author = entry.author_detail.name, + author_channel = entry.author_detail.href, + published = datetime.fromisoformat(entry.published), + updated = datetime.fromisoformat(entry.updated), + thumbnail = entry.media_thumbnail[0]["url"], + summary = entry.summary, + ) diff --git a/data-collection/components/database.py b/data-collection/components/database.py deleted file mode 100644 index 72f63e6..0000000 --- a/data-collection/components/database.py +++ /dev/null @@ -1,16 +0,0 @@ -import atexit -from typing import Any, Dict -from pymongo import MongoClient -from pymongo.database import Database -from pymongo.collection import Collection -from components.subscriptions.typing import SubsDict -from components.users.typing import UserDict - -client: MongoClient[Any] = MongoClient("mongodb://localhost", tz_aware=True) -database: Database[Any] = client.get_database("youtube") -subscriptions: Collection[SubsDict] = database.get_collection("subscriptions") -users: Collection[UserDict] = database.get_collection("users") - -@atexit.register -def _cleanup() -> None: - client.close() diff --git a/data-collection/components/subscriptions/main.py b/data-collection/components/subscriptions/main.py deleted file mode 100644 index 26b0c64..0000000 --- a/data-collection/components/subscriptions/main.py +++ /dev/null @@ -1,64 +0,0 @@ -from dataclasses import dataclass, field, asdict -from datetime import datetime, UTC -from sys import stderr -from typing import TypedDict, List, cast -from bson.objectid import ObjectId -from feedparser import parse # type: ignore -from pymongo.collection import Collection -from pymongo.results import InsertOneResult, UpdateResult -import schedule -from components.database import subscriptions -from components.subscriptions.typing import SubsDict -from components.videos import VideoTuple - -@dataclass -class Subscription: - _id: str - link: str - time_between_fetches: int - last_fetch: datetime = datetime.min.replace(tzinfo=UTC) - last_video_update: datetime = datetime.min.replace(tzinfo=UTC) - videos: List[VideoTuple] = field(default_factory=list) - subscribers: List[ObjectId] = field(default_factory=list) - - def __post_init__(self) -> None: - self._job: schedule.Job = schedule.every(self.time_between_fetches).minutes.do(self.fetch) - self._collection: Collection[SubsDict] = subscriptions - self._in_db: bool = False - - def fetch(self) -> None: - try: - rss = parse(self.link) - except Exception as e: - print("Ran into an exception while fetching", self._id + ":", e, file=stderr) - return - for vid in map(VideoTuple.from_rss_entry, rss.entries): - if vid.published > self.last_video_update: - self.videos.append(vid) - elif vid.updated > self.last_video_update: - for i, old_vid in enumerate(self.videos): - if vid.id == old_vid.id: - self.videos[i] = vid - break - last_video_update = max((vid.updated for vid in self.videos)) - if last_video_update > self.last_video_update: - print("Updating", self._id) - self.last_video_update = last_video_update - self.update_videos() - self.last_fetch = datetime.now(tz=UTC) - - def asdict(self) -> SubsDict: - return cast(SubsDict, asdict(self)) - - def insert(self) -> InsertOneResult: - return self._collection.insert_one(self.asdict()) - - def update_videos(self) -> UpdateResult: - return self._collection.update_one( - {"_id": self._id}, - {"$set": { - "videos": self.videos, - "last_video_update": self.last_video_update, - "last_fetch": self.last_fetch, - }}, - ) diff --git a/data-collection/components/subscriptions/typing.py b/data-collection/components/subscriptions/typing.py deleted file mode 100644 index 8f2a298..0000000 --- a/data-collection/components/subscriptions/typing.py +++ /dev/null @@ -1,13 +0,0 @@ -from datetime import datetime -from typing import TypedDict, List -from bson.objectid import ObjectId -from components.videos import VideoTuple - -class SubsDict(TypedDict): - _id: str - link: str - time_between_fetches: int # In minutes. - last_fetch: datetime - last_video_update: datetime - videos: List[VideoTuple] - subscribers: List[ObjectId] diff --git a/data-collection/components/users/typing.py b/data-collection/components/users/typing.py deleted file mode 100644 index f03eecb..0000000 --- a/data-collection/components/users/typing.py +++ /dev/null @@ -1,13 +0,0 @@ -from datetime import datetime -from typing import TypedDict, NamedTuple, List, Tuple -from bson.objectid import ObjectId - -class SubscriptionItem(NamedTuple): - id: str - time_between_fetches: int - last_viewed: datetime - -class UserDict(TypedDict): - id: ObjectId - name: str - subscriptions: List[SubscriptionItem] diff --git a/data-collection/components/videos.py b/data-collection/components/videos.py deleted file mode 100644 index 5b2d644..0000000 --- a/data-collection/components/videos.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import NamedTuple, Any, Self -from datetime import datetime - -class VideoTuple(NamedTuple): - id: str - link: str - title: str - author: str - author_channel: str - published: datetime - updated: datetime - thumbnail: str - summary: str - - @classmethod - def from_rss_entry(cls, entry: Any) -> Self: - return cls( - id = entry.id, - link = entry.link, - title = entry.title, - author = entry.author_detail.name, - author_channel = entry.author_detail.href, - published = datetime.fromisoformat(entry.published), - updated = datetime.fromisoformat(entry.updated), - thumbnail = entry.media_thumbnail[0]["url"], - summary = entry.summary, - ) -- cgit v1.2.3-70-g09d2