summaryrefslogtreecommitdiff
path: root/data-collection/components
diff options
context:
space:
mode:
Diffstat (limited to 'data-collection/components')
-rw-r--r--data-collection/components/database.py16
-rw-r--r--data-collection/components/subscriptions/main.py64
-rw-r--r--data-collection/components/subscriptions/typing.py13
-rw-r--r--data-collection/components/users/typing.py13
-rw-r--r--data-collection/components/videos.py27
5 files changed, 0 insertions, 133 deletions
diff --git a/data-collection/components/database.py b/data-collection/components/database.py
deleted file mode 100644
index 72f63e6..0000000
--- a/data-collection/components/database.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import atexit
-from typing import Any, Dict
-from pymongo import MongoClient
-from pymongo.database import Database
-from pymongo.collection import Collection
-from components.subscriptions.typing import SubsDict
-from components.users.typing import UserDict
-
-client: MongoClient[Any] = MongoClient("mongodb://localhost", tz_aware=True)
-database: Database[Any] = client.get_database("youtube")
-subscriptions: Collection[SubsDict] = database.get_collection("subscriptions")
-users: Collection[UserDict] = database.get_collection("users")
-
-@atexit.register
-def _cleanup() -> None:
- client.close()
diff --git a/data-collection/components/subscriptions/main.py b/data-collection/components/subscriptions/main.py
deleted file mode 100644
index 26b0c64..0000000
--- a/data-collection/components/subscriptions/main.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from dataclasses import dataclass, field, asdict
-from datetime import datetime, UTC
-from sys import stderr
-from typing import TypedDict, List, cast
-from bson.objectid import ObjectId
-from feedparser import parse # type: ignore
-from pymongo.collection import Collection
-from pymongo.results import InsertOneResult, UpdateResult
-import schedule
-from components.database import subscriptions
-from components.subscriptions.typing import SubsDict
-from components.videos import VideoTuple
-
-@dataclass
-class Subscription:
- _id: str
- link: str
- time_between_fetches: int
- last_fetch: datetime = datetime.min.replace(tzinfo=UTC)
- last_video_update: datetime = datetime.min.replace(tzinfo=UTC)
- videos: List[VideoTuple] = field(default_factory=list)
- subscribers: List[ObjectId] = field(default_factory=list)
-
- def __post_init__(self) -> None:
- self._job: schedule.Job = schedule.every(self.time_between_fetches).minutes.do(self.fetch)
- self._collection: Collection[SubsDict] = subscriptions
- self._in_db: bool = False
-
- def fetch(self) -> None:
- try:
- rss = parse(self.link)
- except Exception as e:
- print("Ran into an exception while fetching", self._id + ":", e, file=stderr)
- return
- for vid in map(VideoTuple.from_rss_entry, rss.entries):
- if vid.published > self.last_video_update:
- self.videos.append(vid)
- elif vid.updated > self.last_video_update:
- for i, old_vid in enumerate(self.videos):
- if vid.id == old_vid.id:
- self.videos[i] = vid
- break
- last_video_update = max((vid.updated for vid in self.videos))
- if last_video_update > self.last_video_update:
- print("Updating", self._id)
- self.last_video_update = last_video_update
- self.update_videos()
- self.last_fetch = datetime.now(tz=UTC)
-
- def asdict(self) -> SubsDict:
- return cast(SubsDict, asdict(self))
-
- def insert(self) -> InsertOneResult:
- return self._collection.insert_one(self.asdict())
-
- def update_videos(self) -> UpdateResult:
- return self._collection.update_one(
- {"_id": self._id},
- {"$set": {
- "videos": self.videos,
- "last_video_update": self.last_video_update,
- "last_fetch": self.last_fetch,
- }},
- )
diff --git a/data-collection/components/subscriptions/typing.py b/data-collection/components/subscriptions/typing.py
deleted file mode 100644
index 8f2a298..0000000
--- a/data-collection/components/subscriptions/typing.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from datetime import datetime
-from typing import TypedDict, List
-from bson.objectid import ObjectId
-from components.videos import VideoTuple
-
-class SubsDict(TypedDict):
- _id: str
- link: str
- time_between_fetches: int # In minutes.
- last_fetch: datetime
- last_video_update: datetime
- videos: List[VideoTuple]
- subscribers: List[ObjectId]
diff --git a/data-collection/components/users/typing.py b/data-collection/components/users/typing.py
deleted file mode 100644
index f03eecb..0000000
--- a/data-collection/components/users/typing.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from datetime import datetime
-from typing import TypedDict, NamedTuple, List, Tuple
-from bson.objectid import ObjectId
-
-class SubscriptionItem(NamedTuple):
- id: str
- time_between_fetches: int
- last_viewed: datetime
-
-class UserDict(TypedDict):
- id: ObjectId
- name: str
- subscriptions: List[SubscriptionItem]
diff --git a/data-collection/components/videos.py b/data-collection/components/videos.py
deleted file mode 100644
index 5b2d644..0000000
--- a/data-collection/components/videos.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from typing import NamedTuple, Any, Self
-from datetime import datetime
-
-class VideoTuple(NamedTuple):
- id: str
- link: str
- title: str
- author: str
- author_channel: str
- published: datetime
- updated: datetime
- thumbnail: str
- summary: str
-
- @classmethod
- def from_rss_entry(cls, entry: Any) -> Self:
- return cls(
- id = entry.id,
- link = entry.link,
- title = entry.title,
- author = entry.author_detail.name,
- author_channel = entry.author_detail.href,
- published = datetime.fromisoformat(entry.published),
- updated = datetime.fromisoformat(entry.updated),
- thumbnail = entry.media_thumbnail[0]["url"],
- summary = entry.summary,
- )