summaryrefslogtreecommitdiff
path: root/data-collection/components
diff options
context:
space:
mode:
Diffstat (limited to 'data-collection/components')
-rw-r--r--data-collection/components/subscriptions/main.py58
-rw-r--r--data-collection/components/subscriptions/typing.py7
-rw-r--r--data-collection/components/videos.py14
3 files changed, 57 insertions, 22 deletions
diff --git a/data-collection/components/subscriptions/main.py b/data-collection/components/subscriptions/main.py
index 352cf1b..6a3f5cd 100644
--- a/data-collection/components/subscriptions/main.py
+++ b/data-collection/components/subscriptions/main.py
@@ -1,36 +1,58 @@
-from dataclasses import dataclass, field
-from datetime import datetime
+from dataclasses import dataclass, field, asdict
+from datetime import datetime, UTC
from sys import stderr
from typing import TypedDict, List
from bson.objectid import ObjectId
from feedparser import parse # type: ignore
-import requests
+from pymongo.collection import Collection
+from pymongo.results import UpdateResult
import schedule
+from components.database import subscriptions
+from components.subscriptions.typing import SubsDict
from components.videos import VideoTuple
@dataclass
class Subscription:
- id: str
+ _id: str
link: str
time_between_fetches: int
- last_update: datetime = datetime.min
+ last_fetch: datetime = datetime.min.replace(tzinfo=UTC)
+ last_video_update: datetime = datetime.min.replace(tzinfo=UTC)
videos: List[VideoTuple] = field(default_factory=list)
subscribers: List[ObjectId] = field(default_factory=list)
def __post_init__(self) -> None:
- self._job: schedule.Job = schedule.every(self.time_between_fetches).second.do(self.update)
+ self._job: schedule.Job = schedule.every(self.time_between_fetches).minutes.do(self.fetch)
+ self._collection: Collection[SubsDict] = subscriptions
+ self._in_db: bool = False
- def update(self) -> None:
+ def fetch(self) -> None:
try:
- for entry in parse(self.link)["entries"]:
- self.videos.append(VideoTuple(
- id = entry.id,
- link = entry.link,
- title = entry.title,
- published = datetime.fromisoformat(entry.published),
- updated = datetime.fromisoformat(entry.updated),
- thumbnail = entry.media_thumbnail[0]["url"],
- summary = entry.summary,
- ))
+ rss = parse(self.link)
except Exception as e:
- print("Ran into exception", e, file=stderr)
+ print("Ran into an exception while fetching", self._id + ":", e, file=stderr)
+ return
+ for vid in map(VideoTuple.from_rss_entry, rss.entries):
+ if vid.published > self.last_video_update:
+ self.videos.append(vid)
+ elif vid.updated > self.last_video_update:
+ for i, old_vid in enumerate(self.videos):
+ if vid.id == old_vid.id:
+ self.videos[i] = vid
+ break
+ last_video_update = max((vid.updated for vid in self.videos))
+ if last_video_update > self.last_video_update:
+ print("Updating", self._id)
+ self.last_video_update = last_video_update
+ self.update_fields(["videos", "last_video_update"])
+ self.last_fetch = datetime.now(tz=UTC)
+
+ def update_fields(self, fields: List[str]) -> UpdateResult:
+ sub = asdict(self)
+ if self._in_db:
+ return self._collection.update_one(
+ {"_id": self._id},
+ {"$set": {key: sub[key] for key in fields}},
+ )
+ self._in_db = True
+ return self._collection.replace_one({"_id": self._id}, sub, upsert=True)
diff --git a/data-collection/components/subscriptions/typing.py b/data-collection/components/subscriptions/typing.py
index eebcece..8f2a298 100644
--- a/data-collection/components/subscriptions/typing.py
+++ b/data-collection/components/subscriptions/typing.py
@@ -4,9 +4,10 @@ from bson.objectid import ObjectId
from components.videos import VideoTuple
class SubsDict(TypedDict):
- id: str
+ _id: str
link: str
- time_between_fetches: int # In hours.
- last_update: datetime
+ time_between_fetches: int # In minutes.
+ last_fetch: datetime
+ last_video_update: datetime
videos: List[VideoTuple]
subscribers: List[ObjectId]
diff --git a/data-collection/components/videos.py b/data-collection/components/videos.py
index 71937b2..32dc1da 100644
--- a/data-collection/components/videos.py
+++ b/data-collection/components/videos.py
@@ -1,4 +1,4 @@
-from typing import NamedTuple
+from typing import NamedTuple, Any, Self
from datetime import datetime
class VideoTuple(NamedTuple):
@@ -9,3 +9,15 @@ class VideoTuple(NamedTuple):
updated: datetime
thumbnail: str
summary: str
+
+ @classmethod
+ def from_rss_entry(cls, entry: Any) -> Self:
+ return cls(
+ id = entry.id,
+ link = entry.link,
+ title = entry.title,
+ published = datetime.fromisoformat(entry.published),
+ updated = datetime.fromisoformat(entry.updated),
+ thumbnail = entry.media_thumbnail[0]["url"],
+ summary = entry.summary,
+ )