summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorA Farzat <a@farzat.xyz>2025-10-05 07:24:22 +0300
committerA Farzat <a@farzat.xyz>2025-10-05 07:24:22 +0300
commit3b418daf684f01a126a56b58c83120af5914f576 (patch)
tree20b6ea9487cce5a90cff8696085b8c3f7783db75
parent66194a36af3d1f6897e8fd0e3b61388cb18f6e1f (diff)
downloadcsca5028-3b418daf684f01a126a56b58c83120af5914f576.tar.gz
csca5028-3b418daf684f01a126a56b58c83120af5914f576.zip
Remove the redundant scheduler class
This allows fetching the database every time, ensuring any updates to the database between fetches as captured.
-rw-r--r--components/subscriptions/main.py9
-rw-r--r--components/subscriptions/typing.py2
-rwxr-xr-xdata_collector.py23
-rw-r--r--tests/feed.py13
4 files changed, 10 insertions, 37 deletions
diff --git a/components/subscriptions/main.py b/components/subscriptions/main.py
index 36e6d15..afd1d54 100644
--- a/components/subscriptions/main.py
+++ b/components/subscriptions/main.py
@@ -6,13 +6,10 @@ from bson.objectid import ObjectId
from feedparser import parse # type: ignore
from pymongo.collection import Collection
from pymongo.results import InsertOneResult, UpdateResult
-from schedule import Job, Scheduler
from components.database import subscriptions
from components.subscriptions.typing import SubsDict
from components.videos import VideoTuple
-default_scheduler = Scheduler()
-
@dataclass
class Subscription:
_id: str
@@ -26,15 +23,9 @@ class Subscription:
def __post_init__(self) -> None:
self._collection: Collection[SubsDict] = subscriptions
- self._scheduler: Scheduler = default_scheduler
if len(self.videos) and type(self.videos[0]) != VideoTuple:
self.videos = [VideoTuple._make(vid) for vid in self.videos]
- def initialise_job(self) -> None:
- self._job: Job = self._scheduler.every(self.time_between_fetches).minutes.do(self.fetch)
- if self.last_fetch > datetime.min.replace(tzinfo=UTC):
- self._job.next_run += self.last_fetch - datetime.now(tz=UTC)
-
def fetch(self) -> None:
try:
rss = parse(self.link)
diff --git a/components/subscriptions/typing.py b/components/subscriptions/typing.py
index bac3620..c16d636 100644
--- a/components/subscriptions/typing.py
+++ b/components/subscriptions/typing.py
@@ -6,7 +6,7 @@ from components.videos import VideoTuple
class SubsDict(TypedDict):
_id: str
link: str
- time_between_fetches: int # In minutes.
+ time_between_fetches: int # In seconds.
last_fetch: datetime
last_video_update: datetime
last_viewed: datetime
diff --git a/data_collector.py b/data_collector.py
index 9da9404..485d8af 100755
--- a/data_collector.py
+++ b/data_collector.py
@@ -1,24 +1,13 @@
#!/usr/bin/env python
from time import sleep
+from datetime import datetime, timedelta, UTC
from components.database import subscriptions
-from components.subscriptions.main import Subscription, default_scheduler
-
-subs_to_fetch = ["fiwzLy-8yKzIbsmZTzxDgw", "Ef0-WZoqYFzLZtx43KPvag",
- "PF-oYb2-xN5FbCXy0167Gg", "hlgI3UHCOnwUGzWzbJ3H5w", ]
-for id in subs_to_fetch:
- sub_dict = subscriptions.find_one({"_id": "yt:channel:"+id})
- if sub_dict:
- sub = Subscription(**sub_dict)
- else:
- sub = Subscription(
- _id="yt:channel:"+id,
- link="http://www.youtube.com/feeds/videos.xml?channel_id=UC"+id,
- time_between_fetches=5,
- )
- sub.insert()
- sub.initialise_job()
+from components.subscriptions.main import Subscription
while True:
- default_scheduler.run_pending()
+ for sub_dict in subscriptions.find():
+ sub = Subscription(**sub_dict)
+ if datetime.now(tz=UTC) - sub.last_fetch > timedelta(seconds=sub.time_between_fetches):
+ sub.fetch()
sleep(60)
diff --git a/tests/feed.py b/tests/feed.py
index aaeb96a..97a60ef 100644
--- a/tests/feed.py
+++ b/tests/feed.py
@@ -1,7 +1,6 @@
from typing import Any
from mongomock import MongoClient
from pymongo.collection import Collection
-from schedule import Scheduler
from unittest import TestCase
from components.subscriptions.main import Subscription
from components.subscriptions.typing import SubsDict
@@ -12,7 +11,6 @@ class TestFeeds(TestCase):
def setUp(self) -> None:
self.client: MongoClient[Any] = MongoClient(tz_aware=True)
self.collection: Collection[SubsDict] = self.client.db.collection
- self.scheduler: Scheduler = Scheduler()
def test_insert(self) -> None:
sub = Subscription(
@@ -21,7 +19,6 @@ class TestFeeds(TestCase):
time_between_fetches=5,
)
sub._collection = self.collection
- sub._scheduler = self.scheduler
sub.insert()
sub_dict = self.collection.find_one({"_id": "yt:channel:bla"})
self.assertIsNotNone(sub_dict)
@@ -35,10 +32,8 @@ class TestFeeds(TestCase):
time_between_fetches=1,
)
sub._collection = self.collection
- sub._scheduler = self.scheduler
sub.insert()
- sub.initialise_job()
- self.scheduler.run_all()
+ sub.fetch()
self.assertEqual(15, len(sub.videos))
sub_dict = self.collection.find_one({"_id": "yt:channel:hlgI3UHCOnwUGzWzbJ3H5w"})
self.assertIsNotNone(sub_dict)
@@ -52,12 +47,10 @@ class TestFeeds(TestCase):
time_between_fetches=1,
)
sub._collection = self.collection
- sub._scheduler = self.scheduler
sub.insert()
- sub.initialise_job()
- self.scheduler.run_all()
+ sub.fetch()
sub.link=r"tests/data/feed@ytnnews24@002.xml"
- self.scheduler.run_all()
+ sub.fetch()
self.assertEqual(16, len(sub.videos))
sub_dict = self.collection.find_one({"_id": "yt:channel:hlgI3UHCOnwUGzWzbJ3H5w"})
self.assertIsNotNone(sub_dict)