Spaces:

Iker
/

ClickbaitFighter

Running on Zero

App Files Files Community

ClickbaitFighter / cache_system.py

Iker

Take into account redirects for cache

24e2197 verified 10 months ago

raw

history blame

2.39 kB

	from collections import OrderedDict
	from datetime import datetime
	from typing import Optional


	class CacheHandler:
	def __init__(self, max_cache_size: int = 1000):
	# Using OrderedDict to maintain the order of insertion for efficient removal of oldest items
	self.cache = OrderedDict()
	self.max_cache_size = max_cache_size
	self.misses = 0
	self.hits = 0

	def add_to_cache(
	self, url: str, title: str, text: str, summary_type: int, summary: str
	):
	# If URL already exists, update it and move it to the end to mark it as the most recently used
	if url in self.cache:
	self.cache.move_to_end(url)
	self.cache[url][f"summary_{summary_type}"] = summary
	self.cache[url]["date"] = datetime.now()
	else:
	# Add new entry to the cache
	self.cache[url] = {
	"title": title,
	"text": text,
	"date": datetime.now(),
	"summary_0": summary if summary_type == 0 else None,
	"summary_50": summary if summary_type == 50 else None,
	"summary_100": summary if summary_type == 100 else None,
	}
	# Remove the oldest item if cache exceeds max size
	if len(self.cache) > self.max_cache_size:
	self.cache.popitem(last=False) # pop the oldest item

	def get_from_cache(
	self, url: str, summary_type: int, second_try: bool = False
	) -> Optional[tuple]:
	if url in self.cache and self.cache[url][f"summary_{summary_type}"] is not None:
	# Move the accessed item to the end to mark it as recently used
	self.cache.move_to_end(url)
	self.hits += 1
	if second_try:
	# In the first try we didn't get the cache hit, probably because it was a shortened URL
	# So me decrease the number of misses, because we got the cache hit in the end
	self.misses -= 1
	return (
	self.cache[url]["title"],
	self.cache[url]["text"],
	self.cache[url][f"summary_{summary_type}"],
	)
	else:
	if not second_try:
	self.misses += 1
	return None, None, None

	def get_cache_stats(self):
	return self.hits, self.misses, len(self.cache)