Spaces:

Iker
/

ClickbaitFighter

Running

App Files Files Community

ClickbaitFighter / cache_system.py

Iker

Take into account redirects for cache

24e2197 verified 6 months ago

raw

history blame

No virus

2.39 kB

	from collections import OrderedDict
	from datetime import datetime
	from typing import Optional


	class CacheHandler:
	def __init__(self, max_cache_size: int = 1000):
	# Using OrderedDict to maintain the order of insertion for efficient removal of oldest items
	self.cache = OrderedDict()
	self.max_cache_size = max_cache_size
	self.misses = 0
	self.hits = 0

	def add_to_cache(
	self, url: str, title: str, text: str, summary_type: int, summary: str
	):
	# If URL already exists, update it and move it to the end to mark it as the most recently used
	if url in self.cache:
	self.cache.move_to_end(url)
	self.cache[url][f"summary_{summary_type}"] = summary
	self.cache[url]["date"] = datetime.now()
	else:
	# Add new entry to the cache
	self.cache[url] = {
	"title": title,
	"text": text,
	"date": datetime.now(),
	"summary_0": summary if summary_type == 0 else None,
	"summary_50": summary if summary_type == 50 else None,
	"summary_100": summary if summary_type == 100 else None,
	}
	# Remove the oldest item if cache exceeds max size
	if len(self.cache) > self.max_cache_size:
	self.cache.popitem(last=False) # pop the oldest item

	def get_from_cache(
	self, url: str, summary_type: int, second_try: bool = False
	) -> Optional[tuple]:
	if url in self.cache and self.cache[url][f"summary_{summary_type}"] is not None:
	# Move the accessed item to the end to mark it as recently used
	self.cache.move_to_end(url)
	self.hits += 1
	if second_try:
	# In the first try we didn't get the cache hit, probably because it was a shortened URL
	# So me decrease the number of misses, because we got the cache hit in the end
	self.misses -= 1
	return (
	self.cache[url]["title"],
	self.cache[url]["text"],
	self.cache[url][f"summary_{summary_type}"],
	)
	else:
	if not second_try:
	self.misses += 1
	return None, None, None

	def get_cache_stats(self):
	return self.hits, self.misses, len(self.cache)