Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,809 Bytes
d4974c7 f039b65 d4974c7 f039b65 d4974c7 24e2197 d4974c7 24e2197 d4974c7 24e2197 d4974c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from collections import OrderedDict
from datetime import datetime
from typing import Optional
class CacheHandler:
def __init__(self, max_cache_size: int = 1000):
# Using OrderedDict to maintain the order of insertion for efficient removal of oldest items
self.cache = OrderedDict()
self.cache["https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/"] = {
"title": "Iker García-Ferrero | Personal Webpage",
"date": datetime.now(),
"summary_0": "Iker García-Ferrero es un candidato a PhD en Natural Language Processing (NLP) "
"en la Universidad del País Vasco UPV/EHU, IXA Group y HiTZ Centro Vasco de Tecnología de la "
"Lengua, financiado por una beca del Gobierno Vasco. "
"En el pasado, ha realizado prácticas en Amazon y ha realizado una estancia "
"de investigación en la Universidad de Pensilvania (EEUU)."
"Sus investigaciones se centran en la creación de modelos y recursos para NLP en "
"lenguas con pocos o ningún recurso disponible, utilizando técnicas de transferencia de "
"datos y modelos. Recientemente también se ha especializado en el entrenamiento de LLMs",
"summary_50": "Iker García-Ferrero es un candidato a PhD en NLP en la Universidad del País Vasco, "
"con experiencia en Amazon, la Universidad de Pensilvania e HiTZ.",
"summary_100": "Iker García-Ferrero es un candidato a PhD en NLP.",
}
self.max_cache_size = max_cache_size
self.misses = 0
self.hits = 0
def add_to_cache(
self, url: str, title: str, text: str, summary_type: int, summary: str
):
# If URL already exists, update it and move it to the end to mark it as the most recently used
if url in self.cache:
self.cache.move_to_end(url)
self.cache[url][f"summary_{summary_type}"] = summary
self.cache[url]["date"] = datetime.now()
else:
# Add new entry to the cache
self.cache[url] = {
"title": title,
"text": text,
"date": datetime.now(),
"summary_0": summary if summary_type == 0 else None,
"summary_50": summary if summary_type == 50 else None,
"summary_100": summary if summary_type == 100 else None,
}
# Remove the oldest item if cache exceeds max size
if len(self.cache) > self.max_cache_size:
self.cache.move_to_end(
"https://ikergarcia1996.github.io/Iker-Garcia-Ferrero/"
) # This is the default value in the demo, so we don't want to remove it
self.cache.popitem(last=False) # pop the oldest item
def get_from_cache(
self, url: str, summary_type: int, second_try: bool = False
) -> Optional[tuple]:
if url in self.cache and self.cache[url][f"summary_{summary_type}"] is not None:
# Move the accessed item to the end to mark it as recently used
self.cache.move_to_end(url)
self.hits += 1
if second_try:
# In the first try we didn't get the cache hit, probably because it was a shortened URL
# So me decrease the number of misses, because we got the cache hit in the end
self.misses -= 1
return (
self.cache[url]["title"],
self.cache[url]["text"],
self.cache[url][f"summary_{summary_type}"],
)
else:
if not second_try:
self.misses += 1
return None, None, None
def get_cache_stats(self):
return self.hits, self.misses, len(self.cache)
|