Spaces:

bebechien
/

hollow-knight-helper

Running on Zero

App Files Files Community

bebechien commited on Sep 12

Commit

1eca919

verified ·

1 Parent(s): c7ffebe

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

app.py +15 -1
config.py +1 -1
rag_service.py +7 -20

app.py CHANGED Viewed

@@ -1,4 +1,7 @@
 from huggingface_hub import login
 from config import HF_TOKEN, GAME_KNOWLEDGE_DATA, EMBEDDING_MODEL_ID, LLM_MODEL_ID
 from rag_service import RAGService
 from ui import build_gradio_ui
@@ -8,8 +11,19 @@ def main():
     print("Logging into Hugging Face Hub...")
     login(token=HF_TOKEN)
     # 1. Create the single service instance. This loads all models and data.
-    rag_service = RAGService(GAME_KNOWLEDGE_DATA, EMBEDDING_MODEL_ID, LLM_MODEL_ID)
     # 2. Build the UI, passing the service instance to it.
     demo = build_gradio_ui(rag_service)

 from huggingface_hub import login
+from sentence_transformers import SentenceTransformer
+from transformers import pipeline
 from config import HF_TOKEN, GAME_KNOWLEDGE_DATA, EMBEDDING_MODEL_ID, LLM_MODEL_ID
 from rag_service import RAGService
 from ui import build_gradio_ui
     print("Logging into Hugging Face Hub...")
     login(token=HF_TOKEN)
+    print("Initializing embedding model...")
+    embedding_model = SentenceTransformer(EMBEDDING_MODEL_ID)
+    print("Initializing language model...")
+    llm_pipeline = pipeline(
+        "text-generation",
+        model=LLM_MODEL_ID,
+        device_map="auto",
+        dtype="auto",
+    )
     # 1. Create the single service instance. This loads all models and data.
+    rag_service = RAGService(GAME_KNOWLEDGE_DATA, embedding_model, llm_pipeline)
     # 2. Build the UI, passing the service instance to it.
     demo = build_gradio_ui(rag_service)

config.py CHANGED Viewed

@@ -9,7 +9,7 @@ from web_helper import get_html, find_wiki_links, get_markdown_from_html, get_ma
 # --- Hugging Face & Model Configuration ---
 HF_TOKEN = os.getenv('HF_TOKEN')
 EMBEDDING_MODEL_ID = "google/embeddinggemma-300M"
-LLM_MODEL_ID = "google/gemma-3-1B-it"
 # --- Data Source Configuration ---
 BASE_URL = "https://hollowknight.wiki"

 # --- Hugging Face & Model Configuration ---
 HF_TOKEN = os.getenv('HF_TOKEN')
 EMBEDDING_MODEL_ID = "google/embeddinggemma-300M"
+LLM_MODEL_ID = "google/gemma-3-12B-it"
 # --- Data Source Configuration ---
 BASE_URL = "https://hollowknight.wiki"

rag_service.py CHANGED Viewed

@@ -1,33 +1,20 @@
 import spaces
 import torch
-from sentence_transformers import SentenceTransformer, util
-from transformers import pipeline, TextIteratorStreamer
 from threading import Thread
 # Import project-specific modules
 from config import BASE_URL, DEFAULT_MESSAGE_NO_MATCH, get_all_game_data
 from chat_context import ChatContext
-embedding_model = None
 class RAGService:
     """Manages model loading, data processing, and chat generation logic."""
-    def __init__(self, data_config: list[dict], embedding_model_id: str, llm_model_id: str):
         print("Initializing RAG Service...")
         self.data_config = data_config
-        print("Initializing embedding model...")
-        global embedding_model
-        embedding_model = SentenceTransformer(embedding_model_id)
-        print("Initializing language model...")
-        self.llm_pipeline = pipeline(
-            "text-generation",
-            model=llm_model_id,
-            device_map="auto",
-            dtype="auto",
-        )
         self.knowledge_base: dict[str, list[dict]] = get_all_game_data(embedding_model)
     def _select_content(self, title: str) -> list[dict]:
@@ -40,11 +27,11 @@ class RAGService:
         if not query or not contents:
             return -1
-        query_embedding = embedding_model.encode(query, prompt_name="query", convert_to_tensor=True).to(embedding_model.device)
         try:
             # Stack pre-computed tensors from our knowledge base
-            contents_embeddings = torch.stack([item["embedding"] for item in contents]).to(embedding_model.device)
         except (RuntimeError, IndexError, TypeError) as e:
             print(f"Warning: Could not stack content embeddings. Error: {e}")
             return -1

 import spaces
 import torch
+from sentence_transformers import util
+from transformers import TextIteratorStreamer
 from threading import Thread
 # Import project-specific modules
 from config import BASE_URL, DEFAULT_MESSAGE_NO_MATCH, get_all_game_data
 from chat_context import ChatContext
 class RAGService:
     """Manages model loading, data processing, and chat generation logic."""
+    def __init__(self, data_config: list[dict], embedding_model, llm_pipeline):
         print("Initializing RAG Service...")
         self.data_config = data_config
+        self.embedding_model = embedding_model
+        self.llm_pipeline = llm_pipeline
         self.knowledge_base: dict[str, list[dict]] = get_all_game_data(embedding_model)
     def _select_content(self, title: str) -> list[dict]:
         if not query or not contents:
             return -1
+        query_embedding = self.embedding_model.encode(query, prompt_name="query", convert_to_tensor=True).to(self.embedding_model.device)
         try:
             # Stack pre-computed tensors from our knowledge base
+            contents_embeddings = torch.stack([item["embedding"] for item in contents]).to(self.embedding_model.device)
         except (RuntimeError, IndexError, TypeError) as e:
             print(f"Warning: Could not stack content embeddings. Error: {e}")
             return -1