Spaces:

Goated121
/

ChatBot

Running

App Files Files Community

Goated121 commited on Mar 26

Commit

1e93d04

verified ·

1 Parent(s): 093f515

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -54

app.py CHANGED Viewed

@@ -1,46 +1,33 @@
-from llama_cpp import Llama
 import gradio as gr
 import faiss
 import pickle
 import numpy as np
 from sentence_transformers import SentenceTransformer
-import os
 print("Files in current directory:", os.listdir())
 # -----------------------------
-# Globals (lazy-loaded)
 # -----------------------------
-model = None
-embed_model = None
-index = None
-chunks = None
-metadata = None
 # -----------------------------
-# Lazy-loading functions
 # -----------------------------
-def load_llm():
-    global model
-    if model is None:
-        print("Loading LLM...")
-        model = Llama(
-            model_path="qwen2.5-1.5B-q4.gguf",
-            n_ctx=4096,
-            n_gpu_layers=0,
-            chat_format="qwen",
-        )
-        print("LLM loaded.")
-def load_rag():
-    global embed_model, index, chunks, metadata
-    if embed_model is None or index is None or chunks is None or metadata is None:
-        print("Loading embedding model and FAISS index...")
-        embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
-        index = faiss.read_index("faiss_index.bin")
-        chunks = pickle.load(open("chunks.pkl", "rb"))
-        metadata = pickle.load(open("metadata.pkl", "rb"))
-        print("RAG components loaded.")
 # -----------------------------
 # Detect query intent
@@ -67,8 +54,6 @@ def detect_query(query):
 # Retrieve context (RAG)
 # -----------------------------
 def retrieve_context(query):
-    load_rag()  # ensure RAG is loaded
     animal, topic = detect_query(query)
     filtered_indices = []
@@ -83,7 +68,9 @@ def retrieve_context(query):
         filtered_indices = list(range(len(chunks)))
     query_embedding = embed_model.encode([query])
-    filtered_embeddings = np.array([index.reconstruct(i) for i in filtered_indices])
     distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
     top_indices = distances.argsort()[:2]
@@ -99,8 +86,6 @@ def retrieve_context(query):
 # Chat function
 # -----------------------------
 def chat(user_input):
-    load_llm()  # ensure LLM is loaded
     context = retrieve_context(user_input)
     prompt = f"""
@@ -118,29 +103,16 @@ Question:
 Answer in short and clear sentences.
 """
-    messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": prompt}
-    ]
-    response = model.create_chat_completion(
-        messages=messages,
-        max_tokens=200,
-        temperature=0.5,
-    )
-    return response["choices"][0]["message"]["content"]
 # -----------------------------
 # Gradio UI
 # -----------------------------
-demo = gr.Interface(
     fn=chat,
     inputs="text",
     outputs="text",
-    title="Livestock Chatbot",
-    description="Ask questions about goats and cows. The assistant answers using only the provided knowledge base."
-)
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+# app.py
 import gradio as gr
 import faiss
 import pickle
 import numpy as np
 from sentence_transformers import SentenceTransformer
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import os
 print("Files in current directory:", os.listdir())
 # -----------------------------
+# Load RAG components
 # -----------------------------
+embed_model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')
+index = faiss.read_index("faiss_index.bin")
+chunks = pickle.load(open("chunks.pkl", "rb"))
+metadata = pickle.load(open("metadata.pkl", "rb"))
 # -----------------------------
+# Load Hugging Face LLM (CPU-friendly)
 # -----------------------------
+# Small model for HF Spaces CPU limits
+model_name = "TheBloke/vicuna-7B-1.1-HF"  # You can replace with a smaller model if needed
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")  # Hugging Face will manage CPU/GPU
+generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=200)
+print("LLM loaded successfully!")
 # -----------------------------
 # Detect query intent
 # Retrieve context (RAG)
 # -----------------------------
 def retrieve_context(query):
     animal, topic = detect_query(query)
     filtered_indices = []
         filtered_indices = list(range(len(chunks)))
     query_embedding = embed_model.encode([query])
+    filtered_embeddings = [index.reconstruct(i) for i in filtered_indices]
+    filtered_embeddings = np.array(filtered_embeddings)
     distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
     top_indices = distances.argsort()[:2]
 # Chat function
 # -----------------------------
 def chat(user_input):
     context = retrieve_context(user_input)
     prompt = f"""
 Answer in short and clear sentences.
 """
+    # Generate response
+    response = generator(prompt, max_length=200, do_sample=True, temperature=0.5)
+    return response[0]["generated_text"]
 # -----------------------------
 # Gradio UI
 # -----------------------------
+gr.Interface(
     fn=chat,
     inputs="text",
     outputs="text",
+    title="Livestock Chatbot"
+).launch()