Goated121 commited on
Commit
214f779
·
verified ·
1 Parent(s): 8591cf5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -15
app.py CHANGED
@@ -1,10 +1,9 @@
1
- # app.py
2
  import gradio as gr
3
  import faiss
4
  import pickle
5
  import numpy as np
6
  from sentence_transformers import SentenceTransformer
7
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
 
9
  import os
10
  print("Files in current directory:", os.listdir())
@@ -19,22 +18,34 @@ chunks = pickle.load(open("chunks.pkl", "rb"))
19
  metadata = pickle.load(open("metadata.pkl", "rb"))
20
 
21
  # -----------------------------
22
- # Load Hugging Face LLM (CPU-friendly)
23
  # -----------------------------
24
- # Small model for HF Spaces CPU limits
25
- model_name = "TheBloke/vicuna-3B-1.1-HF" # You can replace with a smaller model if needed
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") # Hugging Face will manage CPU/GPU
28
- generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=200)
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  print("LLM loaded successfully!")
31
 
32
  # -----------------------------
33
- # Detect query intent
34
  # -----------------------------
35
  def detect_query(query):
36
  query = query.lower()
37
-
38
  animal = None
39
  topic = None
40
 
@@ -69,9 +80,7 @@ def retrieve_context(query):
69
 
70
  query_embedding = embed_model.encode([query])
71
 
72
- filtered_embeddings = [index.reconstruct(i) for i in filtered_indices]
73
- filtered_embeddings = np.array(filtered_embeddings)
74
-
75
  distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
76
  top_indices = distances.argsort()[:2]
77
 
@@ -103,9 +112,12 @@ Question:
103
  Answer in short and clear sentences.
104
  """
105
 
106
- # Generate response
107
- response = generator(prompt, max_length=200, do_sample=True, temperature=0.5)
108
- return response[0]["generated_text"]
 
 
 
109
 
110
  # -----------------------------
111
  # Gradio UI
 
 
1
  import gradio as gr
2
  import faiss
3
  import pickle
4
  import numpy as np
5
  from sentence_transformers import SentenceTransformer
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
 
8
  import os
9
  print("Files in current directory:", os.listdir())
 
18
  metadata = pickle.load(open("metadata.pkl", "rb"))
19
 
20
  # -----------------------------
21
+ # Load HF‑hosted small LLM
22
  # -----------------------------
23
+ model_name = "NousResearch/Nous-Hermes-1.0-GPTQ"
24
+
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ model_name,
28
+ device_map="auto", # Works on CPU or GPU
29
+ torch_dtype="auto"
30
+ )
31
+
32
+ generator = pipeline(
33
+ "text-generation",
34
+ model=model,
35
+ tokenizer=tokenizer,
36
+ max_new_tokens=150,
37
+ do_sample=True,
38
+ temperature=0.6
39
+ )
40
 
41
  print("LLM loaded successfully!")
42
 
43
  # -----------------------------
44
+ # Intent detection
45
  # -----------------------------
46
  def detect_query(query):
47
  query = query.lower()
48
+
49
  animal = None
50
  topic = None
51
 
 
80
 
81
  query_embedding = embed_model.encode([query])
82
 
83
+ filtered_embeddings = np.array([index.reconstruct(i) for i in filtered_indices])
 
 
84
  distances = np.linalg.norm(filtered_embeddings - query_embedding, axis=1)
85
  top_indices = distances.argsort()[:2]
86
 
 
112
  Answer in short and clear sentences.
113
  """
114
 
115
+ response = generator(prompt, max_new_tokens=150, do_sample=True, temperature=0.6)
116
+ text = response[0]["generated_text"]
117
+ # Remove prompt if repeated
118
+ if prompt.strip() in text:
119
+ text = text.split(prompt.strip())[-1].strip()
120
+ return text
121
 
122
  # -----------------------------
123
  # Gradio UI