Goated121 commited on
Commit
1a86260
·
verified ·
1 Parent(s): 214f779

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -18,15 +18,14 @@ chunks = pickle.load(open("chunks.pkl", "rb"))
18
  metadata = pickle.load(open("metadata.pkl", "rb"))
19
 
20
  # -----------------------------
21
- # Load HF‑hosted small LLM
22
  # -----------------------------
23
- model_name = "NousResearch/Nous-Hermes-1.0-GPTQ"
 
24
 
25
  tokenizer = AutoTokenizer.from_pretrained(model_name)
26
  model = AutoModelForCausalLM.from_pretrained(
27
  model_name,
28
- device_map="auto", # Works on CPU or GPU
29
- torch_dtype="auto"
30
  )
31
 
32
  generator = pipeline(
@@ -114,9 +113,11 @@ Answer in short and clear sentences.
114
 
115
  response = generator(prompt, max_new_tokens=150, do_sample=True, temperature=0.6)
116
  text = response[0]["generated_text"]
117
- # Remove prompt if repeated
 
118
  if prompt.strip() in text:
119
  text = text.split(prompt.strip())[-1].strip()
 
120
  return text
121
 
122
  # -----------------------------
 
18
  metadata = pickle.load(open("metadata.pkl", "rb"))
19
 
20
  # -----------------------------
21
+ # Load free HF small LLM
22
  # -----------------------------
23
+ # Using distilgpt2 as it doesn't need a token
24
+ model_name = "distilgpt2"
25
 
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
  model = AutoModelForCausalLM.from_pretrained(
28
  model_name,
 
 
29
  )
30
 
31
  generator = pipeline(
 
113
 
114
  response = generator(prompt, max_new_tokens=150, do_sample=True, temperature=0.6)
115
  text = response[0]["generated_text"]
116
+
117
+ # Remove prompt repetition
118
  if prompt.strip() in text:
119
  text = text.split(prompt.strip())[-1].strip()
120
+
121
  return text
122
 
123
  # -----------------------------