Ankitajadhav commited on
Commit
9ccd468
1 Parent(s): af4db7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -24
app.py CHANGED
@@ -1,20 +1,19 @@
 
1
  import gradio as gr
2
  import copy
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
  import chromadb
6
  from sentence_transformers import SentenceTransformer
7
- import logging
8
-
9
- # Initialize logging
10
- logging.basicConfig(level=logging.INFO)
11
 
12
  # Initialize the Llama model
13
  llm = Llama(
14
- model_path=hf_hub_download(
15
- repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
16
- filename="Phi-3-mini-4k-instruct-q4.gguf",
17
- ),
 
 
18
  n_ctx=2048,
19
  n_gpu_layers=50, # Adjust based on your VRAM
20
  )
@@ -39,6 +38,9 @@ class VectorStore:
39
  # Example initialization (assuming you've already populated the vector store)
40
  vector_store = VectorStore("embedding_vector")
41
 
 
 
 
42
  def generate_text(
43
  message,
44
  history: list[tuple[str, str]],
@@ -56,8 +58,6 @@ def generate_text(
56
  input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
57
  input_prompt += f"{message} [/INST] "
58
 
59
- logging.info("Input prompt:\n%s", input_prompt) # Debugging output
60
-
61
  temp = ""
62
  output = llm(
63
  input_prompt,
@@ -71,28 +71,27 @@ def generate_text(
71
  )
72
  for out in output:
73
  temp += out["choices"][0]["text"]
74
- logging.info("Model output:\n%s", temp) # Log model output
75
  yield temp
76
 
77
  # Define the Gradio interface
78
- demo = gr.Interface(
79
- fn=generate_text,
80
- title="LLM Chatbot with ChromaDB Integration",
81
- description="Generate responses based on context and user queries.",
82
  examples=[
83
  ["I have leftover rice, what can I make out of it?"],
84
  ["Can I make lunch for two people with this?"],
85
  ],
86
- inputs=[
87
- gr.Textbox(label="Message"),
88
- gr.Textbox(label="System message", default="You are a friendly Chatbot."),
89
- gr.Textbox(label="History", default="[('USER', 'Hi there!')]"),
90
- gr.Slider(minimum=1, maximum=2048, step=1, default=512, label="Max new tokens"),
91
- gr.Slider(minimum=0.1, maximum=4.0, step=0.1, default=0.7, label="Temperature"),
92
- gr.Slider(minimum=0.1, maximum=1.0, step=0.05, default=0.95, label="Top-p (nucleus sampling)"),
 
 
93
  ],
94
- outputs=gr.Textbox(label="Response"),
95
- live=True,
96
  )
97
 
98
  if __name__ == "__main__":
 
1
+ import os
2
  import gradio as gr
3
  import copy
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
  import chromadb
7
  from sentence_transformers import SentenceTransformer
 
 
 
 
8
 
9
  # Initialize the Llama model
10
  llm = Llama(
11
+ # model_path=hf_hub_download(
12
+ # repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
13
+ # filename="Phi-3-mini-4k-instruct-q4.gguf",
14
+ # ),
15
+ model_path = "./models/Phi-3-mini-4k-instruct-gguf",
16
+ # model_path = "NicholasJohn/OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
17
  n_ctx=2048,
18
  n_gpu_layers=50, # Adjust based on your VRAM
19
  )
 
38
  # Example initialization (assuming you've already populated the vector store)
39
  vector_store = VectorStore("embedding_vector")
40
 
41
+ # Populate with your data if not already done
42
+ # vector_store.populate_vectors(your_texts, your_ids)
43
+
44
  def generate_text(
45
  message,
46
  history: list[tuple[str, str]],
 
58
  input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
59
  input_prompt += f"{message} [/INST] "
60
 
 
 
61
  temp = ""
62
  output = llm(
63
  input_prompt,
 
71
  )
72
  for out in output:
73
  temp += out["choices"][0]["text"]
 
74
  yield temp
75
 
76
  # Define the Gradio interface
77
+ demo = gr.ChatInterface(
78
+ generate_text,
79
+ title="llama-cpp-python on GPU with ChromaDB",
80
+ description="Running LLM with context retrieval from ChromaDB",
81
  examples=[
82
  ["I have leftover rice, what can I make out of it?"],
83
  ["Can I make lunch for two people with this?"],
84
  ],
85
+ cache_examples=False,
86
+ retry_btn=None,
87
+ undo_btn="Delete Previous",
88
+ clear_btn="Clear",
89
+ additional_inputs=[
90
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
91
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
92
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
93
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
94
  ],
 
 
95
  )
96
 
97
  if __name__ == "__main__":