Spaces:

Ankitajadhav
/

Whats_Cooking

Runtime error

App Files Files Community

Ankitajadhav commited on Jul 8

Commit

9ccd468

•

1 Parent(s): af4db7d

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -24

app.py CHANGED Viewed

@@ -1,20 +1,19 @@
 import gradio as gr
 import copy
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import chromadb
 from sentence_transformers import SentenceTransformer
-import logging
-# Initialize logging
-logging.basicConfig(level=logging.INFO)
 # Initialize the Llama model
 llm = Llama(
-    model_path=hf_hub_download(
-        repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
-        filename="Phi-3-mini-4k-instruct-q4.gguf",
-    ),
     n_ctx=2048,
     n_gpu_layers=50,  # Adjust based on your VRAM
 )
@@ -39,6 +38,9 @@ class VectorStore:
 # Example initialization (assuming you've already populated the vector store)
 vector_store = VectorStore("embedding_vector")
 def generate_text(
     message,
     history: list[tuple[str, str]],
@@ -56,8 +58,6 @@ def generate_text(
         input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
     input_prompt += f"{message} [/INST] "
-    logging.info("Input prompt:\n%s", input_prompt)  # Debugging output
     temp = ""
     output = llm(
         input_prompt,
@@ -71,28 +71,27 @@ def generate_text(
     )
     for out in output:
         temp += out["choices"][0]["text"]
-        logging.info("Model output:\n%s", temp)  # Log model output
         yield temp
 # Define the Gradio interface
-demo = gr.Interface(
-    fn=generate_text,
-    title="LLM Chatbot with ChromaDB Integration",
-    description="Generate responses based on context and user queries.",
     examples=[
         ["I have leftover rice, what can I make out of it?"],
         ["Can I make lunch for two people with this?"],
     ],
-    inputs=[
-        gr.Textbox(label="Message"),
-        gr.Textbox(label="System message", default="You are a friendly Chatbot."),
-        gr.Textbox(label="History", default="[('USER', 'Hi there!')]"),
-        gr.Slider(minimum=1, maximum=2048, step=1, default=512, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, step=0.1, default=0.7, label="Temperature"),
-        gr.Slider(minimum=0.1, maximum=1.0, step=0.05, default=0.95, label="Top-p (nucleus sampling)"),
     ],
-    outputs=gr.Textbox(label="Response"),
-    live=True,
 )
 if __name__ == "__main__":

+import os
 import gradio as gr
 import copy
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import chromadb
 from sentence_transformers import SentenceTransformer
 # Initialize the Llama model
 llm = Llama(
+    # model_path=hf_hub_download(
+    #     repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
+    #     filename="Phi-3-mini-4k-instruct-q4.gguf",
+    # ),
+    model_path = "./models/Phi-3-mini-4k-instruct-gguf",
+    # model_path = "NicholasJohn/OpenBioLLM-Llama3-8B-Q5_K_M.gguf",
     n_ctx=2048,
     n_gpu_layers=50,  # Adjust based on your VRAM
 )
 # Example initialization (assuming you've already populated the vector store)
 vector_store = VectorStore("embedding_vector")
+# Populate with your data if not already done
+# vector_store.populate_vectors(your_texts, your_ids)
 def generate_text(
     message,
     history: list[tuple[str, str]],
         input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
     input_prompt += f"{message} [/INST] "
     temp = ""
     output = llm(
         input_prompt,
     )
     for out in output:
         temp += out["choices"][0]["text"]
         yield temp
 # Define the Gradio interface
+demo = gr.ChatInterface(
+    generate_text,
+    title="llama-cpp-python on GPU with ChromaDB",
+    description="Running LLM with context retrieval from ChromaDB",
     examples=[
         ["I have leftover rice, what can I make out of it?"],
         ["Can I make lunch for two people with this?"],
     ],
+    cache_examples=False,
+    retry_btn=None,
+    undo_btn="Delete Previous",
+    clear_btn="Clear",
+    additional_inputs=[
+        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )
 if __name__ == "__main__":