Spaces:

mvasim
/

llama-2-gguf

Sleeping

App Files Files Community

mvasim commited on Feb 18

Commit

c32b415

•

1 Parent(s): 381c14d

gradio_app added

Browse files

Files changed (3) hide show

Dockerfile +1 -1
app.py +11 -22
gradio_app.py +69 -0

Dockerfile CHANGED Viewed

@@ -21,4 +21,4 @@ RUN huggingface-cli download \
     --local-dir . \
     --local-dir-use-symlinks False
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

     --local-dir . \
     --local-dir-use-symlinks False
+CMD ["uvicorn", "gradio_app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ MODEL_NAME = "llama-2-7b-chat.Q5_K_M.gguf"
 DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False"
-MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf"
 if not os.path.exists(MODEL_PATH):
     os.system(DOWNLOAD_MODEL)
@@ -46,24 +46,13 @@ llm_chain = LLMChain(prompt=prompt, llm=llm)
 # question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
 # llm_chain.run(question)
-title = "Welcome to Open Source LLM"
-description = "This is a Llama-2-GGUF"
-def answer_query(message, history):
-    print(message)
-    message = llm_chain.invoke(message)
-    print(message, history)
-    return message
-# Gradio chat interface
-gr.ChatInterface(
-    fn=answer_query,
-    title=title,
-    description=description,
-    examples=[
-        ["What is a Large Language Model?"],
-        ["What's 9+2-1?"],
-        ["Write Python code to print the Fibonacci sequence"]
-    ]
-).queue().launch(server_name="0.0.0.0")

 DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False"
+MODEL_PATH = "models/llama-2-7b-chat.Q5_K_M.gguf"
 if not os.path.exists(MODEL_PATH):
     os.system(DOWNLOAD_MODEL)
 # question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
 # llm_chain.run(question)
+if __name__ == "__main__":
+    print("Hello, Friend")
+    chat = True
+    while chat:
+        print("Enter question or q to quit.")
+        question = input("Question: ")
+        if question == "q":
+            chat = False
+        response = llm_chain.invoke(question)
+        print(response['text'])

gradio_app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain_community.llms import LlamaCpp
+import gradio as gr
+import os
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+REPO = "TheBloke/Llama-2-7B-Chat-GGUF"
+MODEL_NAME = "llama-2-7b-chat.Q5_K_M.gguf"
+DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False"
+MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf"
+if not os.path.exists(MODEL_PATH):
+    os.system(DOWNLOAD_MODEL)
+TEMPLATE = """
+You are a helpful AI Assistant created by Mohammed Vasim. Mohammed Vasim is an AI Engineer.
+Question: {question}
+Answer: helpful answer"""
+prompt = PromptTemplate.from_template(TEMPLATE)
+# Callbacks support token-wise streaming
+callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+# Make sure the model path is correct for your system!
+llm = LlamaCpp(
+    model_path=MODEL_PATH,
+    temperature=0.75,
+    max_tokens=2000,
+    top_p=1,
+    callback_manager=callback_manager,
+    verbose=True,  # Verbose is required to pass to the callback manager
+)
+llm_chain = LLMChain(prompt=prompt, llm=llm)
+# question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
+# llm_chain.run(question)
+title = "Welcome to Open Source LLM"
+description = "This is a Llama-2-GGUF"
+def answer_query(message, history):
+    print(message)
+    message = llm_chain.invoke(message)
+    print(message, history)
+    return message
+# Gradio chat interface
+gr.ChatInterface(
+    fn=answer_query,
+    title=title,
+    description=description,
+    examples=[
+        ["What is a Large Language Model?"],
+        ["What's 9+2-1?"],
+        ["Write Python code to print the Fibonacci sequence"]
+    ]
+).queue().launch(server_name="0.0.0.0")