mvasim commited on
Commit
c32b415
1 Parent(s): 381c14d

gradio_app added

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. app.py +11 -22
  3. gradio_app.py +69 -0
Dockerfile CHANGED
@@ -21,4 +21,4 @@ RUN huggingface-cli download \
21
  --local-dir . \
22
  --local-dir-use-symlinks False
23
 
24
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
21
  --local-dir . \
22
  --local-dir-use-symlinks False
23
 
24
+ CMD ["uvicorn", "gradio_app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -13,7 +13,7 @@ MODEL_NAME = "llama-2-7b-chat.Q5_K_M.gguf"
13
 
14
  DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False"
15
 
16
- MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf"
17
 
18
  if not os.path.exists(MODEL_PATH):
19
  os.system(DOWNLOAD_MODEL)
@@ -46,24 +46,13 @@ llm_chain = LLMChain(prompt=prompt, llm=llm)
46
  # question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
47
  # llm_chain.run(question)
48
 
49
- title = "Welcome to Open Source LLM"
50
-
51
- description = "This is a Llama-2-GGUF"
52
-
53
- def answer_query(message, history):
54
- print(message)
55
- message = llm_chain.invoke(message)
56
- print(message, history)
57
- return message
58
-
59
- # Gradio chat interface
60
- gr.ChatInterface(
61
- fn=answer_query,
62
- title=title,
63
- description=description,
64
- examples=[
65
- ["What is a Large Language Model?"],
66
- ["What's 9+2-1?"],
67
- ["Write Python code to print the Fibonacci sequence"]
68
- ]
69
- ).queue().launch(server_name="0.0.0.0")
 
13
 
14
  DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False"
15
 
16
+ MODEL_PATH = "models/llama-2-7b-chat.Q5_K_M.gguf"
17
 
18
  if not os.path.exists(MODEL_PATH):
19
  os.system(DOWNLOAD_MODEL)
 
46
  # question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
47
  # llm_chain.run(question)
48
 
49
+ if __name__ == "__main__":
50
+ print("Hello, Friend")
51
+ chat = True
52
+ while chat:
53
+ print("Enter question or q to quit.")
54
+ question = input("Question: ")
55
+ if question == "q":
56
+ chat = False
57
+ response = llm_chain.invoke(question)
58
+ print(response['text'])
 
 
 
 
 
 
 
 
 
 
 
gradio_app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.callbacks.manager import CallbackManager
2
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
3
+ from langchain.chains import LLMChain
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain_community.llms import LlamaCpp
6
+ import gradio as gr
7
+ import os
8
+
9
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
10
+
11
+ REPO = "TheBloke/Llama-2-7B-Chat-GGUF"
12
+ MODEL_NAME = "llama-2-7b-chat.Q5_K_M.gguf"
13
+
14
+ DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False"
15
+
16
+ MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf"
17
+
18
+ if not os.path.exists(MODEL_PATH):
19
+ os.system(DOWNLOAD_MODEL)
20
+
21
+ TEMPLATE = """
22
+
23
+ You are a helpful AI Assistant created by Mohammed Vasim. Mohammed Vasim is an AI Engineer.
24
+
25
+ Question: {question}
26
+
27
+ Answer: helpful answer"""
28
+
29
+ prompt = PromptTemplate.from_template(TEMPLATE)
30
+
31
+ # Callbacks support token-wise streaming
32
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
33
+
34
+ # Make sure the model path is correct for your system!
35
+ llm = LlamaCpp(
36
+ model_path=MODEL_PATH,
37
+ temperature=0.75,
38
+ max_tokens=2000,
39
+ top_p=1,
40
+ callback_manager=callback_manager,
41
+ verbose=True, # Verbose is required to pass to the callback manager
42
+ )
43
+
44
+ llm_chain = LLMChain(prompt=prompt, llm=llm)
45
+
46
+ # question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
47
+ # llm_chain.run(question)
48
+
49
+ title = "Welcome to Open Source LLM"
50
+
51
+ description = "This is a Llama-2-GGUF"
52
+
53
+ def answer_query(message, history):
54
+ print(message)
55
+ message = llm_chain.invoke(message)
56
+ print(message, history)
57
+ return message
58
+
59
+ # Gradio chat interface
60
+ gr.ChatInterface(
61
+ fn=answer_query,
62
+ title=title,
63
+ description=description,
64
+ examples=[
65
+ ["What is a Large Language Model?"],
66
+ ["What's 9+2-1?"],
67
+ ["Write Python code to print the Fibonacci sequence"]
68
+ ]
69
+ ).queue().launch(server_name="0.0.0.0")