Spaces:
Runtime error
Runtime error
LennardZuendorf
commited on
Commit
•
0f77c21
1
Parent(s):
69a80e6
feat: setting up working fastapi application
Browse files- README.md +1 -0
- __init__.py +0 -0
- app.py +5 -8
- main.py +7 -0
- model/__init__.py +0 -0
- model/llama2.py +0 -66
- requirements.txt +3 -1
README.md
CHANGED
@@ -11,3 +11,4 @@ license: mit
|
|
11 |
---
|
12 |
|
13 |
|
|
|
|
11 |
---
|
12 |
|
13 |
|
14 |
+
|
__init__.py
ADDED
File without changes
|
app.py
CHANGED
@@ -4,13 +4,10 @@
|
|
4 |
import gradio as gr
|
5 |
import markdown
|
6 |
|
7 |
-
|
8 |
# internal imports
|
9 |
from model import mistral
|
10 |
-
from explanation import interpret as shap
|
11 |
-
|
12 |
|
13 |
-
# function to load md files in
|
14 |
def load_md(path):
|
15 |
|
16 |
# credit: official python-markdown documentation (https://python-markdown.github.io/reference/)
|
@@ -35,7 +32,7 @@ with gr.Blocks() as ui:
|
|
35 |
gr.Markdown(
|
36 |
"""
|
37 |
### ChatBot Demo
|
38 |
-
Mitral AI 7B
|
39 |
""")
|
40 |
# row with chatbot ui displaying "conversation" with the model (see documentation: https://www.gradio.app/docs/chatbot)
|
41 |
with gr.Row():
|
@@ -89,7 +86,7 @@ with gr.Blocks() as ui:
|
|
89 |
""")
|
90 |
|
91 |
# model overview tab for transparency
|
92 |
-
with gr.Tab("
|
93 |
with gr.Tab("Mistral 7B Instruct"):
|
94 |
gr.Markdown(value=load_md("./model/mistral.md"))
|
95 |
with gr.Tab("LlaMa 2 7B Chat"):
|
@@ -98,8 +95,8 @@ with gr.Blocks() as ui:
|
|
98 |
# final row to show legal information - credits, data protection and link to the LICENSE on GitHub
|
99 |
with gr.Row():
|
100 |
with gr.Accordion("Credits, Data Protection and License", open=False):
|
101 |
-
gr.Markdown(value=load_md("
|
102 |
|
103 |
# launch function for Gradio Interface
|
104 |
if __name__ == "__main__":
|
105 |
-
ui.launch(
|
|
|
4 |
import gradio as gr
|
5 |
import markdown
|
6 |
|
|
|
7 |
# internal imports
|
8 |
from model import mistral
|
|
|
|
|
9 |
|
10 |
+
# function to load md files in python as a string
|
11 |
def load_md(path):
|
12 |
|
13 |
# credit: official python-markdown documentation (https://python-markdown.github.io/reference/)
|
|
|
32 |
gr.Markdown(
|
33 |
"""
|
34 |
### ChatBot Demo
|
35 |
+
Mitral AI 7B notebooks fine-tuned for instruction and fully open source (see at [HGF](https://huggingface.co/mistralai/Mistral-7B-v0.1))
|
36 |
""")
|
37 |
# row with chatbot ui displaying "conversation" with the model (see documentation: https://www.gradio.app/docs/chatbot)
|
38 |
with gr.Row():
|
|
|
86 |
""")
|
87 |
|
88 |
# model overview tab for transparency
|
89 |
+
with gr.Tab("notebooks Overview"):
|
90 |
with gr.Tab("Mistral 7B Instruct"):
|
91 |
gr.Markdown(value=load_md("./model/mistral.md"))
|
92 |
with gr.Tab("LlaMa 2 7B Chat"):
|
|
|
95 |
# final row to show legal information - credits, data protection and link to the LICENSE on GitHub
|
96 |
with gr.Row():
|
97 |
with gr.Accordion("Credits, Data Protection and License", open=False):
|
98 |
+
gr.Markdown(value=load_md("public/credits_dataprotection_license.md"))
|
99 |
|
100 |
# launch function for Gradio Interface
|
101 |
if __name__ == "__main__":
|
102 |
+
ui.launch(share=False)
|
main.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
import gradio as gr
|
3 |
+
import app as gradio_app
|
4 |
+
|
5 |
+
app = FastAPI()
|
6 |
+
|
7 |
+
app = gr.mount_gradio_app(app, gradio_app.ui, path="/")
|
model/__init__.py
ADDED
File without changes
|
model/llama2.py
CHANGED
@@ -1,66 +0,0 @@
|
|
1 |
-
from huggingface_hub import InferenceClient
|
2 |
-
import os
|
3 |
-
|
4 |
-
# huggingface token used to load closed off models
|
5 |
-
token = os.environ.get("HGFTOKEN")
|
6 |
-
|
7 |
-
# interference client created from mistral 7b instruction fine tuned model
|
8 |
-
# credit: copied 1:1 from Hugging Face, Inc/ Omar Sanseviero (see https://huggingface.co/spaces/osanseviero/mistral-super-fast/)
|
9 |
-
interference = InferenceClient(
|
10 |
-
"mistralai/Mistral-7B-Instruct-v0.1"
|
11 |
-
)
|
12 |
-
|
13 |
-
# default model settings
|
14 |
-
model_temperature = 0.7
|
15 |
-
model_max_new_tokens = 320
|
16 |
-
model_top_p = 0.95
|
17 |
-
model_repetition_penalty = 1.1
|
18 |
-
|
19 |
-
# chat function - basically the main function calling other functions and returning a response to showcase in chatbot ui
|
20 |
-
def chat (prompt, history,):
|
21 |
-
|
22 |
-
# creating formatted prompt and calling for an answer from the model
|
23 |
-
formatted_prompt = format_prompt(prompt, history)
|
24 |
-
answer=respond(formatted_prompt)
|
25 |
-
|
26 |
-
# updating the chat history with the new answer
|
27 |
-
history.append((prompt, answer))
|
28 |
-
|
29 |
-
# returning the chat history to be displayed in the chatbot ui
|
30 |
-
return "",history
|
31 |
-
|
32 |
-
# function to format prompt in a way that is understandable for the text generation model
|
33 |
-
# credit: copied 1:1 from Hugging Face, Inc/ Omar Sanseviero (see https://huggingface.co/spaces/osanseviero/mistral-super-fast/)
|
34 |
-
def format_prompt(message, history):
|
35 |
-
prompt = "<s>"
|
36 |
-
|
37 |
-
# labeling each message in the history as bot or user
|
38 |
-
for user_prompt, bot_response in history:
|
39 |
-
prompt += f"[INST] {user_prompt} [/INST]"
|
40 |
-
prompt += f" {bot_response}</s> "
|
41 |
-
prompt += f"[INST] {message} [/INST]"
|
42 |
-
return prompt
|
43 |
-
|
44 |
-
# function to get the response
|
45 |
-
# credit: minimally changed from Hugging Face, Inc/ Omar Sanseviero (see https://huggingface.co/spaces/osanseviero/mistral-super-fast/)
|
46 |
-
def respond(formatted_prompt):
|
47 |
-
|
48 |
-
# setting model temperature and
|
49 |
-
temperature = float(model_temperature)
|
50 |
-
if temperature < 1e-2:
|
51 |
-
temperature = 1e-2
|
52 |
-
top_p = float(model_top_p)
|
53 |
-
|
54 |
-
# creating model arguments/settings
|
55 |
-
generate_kwargs = dict(
|
56 |
-
temperature=temperature,
|
57 |
-
max_new_tokens=model_max_new_tokens,
|
58 |
-
top_p=top_p,
|
59 |
-
repetition_penalty=model_repetition_penalty,
|
60 |
-
do_sample=True,
|
61 |
-
seed=42,
|
62 |
-
)
|
63 |
-
|
64 |
-
# calling for model output and returning it
|
65 |
-
output = interference.text_generation(formatted_prompt, **generate_kwargs, stream=False, details=True, return_full_text=False).generated_text
|
66 |
-
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -4,4 +4,6 @@ torch~=2.1.1
|
|
4 |
shap~=0.43.0
|
5 |
accelerate~=0.24.1
|
6 |
markdown~=3.5.1
|
7 |
-
huggingface_hub~=0.19.4
|
|
|
|
|
|
4 |
shap~=0.43.0
|
5 |
accelerate~=0.24.1
|
6 |
markdown~=3.5.1
|
7 |
+
huggingface_hub~=0.19.4
|
8 |
+
fastapi~=0.104.1
|
9 |
+
uvicorn~=0.24.0
|