LennardZuendorf commited on
Commit
0f77c21
1 Parent(s): 69a80e6

feat: setting up working fastapi application

Browse files
Files changed (7) hide show
  1. README.md +1 -0
  2. __init__.py +0 -0
  3. app.py +5 -8
  4. main.py +7 -0
  5. model/__init__.py +0 -0
  6. model/llama2.py +0 -66
  7. requirements.txt +3 -1
README.md CHANGED
@@ -11,3 +11,4 @@ license: mit
11
  ---
12
 
13
 
 
 
11
  ---
12
 
13
 
14
+
__init__.py ADDED
File without changes
app.py CHANGED
@@ -4,13 +4,10 @@
4
  import gradio as gr
5
  import markdown
6
 
7
-
8
  # internal imports
9
  from model import mistral
10
- from explanation import interpret as shap
11
-
12
 
13
- # function to load md files in pthon as a string
14
  def load_md(path):
15
 
16
  # credit: official python-markdown documentation (https://python-markdown.github.io/reference/)
@@ -35,7 +32,7 @@ with gr.Blocks() as ui:
35
  gr.Markdown(
36
  """
37
  ### ChatBot Demo
38
- Mitral AI 7B Model fine-tuned for instruction and fully open source (see at [HGF](https://huggingface.co/mistralai/Mistral-7B-v0.1))
39
  """)
40
  # row with chatbot ui displaying "conversation" with the model (see documentation: https://www.gradio.app/docs/chatbot)
41
  with gr.Row():
@@ -89,7 +86,7 @@ with gr.Blocks() as ui:
89
  """)
90
 
91
  # model overview tab for transparency
92
- with gr.Tab("Model Overview"):
93
  with gr.Tab("Mistral 7B Instruct"):
94
  gr.Markdown(value=load_md("./model/mistral.md"))
95
  with gr.Tab("LlaMa 2 7B Chat"):
@@ -98,8 +95,8 @@ with gr.Blocks() as ui:
98
  # final row to show legal information - credits, data protection and link to the LICENSE on GitHub
99
  with gr.Row():
100
  with gr.Accordion("Credits, Data Protection and License", open=False):
101
- gr.Markdown(value=load_md("./public/credits_dataprotection_license.md"))
102
 
103
  # launch function for Gradio Interface
104
  if __name__ == "__main__":
105
- ui.launch(debug=True)
 
4
  import gradio as gr
5
  import markdown
6
 
 
7
  # internal imports
8
  from model import mistral
 
 
9
 
10
+ # function to load md files in python as a string
11
  def load_md(path):
12
 
13
  # credit: official python-markdown documentation (https://python-markdown.github.io/reference/)
 
32
  gr.Markdown(
33
  """
34
  ### ChatBot Demo
35
+ Mitral AI 7B notebooks fine-tuned for instruction and fully open source (see at [HGF](https://huggingface.co/mistralai/Mistral-7B-v0.1))
36
  """)
37
  # row with chatbot ui displaying "conversation" with the model (see documentation: https://www.gradio.app/docs/chatbot)
38
  with gr.Row():
 
86
  """)
87
 
88
  # model overview tab for transparency
89
+ with gr.Tab("notebooks Overview"):
90
  with gr.Tab("Mistral 7B Instruct"):
91
  gr.Markdown(value=load_md("./model/mistral.md"))
92
  with gr.Tab("LlaMa 2 7B Chat"):
 
95
  # final row to show legal information - credits, data protection and link to the LICENSE on GitHub
96
  with gr.Row():
97
  with gr.Accordion("Credits, Data Protection and License", open=False):
98
+ gr.Markdown(value=load_md("public/credits_dataprotection_license.md"))
99
 
100
  # launch function for Gradio Interface
101
  if __name__ == "__main__":
102
+ ui.launch(share=False)
main.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ import gradio as gr
3
+ import app as gradio_app
4
+
5
+ app = FastAPI()
6
+
7
+ app = gr.mount_gradio_app(app, gradio_app.ui, path="/")
model/__init__.py ADDED
File without changes
model/llama2.py CHANGED
@@ -1,66 +0,0 @@
1
- from huggingface_hub import InferenceClient
2
- import os
3
-
4
- # huggingface token used to load closed off models
5
- token = os.environ.get("HGFTOKEN")
6
-
7
- # interference client created from mistral 7b instruction fine tuned model
8
- # credit: copied 1:1 from Hugging Face, Inc/ Omar Sanseviero (see https://huggingface.co/spaces/osanseviero/mistral-super-fast/)
9
- interference = InferenceClient(
10
- "mistralai/Mistral-7B-Instruct-v0.1"
11
- )
12
-
13
- # default model settings
14
- model_temperature = 0.7
15
- model_max_new_tokens = 320
16
- model_top_p = 0.95
17
- model_repetition_penalty = 1.1
18
-
19
- # chat function - basically the main function calling other functions and returning a response to showcase in chatbot ui
20
- def chat (prompt, history,):
21
-
22
- # creating formatted prompt and calling for an answer from the model
23
- formatted_prompt = format_prompt(prompt, history)
24
- answer=respond(formatted_prompt)
25
-
26
- # updating the chat history with the new answer
27
- history.append((prompt, answer))
28
-
29
- # returning the chat history to be displayed in the chatbot ui
30
- return "",history
31
-
32
- # function to format prompt in a way that is understandable for the text generation model
33
- # credit: copied 1:1 from Hugging Face, Inc/ Omar Sanseviero (see https://huggingface.co/spaces/osanseviero/mistral-super-fast/)
34
- def format_prompt(message, history):
35
- prompt = "<s>"
36
-
37
- # labeling each message in the history as bot or user
38
- for user_prompt, bot_response in history:
39
- prompt += f"[INST] {user_prompt} [/INST]"
40
- prompt += f" {bot_response}</s> "
41
- prompt += f"[INST] {message} [/INST]"
42
- return prompt
43
-
44
- # function to get the response
45
- # credit: minimally changed from Hugging Face, Inc/ Omar Sanseviero (see https://huggingface.co/spaces/osanseviero/mistral-super-fast/)
46
- def respond(formatted_prompt):
47
-
48
- # setting model temperature and
49
- temperature = float(model_temperature)
50
- if temperature < 1e-2:
51
- temperature = 1e-2
52
- top_p = float(model_top_p)
53
-
54
- # creating model arguments/settings
55
- generate_kwargs = dict(
56
- temperature=temperature,
57
- max_new_tokens=model_max_new_tokens,
58
- top_p=top_p,
59
- repetition_penalty=model_repetition_penalty,
60
- do_sample=True,
61
- seed=42,
62
- )
63
-
64
- # calling for model output and returning it
65
- output = interference.text_generation(formatted_prompt, **generate_kwargs, stream=False, details=True, return_full_text=False).generated_text
66
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -4,4 +4,6 @@ torch~=2.1.1
4
  shap~=0.43.0
5
  accelerate~=0.24.1
6
  markdown~=3.5.1
7
- huggingface_hub~=0.19.4
 
 
 
4
  shap~=0.43.0
5
  accelerate~=0.24.1
6
  markdown~=3.5.1
7
+ huggingface_hub~=0.19.4
8
+ fastapi~=0.104.1
9
+ uvicorn~=0.24.0