gokaygokay commited on
Commit
336c407
1 Parent(s): 25dd0d5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -0
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ from llama_cpp import Llama
3
+ from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
4
+ from llama_cpp_agent.providers import LlamaCppPythonProvider
5
+ from llama_cpp_agent.chat_history import BasicChatHistory
6
+ from llama_cpp_agent.chat_history.messages import Roles
7
+ import gradio as gr
8
+ from huggingface_hub import hf_hub_download
9
+
10
+ hf_hub_download(
11
+ repo_id="MaziyarPanahi/Mistral-Nemo-Instruct-2407-GGUF",
12
+ filename="Mistral-Nemo-Instruct-2407.Q5_K_M.gguf",
13
+ local_dir="./models"
14
+ )
15
+
16
+ llm = None
17
+ llm_model = None
18
+
19
+ @spaces.GPU(duration=120)
20
+ def respond(
21
+ message,
22
+ history: list[tuple[str, str]],
23
+ model,
24
+ system_message,
25
+ max_tokens,
26
+ temperature,
27
+ top_p,
28
+ top_k,
29
+ repeat_penalty,
30
+ ):
31
+
32
+
33
+ chat_template = MessagesFormatterType.MISTRAL
34
+
35
+ global llm
36
+ global llm_model
37
+
38
+ if llm is None or llm_model != model:
39
+ llm = Llama(
40
+ model_path=f"models/{model}",
41
+ flash_attn=True,
42
+ n_gpu_layers=81,
43
+ n_batch=1024,
44
+ n_ctx=32768,
45
+ )
46
+ llm_model = model
47
+
48
+ provider = LlamaCppPythonProvider(llm)
49
+
50
+ agent = LlamaCppAgent(
51
+ provider,
52
+ system_prompt=f"{system_message}",
53
+ predefined_messages_formatter_type=chat_template,
54
+ debug_output=True
55
+ )
56
+
57
+ settings = provider.get_provider_default_settings()
58
+ settings.temperature = temperature
59
+ settings.top_k = top_k
60
+ settings.top_p = top_p
61
+ settings.max_tokens = max_tokens
62
+ settings.repeat_penalty = repeat_penalty
63
+ settings.stream = True
64
+
65
+ messages = BasicChatHistory()
66
+
67
+ for msn in history:
68
+ user = {
69
+ 'role': Roles.user,
70
+ 'content': msn[0]
71
+ }
72
+ assistant = {
73
+ 'role': Roles.assistant,
74
+ 'content': msn[1]
75
+ }
76
+ messages.add_message(user)
77
+ messages.add_message(assistant)
78
+
79
+ stream = agent.get_chat_response(
80
+ message,
81
+ llm_sampling_settings=settings,
82
+ chat_history=messages,
83
+ returns_streaming_generator=True,
84
+ print_output=False
85
+ )
86
+
87
+ outputs = ""
88
+ for output in stream:
89
+ outputs += output
90
+ yield outputs
91
+
92
+ description = """<p><center>
93
+ <a href="https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407" target="_blank">[Instruct Model]</a>
94
+ <a href="https://huggingface.co/mistralai/Mistral-Nemo-Base-2407" target="_blank">[Base Model]</a>
95
+ <a href="https://huggingface.co/second-state/Mistral-Nemo-Instruct-2407-GGUF" target="_blank">[GGUF Version]</a>
96
+ </center></p>
97
+ """
98
+
99
+ demo = gr.ChatInterface(
100
+ respond,
101
+ additional_inputs=[
102
+ gr.Dropdown([
103
+ 'Mistral-Nemo-Instruct-2407.Q5_K_M.gguf'
104
+ ],
105
+ value="Mistral-Nemo-Instruct-2407.Q5_K_M.gguf",
106
+ label="Model"
107
+ ),
108
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
109
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
110
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
111
+ gr.Slider(
112
+ minimum=0.1,
113
+ maximum=1.0,
114
+ value=0.95,
115
+ step=0.05,
116
+ label="Top-p",
117
+ ),
118
+ gr.Slider(
119
+ minimum=0,
120
+ maximum=100,
121
+ value=40,
122
+ step=1,
123
+ label="Top-k",
124
+ ),
125
+ gr.Slider(
126
+ minimum=0.0,
127
+ maximum=2.0,
128
+ value=1.1,
129
+ step=0.1,
130
+ label="Repetition penalty",
131
+ ),
132
+ ],
133
+ retry_btn="Retry",
134
+ undo_btn="Undo",
135
+ clear_btn="Clear",
136
+ submit_btn="Send",
137
+ title="Chat with Mistral-Nemo using llama.cpp",
138
+ description=description,
139
+ chatbot=gr.Chatbot(
140
+ scale=1,
141
+ likeable=False,
142
+ show_copy_button=True
143
+ )
144
+ )
145
+
146
+ demo.launch(debug=True)