shauninkripped commited on
Commit
959ec93
·
verified ·
1 Parent(s): db5e34b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -1
app.py CHANGED
@@ -6,6 +6,9 @@ For more information on `huggingface_hub` Inference API support, please check th
6
  """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -39,6 +42,142 @@ def respond(
39
  response += token
40
  yield response
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  """
43
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
  """
@@ -60,4 +199,5 @@ demo = gr.ChatInterface(
60
 
61
 
62
  if __name__ == "__main__":
63
- demo.launch()
 
 
6
  """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+ """
10
+ test web research
11
+ """
12
 
13
  def respond(
14
  message,
 
42
  response += token
43
  yield response
44
 
45
+ hf_hub_download(
46
+ repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
47
+ filename="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
48
+ local_dir="./models"
49
+ )
50
+
51
+
52
+ def get_context_by_model(model_name):
53
+ model_context_limits = {
54
+ "Mistral-7B-Instruct-v0.3-Q6_K.gguf": 32768,
55
+ "Meta-Llama-3-8B-Instruct-Q6_K.gguf": 8192
56
+ }
57
+ return model_context_limits.get(model_name, None)
58
+
59
+
60
+ def get_messages_formatter_type(model_name):
61
+ from llama_cpp_agent import MessagesFormatterType
62
+ if "Meta" in model_name or "aya" in model_name:
63
+ return MessagesFormatterType.LLAMA_3
64
+ elif "Mistral" in model_name:
65
+ return MessagesFormatterType.MISTRAL
66
+ elif "Einstein-v6-7B" in model_name or "dolphin" in model_name:
67
+ return MessagesFormatterType.CHATML
68
+ elif "Phi" in model_name:
69
+ return MessagesFormatterType.PHI_3
70
+ else:
71
+ return MessagesFormatterType.CHATML
72
+
73
+
74
+ @spaces.GPU(duration=120)
75
+ def respond(
76
+ message,
77
+ history: list[tuple[str, str]],
78
+ system_message,
79
+ temperature,
80
+ top_p,
81
+ top_k,
82
+ repetition_penalty,
83
+ ):
84
+ chat_template = get_messages_formatter_type("Mistral-7B-Instruct-v0.3-Q6_K.gguf")
85
+ llm = Llama(
86
+ model_path=f"models/Mistral-7B-Instruct-v0.3-Q6_K.gguf",
87
+ flash_attn=True,
88
+ n_gpu_layers=33,
89
+ n_batch=1024,
90
+ n_ctx=get_context_by_model("Mistral-7B-Instruct-v0.3-Q6_K.gguf"),
91
+ )
92
+ provider = LlamaCppPythonProvider(llm)
93
+ search_tool = WebSearchTool(
94
+ llm_provider=provider,
95
+ message_formatter_type=chat_template,
96
+ model_max_context_tokens=get_context_by_model("Mistral-7B-Instruct-v0.3-Q6_K.gguf"),
97
+ max_tokens_search_results=12000,
98
+ max_tokens_per_summary=2048,
99
+ )
100
+
101
+ web_search_agent = LlamaCppAgent(
102
+ provider,
103
+ system_prompt=web_search_system_prompt,
104
+ predefined_messages_formatter_type=chat_template,
105
+ debug_output=True,
106
+ )
107
+
108
+ answer_agent = LlamaCppAgent(
109
+ provider,
110
+ system_prompt=system_message,
111
+ predefined_messages_formatter_type=chat_template,
112
+ debug_output=True,
113
+ )
114
+
115
+ settings = provider.get_provider_default_settings()
116
+ settings.stream = False
117
+ settings.temperature = temperature
118
+ settings.top_k = top_k
119
+ settings.top_p = top_p
120
+
121
+ settings.max_tokens = 2048
122
+ settings.repeat_penalty = repetition_penalty
123
+
124
+ output_settings = LlmStructuredOutputSettings.from_functions(
125
+ [search_tool.get_tool()], add_thoughts_and_reasoning_field=True
126
+ )
127
+
128
+ messages = BasicChatHistory()
129
+
130
+ for msn in history:
131
+ user = {"role": Roles.user, "content": msn[0]}
132
+ assistant = {"role": Roles.assistant, "content": msn[1]}
133
+ messages.add_message(user)
134
+ messages.add_message(assistant)
135
+
136
+ result = web_search_agent.get_chat_response(
137
+ f"Current Date and Time(d/m/y, h:m:s): {datetime.datetime.now().strftime('%d/%m/%Y, %H:%M:%S')}\n\nUser Query: " + message,
138
+ llm_sampling_settings=settings,
139
+ structured_output_settings=output_settings,
140
+ add_message_to_chat_history=False,
141
+ add_response_to_chat_history=False,
142
+ print_output=False,
143
+ )
144
+
145
+ outputs = ""
146
+
147
+ settings.stream = True
148
+ response_text = answer_agent.get_chat_response(
149
+ f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information below.\n\n"
150
+ + result[0]["return_value"],
151
+ role=Roles.tool,
152
+ llm_sampling_settings=settings,
153
+ chat_history=messages,
154
+ returns_streaming_generator=True,
155
+ print_output=False,
156
+ )
157
+
158
+ for text in response_text:
159
+ outputs += text
160
+ yield outputs
161
+
162
+ output_settings = LlmStructuredOutputSettings.from_pydantic_models(
163
+ [CitingSources], LlmStructuredOutputType.object_instance
164
+ )
165
+
166
+ citing_sources = answer_agent.get_chat_response(
167
+ "Cite the sources you used in your response.",
168
+ role=Roles.tool,
169
+ llm_sampling_settings=settings,
170
+ chat_history=messages,
171
+ returns_streaming_generator=False,
172
+ structured_output_settings=output_settings,
173
+ print_output=False,
174
+ )
175
+ outputs += "\n\nSources:\n"
176
+ outputs += "\n".join(citing_sources.sources)
177
+ yield outputs
178
+
179
+
180
+
181
  """
182
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
183
  """
 
199
 
200
 
201
  if __name__ == "__main__":
202
+ demo.launch()
203
+