Spaces:

shauninkripped
/

Sentient-Aid-space1

Sleeping

App Files Files Community

shauninkripped commited on Sep 12, 2024

Commit

959ec93

verified ·

1 Parent(s): db5e34b

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -1

app.py CHANGED Viewed

@@ -6,6 +6,9 @@ For more information on `huggingface_hub` Inference API support, please check th
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
@@ -39,6 +42,142 @@ def respond(
         response += token
         yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
@@ -60,4 +199,5 @@ demo = gr.ChatInterface(
 if __name__ == "__main__":
-    demo.launch()

 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+"""
+test web research
+"""
 def respond(
     message,
         response += token
         yield response
+        hf_hub_download(
+    repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
+    filename="Mistral-7B-Instruct-v0.3-Q6_K.gguf",
+    local_dir="./models"
+)
+def get_context_by_model(model_name):
+    model_context_limits = {
+        "Mistral-7B-Instruct-v0.3-Q6_K.gguf": 32768,
+        "Meta-Llama-3-8B-Instruct-Q6_K.gguf": 8192
+    }
+    return model_context_limits.get(model_name, None)
+def get_messages_formatter_type(model_name):
+    from llama_cpp_agent import MessagesFormatterType
+    if "Meta" in model_name or "aya" in model_name:
+        return MessagesFormatterType.LLAMA_3
+    elif "Mistral" in model_name:
+        return MessagesFormatterType.MISTRAL
+    elif "Einstein-v6-7B" in model_name or "dolphin" in model_name:
+        return MessagesFormatterType.CHATML
+    elif "Phi" in model_name:
+        return MessagesFormatterType.PHI_3
+    else:
+        return MessagesFormatterType.CHATML
+@spaces.GPU(duration=120)
+def respond(
+        message,
+        history: list[tuple[str, str]],
+        system_message,
+        temperature,
+        top_p,
+        top_k,
+        repetition_penalty,
+):
+    chat_template = get_messages_formatter_type("Mistral-7B-Instruct-v0.3-Q6_K.gguf")
+    llm = Llama(
+        model_path=f"models/Mistral-7B-Instruct-v0.3-Q6_K.gguf",
+        flash_attn=True,
+        n_gpu_layers=33,
+        n_batch=1024,
+        n_ctx=get_context_by_model("Mistral-7B-Instruct-v0.3-Q6_K.gguf"),
+    )
+    provider = LlamaCppPythonProvider(llm)
+    search_tool = WebSearchTool(
+        llm_provider=provider,
+        message_formatter_type=chat_template,
+        model_max_context_tokens=get_context_by_model("Mistral-7B-Instruct-v0.3-Q6_K.gguf"),
+        max_tokens_search_results=12000,
+        max_tokens_per_summary=2048,
+    )
+    web_search_agent = LlamaCppAgent(
+        provider,
+        system_prompt=web_search_system_prompt,
+        predefined_messages_formatter_type=chat_template,
+        debug_output=True,
+    )
+    answer_agent = LlamaCppAgent(
+        provider,
+        system_prompt=system_message,
+        predefined_messages_formatter_type=chat_template,
+        debug_output=True,
+    )
+    settings = provider.get_provider_default_settings()
+    settings.stream = False
+    settings.temperature = temperature
+    settings.top_k = top_k
+    settings.top_p = top_p
+    settings.max_tokens = 2048
+    settings.repeat_penalty = repetition_penalty
+    output_settings = LlmStructuredOutputSettings.from_functions(
+        [search_tool.get_tool()], add_thoughts_and_reasoning_field=True
+    )
+    messages = BasicChatHistory()
+    for msn in history:
+        user = {"role": Roles.user, "content": msn[0]}
+        assistant = {"role": Roles.assistant, "content": msn[1]}
+        messages.add_message(user)
+        messages.add_message(assistant)
+    result = web_search_agent.get_chat_response(
+        f"Current Date and Time(d/m/y, h:m:s): {datetime.datetime.now().strftime('%d/%m/%Y, %H:%M:%S')}\n\nUser Query: " + message,
+        llm_sampling_settings=settings,
+        structured_output_settings=output_settings,
+        add_message_to_chat_history=False,
+        add_response_to_chat_history=False,
+        print_output=False,
+    )
+    outputs = ""
+    settings.stream = True
+    response_text = answer_agent.get_chat_response(
+        f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information below.\n\n"
+        + result[0]["return_value"],
+        role=Roles.tool,
+        llm_sampling_settings=settings,
+        chat_history=messages,
+        returns_streaming_generator=True,
+        print_output=False,
+    )
+    for text in response_text:
+        outputs += text
+        yield outputs
+    output_settings = LlmStructuredOutputSettings.from_pydantic_models(
+        [CitingSources], LlmStructuredOutputType.object_instance
+    )
+    citing_sources = answer_agent.get_chat_response(
+        "Cite the sources you used in your response.",
+        role=Roles.tool,
+        llm_sampling_settings=settings,
+        chat_history=messages,
+        returns_streaming_generator=False,
+        structured_output_settings=output_settings,
+        print_output=False,
+    )
+    outputs += "\n\nSources:\n"
+    outputs += "\n".join(citing_sources.sources)
+    yield outputs
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """
 if __name__ == "__main__":
+    demo.launch()