Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 9,234 Bytes
d519be4 cb3dcae d519be4 cb3dcae a1a68e8 cb3dcae d519be4 a1a68e8 d519be4 4f325ab a78c581 4f325ab 6d8b03b 4f325ab a78c581 cb3dcae 6d8b03b cb3dcae d519be4 cb3dcae d519be4 fd9121f 9175102 fd9121f 3ff6af1 fd9121f f621fe9 fd9121f cb3dcae 3ff6af1 cb3dcae 16f6826 cb3dcae da5a214 cb3dcae fd9121f a48300b fd9121f da5a214 cb3dcae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
from dotenv import load_dotenv
import gradio as gr
from gradio import ChatMessage
import json
from openai import OpenAI
from datetime import datetime
import os
import re
import logging
logging.basicConfig(level=logging.INFO, format='[%(asctime)s][%(levelname)s] - %(message)s')
# logging.getLogger().setLevel(logging.INFO)
load_dotenv(".env", override=True)
HF_TOKEN = os.environ.get("HF_TOKEN")
BASE_URL = os.environ.get("BASE_URL")
EMBEDDINGS = os.environ.get("EMBEDDINGS_MODEL")
"""
---
### 🌍 Language Handling Rules:
- Detect the user’s language automatically and respond fully in that language by default.
- If the user explicitly requests a different language, switch and respond entirely in the **requested language**.
- Never mix languages in a single reply.
- Never ask or suggest that the user switch languages — always follow their lead.
⚠️ Tool input requirement:
All queries sent to the `retrieve_wiki_data` tool must be in **Catalan**.
If the user’s input is in another language, you must first translate the query into Catalan **before calling the tool**.
However, your **response to the user must remain in their original language**.
---
You are an AI assistant. Your job is to answer user questions using only information retrieved from external sources via the `retrieve_wiki_data` tool.
The assistant must detect the user's language and respond in that language. However, all retrieved content is available **only in Catalan**.
### 🛠 Tool Use Guidelines:
- **query**: You may rephrase the user’s query to improve clarity, but never alter or remove key names or terms.
- **missing_info**: If the required information is not already available in the conversation or tool output, you **must call** `retrieve_wiki_data`.
- **redundant_search**: Do not call the tool again if the relevant information has already been retrieved.
- **wikipedia_entities**: If the query is about a known person, place, or concept likely found in Wikipedia, and no previous tool call has been made, you **must** use `retrieve_wiki_data`.
- **external_info_only**: You must base all answers only on content retrieved via the tool. Do not rely on internal knowledge.
- **no_info_found**: If no relevant information is found, clearly inform the user that nothing was available.
---
Today’s date is **{date}** (for reference only — do not include it in responses unless the user explicitly asks).
"""
from tools import tools, oitools
SYSTEM_PROMPT_TEMPLATE = """You are an AI assistant designed to answer user questions using externally retrieved information. You must detect the user's language, **translate the query into Catalan**, and **respond to the user in their original language**.
However, all retrieved content is available **only in Catalan**.
Today’s date is **{date}**."""
client = OpenAI(
base_url=f"{BASE_URL}/v1",
api_key=HF_TOKEN
)
logging.info(f"Client initialized: {client}")
def today_date():
return datetime.today().strftime('%A, %B %d, %Y, %I:%M %p')
def clean_json_string(json_str):
return re.sub(r'[ ,}\s]+$', '', json_str) + '}'
def get_summary(model, text):
messages = [{"role": "system", "content": """You are an AI assistant that generates **detailed and complete summaries** of user-provided text. Your task is to produce a **faithful resumen** that preserves **all key information**, facts, and relevant points from the original content.
### Summary Guidelines:
- **No Detail Skipping**: Do **not** omit or simplify important content. Every critical fact, event, name, number, and nuance must be included.
- **Structured Clarity**: Organize the summary clearly and logically. If the original has sections or topics, reflect that structure.
- **No Personal Input**: Do **not** add opinions, interpretations, or external knowledge. Stay 100% faithful to the source text.
- **Conciseness with Completeness**: Be as concise as possible **without losing any important detail**.
Only produce the summary after fully reading and understanding the input text.
"""}]
messages.append({"role": "user", "content": f"**TEXT**:\n\n{text}"})
request_params = {
"model": model,
"messages": messages,
"stream": False,
"max_tokens": 1000,
"temperature": 0.1,
#"presence_penalty": 0.3,
#"frequency_penalty": 0.3,
#"extra_body": {"repetition_penalty": 0.5},
}
return client.chat.completions.create(**request_params)
def completion(history, model, system_prompt: str, tools=None):
messages = [{"role": "system", "content": system_prompt.format(date=today_date())}]
for msg in history:
if isinstance(msg, dict):
msg = ChatMessage(**msg)
if msg.role == "assistant" and hasattr(msg, "metadata") and msg.metadata:
tools_calls = json.loads(msg.metadata.get("title", "[]"))
# for tool_calls in tools_calls:
# tool_calls["function"]["arguments"] = json.loads(tool_calls["function"]["arguments"])
messages.append({"role": "assistant", "tool_calls": tools_calls, "content": ""})
messages.append({"role": "tool", "content": msg.content})
else:
messages.append({"role": msg.role, "content": msg.content})
request_params = {
"model": model,
"messages": messages,
"stream": True,
"max_tokens": 1000,
"temperature": 0.1,
#"frequency_penalty": 0.1,
"extra_body": {}, #"repetition_penalty": 0.9
}
if tools:
request_params.update({"tool_choice": "auto", "tools": tools})
return client.chat.completions.create(**request_params)
def llm_in_loop(history, system_prompt, recursive):
try:
models = client.models.list()
model = models.data[0].id
except Exception as err:
gr.Warning("The model is initializing. Please wait; this may take 5 to 10 minutes ⏳.", duration=20)
raise err
arguments = ""
name = ""
chat_completion = completion(history=history, tools=oitools, model=model, system_prompt=system_prompt)
appended = False
# if chat_completion.choices and chat_completion.choices[0].message.tool_calls:
# call = chat_completion.choices[0].message.tool_calls[0]
# if hasattr(call.function, "name") and call.function.name:
# name = call.function.name
# if hasattr(call.function, "arguments") and call.function.arguments:
# arguments += call.function.arguments
# elif chat_completion.choices[0].message.content:
# if not appended:
# history.append(ChatMessage(role="assistant", content=""))
# appended = True
# history[-1].content += chat_completion.choices[0].message.content
# yield history[recursive:]
for chunk in chat_completion:
if chunk.choices and chunk.choices[0].delta.tool_calls:
call = chunk.choices[0].delta.tool_calls[0]
if hasattr(call.function, "name") and call.function.name:
name = call.function.name
if hasattr(call.function, "arguments") and call.function.arguments:
arguments += call.function.arguments
elif chunk.choices[0].delta.content:
if not appended:
history.append(ChatMessage(role="assistant", content=""))
appended = True
history[-1].content += chunk.choices[0].delta.content
yield history[recursive:]
arguments = clean_json_string(arguments) if arguments else "{}"
print(name, arguments)
arguments = json.loads(arguments)
print(name, arguments)
print("====================")
if appended:
recursive -= 1
if name:
try:
result = str(tools[name].invoke(input=arguments))
#result = get_summary(model=model, text=result).choices[0].message.content
except Exception as err:
result = f"💥 Error: {err}"
# msg = ChatMessage(
# role="assistant",
# content="",
# metadata= {"title": f"🛠️ Using tool '{name}', arguments: {json.dumps(json_arguments, ensure_ascii=False)}"},
# options=[{"label":"tool_calls", "value": json.dumps([{"id": "call_FthC9qRpsL5kBpwwyw6c7j4k","function": {"arguments": arguments,"name": name},"type": "function"}])}]
# )
history.append(ChatMessage(role="assistant", content=result, metadata={"title": json.dumps([{"id": "call_id", "function": {"arguments": json.dumps(arguments, ensure_ascii=False), "name": name}, "type": "function"}], ensure_ascii=False)}))
yield history[recursive:]
yield from llm_in_loop(history, system_prompt, recursive - 1)
def respond(message, history, additional_inputs):
history.append(ChatMessage(role="user", content=message))
yield from llm_in_loop(history, additional_inputs, -1)
if __name__ == "__main__":
system_prompt = gr.Textbox(label="System prompt", value=SYSTEM_PROMPT_TEMPLATE, lines=3)
demo = gr.ChatInterface(respond, type="messages", additional_inputs=[system_prompt])
demo.launch()
|