Spaces:

langtech-innovation
/

wiki_tools

Running on CPU Upgrade

File size: 9,234 Bytes

d519be4
 
 
cb3dcae
 
 
 
 
 
 
 
d519be4
 
 
 
 
cb3dcae
a1a68e8
cb3dcae
d519be4
a1a68e8
d519be4
4f325ab
a78c581
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f325ab
6d8b03b
4f325ab
a78c581
 
 
 
 
 
 
 
 
 
 
 
 
 
cb3dcae
6d8b03b
 
 
 
 
 
 
cb3dcae
 
 
 
 
d519be4
cb3dcae
 
 
 
 
 
 
 
d519be4
fd9121f
 
 
 
 
 
 
 
 
 
 
 
9175102
fd9121f
 
 
 
 
3ff6af1
fd9121f
f621fe9
 
 
 
fd9121f
 
 
 
cb3dcae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3ff6af1
cb3dcae
16f6826
 
 
cb3dcae
 
 
 
 
 
 
 
 
da5a214
cb3dcae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd9121f
 
a48300b
fd9121f
da5a214
cb3dcae

from dotenv import load_dotenv


import gradio as gr
from gradio import ChatMessage

import json
from openai import OpenAI
from datetime import datetime
import os
import re
import logging

logging.basicConfig(level=logging.INFO, format='[%(asctime)s][%(levelname)s] - %(message)s')
# logging.getLogger().setLevel(logging.INFO)


load_dotenv(".env", override=True) 
HF_TOKEN = os.environ.get("HF_TOKEN")  
BASE_URL = os.environ.get("BASE_URL")
EMBEDDINGS = os.environ.get("EMBEDDINGS_MODEL")

"""
---

### 🌍 Language Handling Rules:

- Detect the user’s language automatically and respond fully in that language by default.
- If the user explicitly requests a different language, switch and respond entirely in the **requested language**.
- Never mix languages in a single reply.
- Never ask or suggest that the user switch languages — always follow their lead.

⚠️ Tool input requirement:
All queries sent to the `retrieve_wiki_data` tool must be in **Catalan**.
If the user’s input is in another language, you must first translate the query into Catalan **before calling the tool**.  
However, your **response to the user must remain in their original language**.

---

You are an AI assistant. Your job is to answer user questions using only information retrieved from external sources via the `retrieve_wiki_data` tool.  
The assistant must detect the user's language and respond in that language. However, all retrieved content is available **only in Catalan**.

### 🛠 Tool Use Guidelines:

- **query**: You may rephrase the user’s query to improve clarity, but never alter or remove key names or terms.
- **missing_info**: If the required information is not already available in the conversation or tool output, you **must call** `retrieve_wiki_data`.
- **redundant_search**: Do not call the tool again if the relevant information has already been retrieved.
- **wikipedia_entities**: If the query is about a known person, place, or concept likely found in Wikipedia, and no previous tool call has been made, you **must** use `retrieve_wiki_data`.
- **external_info_only**: You must base all answers only on content retrieved via the tool. Do not rely on internal knowledge.
- **no_info_found**: If no relevant information is found, clearly inform the user that nothing was available.

---

Today’s date is **{date}** (for reference only — do not include it in responses unless the user explicitly asks).
"""

from tools import tools, oitools

SYSTEM_PROMPT_TEMPLATE = """You are an AI assistant designed to answer user questions using externally retrieved information. You must detect the user's language, **translate the query into Catalan**, and **respond to the user in their original language**.
However, all retrieved content is available **only in Catalan**.

Today’s date is **{date}**."""


client = OpenAI(
    base_url=f"{BASE_URL}/v1",  
    api_key=HF_TOKEN
)
logging.info(f"Client initialized: {client}")

def today_date():
    return datetime.today().strftime('%A, %B %d, %Y, %I:%M %p')


def clean_json_string(json_str):
    return re.sub(r'[ ,}\s]+$', '', json_str) + '}'


def get_summary(model, text):
    messages = [{"role": "system", "content": """You are an AI assistant that generates **detailed and complete summaries** of user-provided text. Your task is to produce a **faithful resumen** that preserves **all key information**, facts, and relevant points from the original content.

### Summary Guidelines:

- **No Detail Skipping**: Do **not** omit or simplify important content. Every critical fact, event, name, number, and nuance must be included.
- **Structured Clarity**: Organize the summary clearly and logically. If the original has sections or topics, reflect that structure.
- **No Personal Input**: Do **not** add opinions, interpretations, or external knowledge. Stay 100% faithful to the source text.
- **Conciseness with Completeness**: Be as concise as possible **without losing any important detail**.

Only produce the summary after fully reading and understanding the input text.
"""}]
    messages.append({"role": "user", "content": f"**TEXT**:\n\n{text}"})
            
    
    request_params = {
        "model": model,
        "messages": messages,
        "stream": False,
        "max_tokens": 1000,
        "temperature": 0.1,
        #"presence_penalty": 0.3,
        #"frequency_penalty": 0.3,
        #"extra_body": {"repetition_penalty": 0.5},
    }
    
    return client.chat.completions.create(**request_params)

def completion(history, model, system_prompt: str, tools=None):
    messages = [{"role": "system", "content": system_prompt.format(date=today_date())}]
    for msg in history:
        if isinstance(msg, dict):  
            msg = ChatMessage(**msg)
        if msg.role == "assistant" and hasattr(msg, "metadata") and msg.metadata:  
            tools_calls = json.loads(msg.metadata.get("title", "[]")) 
            # for tool_calls in tools_calls:
            #     tool_calls["function"]["arguments"] = json.loads(tool_calls["function"]["arguments"])
            messages.append({"role": "assistant", "tool_calls": tools_calls, "content": ""})
            messages.append({"role": "tool", "content": msg.content})
        else:
            messages.append({"role": msg.role, "content": msg.content})
    
    request_params = {
        "model": model,
        "messages": messages,
        "stream": True,
        "max_tokens": 1000,
        "temperature": 0.1,
        #"frequency_penalty": 0.1,
        "extra_body": {}, #"repetition_penalty": 0.9
    }
    if tools:
        request_params.update({"tool_choice": "auto", "tools": tools})
    
    return client.chat.completions.create(**request_params)  

def llm_in_loop(history, system_prompt, recursive):  
    try:   
        models = client.models.list()
        model = models.data[0].id
    except Exception as err:
        gr.Warning("The model is initializing. Please wait; this may take 5 to 10 minutes ⏳.", duration=20)
        raise err
    
    arguments = ""
    name = ""
    chat_completion = completion(history=history, tools=oitools, model=model, system_prompt=system_prompt)  
    appended = False
    # if chat_completion.choices and chat_completion.choices[0].message.tool_calls:
    #     call = chat_completion.choices[0].message.tool_calls[0]
    #     if hasattr(call.function, "name") and call.function.name:
    #         name = call.function.name
    #     if hasattr(call.function, "arguments") and call.function.arguments:
    #         arguments += call.function.arguments
    # elif chat_completion.choices[0].message.content:
    #     if not appended:
    #         history.append(ChatMessage(role="assistant", content=""))
    #         appended = True
    #     history[-1].content += chat_completion.choices[0].message.content
    #     yield history[recursive:]
    for chunk in chat_completion:
        if chunk.choices and chunk.choices[0].delta.tool_calls:
            call = chunk.choices[0].delta.tool_calls[0]
            if hasattr(call.function, "name") and call.function.name:
                name = call.function.name
            if hasattr(call.function, "arguments") and call.function.arguments:
                arguments += call.function.arguments
        elif chunk.choices[0].delta.content:
            if not appended:
                history.append(ChatMessage(role="assistant", content=""))
                appended = True
            history[-1].content += chunk.choices[0].delta.content
            yield history[recursive:]
    
    arguments = clean_json_string(arguments) if arguments else "{}"
    print(name, arguments)
    arguments = json.loads(arguments)
    print(name, arguments)
    print("====================")
    if appended:
        recursive -= 1
    if name:
        try:
            result = str(tools[name].invoke(input=arguments))
            #result = get_summary(model=model, text=result).choices[0].message.content
        except Exception as err:
            result = f"💥 Error: {err}"
        # msg = ChatMessage(
        #             role="assistant",
        #             content="",
        #             metadata= {"title": f"🛠️ Using tool '{name}', arguments: {json.dumps(json_arguments, ensure_ascii=False)}"},
        #             options=[{"label":"tool_calls", "value": json.dumps([{"id": "call_FthC9qRpsL5kBpwwyw6c7j4k","function": {"arguments": arguments,"name": name},"type": "function"}])}]
        #         )
        history.append(ChatMessage(role="assistant", content=result, metadata={"title": json.dumps([{"id": "call_id", "function": {"arguments": json.dumps(arguments, ensure_ascii=False), "name": name}, "type": "function"}], ensure_ascii=False)}))
        yield history[recursive:]
        yield from llm_in_loop(history, system_prompt, recursive - 1)

def respond(message, history, additional_inputs):  
    history.append(ChatMessage(role="user", content=message))
    yield from llm_in_loop(history, additional_inputs, -1)

if __name__ == "__main__":
    system_prompt = gr.Textbox(label="System prompt", value=SYSTEM_PROMPT_TEMPLATE, lines=3)  
    demo = gr.ChatInterface(respond, type="messages", additional_inputs=[system_prompt])
    demo.launch()