Spaces:

sasan
/

KITT

Build error

App Files Files Community

sasan commited on May 21, 2024

Commit

fea02f6

1 Parent(s): b4ec609

chore: Add set_vehicle_speed and set_vehicle_destination functions

Browse files

Files changed (6) hide show

kitt/core/model.py +68 -25
kitt/skills/__init__.py +1 -1
kitt/skills/common.py +10 -1
kitt/skills/vehicle.py +21 -2
kitt/skills/weather.py +5 -5
main.py +94 -21

kitt/core/model.py CHANGED Viewed

@@ -12,6 +12,7 @@ from loguru import logger
 from kitt.skills import vehicle_status
 class FunctionCall(BaseModel):
@@ -29,7 +30,7 @@ class FunctionCall(BaseModel):
 schema_json = json.loads(FunctionCall.schema_json())
 HRMS_SYSTEM_PROMPT = """<|im_start|>system
-You are a function calling AI agent with self-recursion.
 You can call only one function at a time and analyse data you get from function response.
 You are provided with function signatures within <tools></tools> XML tags.
@@ -53,7 +54,7 @@ Make sure that the json object above with code markdown block is parseable with
 When using tools, ensure to only use the tools provided and not make up any data and do not provide any explanation as to which tool you are using and why.
 Example 1:
-User: How is the weather today?
 Assistant:
 <tool_call>
 {{"arguments": {{"location": ""}}, "name": "get_weather"}}
@@ -206,14 +207,7 @@ def process_response(user_query, res, history, tools, depth):
     return True, tool_calls, errors
-def run_inference_step(depth, history, tools, schema_json, dry_run=False):
-    # If we decide to call a function, we need to generate the prompt for the model
-    # based on the history of the conversation so far.
-    # not break the loop
-    openai_tools = [convert_to_openai_function(tool) for tool in tools]
-    prompt = get_prompt(HRMS_SYSTEM_PROMPT, history, openai_tools, schema_json)
-    print(f"Prompt is:{prompt + AI_PREAMBLE}\n------------------\n")
     data = {
         "prompt": prompt
         + "\nThis is the first turn and you don't have <tool_results> to analyze yet"
@@ -230,37 +224,86 @@ def run_inference_step(depth, history, tools, schema_json, dry_run=False):
             "temperature": 0.8,
             # "max_tokens": 1500,
             "num_predict": 1500,
-            "mirostat": 1,
             # "mirostat_tau": 2,
-            "repeat_penalty": 1.5,
             "top_k": 25,
             "top_p": 0.5,
             # "num_predict": 1500,
             # "max_tokens": 1500,
         },
     }
-    if dry_run:
-        print(prompt + AI_PREAMBLE)
-        return "Didn't really run it."
-    client = Client(host='http://localhost:11444')
     # out = ollama.generate(**data)
     out = client.generate(**data)
-    logger.debug(f"Response from model: {out}")
-    res = out["response"]
     return res
-def process_query(user_query: str, history: ChatMessageHistory, tools):
-    # Add vehicle status to the history
-    user_query_status = (
-        f"Given that:\n{vehicle_status()[0]}\nAnswer the following:\n{user_query}"
     )
     history.add_message(HumanMessage(content=user_query_status))
     for depth in range(10):
-        out = run_inference_step(depth, history, tools, schema_json)
         print(f"Inference step result:\n{out}\n------------------\n")
         history.add_message(AIMessage(content=out))
         to_continue, tool_calls, errors = process_response(

 from kitt.skills import vehicle_status
+from kitt.skills.common import config
 class FunctionCall(BaseModel):
 schema_json = json.loads(FunctionCall.schema_json())
 HRMS_SYSTEM_PROMPT = """<|im_start|>system
+You are a function calling AI agent. Your name is KITT. You are embodied in a Car. You know where you are, where you are going, and the current date and time. You can call functions to help with user queries.
 You can call only one function at a time and analyse data you get from function response.
 You are provided with function signatures within <tools></tools> XML tags.
 When using tools, ensure to only use the tools provided and not make up any data and do not provide any explanation as to which tool you are using and why.
 Example 1:
+User: How is the weather?
 Assistant:
 <tool_call>
 {{"arguments": {{"location": ""}}, "name": "get_weather"}}
     return True, tool_calls, errors
+def run_inference_ollama(prompt):
     data = {
         "prompt": prompt
         + "\nThis is the first turn and you don't have <tool_results> to analyze yet"
             "temperature": 0.8,
             # "max_tokens": 1500,
             "num_predict": 1500,
+            # "mirostat": 1,
             # "mirostat_tau": 2,
+            "repeat_penalty": 1.1,
             "top_k": 25,
             "top_p": 0.5,
+            "num_ctx": 8000,
             # "num_predict": 1500,
             # "max_tokens": 1500,
         },
     }
+    client = Client(host="http://localhost:11434")
     # out = ollama.generate(**data)
     out = client.generate(**data)
+    res = out.pop("response")
+    # Report prompt and eval tokens
+    logger.warning(
+        f"Prompt tokens: {out.get('prompt_eval_count')}, Response tokens: {out.get('eval_count')}"
+    )
+    logger.debug(f"Response from Ollama: {res}\nOut:{out}")
     return res
+def run_inference_step(
+    depth, history, tools, schema_json, dry_run=False, backend="ollama"
+):
+    # If we decide to call a function, we need to generate the prompt for the model
+    # based on the history of the conversation so far.
+    # not break the loop
+    openai_tools = [convert_to_openai_function(tool) for tool in tools]
+    prompt = get_prompt(HRMS_SYSTEM_PROMPT, history, openai_tools, schema_json)
+    print(f"Prompt is:{prompt + AI_PREAMBLE}\n------------------\n")
+    if backend == "ollama":
+        output = run_inference_ollama(prompt)
+    else:
+        output = run_inference_replicate(prompt)
+    logger.debug(f"Response from model: {output}")
+    return output
+def run_inference_replicate(prompt):
+    from replicate import Client
+    replicate = Client(api_token=config.REPLICATE_API_KEY)
+    input = {
+        "prompt": prompt
+        + "\nThis is the first turn and you don't have <tool_results> to analyze yet"
+        + AI_PREAMBLE,
+        "temperature": 0.5,
+        "system_prompt": "",
+        "max_new_tokens": 1024,
+        "repeat_penalty": 1.1,
+        "prompt_template": "{prompt}",
+    }
+    output = replicate.run(
+        "mikeei/dolphin-2.9-llama3-8b-gguf:0f79fb14c45ae2b92e1f07d872dceed3afafcacd903258df487d3bec9e393cb2",
+        input=input,
     )
+    out = "".join(output)
+    return out
+def process_query(
+    user_query: str,
+    history: ChatMessageHistory,
+    user_preferences,
+    tools,
+    backend="ollama",
+):
+    # Add vehicle status to the history
+    user_query_status = f"Given that:\n{vehicle_status()[0]}\nUser preferences:\n{user_preferences}\nAnswer the following:\n{user_query}"
     history.add_message(HumanMessage(content=user_query_status))
     for depth in range(10):
+        # out = run_inference_step(depth, history, tools, schema_json)
+        out = run_inference_step(depth, history, tools, schema_json, backend=backend)
         print(f"Inference step result:\n{out}\n------------------\n")
         history.add_message(AIMessage(content=out))
         to_continue, tool_calls, errors = process_response(

kitt/skills/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from .common import execute_function_call, extract_func_args, vehicle as vehicle
 from .weather import get_weather_current_location, get_weather, get_forecast
 from .routing import find_route
 from .poi import search_points_of_interests, search_along_route_w_coordinates
-from .vehicle import vehicle_status
 from .interpreter import code_interpreter

 from .weather import get_weather_current_location, get_weather, get_forecast
 from .routing import find_route
 from .poi import search_points_of_interests, search_along_route_w_coordinates
+from .vehicle import vehicle_status, set_vehicle_speed, set_vehicle_destination
 from .interpreter import code_interpreter

kitt/skills/common.py CHANGED Viewed

@@ -1,25 +1,34 @@
 import re
-from typing import Union
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from pydantic import BaseModel
 from .. import skills
 class Settings(BaseSettings):
     WEATHER_API_KEY: str
     TOMTOM_API_KEY: str
     model_config = SettingsConfigDict(env_file=".env")
 class VehicleStatus(BaseModel):
     location: str
     location_coordinates: tuple[float, float] # (latitude, longitude)
     date: str
     time: str
     destination: str
 def execute_function_call(text: str, dry_run=False) -> str:

 import re
+from typing import Union, Optional
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from pydantic import BaseModel
 from .. import skills
+from enum import Enum
 class Settings(BaseSettings):
     WEATHER_API_KEY: str
     TOMTOM_API_KEY: str
+    REPLICATE_API_KEY: Optional[str]
     model_config = SettingsConfigDict(env_file=".env")
+class Speed(Enum):
+        SLOW = "slow"
+        FAST = "fast"
 class VehicleStatus(BaseModel):
     location: str
     location_coordinates: tuple[float, float] # (latitude, longitude)
     date: str
     time: str
     destination: str
+    speed: Speed = Speed.SLOW
 def execute_function_call(text: str, dry_run=False) -> str:

kitt/skills/vehicle.py CHANGED Viewed

@@ -1,7 +1,8 @@
-from .common import vehicle
-STATUS_TEMPLATE = """The current location is: {location} ({lat}, {lon})
 The current date and time: {date} {time}
 The current destination is: {destination}"""
@@ -32,3 +33,21 @@ def vehicle_status() -> tuple[str, dict[str, str]]:
     vs["lat"] = vs["location_coordinates"][0]
     vs["lon"] = vs["location_coordinates"][1]
     return  STATUS_TEMPLATE.format(**vs), vs

+from .common import vehicle, Speed
+STATUS_TEMPLATE = """The current location is: {location}
+Current coordinates: {lat}, {lon}
 The current date and time: {date} {time}
 The current destination is: {destination}"""
     vs["lat"] = vs["location_coordinates"][0]
     vs["lon"] = vs["location_coordinates"][1]
     return  STATUS_TEMPLATE.format(**vs), vs
+def set_vehicle_speed(speed: Speed):
+    """Set the speed of the vehicle.
+    Args:
+        speed (Speed): The speed of the vehicle. ("slow", "fast")
+    """
+    vehicle.speed = speed
+    return f"The vehicle speed is set to {speed.value}."
+def set_vehicle_destination(destination: str):
+    """Set the destination of the vehicle.
+    Args:
+        destination (str): The destination of the vehicle.
+    """
+    vehicle.destination = destination
+    return f"The vehicle destination is set to {destination}."

kitt/skills/weather.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import requests
 from .common import config, vehicle
@@ -19,27 +20,26 @@ def get_weather_current_location():
 # current weather API
-def get_weather(location: str = ""):
     """
     Get the current weather in a specified location.
     When responding to user, only mention the weather condition, temperature, and the temperature that it feels like, unless the user asks for more information.
     Args:
-        location (string) : Optional. The name of the location, if empty, the vehicle location is used.
     Returns:
         dict: The weather data in the specified location.
     """
-    if location == "":
-        print(
             f"get_weather: location is empty, using the vehicle location. ({vehicle.location})"
         )
         location = vehicle.location
     # The endpoint URL provided by WeatherAPI
     url = f"http://api.weatherapi.com/v1/current.json?key={config.WEATHER_API_KEY}&q={location}&aqi=no"
-    print(url)
     # Make the API request
     response = requests.get(url)

 import requests
+from loguru import logger
 from .common import config, vehicle
 # current weather API
+def get_weather(location: str = "here"):
     """
     Get the current weather in a specified location.
     When responding to user, only mention the weather condition, temperature, and the temperature that it feels like, unless the user asks for more information.
     Args:
+        location (string) : Optional. The name of the location, if empty or here, the vehicle location is used.
     Returns:
         dict: The weather data in the specified location.
     """
+    if location == "" or location == "here":
+        logger.warning(
             f"get_weather: location is empty, using the vehicle location. ({vehicle.location})"
         )
         location = vehicle.location
     # The endpoint URL provided by WeatherAPI
     url = f"http://api.weatherapi.com/v1/current.json?key={config.WEATHER_API_KEY}&q={location}&aqi=no"
     # Make the API request
     response = requests.get(url)

main.py CHANGED Viewed

@@ -21,8 +21,10 @@ from kitt.skills import (
     find_route,
     get_forecast,
     vehicle_status as vehicle_status_fn,
     search_points_of_interests,
     search_along_route_w_coordinates,
     do_anything_else,
     date_time_info,
     get_weather_current_location,
@@ -120,11 +122,12 @@ def get_vehicle_status(state):
 tools = [
     StructuredTool.from_function(get_weather),
     StructuredTool.from_function(find_route),
-    StructuredTool.from_function(vehicle_status_fn),
     StructuredTool.from_function(search_points_of_interests),
     StructuredTool.from_function(search_along_route),
-    StructuredTool.from_function(date_time_info),
-    StructuredTool.from_function(get_weather_current_location),
     StructuredTool.from_function(code_interpreter),
     # StructuredTool.from_function(do_anything_else),
 ]
@@ -148,7 +151,7 @@ def clear_history():
     history.clear()
-def run_nexusraven_model(query, voice_character):
     global_context["prompt"] = get_prompt(RAVEN_PROMPT_FUNC, query, "", tools)
     print("Prompt: ", global_context["prompt"])
     data = {
@@ -182,11 +185,18 @@ def run_nexusraven_model(query, voice_character):
     )
-def run_llama3_model(query, voice_character):
-    output_text = process_query(query, history, tools)
     gr.Info(f"Output text: {output_text}, generating voice output...")
-    # voice_out = tts_gradio(output_text, voice_character, speaker_embedding_cache)[0]
     voice_out = None
     return (
         output_text,
         voice_out,
@@ -196,15 +206,17 @@ def run_llama3_model(query, voice_character):
 def run_model(query, voice_character, state):
     model = state.get("model", "nexusraven")
     query = query.strip().replace("'", "")
-    print("Query: ", query)
-    print("Model: ", model)
     global_context["query"] = query
     if model == "nexusraven":
-        return run_nexusraven_model(query, voice_character)
     elif model == "llama3":
-        return run_llama3_model(query, voice_character)
-    return "Error running model", None
 def calculate_route_gradio(origin, destination):
@@ -276,6 +288,32 @@ def save_and_transcribe_run_model(audio, voice_character, state):
     out_text, out_voice = run_model(text, voice_character, state)
     return text, out_text, out_voice
 # to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
 # in "Insecure origins treated as secure", enable it and relaunch chrome
@@ -284,7 +322,7 @@ def save_and_transcribe_run_model(audio, voice_character, state):
 # What's the closest restaurant from here?
-def create_demo(tts_server: bool = False, model="llama3", tts=True):
     print(f"Running the demo with model: {model} and TTSServer: {tts_server}")
     with gr.Blocks(theme=gr.themes.Default()) as demo:
         state = gr.State(
@@ -293,7 +331,9 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
                 "query": "",
                 "route_points": [],
                 "model": model,
-                "tts": tts,
             }
         )
         trip_points = gr.State(value=[])
@@ -328,6 +368,12 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
                     label="Destination",
                     interactive=True,
                 )
             with gr.Column(scale=2, min_width=600):
                 map_plot = gr.Plot()
@@ -363,6 +409,19 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
                 vehicle_status = gr.JSON(
                     value=vehicle.model_dump_json(), label="Vehicle status"
                 )
                 # Push button
                 clear_history_btn = gr.Button(value="Clear History")
             with gr.Column():
@@ -383,6 +442,9 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
             inputs=[origin, destination],
             outputs=[map_plot, vehicle_status, trip_progress],
         )
         # Update time based on the time picker
         time_picker.select(fn=set_time, inputs=[time_picker], outputs=[vehicle_status])
@@ -391,12 +453,12 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
         input_text.submit(
             fn=run_model,
             inputs=[input_text, voice_character, state],
-            outputs=[output_text, output_audio],
         )
         input_text_debug.submit(
             fn=run_model,
-            inputs=[input_text, voice_character, state],
-            outputs=[output_text, output_audio],
         )
         # Set the vehicle status based on the trip progress
@@ -408,15 +470,26 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
         # Save and transcribe the audio
         input_audio.stop_recording(
-            fn=save_and_transcribe_run_model, inputs=[input_audio, voice_character, state], outputs=[input_text, output_text, output_audio]
         )
         input_audio_debug.stop_recording(
-            fn=save_and_transcribe_audio, inputs=[input_audio_debug], outputs=[input_text_debug]
         )
         # Clear the history
         clear_history_btn.click(fn=clear_history, inputs=[], outputs=[])
     return demo
@@ -424,7 +497,7 @@ def create_demo(tts_server: bool = False, model="llama3", tts=True):
 gr.close_all()
-demo = create_demo(False, "llama3", tts=False)
 demo.launch(
     debug=True,
     server_name="0.0.0.0",

     find_route,
     get_forecast,
     vehicle_status as vehicle_status_fn,
+    set_vehicle_speed,
     search_points_of_interests,
     search_along_route_w_coordinates,
+    set_vehicle_destination,
     do_anything_else,
     date_time_info,
     get_weather_current_location,
 tools = [
     StructuredTool.from_function(get_weather),
     StructuredTool.from_function(find_route),
+    # StructuredTool.from_function(vehicle_status_fn),
+    StructuredTool.from_function(set_vehicle_speed),
     StructuredTool.from_function(search_points_of_interests),
     StructuredTool.from_function(search_along_route),
+    # StructuredTool.from_function(date_time_info),
+    # StructuredTool.from_function(get_weather_current_location),
     StructuredTool.from_function(code_interpreter),
     # StructuredTool.from_function(do_anything_else),
 ]
     history.clear()
+def run_nexusraven_model(query, voice_character, state):
     global_context["prompt"] = get_prompt(RAVEN_PROMPT_FUNC, query, "", tools)
     print("Prompt: ", global_context["prompt"])
     data = {
     )
+def run_llama3_model(query, voice_character, state):
+    output_text = process_query(
+        query,
+        history=history,
+        user_preferences=state["user_preferences"],
+        tools=tools,
+        backend=state["llm_backend"],
+    )
     gr.Info(f"Output text: {output_text}, generating voice output...")
     voice_out = None
+    if state["tts_enabled"]:
+        voice_out = tts_gradio(output_text, voice_character, speaker_embedding_cache)[0]
     return (
         output_text,
         voice_out,
 def run_model(query, voice_character, state):
     model = state.get("model", "nexusraven")
     query = query.strip().replace("'", "")
+    logger.info(
+        f"Running model: {model} with query: {query}, voice_character: {voice_character} and llm_backend: {state['llm_backend']}, tts_enabled: {state['tts_enabled']}"
+    )
     global_context["query"] = query
     if model == "nexusraven":
+        text, voice = run_nexusraven_model(query, voice_character, state)
     elif model == "llama3":
+        text, voice = run_llama3_model(query, voice_character, state)
+    else:
+        text, voice = "Error running model", None
+    return text, voice, vehicle.model_dump_json()
 def calculate_route_gradio(origin, destination):
     out_text, out_voice = run_model(text, voice_character, state)
     return text, out_text, out_voice
+def set_tts_enabled(tts_enabled, state):
+    new_tts_enabled = tts_enabled == "Yes"
+    logger.info(
+        f"TTS enabled was {state['tts_enabled']} and changed to {new_tts_enabled}"
+    )
+    state["tts_enabled"] = new_tts_enabled
+    return state
+def set_llm_backend(llm_backend, state):
+    new_llm_backend = "ollama" if llm_backend == "Ollama" else "replicate"
+    logger.info(
+        f"LLM backend was {state['llm_backend']} and changed to {new_llm_backend}"
+    )
+    state["llm_backend"] = new_llm_backend
+    return state
+def set_user_preferences(preferences, state):
+    new_preferences = preferences
+    logger.info(f"User preferences changed to: {new_preferences}")
+    state["user_preferences"] = new_preferences
+    return state
 # to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
 # in "Insecure origins treated as secure", enable it and relaunch chrome
 # What's the closest restaurant from here?
+def create_demo(tts_server: bool = False, model="llama3", tts_enabled: bool = True):
     print(f"Running the demo with model: {model} and TTSServer: {tts_server}")
     with gr.Blocks(theme=gr.themes.Default()) as demo:
         state = gr.State(
                 "query": "",
                 "route_points": [],
                 "model": model,
+                "tts_enabled": tts_enabled,
+                "llm_backend": "Ollama",
+                "user_preferences": "",
             }
         )
         trip_points = gr.State(value=[])
                     label="Destination",
                     interactive=True,
                 )
+                preferences = gr.Textbox(
+                    value="I love italian food\nI like doing sports",
+                    label="User preferences",
+                    lines=3,
+                    interactive=True,
+                )
             with gr.Column(scale=2, min_width=600):
                 map_plot = gr.Plot()
                 vehicle_status = gr.JSON(
                     value=vehicle.model_dump_json(), label="Vehicle status"
                 )
+                with gr.Accordion("Config"):
+                    tts_enabled = gr.Radio(
+                        choices=["Yes", "No"],
+                        label="Enable TTS",
+                        value="No",
+                        interactive=True,
+                    )
+                    llm_backend = gr.Radio(
+                        choices=["Ollama", "Replicate"],
+                        label="LLM Backend",
+                        value="Ollama",
+                        interactive=True,
+                    )
                 # Push button
                 clear_history_btn = gr.Button(value="Clear History")
             with gr.Column():
             inputs=[origin, destination],
             outputs=[map_plot, vehicle_status, trip_progress],
         )
+        preferences.submit(
+            fn=set_user_preferences, inputs=[preferences, state], outputs=[state]
+        )
         # Update time based on the time picker
         time_picker.select(fn=set_time, inputs=[time_picker], outputs=[vehicle_status])
         input_text.submit(
             fn=run_model,
             inputs=[input_text, voice_character, state],
+            outputs=[output_text, output_audio, vehicle_status],
         )
         input_text_debug.submit(
             fn=run_model,
+            inputs=[input_text_debug, voice_character, state],
+            outputs=[output_text, output_audio, vehicle_status],
         )
         # Set the vehicle status based on the trip progress
         # Save and transcribe the audio
         input_audio.stop_recording(
+            fn=save_and_transcribe_run_model,
+            inputs=[input_audio, voice_character, state],
+            outputs=[input_text, output_text, output_audio],
         )
         input_audio_debug.stop_recording(
+            fn=save_and_transcribe_audio,
+            inputs=[input_audio_debug],
+            outputs=[input_text_debug],
         )
         # Clear the history
         clear_history_btn.click(fn=clear_history, inputs=[], outputs=[])
+        # Config
+        tts_enabled.change(
+            fn=set_tts_enabled, inputs=[tts_enabled, state], outputs=[state]
+        )
+        llm_backend.change(
+            fn=set_llm_backend, inputs=[llm_backend, state], outputs=[state]
+        )
     return demo
 gr.close_all()
+demo = create_demo(False, "llama3", tts_enabled=False)
 demo.launch(
     debug=True,
     server_name="0.0.0.0",