sasan commited on
Commit
e0f71e1
β€’
1 Parent(s): 3e25dc8

chore: Update dependencies and add pyproject.toml file

Browse files
.DS_Store DELETED
Binary file (6.15 kB)
 
{skills β†’ kitt/skills}/poi.py RENAMED
File without changes
{skills β†’ kitt/skills}/routing.py RENAMED
File without changes
{skills β†’ kitt/skills}/vehicle.py RENAMED
File without changes
{skills β†’ kitt/skills}/weather.py RENAMED
@@ -2,8 +2,9 @@ import requests
2
 
3
  from .common import config, vehicle
4
 
5
- #current weather API
6
- def get_weather(location:str= ""):
 
7
  """
8
  Returns the CURRENT weather in a specified location.
9
  Args:
@@ -11,7 +12,9 @@ def get_weather(location:str= ""):
11
  """
12
 
13
  if location == "":
14
- print(f"get_weather: location is empty, using the vehicle location. ({vehicle.location})")
 
 
15
  location = vehicle.location
16
 
17
  # The endpoint URL provided by WeatherAPI
@@ -29,16 +32,16 @@ def get_weather(location:str= ""):
29
  weather_data = response.json()
30
 
31
  # Extracting the necessary pieces of data
32
- location = weather_data['location']['name']
33
- region = weather_data['location']['region']
34
- country = weather_data['location']['country']
35
- time = weather_data['location']['localtime']
36
- temperature_c = weather_data['current']['temp_c']
37
- condition_text = weather_data['current']['condition']['text']
38
- if 'wind_kph' in weather_data['current']:
39
- wind_kph = weather_data['current']['wind_kph']
40
- humidity = weather_data['current']['humidity']
41
- feelslike_c = weather_data['current']['feelslike_c']
42
 
43
  # Formulate the sentences - {region}, {country}
44
  weather_sentences = (
@@ -49,65 +52,68 @@ def get_weather(location:str= ""):
49
  )
50
  return weather_sentences, weather_data
51
 
52
- #weather forecast API
53
- def get_forecast(city_name:str= "", when = 0, **kwargs):
 
54
  """
55
  Returns the weather forecast in a specified number of days for a specified city .
56
  Args:
57
  city_name (string) : Required. The name of the city.
58
  when (int) : Required. in number of days (until the day for which we want to know the forecast) (example: tomorrow is 1, in two days is 2, etc.)
59
  """
60
- #print(when)
61
- when +=1
62
  # The endpoint URL provided by WeatherAPI
63
  url = f"http://api.weatherapi.com/v1/forecast.json?key={WEATHER_API_KEY}&q={city_name}&days={str(when)}&aqi=no"
64
 
65
-
66
  # Make the API request
67
  response = requests.get(url)
68
 
69
  if response.status_code == 200:
70
  # Parse the JSON response
71
  data = response.json()
72
-
73
  # Initialize an empty string to hold our result
74
  forecast_sentences = ""
75
 
76
  # Extract city information
77
- location = data.get('location', {})
78
- city_name = location.get('name', 'the specified location')
79
-
80
- #print(data)
81
-
82
 
83
  # Extract the forecast days
84
- forecast_days = data.get('forecast', {}).get('forecastday', [])[when-1:]
85
- #number = 0
86
-
87
- #print (forecast_days)
88
 
89
  for day in forecast_days:
90
- date = day.get('date', 'a specific day')
91
- conditions = day.get('day', {}).get('condition', {}).get('text', 'weather conditions')
92
- max_temp_c = day.get('day', {}).get('maxtemp_c', 'N/A')
93
- min_temp_c = day.get('day', {}).get('mintemp_c', 'N/A')
94
- chance_of_rain = day.get('day', {}).get('daily_chance_of_rain', 'N/A')
95
-
 
 
 
 
96
  if when == 1:
97
- number_str = 'today'
98
  elif when == 2:
99
- number_str = 'tomorrow'
100
  else:
101
- number_str = f'in {when-1} days'
102
 
103
  # Generate a sentence for the day's forecast
104
  forecast_sentence = f"On {date} ({number_str}) in {city_name}, the weather will be {conditions} with a high of {max_temp_c}Β°C and a low of {min_temp_c}Β°C. There's a {chance_of_rain}% chance of rain. "
105
-
106
- #number = number + 1
107
  # Add the sentence to the result
108
  forecast_sentences += forecast_sentence
109
  return forecast_sentences
110
  else:
111
  # Handle errors
112
- print( f"Failed to get weather data: {response.status_code}, {response.text}")
113
- return f'error {response.status_code}'
 
2
 
3
  from .common import config, vehicle
4
 
5
+
6
+ # current weather API
7
+ def get_weather(location: str = ""):
8
  """
9
  Returns the CURRENT weather in a specified location.
10
  Args:
 
12
  """
13
 
14
  if location == "":
15
+ print(
16
+ f"get_weather: location is empty, using the vehicle location. ({vehicle.location})"
17
+ )
18
  location = vehicle.location
19
 
20
  # The endpoint URL provided by WeatherAPI
 
32
  weather_data = response.json()
33
 
34
  # Extracting the necessary pieces of data
35
+ location = weather_data["location"]["name"]
36
+ region = weather_data["location"]["region"]
37
+ country = weather_data["location"]["country"]
38
+ time = weather_data["location"]["localtime"]
39
+ temperature_c = weather_data["current"]["temp_c"]
40
+ condition_text = weather_data["current"]["condition"]["text"]
41
+ if "wind_kph" in weather_data["current"]:
42
+ wind_kph = weather_data["current"]["wind_kph"]
43
+ humidity = weather_data["current"]["humidity"]
44
+ feelslike_c = weather_data["current"]["feelslike_c"]
45
 
46
  # Formulate the sentences - {region}, {country}
47
  weather_sentences = (
 
52
  )
53
  return weather_sentences, weather_data
54
 
55
+
56
+ # weather forecast API
57
+ def get_forecast(city_name: str = "", when=0, **kwargs):
58
  """
59
  Returns the weather forecast in a specified number of days for a specified city .
60
  Args:
61
  city_name (string) : Required. The name of the city.
62
  when (int) : Required. in number of days (until the day for which we want to know the forecast) (example: tomorrow is 1, in two days is 2, etc.)
63
  """
64
+
65
+ when += 1
66
  # The endpoint URL provided by WeatherAPI
67
  url = f"http://api.weatherapi.com/v1/forecast.json?key={WEATHER_API_KEY}&q={city_name}&days={str(when)}&aqi=no"
68
 
 
69
  # Make the API request
70
  response = requests.get(url)
71
 
72
  if response.status_code == 200:
73
  # Parse the JSON response
74
  data = response.json()
75
+
76
  # Initialize an empty string to hold our result
77
  forecast_sentences = ""
78
 
79
  # Extract city information
80
+ location = data.get("location", {})
81
+ city_name = location.get("name", "the specified location")
82
+
83
+ # print(data)
 
84
 
85
  # Extract the forecast days
86
+ forecast_days = data.get("forecast", {}).get("forecastday", [])[when - 1 :]
87
+ # number = 0
88
+
89
+ # print (forecast_days)
90
 
91
  for day in forecast_days:
92
+ date = day.get("date", "a specific day")
93
+ conditions = (
94
+ day.get("day", {})
95
+ .get("condition", {})
96
+ .get("text", "weather conditions")
97
+ )
98
+ max_temp_c = day.get("day", {}).get("maxtemp_c", "N/A")
99
+ min_temp_c = day.get("day", {}).get("mintemp_c", "N/A")
100
+ chance_of_rain = day.get("day", {}).get("daily_chance_of_rain", "N/A")
101
+
102
  if when == 1:
103
+ number_str = "today"
104
  elif when == 2:
105
+ number_str = "tomorrow"
106
  else:
107
+ number_str = f"in {when-1} days"
108
 
109
  # Generate a sentence for the day's forecast
110
  forecast_sentence = f"On {date} ({number_str}) in {city_name}, the weather will be {conditions} with a high of {max_temp_c}Β°C and a low of {min_temp_c}Β°C. There's a {chance_of_rain}% chance of rain. "
111
+
112
+ # number = number + 1
113
  # Add the sentence to the result
114
  forecast_sentences += forecast_sentence
115
  return forecast_sentences
116
  else:
117
  # Handle errors
118
+ print(f"Failed to get weather data: {response.status_code}, {response.text}")
119
+ return f"error {response.status_code}"
kitt.py β†’ main.py RENAMED
@@ -1,23 +1,18 @@
1
  import time
2
  import gradio as gr
3
  import numpy as np
4
- import requests
5
  import torch
6
  import torchaudio
7
  from transformers import pipeline
 
8
 
9
-
10
-
11
- import skills
12
- from skills.common import config, vehicle
13
- from skills.routing import calculate_route
14
  import ollama
15
 
16
- ### LLM Stuff ###
17
- from langchain_community.llms import Ollama
18
  from langchain.tools.base import StructuredTool
19
 
20
- from skills import (
21
  get_weather,
22
  find_route,
23
  get_forecast,
@@ -25,10 +20,10 @@ from skills import (
25
  search_points_of_interests,
26
  search_along_route_w_coordinates,
27
  do_anything_else,
28
- date_time_info
29
  )
30
- from skills import extract_func_args
31
- from core import voice_options, load_tts_pipeline, tts_gradio
32
 
33
 
34
  global_context = {
@@ -52,6 +47,17 @@ Answer questions concisely and do not mention what you base your reply on."
52
  User Query: Question: {input}<human_end>
53
  """
54
 
 
 
 
 
 
 
 
 
 
 
 
55
  def get_prompt(template, input, history, tools):
56
  # "vehicle_status": vehicle_status_fn()[0]
57
  kwargs = {"history": history, "input": input}
@@ -66,12 +72,14 @@ def get_prompt(template, input, history, tools):
66
 
67
  return template.format(**kwargs).replace("{{", "{").replace("}}", "}")
68
 
 
69
  def use_tool(func_name, kwargs, tools):
70
  for tool in tools:
71
  if tool.name == func_name:
72
  return tool.invoke(input=kwargs)
73
  return None
74
 
 
75
  # llm = Ollama(model="nexusraven", stop=["\nReflection:", "\nThought:"], keep_alive=60*10)
76
 
77
 
@@ -84,7 +92,7 @@ def search_along_route(query=""):
84
 
85
  Args:
86
  query (str, optional): The type of point of interest to search for. Defaults to "restaurant".
87
-
88
  """
89
  points = global_context["route_points"]
90
  # maybe reshape
@@ -119,16 +127,14 @@ def run_generic_model(query):
119
  "options": {
120
  # "temperature": 0.1,
121
  # "stop":["\nReflection:", "\nThought:"]
122
- }
123
  }
124
  out = ollama.generate(**data)
125
  return out["response"]
126
 
127
 
128
- def run_model(query, voice_character):
129
- query = query.strip().replace("'", "")
130
- print("Query: ", query)
131
- global_context["query"] = query
132
  global_context["prompt"] = get_prompt(RAVEN_PROMPT_FUNC, query, "", tools)
133
  print("Prompt: ", global_context["prompt"])
134
  data = {
@@ -137,16 +143,13 @@ def run_model(query, voice_character):
137
  "model": "nexusraven",
138
  # "model": "smangrul/llama-3-8b-instruct-function-calling",
139
  "raw": True,
140
- "options": {
141
- "temperature": 0.5,
142
- "stop":["\nReflection:", "\nThought:"]
143
- }
144
  }
145
  out = ollama.generate(**data)
146
  llm_response = out["response"]
147
  if "Call: " in llm_response:
148
  print(f"llm_response: {llm_response}")
149
- llm_response = llm_response.replace("<bot_end>"," ")
150
  func_name, kwargs = extract_func_args(llm_response)
151
  print(f"Function: {func_name}, Args: {kwargs}")
152
  if func_name == "do_anything_else":
@@ -159,7 +162,57 @@ def run_model(query, voice_character):
159
  if type(output_text) == tuple:
160
  output_text = output_text[0]
161
  gr.Info(f"Output text: {output_text}, generating voice output...")
162
- return output_text, tts_gradio(tts_pipeline, output_text, voice_character, speaker_embedding_cache)[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
 
165
  def calculate_route_gradio(origin, destination):
@@ -171,7 +224,9 @@ def calculate_route_gradio(origin, destination):
171
 
172
  def update_vehicle_status(trip_progress):
173
  n_points = len(global_context["route_points"])
174
- new_coords = global_context["route_points"][min(int(trip_progress / 100 * n_points), n_points - 1)]
 
 
175
  new_coords = new_coords["latitude"], new_coords["longitude"]
176
  print(f"Trip progress: {trip_progress}, len: {n_points}, new_coords: {new_coords}")
177
  vehicle.location_coordinates = new_coords
@@ -180,13 +235,17 @@ def update_vehicle_status(trip_progress):
180
 
181
 
182
  device = "cuda" if torch.cuda.is_available() else "cpu"
183
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en", device=device)
 
 
184
 
185
 
186
  def save_audio_as_wav(data, sample_rate, file_path):
187
  # make a tensor from the numpy array
188
  data = torch.tensor(data).reshape(1, -1)
189
- torchaudio.save(file_path, data, sample_rate=sample_rate, bits_per_sample=16, encoding="PCM_S")
 
 
190
 
191
 
192
  def save_and_transcribe_audio(audio):
@@ -200,16 +259,17 @@ def save_and_transcribe_audio(audio):
200
  # add timestamp to file name
201
  filename = f"recordings/audio{time.time()}.wav"
202
  save_audio_as_wav(y, sr, filename)
203
-
204
  sr, y = audio
205
  y = y.astype(np.float32)
206
  y /= np.max(np.abs(y))
207
- text = transcriber({"sampling_rate": sr, "raw":y})["text"]
208
  except Exception as e:
209
  print(f"Error: {e}")
210
  return "Error transcribing audio"
211
  return text
212
 
 
213
  # to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
214
  # in "Insecure origins treated as secure", enable it and relaunch chrome
215
 
@@ -218,114 +278,148 @@ def save_and_transcribe_audio(audio):
218
  # What's the closest restaurant from here?
219
 
220
 
221
- tts_pipeline = load_tts_pipeline()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
 
224
- with gr.Blocks(theme=gr.themes.Default()) as demo:
225
- state = gr.State(
226
- value={
227
- # "context": initial_context,
228
- "query": "",
229
- "route_points": [],
230
- }
231
- )
232
- trip_points = gr.State(value=[])
233
-
234
- with gr.Row():
235
- with gr.Column(scale=1, min_width=300):
236
- time_picker = gr.Dropdown(
237
- choices=hour_options,
238
- label="What time is it? (HH:MM)",
239
- value="08:00:00",
240
- interactive=True,
241
- )
242
- history = gr.Radio(
243
- ["Yes", "No"],
244
- label="Maintain the conversation history?",
245
- value="No",
246
- interactive=True,
247
- )
248
- voice_character = gr.Radio(choices=voice_options, label='Choose a voice', value=voice_options[0], show_label=True)
249
- origin = gr.Textbox(
250
- value="Mondorf-les-Bains, Luxembourg", label="Origin", interactive=True
251
- )
252
- destination = gr.Textbox(
253
- value="Rue Alphonse Weicker, Luxembourg",
254
- label="Destination",
255
- interactive=True,
256
- )
257
-
258
- with gr.Column(scale=2, min_width=600):
259
- map_plot = gr.Plot()
260
- trip_progress = gr.Slider(0, 100, step=5, label="Trip progress", interactive=True)
261
-
262
- # map_if = gr.Interface(fn=plot_map, inputs=year_input, outputs=map_plot)
263
-
264
- with gr.Row():
265
- with gr.Column():
266
- input_audio = gr.Audio(
267
- type="numpy",sources=["microphone"], label="Input audio", elem_id="input_audio"
268
- )
269
- input_text = gr.Textbox(
270
- value="How is the weather?", label="Input text", interactive=True
271
- )
272
- vehicle_status = gr.JSON(
273
- value=vehicle.model_dump_json(), label="Vehicle status"
274
- )
275
- with gr.Column():
276
- output_audio = gr.Audio(label="output audio", autoplay=True)
277
- output_text = gr.TextArea(value="", label="Output text", interactive=False)
278
- # iface = gr.Interface(
279
- # fn=transcript,
280
- # inputs=[
281
- # gr.Textbox(value=initial_context, visible=False),
282
- # gr.Audio(type="filepath", label="input audio", elem_id="recorder"),
283
- # voice_character,
284
- # emotion,
285
- # place,
286
- # time_picker,
287
- # history,
288
- # gr.State(), # This will keep track of the context state across interactions.
289
- # ],
290
- # outputs=[gr.Audio(label="output audio"), gr.Textbox(visible=False), gr.State()],
291
- # head=shortcut_js,
292
- # )
293
-
294
- # Update plot based on the origin and destination
295
- # Sets the current location and destination
296
- origin.submit(
297
- fn=calculate_route_gradio,
298
- inputs=[origin, destination],
299
- outputs=[map_plot, vehicle_status],
300
- )
301
- destination.submit(
302
- fn=calculate_route_gradio,
303
- inputs=[origin, destination],
304
- outputs=[map_plot, vehicle_status],
305
- )
306
 
307
- # Update time based on the time picker
308
- time_picker.select(fn=set_time, inputs=[time_picker], outputs=[vehicle_status])
 
 
 
 
 
 
 
309
 
310
- # Run the model if the input text is changed
311
- input_text.submit(fn=run_model, inputs=[input_text, voice_character], outputs=[output_text, output_audio])
312
 
313
- # Set the vehicle status based on the trip progress
314
- trip_progress.release(
315
- fn=update_vehicle_status, inputs=[trip_progress], outputs=[vehicle_status]
 
 
 
316
  )
317
 
318
- # Save and transcribe the audio
319
- input_audio.stop_recording(
320
- fn=save_and_transcribe_audio, inputs=[input_audio], outputs=[input_text]
 
 
 
 
 
 
 
321
  )
322
 
323
- # close all interfaces open to make the port available
324
- gr.close_all()
325
- # Launch the interface.
326
 
327
  if __name__ == "__main__":
328
- # demo.launch(debug=True, server_name="0.0.0.0", server_port=7860, ssl_verify=False)
329
- demo.launch(debug=True, server_name="0.0.0.0", server_port=7860, ssl_verify=True, share=True)
330
-
331
- # iface.launch(debug=True, share=False, server_name="0.0.0.0", server_port=7860, ssl_verify=False)
 
1
  import time
2
  import gradio as gr
3
  import numpy as np
 
4
  import torch
5
  import torchaudio
6
  from transformers import pipeline
7
+ import typer
8
 
9
+ from kitt.skills.common import config, vehicle
10
+ from kitt.skills.routing import calculate_route
 
 
 
11
  import ollama
12
 
 
 
13
  from langchain.tools.base import StructuredTool
14
 
15
+ from kitt.skills import (
16
  get_weather,
17
  find_route,
18
  get_forecast,
 
20
  search_points_of_interests,
21
  search_along_route_w_coordinates,
22
  do_anything_else,
23
+ date_time_info,
24
  )
25
+ from kitt.skills import extract_func_args
26
+ from kitt.core import voice_options, tts_gradio
27
 
28
 
29
  global_context = {
 
47
  User Query: Question: {input}<human_end>
48
  """
49
 
50
+
51
+ HERMES_PROMPT_FUNC = """
52
+ <|im_start|>system
53
+ You are a helpful AI assistant in a car (vehicle), that follows instructions extremely well. \
54
+ Answer questions concisely and do not mention what you base your reply on.<|im_end|>
55
+ <|im_start|>user
56
+ {{ .Prompt }}<|im_end|>
57
+ <|im_start|>assistant
58
+ """
59
+
60
+
61
  def get_prompt(template, input, history, tools):
62
  # "vehicle_status": vehicle_status_fn()[0]
63
  kwargs = {"history": history, "input": input}
 
72
 
73
  return template.format(**kwargs).replace("{{", "{").replace("}}", "}")
74
 
75
+
76
  def use_tool(func_name, kwargs, tools):
77
  for tool in tools:
78
  if tool.name == func_name:
79
  return tool.invoke(input=kwargs)
80
  return None
81
 
82
+
83
  # llm = Ollama(model="nexusraven", stop=["\nReflection:", "\nThought:"], keep_alive=60*10)
84
 
85
 
 
92
 
93
  Args:
94
  query (str, optional): The type of point of interest to search for. Defaults to "restaurant".
95
+
96
  """
97
  points = global_context["route_points"]
98
  # maybe reshape
 
127
  "options": {
128
  # "temperature": 0.1,
129
  # "stop":["\nReflection:", "\nThought:"]
130
+ },
131
  }
132
  out = ollama.generate(**data)
133
  return out["response"]
134
 
135
 
136
+
137
+ def run_nexusraven_model(query, voice_character):
 
 
138
  global_context["prompt"] = get_prompt(RAVEN_PROMPT_FUNC, query, "", tools)
139
  print("Prompt: ", global_context["prompt"])
140
  data = {
 
143
  "model": "nexusraven",
144
  # "model": "smangrul/llama-3-8b-instruct-function-calling",
145
  "raw": True,
146
+ "options": {"temperature": 0.5, "stop": ["\nReflection:", "\nThought:"]},
 
 
 
147
  }
148
  out = ollama.generate(**data)
149
  llm_response = out["response"]
150
  if "Call: " in llm_response:
151
  print(f"llm_response: {llm_response}")
152
+ llm_response = llm_response.replace("<bot_end>", " ")
153
  func_name, kwargs = extract_func_args(llm_response)
154
  print(f"Function: {func_name}, Args: {kwargs}")
155
  if func_name == "do_anything_else":
 
162
  if type(output_text) == tuple:
163
  output_text = output_text[0]
164
  gr.Info(f"Output text: {output_text}, generating voice output...")
165
+ return (
166
+ output_text,
167
+ tts_gradio(output_text, voice_character, speaker_embedding_cache)[0],
168
+ )
169
+
170
+
171
+ def run_llama3_model(query, voice_character):
172
+ global_context["prompt"] = get_prompt(RAVEN_PROMPT_FUNC, query, "", tools)
173
+ print("Prompt: ", global_context["prompt"])
174
+ data = {
175
+ "prompt": global_context["prompt"],
176
+ # "streaming": False,
177
+ # "model": "smangrul/llama-3-8b-instruct-function-calling",
178
+ "model": "elvee/hermes-2-pro-llama-3:8b-Q5_K_M",
179
+ "raw": True,
180
+ "options": {"temperature": 0.5, "stop": ["\nReflection:", "\nThought:"]},
181
+ }
182
+ out = ollama.generate(**data)
183
+ llm_response = out["response"]
184
+ if "Call: " in llm_response:
185
+ print(f"llm_response: {llm_response}")
186
+ llm_response = llm_response.replace("<bot_end>", " ")
187
+ func_name, kwargs = extract_func_args(llm_response)
188
+ print(f"Function: {func_name}, Args: {kwargs}")
189
+ if func_name == "do_anything_else":
190
+ output_text = run_generic_model(query)
191
+ else:
192
+ output_text = use_tool(func_name, kwargs, tools)
193
+ else:
194
+ output_text = out["response"]
195
+
196
+ if type(output_text) == tuple:
197
+ output_text = output_text[0]
198
+ gr.Info(f"Output text: {output_text}, generating voice output...")
199
+ return (
200
+ output_text,
201
+ tts_gradio(output_text, voice_character, speaker_embedding_cache)[0],
202
+ )
203
+
204
+
205
+ def run_model(query, voice_character, state):
206
+
207
+ model = state.get("model", "nexusraven")
208
+ query = query.strip().replace("'", "")
209
+ print("Query: ", query)
210
+ print("Model: ", model)
211
+ global_context["query"] = query
212
+ if model == "nexusraven":
213
+ return run_nexusraven_model(query, voice_character)
214
+ elif model == "llama3":
215
+ return run_llama3_model(query, voice_character)
216
 
217
 
218
  def calculate_route_gradio(origin, destination):
 
224
 
225
  def update_vehicle_status(trip_progress):
226
  n_points = len(global_context["route_points"])
227
+ new_coords = global_context["route_points"][
228
+ min(int(trip_progress / 100 * n_points), n_points - 1)
229
+ ]
230
  new_coords = new_coords["latitude"], new_coords["longitude"]
231
  print(f"Trip progress: {trip_progress}, len: {n_points}, new_coords: {new_coords}")
232
  vehicle.location_coordinates = new_coords
 
235
 
236
 
237
  device = "cuda" if torch.cuda.is_available() else "cpu"
238
+ transcriber = pipeline(
239
+ "automatic-speech-recognition", model="openai/whisper-base.en", device=device
240
+ )
241
 
242
 
243
  def save_audio_as_wav(data, sample_rate, file_path):
244
  # make a tensor from the numpy array
245
  data = torch.tensor(data).reshape(1, -1)
246
+ torchaudio.save(
247
+ file_path, data, sample_rate=sample_rate, bits_per_sample=16, encoding="PCM_S"
248
+ )
249
 
250
 
251
  def save_and_transcribe_audio(audio):
 
259
  # add timestamp to file name
260
  filename = f"recordings/audio{time.time()}.wav"
261
  save_audio_as_wav(y, sr, filename)
262
+
263
  sr, y = audio
264
  y = y.astype(np.float32)
265
  y /= np.max(np.abs(y))
266
+ text = transcriber({"sampling_rate": sr, "raw": y})["text"]
267
  except Exception as e:
268
  print(f"Error: {e}")
269
  return "Error transcribing audio"
270
  return text
271
 
272
+
273
  # to be able to use the microphone on chrome, you will have to go to chrome://flags/#unsafely-treat-insecure-origin-as-secure and enter http://10.186.115.21:7860/
274
  # in "Insecure origins treated as secure", enable it and relaunch chrome
275
 
 
278
  # What's the closest restaurant from here?
279
 
280
 
281
+ def create_demo(tts_server: bool = False, model="llama3"):
282
+ print(f"Running the demo with model: {model} and TTSServer: {tts_server}")
283
+ with gr.Blocks(theme=gr.themes.Default()) as demo:
284
+ state = gr.State(
285
+ value={
286
+ # "context": initial_context,
287
+ "query": "",
288
+ "route_points": [],
289
+ "model": model,
290
+ }
291
+ )
292
+ trip_points = gr.State(value=[])
293
+
294
+ with gr.Row():
295
+ with gr.Column(scale=1, min_width=300):
296
+ time_picker = gr.Dropdown(
297
+ choices=hour_options,
298
+ label="What time is it? (HH:MM)",
299
+ value="08:00:00",
300
+ interactive=True,
301
+ )
302
+ history = gr.Radio(
303
+ ["Yes", "No"],
304
+ label="Maintain the conversation history?",
305
+ value="No",
306
+ interactive=True,
307
+ )
308
+ voice_character = gr.Radio(
309
+ choices=voice_options,
310
+ label="Choose a voice",
311
+ value=voice_options[0],
312
+ show_label=True,
313
+ )
314
+ origin = gr.Textbox(
315
+ value="Mondorf-les-Bains, Luxembourg",
316
+ label="Origin",
317
+ interactive=True,
318
+ )
319
+ destination = gr.Textbox(
320
+ value="Rue Alphonse Weicker, Luxembourg",
321
+ label="Destination",
322
+ interactive=True,
323
+ )
324
+
325
+ with gr.Column(scale=2, min_width=600):
326
+ map_plot = gr.Plot()
327
+ trip_progress = gr.Slider(
328
+ 0, 100, step=5, label="Trip progress", interactive=True
329
+ )
330
+
331
+ # map_if = gr.Interface(fn=plot_map, inputs=year_input, outputs=map_plot)
332
+
333
+ with gr.Row():
334
+ with gr.Column():
335
+ input_audio = gr.Audio(
336
+ type="numpy",
337
+ sources=["microphone"],
338
+ label="Input audio",
339
+ elem_id="input_audio",
340
+ )
341
+ input_text = gr.Textbox(
342
+ value="How is the weather?", label="Input text", interactive=True
343
+ )
344
+ vehicle_status = gr.JSON(
345
+ value=vehicle.model_dump_json(), label="Vehicle status"
346
+ )
347
+ with gr.Column():
348
+ output_audio = gr.Audio(label="output audio", autoplay=True)
349
+ output_text = gr.TextArea(
350
+ value="", label="Output text", interactive=False
351
+ )
352
+
353
+ # Update plot based on the origin and destination
354
+ # Sets the current location and destination
355
+ origin.submit(
356
+ fn=calculate_route_gradio,
357
+ inputs=[origin, destination],
358
+ outputs=[map_plot, vehicle_status],
359
+ )
360
+ destination.submit(
361
+ fn=calculate_route_gradio,
362
+ inputs=[origin, destination],
363
+ outputs=[map_plot, vehicle_status],
364
+ )
365
+
366
+ # Update time based on the time picker
367
+ time_picker.select(fn=set_time, inputs=[time_picker], outputs=[vehicle_status])
368
+
369
+ # Run the model if the input text is changed
370
+ input_text.submit(
371
+ fn=run_model,
372
+ inputs=[input_text, voice_character, state],
373
+ outputs=[output_text, output_audio],
374
+ )
375
+
376
+ # Set the vehicle status based on the trip progress
377
+ trip_progress.release(
378
+ fn=update_vehicle_status, inputs=[trip_progress], outputs=[vehicle_status]
379
+ )
380
+
381
+ # Save and transcribe the audio
382
+ input_audio.stop_recording(
383
+ fn=save_and_transcribe_audio, inputs=[input_audio], outputs=[input_text]
384
+ )
385
+ return demo
386
 
387
 
388
+ # close all interfaces open to make the port available
389
+ gr.close_all()
390
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
 
392
+ demo = create_demo(False, "llama3")
393
+ demo.launch(
394
+ debug=True,
395
+ server_name="0.0.0.0",
396
+ server_port=7860,
397
+ ssl_verify=False,
398
+ share=False,
399
+ )
400
+ app = typer.Typer()
401
 
 
 
402
 
403
+ @app.command()
404
+ def run(tts_server: bool = False):
405
+ global demo
406
+ demo = create_demo(tts_server)
407
+ demo.launch(
408
+ debug=True, server_name="0.0.0.0", server_port=7860, ssl_verify=True, share=True
409
  )
410
 
411
+
412
+ @app.command()
413
+ def dev(tts_server: bool = False, model: str = "llama3"):
414
+ demo = create_demo(tts_server, model)
415
+ demo.launch(
416
+ debug=True,
417
+ server_name="0.0.0.0",
418
+ server_port=7860,
419
+ ssl_verify=False,
420
+ share=False,
421
  )
422
 
 
 
 
423
 
424
  if __name__ == "__main__":
425
+ app()
 
 
 
pyproject.toml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "kitt"
3
+ version = "0.1.0"
4
+ description = "LLM-based Voice Assistant for Cars"
5
+ authors = ["Sasan <s.jafarnejad@gmail.com>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.10"
11
+
12
+
13
+ [tool.poetry.scripts]
14
+ kitt = 'main:app'
15
+
16
+ [build-system]
17
+ requires = ["poetry-core"]
18
+ build-backend = "poetry.core.masonry.api"
skills/__init__.py DELETED
@@ -1,43 +0,0 @@
1
- from datetime import datetime
2
- import inspect
3
-
4
- from .common import execute_function_call, extract_func_args, vehicle as vehicle_obj
5
- from .weather import get_weather, get_forecast
6
- from .routing import find_route
7
- from .poi import search_points_of_interests, search_along_route_w_coordinates
8
- from .vehicle import vehicle_status
9
-
10
-
11
-
12
- def date_time_info():
13
- """Get the current date and time."""
14
- time = getattr(vehicle_obj, "time")
15
- date = getattr(vehicle_obj, "date")
16
- datetime_obj = datetime.fromisoformat(f"{date}T{time}")
17
- human_readable_datetime = datetime_obj.strftime("%I:%M %p %A, %B %d, %Y")
18
- return f"It is {human_readable_datetime}."
19
-
20
-
21
- def do_anything_else():
22
- """If the user wants to do anything else call this function. If the question doesn't match any of the functions use this one."""
23
- return True
24
-
25
-
26
-
27
- def format_functions_for_prompt_raven(*functions):
28
- """Format functions for use in Prompt Raven.
29
-
30
- Args:
31
- *functions (function): One or more functions to format.
32
- """
33
- formatted_functions = []
34
- for func in functions:
35
- signature = f"{func.__name__}{inspect.signature(func)}"
36
- docstring = inspect.getdoc(func)
37
- formatted_functions.append(
38
- f"Function:\n<func_start>{signature}<func_end>\n<docstring_start>\n{docstring}\n<docstring_end>"
39
- )
40
- return "\n".join(formatted_functions)
41
-
42
-
43
- SKILLS_PROMPT = format_functions_for_prompt_raven(get_weather, get_forecast, find_route, search_points_of_interests)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
skills/common.py DELETED
@@ -1,62 +0,0 @@
1
- import re
2
- from typing import Union
3
-
4
-
5
- from pydantic_settings import BaseSettings, SettingsConfigDict
6
- from pydantic import BaseModel
7
-
8
- import skills
9
-
10
- class Settings(BaseSettings):
11
- WEATHER_API_KEY: str
12
- TOMTOM_API_KEY: str
13
-
14
- model_config = SettingsConfigDict(env_file=".env")
15
-
16
-
17
- class VehicleStatus(BaseModel):
18
- location: str
19
- location_coordinates: tuple[float, float] # (latitude, longitude)
20
- date: str
21
- time: str
22
- destination: str
23
-
24
-
25
- def execute_function_call(text: str, dry_run=False) -> str:
26
- function_name_match = re.search(r"Call: (\w+)", text)
27
- function_name = function_name_match.group(1) if function_name_match else None
28
- arguments = eval(f"dict{text.split(function_name)[1].strip()}")
29
- function = getattr(skills, function_name) if function_name else None
30
-
31
- if dry_run:
32
- print(f"{function_name}(**{arguments})")
33
- return "Dry run successful"
34
-
35
- if function:
36
- out = function(**arguments)
37
- try:
38
- if function:
39
- out = function(**arguments)
40
- except Exception as e:
41
- out = str(e)
42
- return out
43
-
44
-
45
- def extract_func_args(text: str) -> tuple[str, dict]:
46
- function_name_match = re.search(r"Call: (\w+)", text)
47
- function_name = function_name_match.group(1) if function_name_match else None
48
- if not function_name:
49
- raise ValueError("No function name found in text")
50
- arguments = eval(f"dict{text.split(function_name)[1].strip()}")
51
- return function_name, arguments
52
-
53
-
54
- config = Settings() # type: ignore
55
-
56
- vehicle = VehicleStatus(
57
- location="Rue Alphonse Weicker, Luxembourg",
58
- location_coordinates=(49.505, 6.28111),
59
- date="2025-05-06",
60
- time="08:00:00",
61
- destination="Rue Alphonse Weicker, Luxembourg"
62
- )