Spaces:
Runtime error
Runtime error
File size: 14,152 Bytes
6a095da 55d6a73 072309c 55d6a73 79f3755 5723e5f 55d6a73 0fdfd49 55d6a73 3953589 0fdfd49 3953589 0fdfd49 271d8f5 3953589 0fdfd49 271d8f5 6a095da 3953589 0fdfd49 3953589 6a095da 271d8f5 6a095da 3953589 271d8f5 0fdfd49 271d8f5 0fdfd49 271d8f5 0fdfd49 271d8f5 499367f 271d8f5 0fdfd49 55d6a73 1917fa4 55d6a73 1917fa4 55d6a73 1917fa4 55d6a73 0fdfd49 271d8f5 79f3755 55d6a73 0fdfd49 79f3755 0fdfd49 6a095da 0fdfd49 1917fa4 0fdfd49 55d6a73 3953589 0fdfd49 55d6a73 271d8f5 448581a 271d8f5 3953589 499367f a9a2fc6 499367f 55d6a73 0fdfd49 1917fa4 55d6a73 79f3755 55d6a73 79f3755 0fdfd49 3953589 0fdfd49 55d6a73 6a095da 1917fa4 6a095da 55d6a73 0fdfd49 2e5d63a 55d6a73 3953589 55d6a73 5723e5f 55d6a73 6a095da 55d6a73 3953589 55d6a73 3953589 79f3755 2e5d63a 79f3755 3953589 0fdfd49 6a095da 0fdfd49 1917fa4 6a095da 0fdfd49 1917fa4 499367f 0fdfd49 6a095da 271d8f5 6a095da 79f3755 0fdfd49 6a095da 271d8f5 0fdfd49 1917fa4 6a095da 271d8f5 0fdfd49 3953589 499367f 3953589 0fdfd49 499367f 072309c 55d6a73 5723e5f 55d6a73 76510b9 55d6a73 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 |
from typing import List
import openai
import gradio as gr
import pandas as pd
import tiktoken
import re
from openai.error import AuthenticationError
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
pd.set_option('display.max_columns', None) # Display all columns
pd.set_option('display.width', None) # Automatically adjust the width to fit the content
class Bot():
def __init__(self):
self.ger_system_prompt = None
self.eng_system_prompt = None
self.api_key = None
self.lang = "English"
self.system_prompt = ""
self.change_language(self.lang, [])
self.df = None
self.available_car_models = None
self.set_prompts()
self.columns_in_response = ["marke", "modell", "kategorie", "kilometer", "preis", "grundfarbe"]
self.columns_in_response_eng = ["brand", "model", "category", "kilometer", "price", "color"]
def set_prompts(self):
self.eng_system_prompt = f"""You are a virtual car salesman salesman assistant for an online German car dealership. You only recommend cars that are currently in your inventory.
To access your inventory return dataframe query method like this:
```
df.query("brand.str.contains('ford')")
```
The dataframe has the following columns: ["brand", "model", "category", "kilometer", "price"], with all values in lowercase.
The available car categories are: limousine, offroad, estatecar, smallcar, othercar, and carbo.
The available car brands are: {self.available_car_models}.
To search for specific car information, use the contains() method instead of equals().
Do not say that any models in your inventory or that any models are available, show the query instead.
Do not ask the user if you need to check the inventory, show them how to query the dataframe.
Users should not know that you are using a dataframe. They should think that you are using a database.
If you need to check the inventory to answer the user's question, only show the query and nothing else."""
self.ger_system_prompt = f"""Du bist ein virtueller Autoverkäufer für einen deutschen Online-Autohändler. Du empfiehlst nur Autos, die derzeit in deinem Inventar sind.
Um auf dein Inventar zuzugreifen, gibst du die Datenbankabfrage-Methode wie folgt ein:
```
df.query("marke.str.contains('ford')")
```
Die Datenbank hat die folgenden Spalten: ["marke", "modell", "kategorie", "kilometer", "preis"], mit allen Werten in Kleinbuchstaben.
Die verfügbaren Autokategorien sind: limousine, offroad, estatecar, smallcar, othercar und carbo.
Die verfügbaren Automarken sind: {self.available_car_models}.
Um nach bestimmten Fahrzeuginformationen zu suchen, verwende die contains() -Methode anstelle der equals() -Methode.
Sage nicht, dass irgendeine Modelle in deinem Inventar sind oder dass irgendeine Modelle verfügbar sind, zeige die Abfrage an.
Frag den Benutzer nicht, ob du das Inventar überprüfen musst, zeige ihm, wie du die Datenbankabfrage durchführst.
Benutzer sollten nicht wissen, dass du eine Datenbank verwendest. Sie sollten denken, dass du eine Datenbank verwendest.
Wenn du das Inventar überprüfen musst, um die Frage des Benutzers zu beantworten, zeige nur die Abfrage und nichts anderes an."""
def change_language(self, lang, history):
self.lang = lang if lang in ["English", "German"] else "English"
if self.lang == "English":
self.system_prompt = self.eng_system_prompt
if not self.api_key:
history = [(None, "Please enter your OpenAI API KEY")]
return history
if self.df is None:
history = [(None, "Please upload a csv file")]
return history
else:
self.system_prompt = self.ger_system_prompt
if not self.api_key:
history = [(None, "Bitte geben Sie Ihren OpenAI API-Schlüssel ein")]
return history
if self.df is None:
history = [(None, "Bitte laden Sie eine csv-Datei hoch")]
return history
return []
def process_file(self, file, history):
# check if file is a string
filename = file
if not isinstance(file, str):
# throw error if file is not csv
if file.name.split(".")[-1] != "csv":
if lang == "English":
history.append((None, "Please upload a csv file"))
else:
history.append((None, "Bitte laden Sie eine csv-Datei hoch"))
return history
else:
filename = file.name
df = pd.read_csv(filename)
# drop all rows with at least one NaN value
# only use columns marke modell kategorie kilomeret preis
df = df[self.columns_in_response + ["interne_nummer", "Modellbasis"]]
df = self._construct_urls(df)
df = df.rename(columns={"modell2": "modell"})
df = df.dropna()
# convert price and kilometer to int
df["preis"] = df["preis"].astype(int)
df["kilometer"] = df["kilometer"].astype(int)
# make marke modell kategorie lowercase
df["marke"] = df["marke"].str.lower().str.strip()
df["modell"] = df["modell"].str.lower().str.strip()
df["grundfarbe"] = df["grundfarbe"].str.lower().str.strip()
df["kategorie"] = df["kategorie"].str.lower().str.replace("car.", "").str.strip()
# remove duplicates
df = df.drop_duplicates()
self.available_car_models = str(df["marke"].unique()).replace('\n', "")
print(self.available_car_models)
if self.lang == "English":
df = df.rename(
columns={"marke": "brand", "modell": "model", "kategorie": "category", "kilometer": "kilometer",
"preis": "price", "grundfarbe": "color"})
df = df.loc[:, ~df.columns.duplicated()]
self.df = df
self.set_prompts()
self.change_language(self.lang, history)
if self.api_key:
history = []
return history
def _construct_urls(self, df):
for index, row in df.iterrows():
# check if all values are not na
if row.isna().any():
continue
marke = row['marke']
modellbasis = row['Modellbasis']
interne_nummer = row['interne_nummer']
base_url = f"https://www.hugopfohe.de/gebrauchtwagen/details/"
df.at[index, 'url'] = f"{base_url}{marke}_{modellbasis}_{interne_nummer}/?&etcc_cmp=consultant"
return df
def check_requirements(self, query, history):
if query.startswith("sk-"):
self.api_key = query.strip("\n").strip()
openai.api_key = query.strip("\n").strip()
return self.process_file("./AI Car-Test - complete data.csv", []), False
if not self.api_key:
if self.lang == "English":
history.append((None, "Please enter your OpenAI API KEY"))
else:
history.append((None, "Bitte geben Sie Ihren OpenAI API-Schlüssel ein"))
return history, False
if self.df is None:
return self.process_file("./AI Car-Test - complete data.csv", []), False
if self.lang == "English":
history.append((None, "Please upload a csv file. (This might take a few minutes)"))
else:
history.append((None, "Bitte laden Sie eine csv-Datei hoch. (Dies kann einige Minuten dauern)"))
return history, False
return history, True
def run_text(self, history, query):
history, passed = self.check_requirements(query, history)
if not passed:
return history
messages = []
# add messages from history
for q, response in history:
if q:
messages.append({"role": "user", "content": q})
if response:
messages.append({"role": "assistant", "content": response})
# add system message 1 message before last
messages.append({"role": "system", "content": self.system_prompt})
# remove all messages in front of for the first one
# if the total number of tokens is greater than 3000
total_tokens = self._count_tokens(messages)
while total_tokens > 1200:
messages.pop(1)
total_tokens = self._count_tokens(messages)
messages.append({"role": "user", "content": query})
# print(prompt)
print(messages)
# generate response
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo", # change to gpt-3.5-turbo if don't have access
messages=messages,
temperature=0.0,
)['choices'][0]['message']['content']
if "df.query" in response:
response = self.query_db(response, messages)
# only add query and response to history
# the context is not needed
history.append((query, response))
return history
def query_db(self, response, messages):
# remove first before last message (previous system prompt)
messages.pop(-2)
# extract line that contains the query
for line in response.split("\n"):
if "df.query" in line:
query_line = line.strip()
break
result = eval("self." + query_line)
print(query_line)
if len(result) == 0:
if self.lang == "English":
messages.insert(-1, {"role": "system",
"content": "You are a virtual car salesman assistant in an online car dealership. There are no results this query. Help user adjust their preferences or recommend other brands."})
else:
messages.insert(-1, {"role": "system",
"content": "Sie sind ein virtueller Autohändler-Assistent in einem Online-Autohaus. Keine Ergebnisse für diese Abfrage. Benutzer bei der Anpassung ihrer Präferenzen oder Empfehlung anderer Marken unterstützen"})
else:
# check if result is a dataframe
if isinstance(result, pd.DataFrame):
if len(result) > 5:
result = result.sample(n=5)
if self.lang == "English":
result = result[self.columns_in_response_eng + ['url']]
else:
result = result[self.columns_in_response + ['url']]
result = result.to_string(index=False).replace("<br>", "")
elif isinstance(result, pd.Series):
result = result.to_string(index=False).replace("<br>", "")
elif isinstance(result, List):
result = ", ".join(result)
else:
result = str(result)
print(result)
# add message to history 1 before last
if self.lang == "English":
messages.insert(-1, {"role": "system",
"content": f"You are a virtual car salesman salesman assistant for an online car dealership. You only recommend cars that are currently in stock. Advice cars from this list that are currently in stock:\n\n{result}\n\n" + "Include links to cars in your recommendation from the url column."})
else:
messages.insert(-1,
{"role": "system",
"content": f"Sie sind ein virtueller Autohändler-Assistent in einem Online-Autohaus. Sie empfehlen nur Autos, die derzeit auf Lager sind. Beraten Sie Autos aus dieser Liste, die derzeit auf Lager sind:\n\n{result}\n\n" + "Fügen Sie Links zu Autos in Ihrer Empfehlung aus der Spalte url hinzu."})
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages,
temperature=0.1,
)['choices'][0]['message']['content']
print(response)
response = response.replace("<br>", "")
# Use a regular expression to replace more than two consecutive empty lines with just two empty lines
response = re.sub(r'\n{3,}', '\n\n', response)
return response
def _count_tokens(self, messages):
total_tokens = 0
for message in messages:
total_tokens += len(encoding.encode(message['content']))
return total_tokens
if __name__ == '__main__':
bot = Bot()
# ui
with gr.Blocks(css="#chatbot .overflow-y-auto{height:500px}") as demo:
chatbot = gr.Chatbot([(None, "Please enter your OpenAI API KEY")], elem_id="chatbot",
label="HUGO PFOHE - AI Car Salesman").style(height=500)
with gr.Row():
with gr.Column(scale=1):
txt = gr.Textbox(show_label=False, placeholder="What car would you like to buy?").style(
container=False)
# with gr.Column(scale=0.15):
# submit = gr.Button("Submit")
with gr.Row():
with gr.Column(scale=0.34, min_width=0):
lang = gr.inputs.Dropdown(choices=['English', 'German'], label="Language", default=bot.lang)
# with gr.Column(scale=0.33, min_width=0):
# clear = gr.Button("Clear")
# with gr.Column(scale=0.33, min_width=0):
# btn = gr.UploadButton("Upload CSV", file_types=["csv"])
with gr.Column(scale=0.66, min_width=0):
with gr.Row():
clear = gr.Button("Clear")
with gr.Row():
btn = gr.UploadButton("Upload CSV", file_types=["csv"])
txt.submit(bot.run_text, [chatbot, txt], chatbot)
txt.submit(lambda: "", None, txt)
lang.change(bot.change_language, [lang, chatbot], chatbot)
# submit.click(txt.submit, [chatbot, txt], chatbot)
# submit.click(lambda: "", None, txt)
clear.click(lambda: [], None, chatbot)
btn.upload(bot.process_file, [btn, chatbot], chatbot)
demo.launch()
|