Spaces:

IliaLarchenko
/

interviewer

Sleeping

App Files Files Community

IliaLarchenko commited on Apr 19

Commit

c62e737

•

1 Parent(s): fb73ff7

Added support of the HuggingFace models

Browse files

Files changed (3) hide show

config.py +21 -11
llm.py +36 -26
prompts.py +0 -4

config.py CHANGED Viewed

@@ -1,18 +1,28 @@
-LLM_URL = f"https://api.openai.com/v1"
-LLM_KEY_TYPE = "OPENAI_API_KEY"  # there should be an environment variable with this name
 LLM_NAME = "gpt-3.5-turbo"
 # "gpt-3.5-turbo" - ~3 seconds delay with decent quality
 # "gpt-4-turbo","gpt-4", etc. 10+ seconds delay but higher quality
-STT_URL = f"https://api.openai.com/v1"
-STT_KEY_TYPE = "OPENAI_API_KEY"  # there should be an environment variable with this name
 STT_NAME = "whisper-1"
-# "whisper-1" - the only OpenAI STT model available
-TTS_URL = f"https://api.openai.com/v1"
-TTS_KEY_TYPE = "OPENAI_API_KEY"  # there should be an environment variable with this name
 TTS_NAME = "tts-1"
-# Recommended options
-# "tts-1" - good quality and close to real-time response. Just use this one
-# "tts-1-hd" - slightly better quality with slightly longer response time

+# X_URL - the URL for the model endpoint, can be None if using OpenAI API
+# X_TYPE - the type of the model, can be "OPENAI_API" or "HF_API"
+# there should be an environment variable with the f"{}_KEY" name and the key as the value to authenticate the API
+# X_NAME - the name of the model, used only for OpenAI API
+LLM_URL = None
+LLM_TYPE = "OPENAI_API"
 LLM_NAME = "gpt-3.5-turbo"
 # "gpt-3.5-turbo" - ~3 seconds delay with decent quality
 # "gpt-4-turbo","gpt-4", etc. 10+ seconds delay but higher quality
+# For HuggingFace models, the Messages API is used, it if compatible with Open AI API
+# Don't forget to add "/v1" to the end of the URL for HuggingFace LLM models
+# https://huggingface.co/docs/text-generation-inference/en/messages_api
+STT_URL = "https://api-inference.huggingface.co/models/openai/whisper-tiny.en"
+STT_TYPE = "HF_API"
 STT_NAME = "whisper-1"
+# "whisper-1" is the only OpenAI STT model available for OpenAI API
+# The whisper family with more models is available on HuggingFace:
+# https://huggingface.co/collections/openai/whisper-release-6501bba2cf999715fd953013
+# you can also use any other compatible model from HuggingFace
+TTS_URL = None
+TTS_TYPE = "OPENAI_API"
 TTS_NAME = "tts-1"
+# OpenAI "tts-1" - very good quality and close to real-time response
+# OpenAI "tts-1-hd" - slightly better quality with slightly longer response time (no obvious benefits in this case)
+# I think OS models on HuggingFace have much more artificial voices, but you can try them out

llm.py CHANGED Viewed

@@ -1,19 +1,18 @@
 import json
 import os
 from dotenv import load_dotenv
 from openai import OpenAI
 from audio import numpy_audio_to_bytes
-from config import LLM_KEY_TYPE, LLM_NAME, LLM_URL, STT_KEY_TYPE, STT_NAME, STT_URL, TTS_KEY_TYPE, TTS_NAME, TTS_URL
 from prompts import coding_interviewer_prompt, grading_feedback_prompt, problem_generation_prompt
 load_dotenv()
-client_LLM = OpenAI(base_url=LLM_URL, api_key=os.getenv(LLM_KEY_TYPE))
-print(client_LLM.base_url)
-client_STT = OpenAI(base_url=STT_URL, api_key=os.getenv(STT_KEY_TYPE))
-client_TTS = OpenAI(base_url=TTS_URL, api_key=os.getenv(TTS_KEY_TYPE))
 def init_bot(problem=""):
@@ -73,37 +72,48 @@ def send_request(code, previous_code, message, chat_history, chat_display, clien
         chat_history.append({"role": "user", "content": f"My latest code:\n{code}"})
     chat_history.append({"role": "user", "content": message})
-    response = client.chat.completions.create(model=LLM_NAME, response_format={"type": "json_object"}, messages=chat_history)
-    json_reply = response.choices[0].message.content.strip()
-    try:
-        data = json.loads(json_reply)
-        reply = data["reply_to_candidate"]
-    except json.JSONDecodeError as e:
-        print("Failed to decode JSON:", str(e))
-        reply = "There was an error processing your request."
-    chat_history.append({"role": "assistant", "content": json_reply})
-    chat_display.append([message, str(reply)])
     return chat_history, chat_display, "", code
-def speech_to_text(audio, client=client_STT):
-    transcription = client.audio.transcriptions.create(
-        model=STT_NAME, file=("temp.wav", numpy_audio_to_bytes(audio[1]), "audio/wav"), response_format="text"
-    )
     return transcription
-def text_to_speech(text, client=client_TTS):
-    response = client.audio.speech.create(model=TTS_NAME, voice="alloy", input=text)
-    return response.content
 def read_last_message(chat_display):
     last_message = chat_display[-1][1]
-    audio = text_to_speech(last_message)
-    return audio

 import json
 import os
+import requests
 from dotenv import load_dotenv
 from openai import OpenAI
 from audio import numpy_audio_to_bytes
+from config import LLM_NAME, LLM_TYPE, LLM_URL, STT_NAME, STT_TYPE, STT_URL, TTS_NAME, TTS_TYPE, TTS_URL
 from prompts import coding_interviewer_prompt, grading_feedback_prompt, problem_generation_prompt
 load_dotenv()
+client_LLM = OpenAI(base_url=LLM_URL, api_key=os.getenv(f"{LLM_TYPE}_KEY"))
 def init_bot(problem=""):
         chat_history.append({"role": "user", "content": f"My latest code:\n{code}"})
     chat_history.append({"role": "user", "content": message})
+    response = client.chat.completions.create(model=LLM_NAME, messages=chat_history)
+    reply = response.choices[0].message.content.strip()
+    chat_history.append({"role": "assistant", "content": reply})
+    chat_display.append([message, reply])
     return chat_history, chat_display, "", code
+def speech_to_text(audio):
+    assert STT_TYPE in ["OPENAI_API", "HF_API"]
+    if STT_TYPE == "OPENAI_API":
+        data = ("temp.wav", numpy_audio_to_bytes(audio[1]), "audio/wav")
+        client = OpenAI(base_url=STT_URL, api_key=os.getenv(f"{STT_TYPE}_KEY"))
+        transcription = client.audio.transcriptions.create(model=STT_NAME, file=data, response_format="text")
+    elif STT_TYPE == "HF_API":
+        headers = {"Authorization": "Bearer " + os.getenv(f"{STT_TYPE}_KEY")}
+        transcription = requests.post(STT_URL, headers=headers, data=numpy_audio_to_bytes(audio[1]))
+        transcription = transcription.json()["text"]
     return transcription
+def text_to_speech(text):
+    assert TTS_TYPE in ["OPENAI_API", "HF_API"]
+    if TTS_TYPE == "OPENAI_API":
+        client = OpenAI(base_url=TTS_URL, api_key=os.getenv(f"{TTS_TYPE}_KEY"))
+        response = client.audio.speech.create(model=TTS_NAME, voice="alloy", input=text)
+    elif TTS_TYPE == "HF_API":
+        headers = {"Authorization": "Bearer " + os.getenv(f"{STT_TYPE}_KEY")}
+        response = requests.post(TTS_URL, headers=headers)
+    audio = response.content
+    return audio
 def read_last_message(chat_display):
     last_message = chat_display[-1][1]
+    if last_message is not None:
+        audio = text_to_speech(last_message)
+        return audio
+    return None

prompts.py CHANGED Viewed

@@ -14,10 +14,6 @@ coding_interviewer_prompt = (
     "If the candidate deviates from the problem, gently guide them back to focus on the task at hand. "
     "After multiple unsuccessful attempts by the candidate to identify or fix an error, provide more direct hints or rephrase the problem slightly to aid understanding. "
     "Encourage the candidate to think about real-world applications and scalability of their solutions, asking how changes to the problem parameters might affect their approach. "
-    "Responses should be structured in JSON format with two fields: "
-    "1. 'reply_to_candidate': contains visible feedback and guidance for the candidate, structured to facilitate learning and insight without giving away answers. "
-    "2. 'hidden_note': internal notes for the grading AI, including observations on the candidate’s performance across various criteria such as problem-solving skills, debugging effectiveness, and adaptability. These notes may include specific code snippets the candidate struggled with, key mistakes made, and any notable strengths or weaknesses observed. "
-    "The 'hidden_note' should also reflect a self-critical perspective if the interviewer's expectations do not align with a valid candidate solution, acknowledging and adjusting for any potential bias or error. "
 )

     "If the candidate deviates from the problem, gently guide them back to focus on the task at hand. "
     "After multiple unsuccessful attempts by the candidate to identify or fix an error, provide more direct hints or rephrase the problem slightly to aid understanding. "
     "Encourage the candidate to think about real-world applications and scalability of their solutions, asking how changes to the problem parameters might affect their approach. "
 )