|
import openai |
|
import whisper |
|
from io import BytesIO |
|
import os |
|
import sys |
|
import pytube |
|
from moviepy.editor import VideoFileClip |
|
import moviepy.editor as movpy |
|
from moviepy.editor import AudioFileClip |
|
from pydub import AudioSegment |
|
from pytube import YouTube |
|
import gradio as gr |
|
import collections |
|
from gtts import gTTS |
|
|
|
|
|
openai.api_key = "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" |
|
|
|
class NamedBytesIO(BytesIO): |
|
def __init__(self, *args, **kwargs): |
|
self._name = kwargs.pop('name', 'unnamed.mp3') |
|
super(NamedBytesIO, self).__init__(*args, **kwargs) |
|
|
|
@property |
|
def name(self): |
|
return self._name |
|
|
|
|
|
def transcribe_audio(audio_file_path): |
|
model_id = 'whisper-1' |
|
|
|
|
|
audio_file_path = audio_file_path |
|
audio = AudioSegment.from_file(audio_file_path, format="mp3") |
|
|
|
audio_duration = int(audio.duration_seconds) |
|
|
|
print(f"Duration of the audio is {audio_duration} seconds") |
|
chunk_duration = 60 * 1000 * 20 |
|
|
|
transcriptions = [] |
|
|
|
for i in range(0, audio_duration, chunk_duration): |
|
chunk = audio[i:i + chunk_duration] |
|
chunk_buffer = NamedBytesIO(name='chunk.mp3') |
|
chunk.export(chunk_buffer, format="mp3") |
|
chunk_buffer.seek(0) |
|
|
|
response = openai.Audio.transcribe( |
|
api_key=openai.api_key, |
|
model=model_id, |
|
file=chunk_buffer, |
|
prompt=""" |
|
Dont remove any filler words in transcribe. eg: um, umm, uh, ah, er, mhm, hmm, mm, mmm, oh, ohh, let me think like, Okay, ok, here's what I'm, like, thinking, you know, well, so, actually, basically, literally, right, i mean,anyway... |
|
This is human voices with various tone and accents. |
|
1) Transcibe every words and all signals, dont skip any |
|
2) Dont correct the grammar |
|
3) Dont correct the spelling |
|
4) Dont remove any redundant words or punctuations |
|
5) if there is a pause in the audio, please add a comma(,) in the transcribe |
|
6) if there is a long pause in the audio, please add a period(.) in the transcribe |
|
""" |
|
) |
|
|
|
transcriptions.append(response['text']) |
|
|
|
final_transcription = ' '.join(transcriptions) |
|
|
|
print(final_transcription.replace('.', '.\n')) |
|
|
|
return final_transcription |
|
|
|
|
|
def get_feedback(audio_file_path): |
|
|
|
transcribed_text = transcribe_audio(audio_file_path) |
|
|
|
|
|
prompt = f""" |
|
Pretend that you are an interview coach with 25 years of experience. |
|
Evaluate the following response based on the evaluation criteria: |
|
# please provide feedback on the following response based on the evaluation criteria: |
|
# language, tone & personality, and a 5-point system for clarity, vocal variety, comfort level, interest, and well-supported content. |
|
|
|
Evaluation criteria: |
|
A) Language: Score 1 - 10. the more score the better. |
|
The higher usage of filler words, redundant words, jargons will decrease the score |
|
1. use of Filler words: count the filler words and list them: eg: um, umm, uh, ah, er, mhm, hmm, mm, mmm, oh, ohh, let me think like, Okay, ok, here's what I'm, like, thinking, you know, well, so, actually, basically, literally, right, i mean,anyway... |
|
example: um: 3, yeah: 2 |
|
2. use of redundant words: list them: example: sum total, joint collaboration, unexpected surprise, future plans, new record...etc |
|
example: sum total: 2, joint collaboration: 1 |
|
3. use of jargons: list them |
|
|
|
B) Tone & personality: Score 1 - 15: the more score the better. |
|
The better of the tone and personality, the higher score, Relevance, Clarity, Specificity, Confidence, Fit |
|
will increase the score |
|
|
|
1. Tone: What is the Tone? Choose top one from the following: and just report the headline |
|
A. Positive/optimistic: Expressing a hopeful or upbeat outlook, such as excitement, joy, or satisfaction. Example: "I'm really looking forward to this vacation!" |
|
B. Negative/pessimistic: Expressing a negative or gloomy outlook, such as sadness, frustration, or disappointment. Example: "I don't think I can handle any more bad news." |
|
C. Neutral/objective: Expressing an unbiased or factual outlook, without any emotion or bias. Example: "The temperature is 75 degrees and the sky is clear." |
|
D. Sarcastic: Expressing a tone of mockery or irony, often with the opposite meaning of what is being said. Example: "Oh great, another Monday morning. Just what I needed." |
|
E. Formal/polite: Using formal language and expressions to show respect and politeness. Example: "I would be most grateful if you could provide me with further information." |
|
F. Informal/casual: Using casual language and expressions to show familiarity and informality. Example: "Hey, what's up? Wanna hang out later?" |
|
G. Authoritative: Expressing a tone of authority or control, such as in instructions or commands. Example: "You need to follow these procedures precisely to ensure safety." |
|
H. Condescending: Expressing a tone of superiority or patronization towards others. |
|
2. Personality traits: Insights into the personality traits? Choose top one from the following: and just report the headline |
|
A. Extraversion: Extraverted individuals tend to be outgoing, sociable, and talkative. |
|
B. Introversion: Introverted individuals tend to be more reserved, reflective, and introspective. |
|
C. Conscientiousness: Conscientious individuals tend to be organized, responsible, and diligent. |
|
D. Agreeableness: Agreeable individuals tend to be friendly, cooperative, and empathetic. |
|
E. Anxious / Sensitive : Individuals tend to be anxious, sensitive, and easily stressed. |
|
3. Relevance: Does the response address the question asked and provide relevant information? |
|
4. Clarity: Is the response clear and concise? |
|
5. Specificity / examples or details: Does the response provide specific examples or details that demonstrate the interviewee's skills, experiences, or qualifications? |
|
6. Confidence: Does the interviewee present themselves confidently and effectively communicate their ideas? |
|
7. Fit: Does the response demonstrate how the interviewee's skills, experiences, or qualifications align with the job requirement? |
|
|
|
C) Communication metrics: Score 1 - 25: The user to be evaluated on the following criteria with a 3 point system: |
|
Higher the points better the score |
|
1. Clarity: Spoken language is clear and is easily understood Comment |
|
|
|
3 Is an exemplary Interviewee who is always understood. |
|
2 Spoken language is clear and is easily understood |
|
1 Spoken language is unclear or not easily understood |
|
|
|
2. Comfort Level |
|
|
|
3 Appears completely self-assured with the Interviewer |
|
2 Appears comfortable with the Interviewer |
|
1 Appears highly uncomfortable with the Interviewer |
|
|
|
3. Interesting content: Engages Interviewer with interesting, well-constructed content Comment |
|
|
|
3 Fully engages Interviewer with exemplary, well constructed content |
|
2 Engages Interviewer with interesting, well constructed content |
|
1 Content is neither interesting nor well-constructed |
|
|
|
4. Well Supported: Speech content is well-supported and sources are available if requested |
|
|
|
3 Delivers exemplary speech with a topic that is well-supported by content of the speech |
|
2 Speech topic is well-supported by content of speech |
|
1 Speech content is unrelated to the topic of the speech |
|
|
|
Assess the response and give your honest feedback. |
|
A) Language |
|
B) Tone & personality |
|
C) 3 point evaluation |
|
D) Overall Score = sum of points scored in A + B + C |
|
|
|
Put in top 3 bullet points for feedback + 3 bullet points on how to improve it. |
|
|
|
example output: |
|
" Feedback: Overall Score = 5/50 |
|
|
|
A) Language: Score: 5/10 |
|
1. Filler words: um: 3, yeah: 2 |
|
2. Redundant words: sum total: 2, joint collaboration: 1 |
|
3. Jargons: list them |
|
|
|
B) Tone & personality: Score: 2/15 |
|
1. Tone: Positive/optimistic |
|
2. Personality traits: Extraversion |
|
3. Relevance: Yes |
|
4. Clarity: Yes |
|
5. Specificity / examples or details: Yes |
|
6. Confidence: Yes |
|
7. Fit: Yes |
|
|
|
C) 3 points evaluation: Score: 3/25 |
|
1. Clarity: 2 |
|
2. Comfort Level: 3 |
|
3. Interesting content: 2 |
|
4. Well Supported: 3 |
|
|
|
Suggestions to improve: |
|
1. You used a lot of filler words. Try to avoid them. |
|
2. You used a lot of redundant words. Try to avoid them. |
|
3. You used a lot of jargons. Try to avoid them. |
|
4. Improve your tone. Try to be more positive. |
|
5. Improve your personality. Try to be more extraverted. |
|
6. Improve your clarity. Try to be more clear. |
|
..etc |
|
|
|
" |
|
|
|
Response: |
|
{transcribed_text} |
|
""" |
|
|
|
|
|
api_response = openai.Completion.create( |
|
engine="text-davinci-003", |
|
prompt=prompt, |
|
temperature=0, |
|
max_tokens=500, |
|
top_p=1.0, |
|
frequency_penalty=0.0, |
|
presence_penalty=0.0 |
|
) |
|
|
|
|
|
feedback = api_response.choices[0].text.strip() |
|
|
|
|
|
prompt = f""" |
|
Summarize the interviewee's response to the question in 3 - 5 bullet points. |
|
start the summary with a positive note. |
|
make it more professional and friendly. |
|
you should give feedback so that the interviewee can improve their response. |
|
|
|
|
|
Response: |
|
{feedback} |
|
""" |
|
|
|
|
|
summary_response = openai.Completion.create( |
|
engine="text-davinci-003", |
|
prompt=prompt, |
|
temperature=0, |
|
max_tokens=500, |
|
top_p=1.0, |
|
frequency_penalty=0.0, |
|
presence_penalty=0.0 |
|
) |
|
|
|
|
|
summary_response_out = summary_response.choices[0].text.strip() |
|
|
|
|
|
|
|
|
|
language = "en" |
|
audioobj = gTTS(text = summary_response_out, |
|
lang = language, |
|
slow = False) |
|
|
|
audioobj.save("Temp.mp3") |
|
|
|
return [feedback, summary_response_out, transcribed_text, "Temp.mp3"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
fn=get_feedback, |
|
inputs=gr.inputs.Textbox(lines=1, label="Interviewee audio file path"), |
|
examples=[ |
|
"Tell me about yourself - bad graduate job interview answer tell me about yourselfout.mp3", |
|
"Tell me about yourself - bad entrepreneur answer to tell me about yourselfout.mp3" |
|
], |
|
outputs=[ |
|
gr.outputs.Textbox(label="Feedback"), |
|
gr.outputs.Textbox(label="Summary response"), |
|
gr.outputs.Textbox(label="Transcription"), |
|
gr.Audio("Temp.mp3", label="Speech Output") |
|
], |
|
title="Interview Feedback", |
|
|
|
description="Get feedback on your interview response from an AI interview coach.", |
|
layout="vertical" |
|
) |
|
iface.launch() |
|
|
|
|