test_v1 / app.py
AI-Naga's picture
Update app.py
0eeece7
import openai
import whisper
from io import BytesIO
import os
import sys
import pytube
from moviepy.editor import VideoFileClip
import moviepy.editor as movpy
from moviepy.editor import AudioFileClip
from pydub import AudioSegment
from pytube import YouTube
import gradio as gr
import collections
from gtts import gTTS
##############################################################################
openai.api_key = "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"
#create gradio app
class NamedBytesIO(BytesIO):
def __init__(self, *args, **kwargs):
self._name = kwargs.pop('name', 'unnamed.mp3')
super(NamedBytesIO, self).__init__(*args, **kwargs)
@property
def name(self):
return self._name
##############################################################################
def transcribe_audio(audio_file_path):
model_id = 'whisper-1'
# audio_file_path = "C:\VOLUNTEER\TOASTMASTER\Tell me about yourself - bad graduate job interview answer tell me about yourselfout.mp3"
audio_file_path = audio_file_path
audio = AudioSegment.from_file(audio_file_path, format="mp3")
audio_duration = int(audio.duration_seconds)
print(f"Duration of the audio is {audio_duration} seconds")
chunk_duration = 60 * 1000 * 20 # 20 minute in milliseconds
transcriptions = []
for i in range(0, audio_duration, chunk_duration):
chunk = audio[i:i + chunk_duration]
chunk_buffer = NamedBytesIO(name='chunk.mp3')
chunk.export(chunk_buffer, format="mp3")
chunk_buffer.seek(0)
response = openai.Audio.transcribe(
api_key=openai.api_key,
model=model_id,
file=chunk_buffer,
prompt="""
Dont remove any filler words in transcribe. eg: um, umm, uh, ah, er, mhm, hmm, mm, mmm, oh, ohh, let me think like, Okay, ok, here's what I'm, like, thinking, you know, well, so, actually, basically, literally, right, i mean,anyway...
This is human voices with various tone and accents.
1) Transcibe every words and all signals, dont skip any
2) Dont correct the grammar
3) Dont correct the spelling
4) Dont remove any redundant words or punctuations
5) if there is a pause in the audio, please add a comma(,) in the transcribe
6) if there is a long pause in the audio, please add a period(.) in the transcribe
"""
)
transcriptions.append(response['text'])
final_transcription = ' '.join(transcriptions)
print(final_transcription.replace('.', '.\n'))
return final_transcription
#########################################################################################
def get_feedback(audio_file_path):
transcribed_text = transcribe_audio(audio_file_path)
# response = final_transcription
prompt = f"""
Pretend that you are an interview coach with 25 years of experience.
Evaluate the following response based on the evaluation criteria:
# please provide feedback on the following response based on the evaluation criteria:
# language, tone & personality, and a 5-point system for clarity, vocal variety, comfort level, interest, and well-supported content.
Evaluation criteria:
A) Language: Score 1 - 10. the more score the better.
The higher usage of filler words, redundant words, jargons will decrease the score
1. use of Filler words: count the filler words and list them: eg: um, umm, uh, ah, er, mhm, hmm, mm, mmm, oh, ohh, let me think like, Okay, ok, here's what I'm, like, thinking, you know, well, so, actually, basically, literally, right, i mean,anyway...
example: um: 3, yeah: 2
2. use of redundant words: list them: example: sum total, joint collaboration, unexpected surprise, future plans, new record...etc
example: sum total: 2, joint collaboration: 1
3. use of jargons: list them
B) Tone & personality: Score 1 - 15: the more score the better.
The better of the tone and personality, the higher score, Relevance, Clarity, Specificity, Confidence, Fit
will increase the score
1. Tone: What is the Tone? Choose top one from the following: and just report the headline
A. Positive/optimistic: Expressing a hopeful or upbeat outlook, such as excitement, joy, or satisfaction. Example: "I'm really looking forward to this vacation!"
B. Negative/pessimistic: Expressing a negative or gloomy outlook, such as sadness, frustration, or disappointment. Example: "I don't think I can handle any more bad news."
C. Neutral/objective: Expressing an unbiased or factual outlook, without any emotion or bias. Example: "The temperature is 75 degrees and the sky is clear."
D. Sarcastic: Expressing a tone of mockery or irony, often with the opposite meaning of what is being said. Example: "Oh great, another Monday morning. Just what I needed."
E. Formal/polite: Using formal language and expressions to show respect and politeness. Example: "I would be most grateful if you could provide me with further information."
F. Informal/casual: Using casual language and expressions to show familiarity and informality. Example: "Hey, what's up? Wanna hang out later?"
G. Authoritative: Expressing a tone of authority or control, such as in instructions or commands. Example: "You need to follow these procedures precisely to ensure safety."
H. Condescending: Expressing a tone of superiority or patronization towards others.
2. Personality traits: Insights into the personality traits? Choose top one from the following: and just report the headline
A. Extraversion: Extraverted individuals tend to be outgoing, sociable, and talkative.
B. Introversion: Introverted individuals tend to be more reserved, reflective, and introspective.
C. Conscientiousness: Conscientious individuals tend to be organized, responsible, and diligent.
D. Agreeableness: Agreeable individuals tend to be friendly, cooperative, and empathetic.
E. Anxious / Sensitive : Individuals tend to be anxious, sensitive, and easily stressed.
3. Relevance: Does the response address the question asked and provide relevant information?
4. Clarity: Is the response clear and concise?
5. Specificity / examples or details: Does the response provide specific examples or details that demonstrate the interviewee's skills, experiences, or qualifications?
6. Confidence: Does the interviewee present themselves confidently and effectively communicate their ideas?
7. Fit: Does the response demonstrate how the interviewee's skills, experiences, or qualifications align with the job requirement?
C) Communication metrics: Score 1 - 25: The user to be evaluated on the following criteria with a 3 point system:
Higher the points better the score
1. Clarity: Spoken language is clear and is easily understood Comment
3 Is an exemplary Interviewee who is always understood.
2 Spoken language is clear and is easily understood
1 Spoken language is unclear or not easily understood
2. Comfort Level
3 Appears completely self-assured with the Interviewer
2 Appears comfortable with the Interviewer
1 Appears highly uncomfortable with the Interviewer
3. Interesting content: Engages Interviewer with interesting, well-constructed content Comment
3 Fully engages Interviewer with exemplary, well constructed content
2 Engages Interviewer with interesting, well constructed content
1 Content is neither interesting nor well-constructed
4. Well Supported: Speech content is well-supported and sources are available if requested
3 Delivers exemplary speech with a topic that is well-supported by content of the speech
2 Speech topic is well-supported by content of speech
1 Speech content is unrelated to the topic of the speech
Assess the response and give your honest feedback.
A) Language
B) Tone & personality
C) 3 point evaluation
D) Overall Score = sum of points scored in A + B + C
Put in top 3 bullet points for feedback + 3 bullet points on how to improve it.
example output:
" Feedback: Overall Score = 5/50
A) Language: Score: 5/10
1. Filler words: um: 3, yeah: 2
2. Redundant words: sum total: 2, joint collaboration: 1
3. Jargons: list them
B) Tone & personality: Score: 2/15
1. Tone: Positive/optimistic
2. Personality traits: Extraversion
3. Relevance: Yes
4. Clarity: Yes
5. Specificity / examples or details: Yes
6. Confidence: Yes
7. Fit: Yes
C) 3 points evaluation: Score: 3/25
1. Clarity: 2
2. Comfort Level: 3
3. Interesting content: 2
4. Well Supported: 3
Suggestions to improve:
1. You used a lot of filler words. Try to avoid them.
2. You used a lot of redundant words. Try to avoid them.
3. You used a lot of jargons. Try to avoid them.
4. Improve your tone. Try to be more positive.
5. Improve your personality. Try to be more extraverted.
6. Improve your clarity. Try to be more clear.
..etc
"
Response:
{transcribed_text}
"""
# Call the OpenAI API
api_response = openai.Completion.create(
engine="text-davinci-003", # Use "text-davinci-002" for GPT-3.5, replace with the appropriate engine name for GPT-4 if available
prompt=prompt,
temperature=0,
max_tokens=500,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0
)
# Extract and return the generated feedback
feedback = api_response.choices[0].text.strip()
#########################################################################################
prompt = f"""
Summarize the interviewee's response to the question in 3 - 5 bullet points.
start the summary with a positive note.
make it more professional and friendly.
you should give feedback so that the interviewee can improve their response.
Response:
{feedback}
"""
# Call the OpenAI API
summary_response = openai.Completion.create(
engine="text-davinci-003", # Use "text-davinci-002" for GPT-3.5, replace with the appropriate engine name for GPT-4 if available
prompt=prompt,
temperature=0,
max_tokens=500,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0
)
# Extract and return the generated feedback
summary_response_out = summary_response.choices[0].text.strip()
##################################################################
language = "en"
audioobj = gTTS(text = summary_response_out,
lang = language,
slow = False)
audioobj.save("Temp.mp3")
return [feedback, summary_response_out, transcribed_text, "Temp.mp3"]
# iface = gr.Interface(
# fn=get_feedback,
# inputs=gr.inputs.Textbox(lines=10, label="Interviewee audio file path"),
# outputs=[
# gr.outputs.Textbox(label="Feedback"),
# gr.outputs.Textbox(label="Transcription")
# ],
# title="Interview Feedback",
# examples=[
# "Tell me about yourself - bad graduate job interview answer tell me about yourselfout.mp3",
# "Tell me about yourself - bad entrepreneur answer to tell me about yourselfout.mp3"
# ],
# description="Get feedback on your interview response from an AI interview coach.",
# layout="vertical"
# )
# iface.launch()
# Create a Gradio interface
# the layout should be input, examples, feedback, transcription
# the input should be a textbox
# the examples should be a list of audio files
# the output should be a textbox for feedback and a textbox for transcription
iface = gr.Interface(
fn=get_feedback,
inputs=gr.inputs.Textbox(lines=1, label="Interviewee audio file path"),
examples=[
"Tell me about yourself - bad graduate job interview answer tell me about yourselfout.mp3",
"Tell me about yourself - bad entrepreneur answer to tell me about yourselfout.mp3"
],
outputs=[
gr.outputs.Textbox(label="Feedback"),
gr.outputs.Textbox(label="Summary response"),
gr.outputs.Textbox(label="Transcription"),
gr.Audio("Temp.mp3", label="Speech Output")
],
title="Interview Feedback",
description="Get feedback on your interview response from an AI interview coach.",
layout="vertical"
)
iface.launch()
#########################################################################################