Spaces:
Sleeping
Sleeping
import streamlit as st | |
import whisper | |
from moviepy.editor import VideoFileClip | |
from tempfile import NamedTemporaryFile | |
import numpy as np | |
from transformers import BertTokenizer, BertModel | |
import torch | |
import os | |
# Load the pre-trained BERT model and tokenizer | |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') | |
model = BertModel.from_pretrained('bert-base-uncased') | |
# Load Whisper model for transcription | |
whisper_model = whisper.load_model("base") | |
# Define criteria for scoring responses | |
criteria = { | |
"technical": ["machine learning", "data", "preprocess", "decision tree", "SVM", "neural network", "hyperparameter"], | |
"problem_solving": ["cross-validation", "grid search", "evaluate", "optimize", "performance"], | |
"communication": ["I would", "then", "and", "also"] | |
} | |
# Function to encode a response using BERT | |
def encode_response(response): | |
inputs = tokenizer(response, return_tensors='pt', padding=True, truncation=True) | |
outputs = model(**inputs) | |
return outputs.last_hidden_state.mean(dim=1).squeeze().detach().numpy() | |
# Function to score the response based on predefined criteria | |
def score_response(response, criteria): | |
scores = {} | |
for criterion, keywords in criteria.items(): | |
scores[criterion] = sum([1 for word in keywords if word in response.lower()]) / len(keywords) | |
return scores | |
# Function to rank candidates by average score | |
def rank_candidates(candidates): | |
for candidate in candidates: | |
avg_score = np.mean(list(candidate['scores'].values())) | |
candidate['avg_score'] = avg_score | |
ranked_candidates = sorted(candidates, key=lambda x: x['avg_score'], reverse=True) | |
return ranked_candidates | |
# Function to extract audio from the video and perform transcription using Whisper | |
def transcribe_video(video_file): | |
# Save the uploaded file to a temporary location | |
with NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video_file: | |
temp_video_file.write(video_file.read()) | |
temp_video_path = temp_video_file.name | |
# Load the video and extract audio | |
video = VideoFileClip(temp_video_path) | |
audio_file = "audio.wav" | |
video.audio.write_audiofile(audio_file) | |
whisper_model = whisper.load_model("base") | |
transcription = whisper_model.transcribe(audio_file) | |
os.remove(audio_file) | |
os.remove(temp_video_path) | |
return transcription['text'] | |
st.title("AI Role Candidate Screening via Video Interview") | |
default_videos = ["Unlocking AI_ Insights from a Machine Learning Engineer.mp4", "Navigating Ethical Challenges in AI.mp4"] | |
num_candidates = st.number_input("Enter the number of candidates:", min_value=1, max_value=10, value=1) | |
mock_interviews = [] | |
for i in range(num_candidates): | |
st.write(f"### Candidate {i+1}") | |
# Provide options to either upload a video or use a default video | |
use_default = st.checkbox(f"Use default video for Candidate {i+1}?", key=f"default_{i}") | |
if use_default: | |
video_file_path = default_videos[i % len(default_videos)] # Cycle through default videos | |
st.write(f"Using default video: {video_file_path}") | |
video_file = open(video_file_path, "rb") # Open the default video file as binary | |
else: | |
video_file = st.file_uploader(f"Upload interview video for Candidate {i+1}:", type=["mp4", "mov", "avi"], key=f"video_{i}") | |
if video_file: | |
st.write(f"Processing video for Candidate {i+1}...") | |
if not use_default: | |
transcription = transcribe_video(video_file) # For uploaded videos | |
else: | |
transcription = transcribe_video(video_file) # For default videos | |
st.write(f"Transcript for Candidate {i+1}: {transcription}") | |
mock_interviews.append({"name": f"Candidate {i+1}", "response": transcription}) | |
# Analyze the candidates when the user clicks the "Analyze" button | |
if st.button('Analyze Responses'): | |
if mock_interviews: | |
# Encode and score each candidate | |
scored_candidates = [] | |
for candidate in mock_interviews: | |
scores = score_response(candidate['response'], criteria) | |
candidate['scores'] = scores | |
candidate['encoded'] = encode_response(candidate['response']) | |
scored_candidates.append(candidate) | |
# Rank the candidates based on scores | |
ranked_candidates = rank_candidates(scored_candidates) | |
# Display the results | |
st.write("### Candidate Rankings") | |
for rank, candidate in enumerate(ranked_candidates, 1): | |
st.write(f"**Rank {rank}: {candidate['name']}**") | |
st.write(f"Average Score: {candidate['avg_score']:.2f}") | |
st.write(f"Scores: {candidate['scores']}") | |
else: | |
st.write("Please upload videos for all candidates.") | |