Spaces:

BlakeMartin
/

VideoInterviewAutomation

Runtime error

Blake

Added initial project

5af9455 almost 2 years ago

8.28 kB

	from youtube_transcript_api import YouTubeTranscriptApi
	import openai
	from urllib.parse import urlparse, parse_qs
	import requests
	from requests.auth import HTTPBasicAuth
	import os
	import logging

	logging.basicConfig(filename='app.log', filemode='a',
	format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG)


	def get_video_id_from_url(url):
	"""
	Extracts the YouTube video ID from a given URL.

	Supports both 'youtube.com' and 'youtu.be' URL formats. For 'youtube.com', it looks for the 'v' query parameter.
	For 'youtu.be', it extracts the ID directly from the path.

	Parameters:
	url (str): The full URL of the YouTube video.

	Returns:
	str: The extracted video ID if found, otherwise None.

	Note:
	This function silently handles exceptions and returns None if the video ID cannot be extracted.
	"""
	try:
	url_data = urlparse(url)
	if url_data.hostname == 'www.youtube.com' or url_data.hostname == 'youtube.com':
	query = parse_qs(url_data.query)
	video_id = query.get("v")
	if video_id:
	#logging.info(f"Video ID {video_id[0]} extracted from URL.")
	return video_id[0]
	elif url_data.hostname == 'youtu.be':
	# Extract the video ID from the path for youtu.be URLs
	video_id = url_data.path[1:] # Remove the leading '/'
	if video_id:
	#logging.info(f"Video ID {video_id} extracted from URL.")
	return video_id

	#logging.warning(f"No video ID found in URL: {url}")
	return None
	except Exception:
	#logging.error(f"Error extracting video ID from URL {url}: {e}")
	return None

	def get_first_youtube_video_url(urls):
	"""
	Finds and returns the first YouTube video URL from a list of URLs.

	Iterates over a provided list of URLs, checking each for a substring that matches
	'youtube' or 'youtu.be'. Returns the first URL that matches these criteria.

	Parameters:
	urls (list of str): A list containing URLs to be checked.

	Returns:
	str: The first YouTube video URL found in the list, or None if no YouTube URL is found.
	"""
	for url in urls:
	if 'youtube' in url or 'youtu.be' in url:
	return url
	return None

	def get_youtube_url(opportunity_id):
	"""
	Retrieves the YouTube video URL associated with a given opportunity ID from the Lever API.

	This function makes a GET request to the Lever API to fetch the opportunity details using the provided
	opportunity ID. It then extracts and returns the first YouTube video URL found in the 'links' section
	of the opportunity data.

	Parameters:
	opportunity_id (str): The unique identifier for the opportunity in the Lever system.

	Returns:
	str: The YouTube video URL associated with the opportunity, or None if no YouTube URL is found.

	Note:
	Requires the 'LeverKey' environment variable to be set for authentication with the Lever API.
	"""
	url = 'https://api.lever.co/v1/opportunities/{}'.format(opportunity_id)
	response = requests.get(url, auth=HTTPBasicAuth(os.getenv('LeverKey'),''))

	links = response.json()['data']['links']
	youtube_link = get_first_youtube_video_url(links)

	return youtube_link

	def parse_decision_to_binary(decision_text):
	"""
	Converts a decision text to a binary outcome based on the presence of the word 'yes'.

	This function checks if the word 'yes' is present in the provided decision text, performing
	a case-insensitive comparison. It is designed to interpret a textual decision as a binary
	outcome, where the presence of 'yes' indicates a positive (True) decision, and its absence
	indicates a negative (False) decision.

	Parameters:
	decision_text (str): The decision text to be analyzed.

	Returns:
	bool: True if 'yes' is present in the decision text, False otherwise.
	"""
	decision_text_lower = decision_text.lower()
	return "yes" in decision_text_lower

	def get_transcript_data_and_pause_count(video_id):
	"""
	Fetches a video's transcript, calculates its total duration in minutes, and counts pauses between segments.

	Utilizes the YouTubeTranscriptApi to retrieve the English transcript of a video given its ID, then analyzes
	the transcript to determine the total duration and estimate the number of pauses based on gaps between
	transcript segments.

	Parameters:
	video_id (str): The unique identifier of the YouTube video.

	Returns:
	tuple: A tuple containing the full transcript text (str), total duration in minutes (int),
	and the estimated number of pauses (int), or (None, None, None) if an error occurs.
	"""
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
	if transcript:
	last_segment = transcript[-1]
	total_duration = last_segment['start'] + last_segment['duration']

	# Estimate the number of pauses
	pauses = 0
	for i in range(1, len(transcript)):
	current_start = transcript[i]['start']
	previous_end = transcript[i-1]['start'] + transcript[i-1]['duration']
	if current_start > previous_end:
	pauses += 1

	full_transcript = " ".join(segment['text'] for segment in transcript)
	logging.info(f"Transcript retrieved successfully for video ID {video_id}.")
	return full_transcript, total_duration // 60, pauses
	except Exception as e:
	logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}")
	return None, None, None

	def analyze_transcript(url):
	"""
	Analyzes a YouTube video's transcript for content quality, using a predefined prompt for GPT evaluation.

	This function reads a prompt from 'prompt.txt', extracts the video ID from the provided URL, retrieves the
	video's transcript and its analysis metrics (total duration and pauses), and evaluates these metrics against
	a GPT model to determine if the candidate qualifies for an interview.

	Parameters:
	url (str): The URL of the YouTube video to be analyzed.

	Returns:
	str: A message indicating whether the candidate qualifies for an interview, an error message if the
	video URL is invalid or the transcript could not be retrieved, or a detailed error message if
	any other error occurs during processing.
	"""
	try:
	with open('prompt.txt', 'r') as file:
	prompt = file.read()
	except Exception as e:
	logging.error(f"Error opening or reading from 'prompt.txt': {e}")
	return "Error processing the prompt file."

	try:
	video_id = get_video_id_from_url(url)
	if not video_id:
	logging.error("Invalid URL provided.")
	return "Unable to process the video URL. Currently only YouTube URLs are accepted."

	full_transcript, total_duration, pauses = get_transcript_data_and_pause_count(
	video_id)

	if full_transcript is None: # If there was an error retrieving the transcript
	logging.error("Error retrieving the transcript.")
	return pauses

	# Define the prompt for GPT evaluation based on the rubric
	prompt = prompt.format(full_transcript, pauses, total_duration)

	# Using the new OpenAI client structure
	client = openai.OpenAI(api_key=os.getenv('OpenAIKey'))
	response = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	],
	)

	decision = parse_decision_to_binary(response.choices[0].message.content.strip())

	if decision:
	return "The candidate qualifies for an interview."
	return "The candidate does not qualify for an interview."
	except Exception as e:
	logging.error(f"An error occurred during the analysis: {e}")
	return f"An error occurred during the processing. {e}"