Spaces:
Runtime error
Runtime error
| from youtube_transcript_api import YouTubeTranscriptApi | |
| import openai | |
| from urllib.parse import urlparse, parse_qs | |
| import requests | |
| from requests.auth import HTTPBasicAuth | |
| import os | |
| import logging | |
| logging.basicConfig(filename='app.log', filemode='a', | |
| format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG) | |
| def get_video_id_from_url(url): | |
| """ | |
| Extracts the YouTube video ID from a given URL. | |
| Supports both 'youtube.com' and 'youtu.be' URL formats. For 'youtube.com', it looks for the 'v' query parameter. | |
| For 'youtu.be', it extracts the ID directly from the path. | |
| Parameters: | |
| url (str): The full URL of the YouTube video. | |
| Returns: | |
| str: The extracted video ID if found, otherwise None. | |
| Note: | |
| This function silently handles exceptions and returns None if the video ID cannot be extracted. | |
| """ | |
| try: | |
| url_data = urlparse(url) | |
| if url_data.hostname == 'www.youtube.com' or url_data.hostname == 'youtube.com': | |
| query = parse_qs(url_data.query) | |
| video_id = query.get("v") | |
| if video_id: | |
| #logging.info(f"Video ID {video_id[0]} extracted from URL.") | |
| return video_id[0] | |
| elif url_data.hostname == 'youtu.be': | |
| # Extract the video ID from the path for youtu.be URLs | |
| video_id = url_data.path[1:] # Remove the leading '/' | |
| if video_id: | |
| #logging.info(f"Video ID {video_id} extracted from URL.") | |
| return video_id | |
| #logging.warning(f"No video ID found in URL: {url}") | |
| return None | |
| except Exception: | |
| #logging.error(f"Error extracting video ID from URL {url}: {e}") | |
| return None | |
| def get_first_youtube_video_url(urls): | |
| """ | |
| Finds and returns the first YouTube video URL from a list of URLs. | |
| Iterates over a provided list of URLs, checking each for a substring that matches | |
| 'youtube' or 'youtu.be'. Returns the first URL that matches these criteria. | |
| Parameters: | |
| urls (list of str): A list containing URLs to be checked. | |
| Returns: | |
| str: The first YouTube video URL found in the list, or None if no YouTube URL is found. | |
| """ | |
| for url in urls: | |
| if 'youtube' in url or 'youtu.be' in url: | |
| return url | |
| return None | |
| def get_youtube_url(opportunity_id): | |
| """ | |
| Retrieves the YouTube video URL associated with a given opportunity ID from the Lever API. | |
| This function makes a GET request to the Lever API to fetch the opportunity details using the provided | |
| opportunity ID. It then extracts and returns the first YouTube video URL found in the 'links' section | |
| of the opportunity data. | |
| Parameters: | |
| opportunity_id (str): The unique identifier for the opportunity in the Lever system. | |
| Returns: | |
| str: The YouTube video URL associated with the opportunity, or None if no YouTube URL is found. | |
| Note: | |
| Requires the 'LeverKey' environment variable to be set for authentication with the Lever API. | |
| """ | |
| url = 'https://api.lever.co/v1/opportunities/{}'.format(opportunity_id) | |
| response = requests.get(url, auth=HTTPBasicAuth(os.getenv('LeverKey'),'')) | |
| links = response.json()['data']['links'] | |
| youtube_link = get_first_youtube_video_url(links) | |
| return youtube_link | |
| def parse_decision_to_binary(decision_text): | |
| """ | |
| Converts a decision text to a binary outcome based on the presence of the word 'yes'. | |
| This function checks if the word 'yes' is present in the provided decision text, performing | |
| a case-insensitive comparison. It is designed to interpret a textual decision as a binary | |
| outcome, where the presence of 'yes' indicates a positive (True) decision, and its absence | |
| indicates a negative (False) decision. | |
| Parameters: | |
| decision_text (str): The decision text to be analyzed. | |
| Returns: | |
| bool: True if 'yes' is present in the decision text, False otherwise. | |
| """ | |
| decision_text_lower = decision_text.lower() | |
| return "yes" in decision_text_lower | |
| def get_transcript_data_and_pause_count(video_id): | |
| """ | |
| Fetches a video's transcript, calculates its total duration in minutes, and counts pauses between segments. | |
| Utilizes the YouTubeTranscriptApi to retrieve the English transcript of a video given its ID, then analyzes | |
| the transcript to determine the total duration and estimate the number of pauses based on gaps between | |
| transcript segments. | |
| Parameters: | |
| video_id (str): The unique identifier of the YouTube video. | |
| Returns: | |
| tuple: A tuple containing the full transcript text (str), total duration in minutes (int), | |
| and the estimated number of pauses (int), or (None, None, None) if an error occurs. | |
| """ | |
| try: | |
| transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en']) | |
| if transcript: | |
| last_segment = transcript[-1] | |
| total_duration = last_segment['start'] + last_segment['duration'] | |
| # Estimate the number of pauses | |
| pauses = 0 | |
| for i in range(1, len(transcript)): | |
| current_start = transcript[i]['start'] | |
| previous_end = transcript[i-1]['start'] + transcript[i-1]['duration'] | |
| if current_start > previous_end: | |
| pauses += 1 | |
| full_transcript = " ".join(segment['text'] for segment in transcript) | |
| logging.info(f"Transcript retrieved successfully for video ID {video_id}.") | |
| return full_transcript, total_duration // 60, pauses | |
| except Exception as e: | |
| logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}") | |
| return None, None, None | |
| def analyze_transcript(url): | |
| """ | |
| Analyzes a YouTube video's transcript for content quality, using a predefined prompt for GPT evaluation. | |
| This function reads a prompt from 'prompt.txt', extracts the video ID from the provided URL, retrieves the | |
| video's transcript and its analysis metrics (total duration and pauses), and evaluates these metrics against | |
| a GPT model to determine if the candidate qualifies for an interview. | |
| Parameters: | |
| url (str): The URL of the YouTube video to be analyzed. | |
| Returns: | |
| str: A message indicating whether the candidate qualifies for an interview, an error message if the | |
| video URL is invalid or the transcript could not be retrieved, or a detailed error message if | |
| any other error occurs during processing. | |
| """ | |
| try: | |
| with open('prompt.txt', 'r') as file: | |
| prompt = file.read() | |
| except Exception as e: | |
| logging.error(f"Error opening or reading from 'prompt.txt': {e}") | |
| return "Error processing the prompt file." | |
| try: | |
| video_id = get_video_id_from_url(url) | |
| if not video_id: | |
| logging.error("Invalid URL provided.") | |
| return "Unable to process the video URL. Currently only YouTube URLs are accepted." | |
| full_transcript, total_duration, pauses = get_transcript_data_and_pause_count( | |
| video_id) | |
| if full_transcript is None: # If there was an error retrieving the transcript | |
| logging.error("Error retrieving the transcript.") | |
| return pauses | |
| # Define the prompt for GPT evaluation based on the rubric | |
| prompt = prompt.format(full_transcript, pauses, total_duration) | |
| # Using the new OpenAI client structure | |
| client = openai.OpenAI(api_key=os.getenv('OpenAIKey')) | |
| response = client.chat.completions.create( | |
| model="gpt-4", | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| ) | |
| decision = parse_decision_to_binary(response.choices[0].message.content.strip()) | |
| if decision: | |
| return "The candidate qualifies for an interview." | |
| return "The candidate does not qualify for an interview." | |
| except Exception as e: | |
| logging.error(f"An error occurred during the analysis: {e}") | |
| return f"An error occurred during the processing. {e}" |