Blake commited on
Commit
5af9455
1 Parent(s): ab41f96

Added initial project

Browse files
Files changed (6) hide show
  1. __pycache__/helpers.cpython-311.pyc +0 -0
  2. app.log +0 -0
  3. app.py +109 -95
  4. helpers.py +203 -0
  5. prompt.txt +8 -2
  6. requirements.txt +1 -0
__pycache__/helpers.cpython-311.pyc ADDED
Binary file (10.1 kB). View file
 
app.log ADDED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -1,110 +1,124 @@
1
- import gradio as gr
2
- from youtube_transcript_api import YouTubeTranscriptApi
3
- import openai
4
- from urllib.parse import urlparse, parse_qs
5
- import re
6
  import os
7
- import logging
8
 
9
- logging.basicConfig(filename='app.log', filemode='a',
10
- format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG)
11
 
12
-
13
- def get_video_id_from_url(url):
14
- """
15
- Extracts the video ID from the YouTube URL.
16
  """
17
- try:
18
- url_data = urlparse(url)
19
- query = parse_qs(url_data.query)
20
- video_id = query.get("v")
21
- if video_id:
22
- logging.info(f"Video ID {video_id[0]} extracted from URL.")
23
- return video_id[0]
24
- else:
25
- logging.warning(f"No video ID found in URL: {url}")
26
- return None
27
- except Exception as e:
28
- logging.error(f"Error extracting video ID from URL {url}: {e}")
29
- return None
30
 
 
 
 
 
 
31
 
32
- def get_transcript_data_and_pause_count(video_id):
33
- """
34
- Retrieves the transcript for the given video ID, calculates the total duration, and estimates the number of pauses.
 
 
 
 
 
 
 
 
 
 
 
35
  """
 
 
 
 
 
36
  try:
37
- transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
38
- if transcript:
39
- last_segment = transcript[-1]
40
- total_duration = last_segment['start'] + last_segment['duration']
41
-
42
- # Estimate the number of pauses
43
- pauses = 0
44
- for i in range(1, len(transcript)):
45
- current_start = transcript[i]['start']
46
- previous_end = transcript[i-1]['start'] + transcript[i-1]['duration']
47
- if current_start > previous_end:
48
- pauses += 1
49
-
50
- full_transcript = " ".join(segment['text'] for segment in transcript)
51
- logging.info(f"Transcript retrieved successfully for video ID {video_id}.")
52
- return full_transcript, total_duration // 60, pauses
 
 
 
 
 
 
 
 
53
  except Exception as e:
54
- logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}")
55
- return None, None, None
56
 
57
- def analyze_transcript(url):
58
- """
59
- Analyzes the YouTube video transcript for total length and estimates the number of pauses.
 
 
60
  """
61
- try:
62
- with open('prompt.txt', 'r') as file:
63
- prompt = file.read()
64
- except Exception as e:
65
- logging.error(f"Error opening or reading from 'prompt.txt': {e}")
66
- return "Error processing the prompt file."
67
 
 
 
 
 
 
 
 
 
68
  try:
69
- video_id = get_video_id_from_url(url)
70
- if not video_id:
71
- logging.error("Invalid URL provided.")
72
- return "Invalid URL. Please enter a valid YouTube video URL."
73
-
74
- full_transcript, total_duration, pauses = get_transcript_data_and_pause_count(
75
- video_id)
76
-
77
- if full_transcript is None: # If there was an error retrieving the transcript
78
- logging.error("Error retrieving the transcript.")
79
- return pauses
80
-
81
- # Define the prompt for GPT evaluation based on the rubric
82
- prompt = prompt.format(full_transcript, pauses, total_duration)
83
-
84
- # Using the new OpenAI client structure
85
- client = openai.OpenAI(api_key=os.getenv('OpenAIKey'))
86
- response = client.chat.completions.create(
87
- model="gpt-4",
88
- messages=[
89
- {"role": "system", "content": "You are a helpful assistant."},
90
- {"role": "user", "content": prompt}
91
- ],
92
- )
93
-
94
- decision = response.choices[0].message.content.strip()
95
- return decision
 
 
 
 
 
 
96
  except Exception as e:
97
- logging.error(f"An error occurred during the analysis: {e}")
98
- return f"An error occurred during the processing. {e}"
99
-
100
-
101
- # Gradio interface
102
- iface = gr.Interface(
103
- fn=analyze_transcript,
104
- inputs=gr.Textbox(label="Enter YouTube Video URL"), # Updated
105
- outputs=gr.Textbox(label="Interview Recommendation"), # Updated
106
- description="This app evaluates a YouTube video interview transcript against a specific rubric to recommend if the person should receive an interview."
107
- )
108
 
109
- # Launch the app
110
- iface.launch()
 
1
+ from flask import Flask, request, jsonify, make_response
2
+ from requests.auth import HTTPBasicAuth
3
+ from helpers import *
4
+ import requests
 
5
  import os
 
6
 
7
+ app = Flask(__name__)
8
+ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
9
 
10
+ def post_candidate_analysis_to_lever(analysis_result, candidate_id):
 
 
 
11
  """
12
+ Sends the analysis result of a candidate's video interview to Lever via a POST request.
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ This function constructs a request to the Lever API to add a note to a specific opportunity
15
+ (candidate) identified by the candidate_id. The note contains the result of the machine learning
16
+ analysis of the candidate's video interview. It handles various exceptions that might occur during
17
+ the request, logs the attempt and outcome of the request, and ensures that any HTTP or connection
18
+ errors are caught and logged appropriately.
19
 
20
+ Parameters:
21
+ - analysis_result (str): The result of the video interview analysis to be sent to Lever.
22
+ - candidate_id (str): The unique identifier for the candidate/opportunity in Lever.
23
+
24
+ Returns:
25
+ - dict: The JSON response from the Lever API if the request is successful.
26
+ - None: If the request fails due to an exception, the function returns None.
27
+
28
+ The function logs an info message before sending the data, and upon successful data transmission.
29
+ In case of exceptions such as HTTPError, ConnectionError, Timeout, or any other RequestException,
30
+ it logs the specific error. A general exception catch is also implemented to log any unexpected errors.
31
+
32
+ It uses the requests library for making HTTP requests, and the HTTPBasicAuth for authentication.
33
+ The Lever API key is expected to be available as an environment variable 'LeverKey'.
34
  """
35
+ lever_api_url = 'https://api.lever.co/v1/opportunities/{}/notes'.format(candidate_id)
36
+ data = {
37
+ "value": "Video Interview ML Decision: {}".format(analysis_result)
38
+ }
39
+
40
  try:
41
+ # Log the attempt to send data
42
+ logging.info(f"Sending analysis result to Lever for candidate ID {candidate_id}")
43
+
44
+ response = requests.post(lever_api_url, auth=HTTPBasicAuth(os.getenv('LeverKey'), ''), json=data)
45
+
46
+ # Check if the request was successful
47
+ response.raise_for_status()
48
+
49
+ # Log successful data sending
50
+ logging.info(f"Successfully sent analysis result to Lever for candidate ID {candidate_id}")
51
+
52
+ return response.json()
53
+ except requests.exceptions.HTTPError as http_err:
54
+ # Log HTTP errors (e.g., 404, 401, etc.)
55
+ logging.error(f'HTTP error occurred: {http_err}')
56
+ except requests.exceptions.ConnectionError as conn_err:
57
+ # Log connection errors (e.g., DNS failure, refused connection, etc.)
58
+ logging.error(f'Connection error occurred: {conn_err}')
59
+ except requests.exceptions.Timeout as timeout_err:
60
+ # Log timeout errors
61
+ logging.error(f'Timeout error occurred: {timeout_err}')
62
+ except requests.exceptions.RequestException as req_err:
63
+ # Log any other requests-related errors
64
+ logging.error(f'Error sending data to Lever: {req_err}')
65
  except Exception as e:
66
+ # Catch-all for any other exceptions not related to requests
67
+ logging.error(f'An unexpected error occurred: {e}')
68
 
69
+ # Return None or an appropriate response in case of failure
70
+ return None
71
+
72
+ @app.route('/webhook', methods=['POST'])
73
+ def handle_webhook():
74
  """
75
+ Processes incoming webhook POST requests, analyzes video transcripts, and posts results to Lever.
 
 
 
 
 
76
 
77
+ Validates the presence of required data ('opportunityId') in the request, retrieves the candidate's
78
+ video URL, analyzes the video transcript, and sends the analysis result to Lever. It handles errors
79
+ at each step by logging the error and returning an appropriate HTTP response.
80
+
81
+ Returns:
82
+ - A success response with the analysis result and a 200 status code if all operations succeed.
83
+ - An error response with a relevant message and an appropriate status code (400, 404, 500) if any operation fails.
84
+ """
85
  try:
86
+ data = request.json
87
+ if not data:
88
+ # If no data is received
89
+ logging.error("No data received in request")
90
+ return make_response(jsonify({"error": "No data received"}), 400)
91
+
92
+ opportunity_id = data.get('opportunityId')
93
+ if not opportunity_id:
94
+ # If opportunityId is not provided in the data
95
+ logging.error("No opportunityId provided")
96
+ return make_response(jsonify({"error": "No opportunityId provided"}), 400)
97
+
98
+ candidate_video_url = get_youtube_url(opportunity_id)
99
+ if not candidate_video_url:
100
+ # If no URL is returned for the given opportunity_id
101
+ logging.error(f"Unable to process video URL for opportunityId {opportunity_id}")
102
+ analysis_result = "Unable to process the video URL. Currently only YouTube URLs are accepted."
103
+
104
+ return jsonify(analysis_result), 200
105
+
106
+ analysis_result = analyze_transcript(candidate_video_url)
107
+ if analysis_result is None:
108
+ # Handle case where analysis_result is None or an error occurred during analysis
109
+ logging.error(f"Error analyzing transcript for opportunityId {opportunity_id}")
110
+ return make_response(jsonify({"error": "Failed to analyze transcript"}), 500)
111
+
112
+ send_result = post_candidate_analysis_to_lever(analysis_result, opportunity_id)
113
+ if send_result is None:
114
+ # Assuming post_candidate_analysis_to_lever returns None on failure
115
+ logging.error(f"Failed to send results to Lever for opportunityId {opportunity_id}")
116
+ return make_response(jsonify({"error": "Failed to send results to Lever"}), 500)
117
+
118
+ return jsonify(analysis_result), 200
119
  except Exception as e:
120
+ logging.error(f"An unexpected error occurred: {e}")
121
+ return make_response(jsonify({"error": "An unexpected error occurred"}), 500)
 
 
 
 
 
 
 
 
 
122
 
123
+ if __name__ == '__main__':
124
+ app.run(debug=True, port=5002)
helpers.py ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from youtube_transcript_api import YouTubeTranscriptApi
2
+ import openai
3
+ from urllib.parse import urlparse, parse_qs
4
+ import requests
5
+ from requests.auth import HTTPBasicAuth
6
+ import os
7
+ import logging
8
+
9
+ logging.basicConfig(filename='app.log', filemode='a',
10
+ format='%(name)s - %(levelname)s - %(message)s', level=logging.DEBUG)
11
+
12
+
13
+ def get_video_id_from_url(url):
14
+ """
15
+ Extracts the YouTube video ID from a given URL.
16
+
17
+ Supports both 'youtube.com' and 'youtu.be' URL formats. For 'youtube.com', it looks for the 'v' query parameter.
18
+ For 'youtu.be', it extracts the ID directly from the path.
19
+
20
+ Parameters:
21
+ url (str): The full URL of the YouTube video.
22
+
23
+ Returns:
24
+ str: The extracted video ID if found, otherwise None.
25
+
26
+ Note:
27
+ This function silently handles exceptions and returns None if the video ID cannot be extracted.
28
+ """
29
+ try:
30
+ url_data = urlparse(url)
31
+ if url_data.hostname == 'www.youtube.com' or url_data.hostname == 'youtube.com':
32
+ query = parse_qs(url_data.query)
33
+ video_id = query.get("v")
34
+ if video_id:
35
+ #logging.info(f"Video ID {video_id[0]} extracted from URL.")
36
+ return video_id[0]
37
+ elif url_data.hostname == 'youtu.be':
38
+ # Extract the video ID from the path for youtu.be URLs
39
+ video_id = url_data.path[1:] # Remove the leading '/'
40
+ if video_id:
41
+ #logging.info(f"Video ID {video_id} extracted from URL.")
42
+ return video_id
43
+
44
+ #logging.warning(f"No video ID found in URL: {url}")
45
+ return None
46
+ except Exception:
47
+ #logging.error(f"Error extracting video ID from URL {url}: {e}")
48
+ return None
49
+
50
+ def get_first_youtube_video_url(urls):
51
+ """
52
+ Finds and returns the first YouTube video URL from a list of URLs.
53
+
54
+ Iterates over a provided list of URLs, checking each for a substring that matches
55
+ 'youtube' or 'youtu.be'. Returns the first URL that matches these criteria.
56
+
57
+ Parameters:
58
+ urls (list of str): A list containing URLs to be checked.
59
+
60
+ Returns:
61
+ str: The first YouTube video URL found in the list, or None if no YouTube URL is found.
62
+ """
63
+ for url in urls:
64
+ if 'youtube' in url or 'youtu.be' in url:
65
+ return url
66
+ return None
67
+
68
+ def get_youtube_url(opportunity_id):
69
+ """
70
+ Retrieves the YouTube video URL associated with a given opportunity ID from the Lever API.
71
+
72
+ This function makes a GET request to the Lever API to fetch the opportunity details using the provided
73
+ opportunity ID. It then extracts and returns the first YouTube video URL found in the 'links' section
74
+ of the opportunity data.
75
+
76
+ Parameters:
77
+ opportunity_id (str): The unique identifier for the opportunity in the Lever system.
78
+
79
+ Returns:
80
+ str: The YouTube video URL associated with the opportunity, or None if no YouTube URL is found.
81
+
82
+ Note:
83
+ Requires the 'LeverKey' environment variable to be set for authentication with the Lever API.
84
+ """
85
+ url = 'https://api.lever.co/v1/opportunities/{}'.format(opportunity_id)
86
+ response = requests.get(url, auth=HTTPBasicAuth(os.getenv('LeverKey'),''))
87
+
88
+ links = response.json()['data']['links']
89
+ youtube_link = get_first_youtube_video_url(links)
90
+
91
+ return youtube_link
92
+
93
+ def parse_decision_to_binary(decision_text):
94
+ """
95
+ Converts a decision text to a binary outcome based on the presence of the word 'yes'.
96
+
97
+ This function checks if the word 'yes' is present in the provided decision text, performing
98
+ a case-insensitive comparison. It is designed to interpret a textual decision as a binary
99
+ outcome, where the presence of 'yes' indicates a positive (True) decision, and its absence
100
+ indicates a negative (False) decision.
101
+
102
+ Parameters:
103
+ decision_text (str): The decision text to be analyzed.
104
+
105
+ Returns:
106
+ bool: True if 'yes' is present in the decision text, False otherwise.
107
+ """
108
+ decision_text_lower = decision_text.lower()
109
+ return "yes" in decision_text_lower
110
+
111
+ def get_transcript_data_and_pause_count(video_id):
112
+ """
113
+ Fetches a video's transcript, calculates its total duration in minutes, and counts pauses between segments.
114
+
115
+ Utilizes the YouTubeTranscriptApi to retrieve the English transcript of a video given its ID, then analyzes
116
+ the transcript to determine the total duration and estimate the number of pauses based on gaps between
117
+ transcript segments.
118
+
119
+ Parameters:
120
+ video_id (str): The unique identifier of the YouTube video.
121
+
122
+ Returns:
123
+ tuple: A tuple containing the full transcript text (str), total duration in minutes (int),
124
+ and the estimated number of pauses (int), or (None, None, None) if an error occurs.
125
+ """
126
+ try:
127
+ transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
128
+ if transcript:
129
+ last_segment = transcript[-1]
130
+ total_duration = last_segment['start'] + last_segment['duration']
131
+
132
+ # Estimate the number of pauses
133
+ pauses = 0
134
+ for i in range(1, len(transcript)):
135
+ current_start = transcript[i]['start']
136
+ previous_end = transcript[i-1]['start'] + transcript[i-1]['duration']
137
+ if current_start > previous_end:
138
+ pauses += 1
139
+
140
+ full_transcript = " ".join(segment['text'] for segment in transcript)
141
+ logging.info(f"Transcript retrieved successfully for video ID {video_id}.")
142
+ return full_transcript, total_duration // 60, pauses
143
+ except Exception as e:
144
+ logging.error(f"Failed to retrieve transcript for video ID {video_id}. Error: {e}")
145
+ return None, None, None
146
+
147
+ def analyze_transcript(url):
148
+ """
149
+ Analyzes a YouTube video's transcript for content quality, using a predefined prompt for GPT evaluation.
150
+
151
+ This function reads a prompt from 'prompt.txt', extracts the video ID from the provided URL, retrieves the
152
+ video's transcript and its analysis metrics (total duration and pauses), and evaluates these metrics against
153
+ a GPT model to determine if the candidate qualifies for an interview.
154
+
155
+ Parameters:
156
+ url (str): The URL of the YouTube video to be analyzed.
157
+
158
+ Returns:
159
+ str: A message indicating whether the candidate qualifies for an interview, an error message if the
160
+ video URL is invalid or the transcript could not be retrieved, or a detailed error message if
161
+ any other error occurs during processing.
162
+ """
163
+ try:
164
+ with open('prompt.txt', 'r') as file:
165
+ prompt = file.read()
166
+ except Exception as e:
167
+ logging.error(f"Error opening or reading from 'prompt.txt': {e}")
168
+ return "Error processing the prompt file."
169
+
170
+ try:
171
+ video_id = get_video_id_from_url(url)
172
+ if not video_id:
173
+ logging.error("Invalid URL provided.")
174
+ return "Unable to process the video URL. Currently only YouTube URLs are accepted."
175
+
176
+ full_transcript, total_duration, pauses = get_transcript_data_and_pause_count(
177
+ video_id)
178
+
179
+ if full_transcript is None: # If there was an error retrieving the transcript
180
+ logging.error("Error retrieving the transcript.")
181
+ return pauses
182
+
183
+ # Define the prompt for GPT evaluation based on the rubric
184
+ prompt = prompt.format(full_transcript, pauses, total_duration)
185
+
186
+ # Using the new OpenAI client structure
187
+ client = openai.OpenAI(api_key=os.getenv('OpenAIKey'))
188
+ response = client.chat.completions.create(
189
+ model="gpt-4",
190
+ messages=[
191
+ {"role": "system", "content": "You are a helpful assistant."},
192
+ {"role": "user", "content": prompt}
193
+ ],
194
+ )
195
+
196
+ decision = parse_decision_to_binary(response.choices[0].message.content.strip())
197
+
198
+ if decision:
199
+ return "The candidate qualifies for an interview."
200
+ return "The candidate does not qualify for an interview."
201
+ except Exception as e:
202
+ logging.error(f"An error occurred during the analysis: {e}")
203
+ return f"An error occurred during the processing. {e}"
prompt.txt CHANGED
@@ -77,5 +77,11 @@ Sum the scores for a total out of 50. Please be precise with your total. Based o
77
  0-39 Points: Does not qualify for an interview.
78
  40-50 Points: Qualifies for an interview, demonstrating strong potential.
79
 
80
- Assessment Instructions:
81
- Provide a blunt, straightforward assessment for each criterion based on the candidate's explicit responses. This includes evaluating the efficiency of the presentation's duration and the effectiveness of pacing, recognizing that optimal time management and minimal unnecessary pauses are indicators of preparedness and communication skills. Your feedback should not only highlight areas where the candidate excels but, more critically, pinpoint where improvements are necessary. The conclusion should clearly state the candidate's suitability for further interviews, based on the total score and observed strengths and weaknesses.
 
 
 
 
 
 
 
77
  0-39 Points: Does not qualify for an interview.
78
  40-50 Points: Qualifies for an interview, demonstrating strong potential.
79
 
80
+ Modified Concluding Instructions for You (the AI):
81
+
82
+ Evaluate the candidate's performance based on the total score and the assessment of their strengths and weaknesses in time management, presentation efficiency, and communication effectiveness. You should then decide the candidate's suitability for further interviews. Your decision should be:
83
+
84
+ "Yes" if the candidate meets or exceeds the required criteria for an interview.
85
+ "No" if the candidate does not meet the necessary criteria for an interview.
86
+
87
+ Your output must be strictly one of these two options (Yes or No), ensuring a clear and direct conclusion based on the evaluation criteria provided.
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  gradio
2
  youtube_transcript_api
3
  openai
 
 
1
  gradio
2
  youtube_transcript_api
3
  openai
4
+ flask