Update process_interview.py
Browse files- process_interview.py +36 -29
process_interview.py
CHANGED
|
@@ -363,7 +363,7 @@ def analyze_interviewee_voice(audio_path: str, utterances: List[Dict]) -> Dict:
|
|
| 363 |
intensities = []
|
| 364 |
for segment in segments:
|
| 365 |
rms = np.mean(librosa.feature.rms(y=segment)[0]) if len(segment) > 0 else 0.0
|
| 366 |
-
intensities.append(float(rms))
|
| 367 |
intensity_mean = np.mean(intensities) if intensities else 0
|
| 368 |
intensity_std = np.std(intensities) if intensities else 0
|
| 369 |
shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
|
|
@@ -420,14 +420,15 @@ def generate_anxiety_confidence_chart(composite_scores: Dict, chart_buffer):
|
|
| 420 |
ha='center', color='black', fontweight='bold', fontsize=10)
|
| 421 |
ax.grid(True, axis='y', linestyle='--', alpha=0.7)
|
| 422 |
plt.tight_layout()
|
| 423 |
-
plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=
|
| 424 |
plt.close(fig)
|
| 425 |
except Exception as e:
|
| 426 |
logger.error(f"Error generating chart: {str(e)}")
|
| 427 |
|
| 428 |
def calculate_acceptance_probability(analysis_data: Dict) -> float:
|
| 429 |
voice = analysis_data.get('voice_analysis', {})
|
| 430 |
-
if 'error' in voice:
|
|
|
|
| 431 |
w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.35, -0.25, 0.2, 0.15, -0.15, 0.25
|
| 432 |
confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
|
| 433 |
anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
|
|
@@ -523,10 +524,10 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 523 |
story.append(Paragraph("Candidate Interview Analysis", h1))
|
| 524 |
story.append(Paragraph(f"Generated: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
|
| 525 |
story.append(Spacer(1, 0.3*inch))
|
| 526 |
-
acceptance_prob =
|
| 527 |
story.append(Paragraph("Hiring Suitability Snapshot", h2))
|
| 528 |
prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 80 else (colors.HexColor('#F57C00') if acceptance_prob >= 60 else colors.HexColor('#D32F2F'))
|
| 529 |
-
story.append(Paragraph(f"Suitability Score: <font size=14 color='{prob_color.hexval()}'>{acceptance_prob:.2f}%</font>",
|
| 530 |
ParagraphStyle(name='Prob', fontSize=10, spaceAfter=8, alignment=1, fontName='Helvetica-Bold')))
|
| 531 |
if acceptance_prob >= 80:
|
| 532 |
story.append(Paragraph("<b>HR Verdict:</b> Outstanding candidate, recommended for immediate advancement.", body_text))
|
|
@@ -540,9 +541,9 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 540 |
participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
|
| 541 |
table_data = [
|
| 542 |
['Metric', 'Value'],
|
| 543 |
-
['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.
|
| 544 |
['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
|
| 545 |
-
['Participants',
|
| 546 |
]
|
| 547 |
table = Table(table_data, colWidths=[2.2*inch, 3.8*inch])
|
| 548 |
table.setStyle(TableStyle([
|
|
@@ -554,7 +555,7 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 554 |
('FONTSIZE', (0,0), (-1,-1), 8),
|
| 555 |
('BOTTOMPADDING', (0,0), (-1,0), 6),
|
| 556 |
('TOPPADDING', (0,0), (-1,0), 6),
|
| 557 |
-
('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')
|
| 558 |
('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
|
| 559 |
]))
|
| 560 |
story.append(table)
|
|
@@ -565,41 +566,41 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 565 |
# Detailed Analysis
|
| 566 |
story.append(Paragraph("Detailed Candidate Evaluation", h1))
|
| 567 |
|
| 568 |
-
# Communication and
|
| 569 |
story.append(Paragraph("1. Communication & Vocal Dynamics", h2))
|
| 570 |
voice_analysis = analysis_data.get('voice_analysis', {})
|
| 571 |
-
if voice_analysis
|
| 572 |
table_data = [
|
| 573 |
['Metric', 'Value', 'HR Insight'],
|
| 574 |
['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Benchmark: 2.0-3.0 wps; impacts clarity'],
|
| 575 |
-
['Filler Words', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%
|
| 576 |
['Anxiety', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}"],
|
| 577 |
['Confidence', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}"],
|
| 578 |
['Fluency', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'Drives engagement'],
|
| 579 |
]
|
| 580 |
table = Table(table_data, colWidths=[1.5*inch, 1.3*inch, 3.2*inch])
|
| 581 |
table.setStyle(TableStyle([
|
| 582 |
-
('BACKGROUND', (0,0), (-1,0)
|
| 583 |
-
('TEXTCOLOR', (0,0), (-1
|
| 584 |
('ALIGN', (0,0), (-1,-1), 'LEFT'),
|
| 585 |
('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
|
| 586 |
-
('FONTNAME', (0,0), (-1
|
| 587 |
('FONTSIZE', (0,0), (-1,-1), 8),
|
| 588 |
-
('BOTTOMPADDING', (0,0), (-1
|
| 589 |
-
('TOPPADDING', (0,0), (0
|
| 590 |
-
('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA'))
|
| 591 |
-
('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB'))
|
| 592 |
]))
|
| 593 |
story.append(table)
|
| 594 |
story.append(Spacer(1, 0.15*inch))
|
| 595 |
chart_buffer = io.BytesIO()
|
| 596 |
-
|
| 597 |
chart_buffer.seek(0)
|
| 598 |
img = Image(chart_buffer, width=4.2*inch, height=2.8*inch)
|
| 599 |
img.hAlign = 'CENTER'
|
| 600 |
story.append(img)
|
| 601 |
else:
|
| 602 |
-
story.append(Paragraph("Voice analysis unavailable.", body_text))
|
| 603 |
story.append(Spacer(1, 0.15*inch))
|
| 604 |
|
| 605 |
# Parse Gemini Report
|
|
@@ -615,7 +616,8 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 615 |
lines = gemini_report_text.split('\n')
|
| 616 |
for line in lines:
|
| 617 |
line = line.strip()
|
| 618 |
-
if not line:
|
|
|
|
| 619 |
if line.startswith('**') and line.endswith('**'):
|
| 620 |
section_title = line.strip('**').strip()
|
| 621 |
if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
|
|
@@ -637,10 +639,11 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 637 |
current_subsection = None
|
| 638 |
elif line.startswith('-') and current_section:
|
| 639 |
clean_line = line.lstrip('-').strip()
|
| 640 |
-
if not clean_line:
|
| 641 |
-
|
|
|
|
| 642 |
if current_section == 'Competency':
|
| 643 |
-
if any(k in clean_line.lower() for k in ['leader', '
|
| 644 |
current_subsection = 'Strengths'
|
| 645 |
elif any(k in clean_line.lower() for k in ['improv', 'grow', 'depth']):
|
| 646 |
current_subsection = 'Growth Areas'
|
|
@@ -717,10 +720,14 @@ def create_pdf_report(analysis_data: Dict, output_path: str, gemini_report_text:
|
|
| 717 |
return False
|
| 718 |
|
| 719 |
def convert_to_serializable(obj):
|
| 720 |
-
if isinstance(obj, np.generic):
|
| 721 |
-
|
| 722 |
-
if isinstance(obj,
|
| 723 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 724 |
return obj
|
| 725 |
|
| 726 |
def process_interview(audio_url: str) -> Dict:
|
|
@@ -800,6 +807,6 @@ def process_interview(audio_url: str) -> Dict:
|
|
| 800 |
if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
|
| 801 |
try:
|
| 802 |
os.remove(local_audio_path)
|
| 803 |
-
logger.info(f"Cleaned up temporary
|
| 804 |
except Exception as e:
|
| 805 |
logger.error(f"Failed to clean up local audio file {local_audio_path}: {str(e)}")
|
|
|
|
| 363 |
intensities = []
|
| 364 |
for segment in segments:
|
| 365 |
rms = np.mean(librosa.feature.rms(y=segment)[0]) if len(segment) > 0 else 0.0
|
| 366 |
+
intensities.append(float(rms))
|
| 367 |
intensity_mean = np.mean(intensities) if intensities else 0
|
| 368 |
intensity_std = np.std(intensities) if intensities else 0
|
| 369 |
shimmer = np.mean(np.abs(np.diff(intensities))) / intensity_mean if len(intensities) > 1 and intensity_mean > 0 else 0
|
|
|
|
| 420 |
ha='center', color='black', fontweight='bold', fontsize=10)
|
| 421 |
ax.grid(True, axis='y', linestyle='--', alpha=0.7)
|
| 422 |
plt.tight_layout()
|
| 423 |
+
plt.savefig(chart_buffer, format='png', bbox_inches='tight', dpi=300)
|
| 424 |
plt.close(fig)
|
| 425 |
except Exception as e:
|
| 426 |
logger.error(f"Error generating chart: {str(e)}")
|
| 427 |
|
| 428 |
def calculate_acceptance_probability(analysis_data: Dict) -> float:
|
| 429 |
voice = analysis_data.get('voice_analysis', {})
|
| 430 |
+
if 'error' in voice:
|
| 431 |
+
return 50.0
|
| 432 |
w_confidence, w_anxiety, w_fluency, w_speaking_rate, w_filler_repetition, w_content_strengths = 0.35, -0.25, 0.2, 0.15, -0.15, 0.25
|
| 433 |
confidence_score = voice.get('composite_scores', {}).get('confidence', 0.0)
|
| 434 |
anxiety_score = voice.get('composite_scores', {}).get('anxiety', 0.0)
|
|
|
|
| 524 |
story.append(Paragraph("Candidate Interview Analysis", h1))
|
| 525 |
story.append(Paragraph(f"Generated: {time.strftime('%B %d, %Y')}", ParagraphStyle(name='Date', alignment=1, fontSize=8, textColor=colors.HexColor('#666666'), fontName='Helvetica')))
|
| 526 |
story.append(Spacer(1, 0.3*inch))
|
| 527 |
+
acceptance_prob = analysis_data.get('acceptance_probability', 50.0)
|
| 528 |
story.append(Paragraph("Hiring Suitability Snapshot", h2))
|
| 529 |
prob_color = colors.HexColor('#2E7D32') if acceptance_prob >= 80 else (colors.HexColor('#F57C00') if acceptance_prob >= 60 else colors.HexColor('#D32F2F'))
|
| 530 |
+
story.append(Paragraph(f"Suitability Score: <font size=14 color='{prob_color.hexval()}'><b>{acceptance_prob:.2f}%</b></font>",
|
| 531 |
ParagraphStyle(name='Prob', fontSize=10, spaceAfter=8, alignment=1, fontName='Helvetica-Bold')))
|
| 532 |
if acceptance_prob >= 80:
|
| 533 |
story.append(Paragraph("<b>HR Verdict:</b> Outstanding candidate, recommended for immediate advancement.", body_text))
|
|
|
|
| 541 |
participants = sorted([p for p in set(u['speaker'] for u in analysis_data['transcript']) if p != 'Unknown'])
|
| 542 |
table_data = [
|
| 543 |
['Metric', 'Value'],
|
| 544 |
+
['Interview Duration', f"{analysis_data['text_analysis']['total_duration']:.2f} seconds"],
|
| 545 |
['Speaker Turns', f"{analysis_data['text_analysis']['speaker_turns']}"],
|
| 546 |
+
['Participants', ', '.join(participants)],
|
| 547 |
]
|
| 548 |
table = Table(table_data, colWidths=[2.2*inch, 3.8*inch])
|
| 549 |
table.setStyle(TableStyle([
|
|
|
|
| 555 |
('FONTSIZE', (0,0), (-1,-1), 8),
|
| 556 |
('BOTTOMPADDING', (0,0), (-1,0), 6),
|
| 557 |
('TOPPADDING', (0,0), (-1,0), 6),
|
| 558 |
+
('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
|
| 559 |
('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
|
| 560 |
]))
|
| 561 |
story.append(table)
|
|
|
|
| 566 |
# Detailed Analysis
|
| 567 |
story.append(Paragraph("Detailed Candidate Evaluation", h1))
|
| 568 |
|
| 569 |
+
# Communication and Vocal Dynamics
|
| 570 |
story.append(Paragraph("1. Communication & Vocal Dynamics", h2))
|
| 571 |
voice_analysis = analysis_data.get('voice_analysis', {})
|
| 572 |
+
if voice_analysis and 'error' not in voice_analysis:
|
| 573 |
table_data = [
|
| 574 |
['Metric', 'Value', 'HR Insight'],
|
| 575 |
['Speaking Rate', f"{voice_analysis.get('speaking_rate', 0):.2f} words/sec", 'Benchmark: 2.0-3.0 wps; impacts clarity'],
|
| 576 |
+
['Filler Words', f"{voice_analysis.get('filler_ratio', 0) * 100:.1f}%", 'High usage reduces credibility'],
|
| 577 |
['Anxiety', voice_analysis.get('interpretation', {}).get('anxiety_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('anxiety', 0):.3f}"],
|
| 578 |
['Confidence', voice_analysis.get('interpretation', {}).get('confidence_level', 'N/A'), f"Score: {voice_analysis.get('composite_scores', {}).get('confidence', 0):.3f}"],
|
| 579 |
['Fluency', voice_analysis.get('interpretation', {}).get('fluency_level', 'N/A'), 'Drives engagement'],
|
| 580 |
]
|
| 581 |
table = Table(table_data, colWidths=[1.5*inch, 1.3*inch, 3.2*inch])
|
| 582 |
table.setStyle(TableStyle([
|
| 583 |
+
('BACKGROUND', (0,0), (-1,0), colors.HexColor('#0050BC')),
|
| 584 |
+
('TEXTCOLOR', (0,0), (-1,0), colors.white),
|
| 585 |
('ALIGN', (0,0), (-1,-1), 'LEFT'),
|
| 586 |
('VALIGN', (0,0), (-1,-1), 'MIDDLE'),
|
| 587 |
+
('FONTNAME', (0,0), (-1,0), 'Helvetica-Bold'),
|
| 588 |
('FONTSIZE', (0,0), (-1,-1), 8),
|
| 589 |
+
('BOTTOMPADDING', (0,0), (-1,0), 6),
|
| 590 |
+
('TOPPADDING', (0,0), (-1,0), 6),
|
| 591 |
+
('BACKGROUND', (0,1), (-1,-1), colors.HexColor('#F5F6FA')),
|
| 592 |
+
('GRID', (0,0), (-1,-1), 0.4, colors.HexColor('#DDE4EB')),
|
| 593 |
]))
|
| 594 |
story.append(table)
|
| 595 |
story.append(Spacer(1, 0.15*inch))
|
| 596 |
chart_buffer = io.BytesIO()
|
| 597 |
+
generate_anxiety_confidence_chart(voice_analysis.get('composite_scores', {}), chart_buffer)
|
| 598 |
chart_buffer.seek(0)
|
| 599 |
img = Image(chart_buffer, width=4.2*inch, height=2.8*inch)
|
| 600 |
img.hAlign = 'CENTER'
|
| 601 |
story.append(img)
|
| 602 |
else:
|
| 603 |
+
story.append(Paragraph(f"Voice analysis unavailable: {voice_analysis.get('error', 'Unknown error')}", body_text))
|
| 604 |
story.append(Spacer(1, 0.15*inch))
|
| 605 |
|
| 606 |
# Parse Gemini Report
|
|
|
|
| 616 |
lines = gemini_report_text.split('\n')
|
| 617 |
for line in lines:
|
| 618 |
line = line.strip()
|
| 619 |
+
if not line:
|
| 620 |
+
continue
|
| 621 |
if line.startswith('**') and line.endswith('**'):
|
| 622 |
section_title = line.strip('**').strip()
|
| 623 |
if section_title.startswith(('1.', '2.', '3.', '4.', '5.')):
|
|
|
|
| 639 |
current_subsection = None
|
| 640 |
elif line.startswith('-') and current_section:
|
| 641 |
clean_line = line.lstrip('-').strip()
|
| 642 |
+
if not clean_line:
|
| 643 |
+
continue
|
| 644 |
+
clean_line = re.sub(r'[()]+', '', clean_line)
|
| 645 |
if current_section == 'Competency':
|
| 646 |
+
if any(k in clean_line.lower() for k in ['leader', 'problem', 'commun', 'adapt', 'strength']):
|
| 647 |
current_subsection = 'Strengths'
|
| 648 |
elif any(k in clean_line.lower() for k in ['improv', 'grow', 'depth']):
|
| 649 |
current_subsection = 'Growth Areas'
|
|
|
|
| 720 |
return False
|
| 721 |
|
| 722 |
def convert_to_serializable(obj):
|
| 723 |
+
if isinstance(obj, np.generic):
|
| 724 |
+
return obj.item()
|
| 725 |
+
if isinstance(obj, dict):
|
| 726 |
+
return {k: convert_to_serializable(v) for k, v in obj.items()}
|
| 727 |
+
if isinstance(obj, list):
|
| 728 |
+
return [convert_to_serializable(item) for item in obj]
|
| 729 |
+
if isinstance(obj, np.ndarray):
|
| 730 |
+
return obj.tolist()
|
| 731 |
return obj
|
| 732 |
|
| 733 |
def process_interview(audio_url: str) -> Dict:
|
|
|
|
| 807 |
if is_downloaded and local_audio_path and os.path.exists(local_audio_path):
|
| 808 |
try:
|
| 809 |
os.remove(local_audio_path)
|
| 810 |
+
logger.info(f"Cleaned up temporary file: {local_audio_path}")
|
| 811 |
except Exception as e:
|
| 812 |
logger.error(f"Failed to clean up local audio file {local_audio_path}: {str(e)}")
|