Update pipeline.py
Browse files- pipeline.py +52 -83
pipeline.py
CHANGED
|
@@ -628,109 +628,78 @@ def draw_question_overlay(frame, text, width, height):
|
|
| 628 |
|
| 629 |
|
| 630 |
##Main pipeline
|
| 631 |
-
|
| 632 |
-
"""
|
| 633 |
-
Run the full Intervision analysis pipeline.
|
| 634 |
-
|
| 635 |
-
Steps:
|
| 636 |
-
1. Extract baseline audio
|
| 637 |
-
2. Run video annotation
|
| 638 |
-
3. Merge annotated video with original audio
|
| 639 |
-
4. Generate report
|
| 640 |
-
"""
|
| 641 |
|
|
|
|
| 642 |
os.makedirs(output_dir, exist_ok=True)
|
| 643 |
-
|
| 644 |
print("[PIPELINE] Starting pipeline")
|
| 645 |
-
print("[PIPELINE] Video path:", video_path)
|
| 646 |
-
|
| 647 |
-
# ---------------------------------------------------
|
| 648 |
-
#Extract baseline audio (first 10 seconds)
|
| 649 |
-
# ---------------------------------------------------
|
| 650 |
|
|
|
|
| 651 |
baseline_wav = os.path.join(output_dir, "baseline.wav")
|
| 652 |
-
|
| 653 |
-
print("[PIPELINE] Extracting baseline audio")
|
| 654 |
-
|
| 655 |
subprocess.run([
|
| 656 |
-
"ffmpeg",
|
| 657 |
-
"-
|
| 658 |
-
"-i", video_path,
|
| 659 |
-
"-t", "10",
|
| 660 |
-
"-vn",
|
| 661 |
-
"-acodec", "pcm_s16le",
|
| 662 |
-
"-ar", "16000",
|
| 663 |
-
baseline_wav
|
| 664 |
], check=True)
|
| 665 |
-
|
| 666 |
-
if not os.path.exists(baseline_wav):
|
| 667 |
-
raise Exception("Baseline audio extraction failed")
|
| 668 |
-
|
| 669 |
y_b, sr_b = librosa.load(baseline_wav, sr=16000)
|
| 670 |
-
|
| 671 |
baseline_features = extract_audio_features(y_b, sr_b)
|
| 672 |
|
| 673 |
-
#
|
| 674 |
-
|
| 675 |
-
# ---------------------------------------------------
|
| 676 |
-
|
| 677 |
-
print("[PIPELINE] Running video annotation")
|
| 678 |
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
questions_config
|
| 683 |
-
)
|
| 684 |
-
|
| 685 |
-
if not os.path.exists(annotated_video_raw):
|
| 686 |
-
raise Exception("Annotated video was not generated")
|
| 687 |
-
|
| 688 |
-
# ---------------------------------------------------
|
| 689 |
-
#Merge annotated video with original audio
|
| 690 |
-
# ---------------------------------------------------
|
| 691 |
-
|
| 692 |
-
final_output = os.path.join(
|
| 693 |
-
output_dir,
|
| 694 |
-
"Intervision_Final_Report.mp4"
|
| 695 |
-
)
|
| 696 |
-
|
| 697 |
-
print("[PIPELINE] Merging audio and annotated video")
|
| 698 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
subprocess.run([
|
| 700 |
-
'ffmpeg', '-y',
|
| 701 |
-
'-
|
| 702 |
-
'-
|
| 703 |
-
'-map', '0:v:0',
|
| 704 |
-
'-map', '1:a:0',
|
| 705 |
-
'-c:v', 'libx264',
|
| 706 |
-
'-preset', 'veryfast',
|
| 707 |
-
'-crf', '23',
|
| 708 |
-
'-c:a', 'aac',
|
| 709 |
-
'-b:a', '160k',
|
| 710 |
-
'-shortest',
|
| 711 |
-
final_output
|
| 712 |
], check=True)
|
| 713 |
|
| 714 |
-
|
| 715 |
-
if not os.path.exists(final_output):
|
| 716 |
-
raise Exception("Final video merge failed")
|
| 717 |
-
|
| 718 |
-
print("[PIPELINE] Final video created:", final_output)
|
| 719 |
-
|
| 720 |
-
# ---------------------------------------------------
|
| 721 |
-
# Generate report JSON
|
| 722 |
-
# ---------------------------------------------------
|
| 723 |
-
|
| 724 |
report = {
|
| 725 |
"status": "completed",
|
| 726 |
-
"questionsAnalyzed": len(questions_config)
|
|
|
|
| 727 |
}
|
| 728 |
-
|
| 729 |
report_path = os.path.join(output_dir, "report.json")
|
| 730 |
-
|
| 731 |
with open(report_path, "w") as f:
|
| 732 |
json.dump(report, f, indent=2)
|
| 733 |
|
| 734 |
-
print("[PIPELINE] Report saved:", report_path)
|
| 735 |
-
|
| 736 |
return final_output, report_path
|
|
|
|
| 628 |
|
| 629 |
|
| 630 |
##Main pipeline
|
| 631 |
+
# ... (Keep all your imports and utility functions at the top as they were) ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
|
| 633 |
+
def run_intervision_pipeline(video_path, questions_config, output_dir):
|
| 634 |
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
| 635 |
print("[PIPELINE] Starting pipeline")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 636 |
|
| 637 |
+
# 1. Extract baseline audio
|
| 638 |
baseline_wav = os.path.join(output_dir, "baseline.wav")
|
|
|
|
|
|
|
|
|
|
| 639 |
subprocess.run([
|
| 640 |
+
"ffmpeg", "-y", "-i", video_path, "-t", "10",
|
| 641 |
+
"-vn", "-acodec", "pcm_s16le", "-ar", "16000", baseline_wav
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 642 |
], check=True)
|
| 643 |
+
|
|
|
|
|
|
|
|
|
|
| 644 |
y_b, sr_b = librosa.load(baseline_wav, sr=16000)
|
|
|
|
| 645 |
baseline_features = extract_audio_features(y_b, sr_b)
|
| 646 |
|
| 647 |
+
# 2. Process and Annotate Video
|
| 648 |
+
annotated_video_raw = process_full_video(video_path, output_dir, questions_config)
|
|
|
|
|
|
|
|
|
|
| 649 |
|
| 650 |
+
# 3. Analyze each question segment for the Report
|
| 651 |
+
list_of_answer_report = []
|
| 652 |
+
full_audio, sr = librosa.load(video_path, sr=16000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
|
| 654 |
+
for q in questions_config:
|
| 655 |
+
print(f"[PIPELINE] Analyzing Question ID: {q['question_id']}")
|
| 656 |
+
|
| 657 |
+
# Extract specific segment for this question
|
| 658 |
+
start_sample = int(q['start_time'] * sr)
|
| 659 |
+
end_sample = int(q['end_time'] * sr)
|
| 660 |
+
segment_audio = full_audio[start_sample:end_sample]
|
| 661 |
+
|
| 662 |
+
temp_segment_path = os.path.join(output_dir, f"temp_{q['question_id']}.wav")
|
| 663 |
+
sf.write(temp_segment_path, segment_audio, sr)
|
| 664 |
+
|
| 665 |
+
# AI Analysis
|
| 666 |
+
user_text = get_user_answer(temp_segment_path)
|
| 667 |
+
similarity = compute_similarity_score(user_text, q['ideal_answer'])
|
| 668 |
+
relevance = compute_relevance_score(q['question_text'], user_text)
|
| 669 |
+
audio_scores = analyze_audio_segment(temp_segment_path, baseline=baseline_features)
|
| 670 |
+
|
| 671 |
+
list_of_answer_report.append({
|
| 672 |
+
"questionId": q['question_id'],
|
| 673 |
+
"userAnswerText": user_text,
|
| 674 |
+
"score": similarity,
|
| 675 |
+
"relevance": relevance,
|
| 676 |
+
"confidence": audio_scores["confidence_audio"],
|
| 677 |
+
"stress": audio_scores["stress"],
|
| 678 |
+
"clarity": audio_scores["clarity"],
|
| 679 |
+
"pauses": audio_scores["pauses"],
|
| 680 |
+
"toneOfVoice": audio_scores["tone_of_voice"],
|
| 681 |
+
"status": "answered"
|
| 682 |
+
})
|
| 683 |
+
|
| 684 |
+
if os.path.exists(temp_segment_path):
|
| 685 |
+
os.remove(temp_segment_path)
|
| 686 |
+
|
| 687 |
+
# 4. Final Merge
|
| 688 |
+
final_output = os.path.join(output_dir, "Intervision_Final_Report.mp4")
|
| 689 |
subprocess.run([
|
| 690 |
+
'ffmpeg', '-y', '-i', annotated_video_raw, '-i', video_path,
|
| 691 |
+
'-map', '0:v:0', '-map', '1:a:0', '-c:v', 'libx264', '-preset', 'veryfast',
|
| 692 |
+
'-crf', '23', '-c:a', 'aac', '-shortest', final_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 693 |
], check=True)
|
| 694 |
|
| 695 |
+
# 5. Save Report with the correct KEY
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
report = {
|
| 697 |
"status": "completed",
|
| 698 |
+
"questionsAnalyzed": len(questions_config),
|
| 699 |
+
"listOfAnswerReport": list_of_answer_report
|
| 700 |
}
|
|
|
|
| 701 |
report_path = os.path.join(output_dir, "report.json")
|
|
|
|
| 702 |
with open(report_path, "w") as f:
|
| 703 |
json.dump(report, f, indent=2)
|
| 704 |
|
|
|
|
|
|
|
| 705 |
return final_output, report_path
|