Fayza38 commited on
Commit
3b10697
·
verified ·
1 Parent(s): 23a7f39

Update pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline.py +101 -77
pipeline.py CHANGED
@@ -628,84 +628,108 @@ def draw_question_overlay(frame, text, width, height):
628
 
629
  ##Main pipeline
630
  def run_intervision_pipeline(video_path, questions_config, output_dir):
631
- if not os.path.exists(video_path):
632
- return f"Error: Video file not found at {video_path}"
 
 
 
 
 
 
 
633
 
634
  os.makedirs(output_dir, exist_ok=True)
635
 
636
- # 1. Establish Audio Baseline (First 10s)
637
- try:
638
- y_b, sr_b = librosa.load(video_path, sr=16000, duration=10)
639
- baseline = extract_audio_features(y_b, sr_b)
640
- except Exception as e:
641
- print(f"Baseline Warning: {e}")
642
- baseline = None
643
-
644
- final_reports = []
645
- audio_results_map = {} # To pass scores to the visual function
646
-
647
- # 2. Pre-process each question (Audio & Text Analysis only)
648
- for q in questions_config:
649
- q_id = q['question_id']
650
- wav_p = os.path.join(output_dir, f"q{q_id}.wav")
651
- duration = q["end_time"] - q["start_time"]
652
-
653
- # Extract audio segment for analysis
654
- try:
655
- subprocess.run([
656
- 'ffmpeg', '-y', '-ss', str(q["start_time"]), '-t', str(duration),
657
- '-i', video_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', wav_p
658
- ], check=True, capture_output=True)
659
-
660
- y, sr = librosa.load(wav_p, sr=16000)
661
- a_scores = compute_audio_scores(extract_audio_features(y, sr), baseline)
662
-
663
- # Whisper Transcription
664
- transcription_data = asr(wav_p, chunk_length_s=30)
665
- transcription = transcription_data["text"].strip()
666
-
667
- # Scores
668
- similarity_score = compute_similarity_score(transcription, q["ideal_answer"])
669
- relevance_score = compute_relevance_score(q["question_text"], transcription)
670
-
671
- # Store results for the visual processing step
672
- audio_results_map[q_id] = a_scores
673
-
674
- # Prepare the final JSON report entry
675
- final_reports.append({
676
- "questionId": q_id,
677
- "userAnswerText": transcription,
678
- "toneOfVoice": a_scores["tone_of_voice"],
679
- "clarity": a_scores["clarity"],
680
- "stress": a_scores["stress"],
681
- "confidence": a_scores["confidence_audio"], # We'll refine this after visual if needed
682
- "pauses": a_scores["pauses"],
683
- "score": similarity_score,
684
- "relevance": relevance_score
685
- })
686
-
687
- except Exception as e:
688
- print(f"Error analyzing Question {q_id}: {e}")
689
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
690
 
691
- # 3. Process the entire video visually (Annotations + Overlays)
692
- try:
693
- # This calls the English function we wrote in the previous message
694
- annotated_video_raw = process_full_video(video_path, output_dir, questions_config, audio_results_map)
695
-
696
- # 4. Final Merge: Put original audio back onto the annotated video
697
- final_output = os.path.join(output_dir, "Intervision_Final_Report.mp4")
698
- subprocess.run([
699
- 'ffmpeg', '-y', '-i', annotated_video_raw, '-i', video_path,
700
- '-map', '0:v:0', '-map', '1:a:0', '-c:v', 'libx264', '-preset', 'veryfast',
701
- '-crf', '22', '-c:a', 'aac', '-b:a', '192k', '-shortest', final_output
702
- ], check=True)
703
-
704
- # Save the JSON report
705
- with open(os.path.join(output_dir, "report.json"), "w") as f:
706
- json.dump({"listOfAnswerReport": final_reports}, f, indent=4)
707
-
708
- return f"Success! Full video generated at {final_output}"
709
-
710
- except Exception as e:
711
- return f"Visual processing or merging failed: {e}"
 
628
 
629
  ##Main pipeline
630
  def run_intervision_pipeline(video_path, questions_config, output_dir):
631
+ """
632
+ Run the full Intervision analysis pipeline.
633
+
634
+ Steps:
635
+ 1. Extract baseline audio
636
+ 2. Run video annotation
637
+ 3. Merge annotated video with original audio
638
+ 4. Generate report
639
+ """
640
 
641
  os.makedirs(output_dir, exist_ok=True)
642
 
643
+ print("[PIPELINE] Starting pipeline")
644
+ print("[PIPELINE] Video path:", video_path)
645
+
646
+ # ---------------------------------------------------
647
+ #Extract baseline audio (first 10 seconds)
648
+ # ---------------------------------------------------
649
+
650
+ baseline_wav = os.path.join(output_dir, "baseline.wav")
651
+
652
+ print("[PIPELINE] Extracting baseline audio")
653
+
654
+ subprocess.run([
655
+ "ffmpeg",
656
+ "-y",
657
+ "-i", video_path,
658
+ "-t", "10",
659
+ "-vn",
660
+ "-acodec", "pcm_s16le",
661
+ "-ar", "16000",
662
+ baseline_wav
663
+ ], check=True)
664
+
665
+ if not os.path.exists(baseline_wav):
666
+ raise Exception("Baseline audio extraction failed")
667
+
668
+ y_b, sr_b = librosa.load(baseline_wav, sr=16000)
669
+
670
+ baseline_features = extract_audio_features(y_b, sr_b)
671
+
672
+ # ---------------------------------------------------
673
+ #Process video frames and annotate
674
+ # ---------------------------------------------------
675
+
676
+ print("[PIPELINE] Running video annotation")
677
+
678
+ annotated_video_raw = process_full_video(
679
+ video_path,
680
+ output_dir,
681
+ questions_config
682
+ )
683
+
684
+ if not os.path.exists(annotated_video_raw):
685
+ raise Exception("Annotated video was not generated")
686
+
687
+ # ---------------------------------------------------
688
+ #Merge annotated video with original audio
689
+ # ---------------------------------------------------
690
+
691
+ final_output = os.path.join(
692
+ output_dir,
693
+ "Intervision_Final_Report.mp4"
694
+ )
695
+
696
+ print("[PIPELINE] Merging audio and annotated video")
697
+
698
+ subprocess.run([
699
+ 'ffmpeg', '-y',
700
+ '-i', annotated_video_raw,
701
+ '-i', video_path,
702
+ '-map', '0:v:0',
703
+ '-map', '1:a:0',
704
+ '-c:v', 'libx264',
705
+ '-preset', 'veryfast',
706
+ '-crf', '23',
707
+ '-c:a', 'aac',
708
+ '-b:a', '160k',
709
+ '-shortest',
710
+ final_output
711
+ ], check=True)
712
+
713
+
714
+ if not os.path.exists(final_output):
715
+ raise Exception("Final video merge failed")
716
+
717
+ print("[PIPELINE] Final video created:", final_output)
718
+
719
+ # ---------------------------------------------------
720
+ # Generate report JSON
721
+ # ---------------------------------------------------
722
+
723
+ report = {
724
+ "status": "completed",
725
+ "questionsAnalyzed": len(questions_config)
726
+ }
727
+
728
+ report_path = os.path.join(output_dir, "report.json")
729
+
730
+ with open(report_path, "w") as f:
731
+ json.dump(report, f, indent=2)
732
+
733
+ print("[PIPELINE] Report saved:", report_path)
734
 
735
+ return final_output, report_path