| import os |
| import sys |
| import time |
| import json |
| import argparse |
|
|
| |
| sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) |
|
|
| |
| from behavior_backend.app.services.processing.video_processor import VideoProcessor |
|
|
| def test_video_processor(video_path, language='en', service='whisper', backend='mediapipe', frame_rate=1): |
| """ |
| Test the video processor with different transcription services. |
| |
| Args: |
| video_path: Path to the video file |
| language: Language code |
| service: Transcription service to use ('whisper', 'groq', 'google_cloud', 'openai_whisper') |
| backend: Backend to use for face detection |
| frame_rate: Process every nth frame |
| """ |
| print(f"\n=== Testing Video Processor ===") |
| print(f"Video: {video_path}") |
| print(f"Language: {language}") |
| print(f"Transcription Service: {service}") |
| print(f"Face Detection Backend: {backend}") |
| print(f"Frame Rate: {frame_rate}") |
| |
| |
| processor = VideoProcessor() |
| |
| |
| available_services = list(processor.speech_service.cloud_transcription_service.available_recognizers.keys()) |
| print(f"Available cloud services: {', '.join(available_services)}") |
| |
| |
| original_process_video_speech = processor.speech_service.process_video_speech |
| processor.speech_service.process_video_speech = lambda video_path, language: original_process_video_speech(video_path, language, service) |
| |
| |
| start_time = time.time() |
| try: |
| transcript, analysis_json = processor.process_video( |
| video_path=video_path, |
| frame_rate=frame_rate, |
| backend=backend, |
| language=language, |
| generate_annotated_video=False |
| ) |
| end_time = time.time() |
| |
| print(f"\nProcessing completed in {end_time - start_time:.2f} seconds") |
| print(f"Transcript length: {len(transcript)} characters") |
| |
| |
| base_name = os.path.basename(video_path).split('.')[0] |
| |
| |
| transcript_file = f"{base_name}_{service}_transcript.txt" |
| with open(transcript_file, 'w') as f: |
| f.write(transcript) |
| print(f"Transcript saved to {transcript_file}") |
| |
| |
| analysis_file = f"{base_name}_{service}_analysis.json" |
| with open(analysis_file, 'w') as f: |
| f.write(analysis_json) |
| print(f"Analysis saved to {analysis_file}") |
| |
| |
| print("\nTranscript preview (first 500 characters):") |
| print("-" * 80) |
| print(transcript[:500] + "..." if len(transcript) > 500 else transcript) |
| print("-" * 80) |
| |
| |
| print("\nAnalysis preview:") |
| print("-" * 80) |
| analysis = json.loads(analysis_json) |
| if 'summary' in analysis: |
| print(f"Summary: {analysis['summary']}") |
| if 'key_points' in analysis: |
| print("\nKey Points:") |
| for point in analysis['key_points'][:3]: |
| print(f"- {point}") |
| if len(analysis['key_points']) > 3: |
| print(f"... and {len(analysis['key_points']) - 3} more key points") |
| print("-" * 80) |
| |
| return True |
| except Exception as e: |
| print(f"Error: {str(e)}") |
| return False |
|
|
| def main(): |
| parser = argparse.ArgumentParser(description='Test video processor with different transcription services') |
| parser.add_argument('video_path', help='Path to the video file') |
| parser.add_argument('--language', '-l', default='en', help='Language code (default: en)') |
| parser.add_argument('--service', '-s', default='whisper', |
| choices=['whisper', 'groq', 'google_cloud', 'openai_whisper'], |
| help='Transcription service to use (default: whisper)') |
| parser.add_argument('--backend', '-b', default='mediapipe', |
| choices=['mediapipe', 'ssd', 'mtcnn'], |
| help='Backend to use for face detection (default: mediapipe)') |
| parser.add_argument('--frame-rate', '-f', type=int, default=1, |
| help='Process every nth frame (default: 1)') |
| |
| args = parser.parse_args() |
| test_video_processor( |
| args.video_path, |
| args.language, |
| args.service, |
| args.backend, |
| args.frame_rate |
| ) |
|
|
| if __name__ == "__main__": |
| main() |