File size: 4,567 Bytes
60efa5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from typing import List, Dict
from langchain_google_genai import ChatGoogleGenerativeAI
from analysis.prompt import _create_analysis_prompt
from dotenv import load_dotenv
import re
load_dotenv()

class LLMFusion:
    """Fuses video and audio analysis results using LLM to generate human-readable report"""
    
    def __init__(self):
        self.llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)
    
    def prepare_analysis_json(self, timeline: List[Dict]) -> Dict:
        analysis_data = {
            'total_intervals': len(timeline),
            'intervals': []
        }
        
        for interval in timeline:
            interval_summary = {
                'interval_id': interval['interval_id'],
                'time_range': interval['interval'],
                'video_analysis': interval.get('video_results', {}),
                'audio_analysis': interval.get('audio_results', {})
            }
            analysis_data['intervals'].append(interval_summary)
        
        return analysis_data
    
    def calculate_overall_scores(self, timeline: List[Dict]) -> Dict:
        """Calculate overall video and audio fake scores"""
        video_scores = []
        audio_scores = []
        
        for interval in timeline:
            if interval.get('video_results') and 'fake_score' in interval['video_results']:
                video_scores.append(interval['video_results']['fake_score'])
            
            if interval.get('audio_results') and 'fake_score' in interval['audio_results']:
                audio_scores.append(interval['audio_results']['fake_score'])
        
        overall_video = round(sum(video_scores) / len(video_scores), 3) if len(video_scores) > 0 else 0.0
        overall_audio = round(sum(audio_scores) / len(audio_scores), 3) if len(audio_scores) > 0 else 0.0
        
        if overall_video > 0 and overall_audio > 0:
            overall_combined = round((overall_video + overall_audio) / 2, 3)
        elif overall_video > 0:
            overall_combined = overall_video
        elif overall_audio > 0:
            overall_combined = overall_audio
        else:
            overall_combined = 0.0
        
        return {
            'overall_video_score': overall_video,
            'overall_audio_score': overall_audio,
            'overall_combined_score': overall_combined
        }
    
    def generate_report(self, timeline: List[Dict], video_info: Dict) -> Dict:
        analysis_json = self.prepare_analysis_json(timeline)
        overall_scores = self.calculate_overall_scores(timeline)
        
        prompt = _create_analysis_prompt(analysis_json, overall_scores, video_info)
        
        try:
            response = self.llm.invoke(prompt)
            llm_response = response.content 
        except Exception as e:
            print(f"LLM failed: {e}")
            llm_response = "Analysis failed."
        
        report = self._structure_report(llm_response, overall_scores, analysis_json)
        return report
    
    def _structure_report(self, llm_response: str, overall_scores: Dict, analysis_json: Dict) -> Dict:
        """Extract structured information from LLM response"""
        
        verdict = "DEEPFAKE" if overall_scores['overall_combined_score'] > 0.5 else "REAL"
        
        confidence = 75.0
        conf_match = re.search(r'(\d+)\s*%', llm_response)
        if conf_match:
            confidence = float(conf_match.group(1))
        
        suspicious_intervals = []
        for interval_data in analysis_json['intervals']:
            video_score = interval_data.get('video_analysis', {}).get('fake_score', 0)
            audio_score = interval_data.get('audio_analysis', {}).get('fake_score', 0)
            
            if video_score > 0.6 or audio_score > 0.6:
                suspicious_intervals.append({
                    'interval': interval_data['time_range'],
                    'video_score': video_score,
                    'audio_score': audio_score,
                    'video_regions': interval_data.get('video_analysis', {}).get('suspicious_regions', []),
                    'audio_regions': interval_data.get('audio_analysis', {}).get('suspicious_regions', [])
                })
        
        return {
            'verdict': verdict,
            'confidence': confidence,
            'overall_scores': overall_scores,
            'detailed_analysis': llm_response,
            'suspicious_intervals': suspicious_intervals,
            'total_intervals_analyzed': analysis_json['total_intervals']
        }