yalrashed commited on
Commit
e2cc090
·
verified ·
1 Parent(s): a6e1129

Upload analysis_post_processor.py

Browse files
src/analysis/analysis_post_processor.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import google.generativeai as genai
3
+ from pathlib import Path
4
+ import logging
5
+
6
+ logging.basicConfig(level=logging.INFO)
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class AnalysisPostProcessor:
10
+ def __init__(self):
11
+ api_key = os.getenv("GOOGLE_API_KEY")
12
+ if not api_key:
13
+ raise ValueError("GOOGLE_API_KEY not found")
14
+
15
+ genai.configure(api_key=api_key)
16
+ self.model = genai.GenerativeModel('gemini-pro')
17
+
18
+ def read_sections(self, filepath: str) -> dict:
19
+ """Read and separate the analysis into sections"""
20
+ with open(filepath, 'r') as f:
21
+ content = f.read()
22
+
23
+ sections = {}
24
+ current_section = None
25
+ current_content = []
26
+
27
+ for line in content.split('\n'):
28
+ if line.startswith('### ') and line.endswith(' ###'):
29
+ if current_section:
30
+ sections[current_section] = '\n'.join(current_content)
31
+ current_section = line.strip('#').strip()
32
+ current_content = []
33
+ else:
34
+ current_content.append(line)
35
+
36
+ if current_section:
37
+ sections[current_section] = '\n'.join(current_content)
38
+
39
+ return sections
40
+
41
+ def clean_section(self, title: str, content: str) -> str:
42
+ """Clean individual section using Gemini"""
43
+ prompt = f"""You are processing a section of screenplay analysis titled "{title}".
44
+ The original analysis was generated by analyzing chunks of the screenplay,
45
+ which may have led to some redundancy and discontinuity.
46
+
47
+ Your task:
48
+ 1. Remove any redundant observations
49
+ 2. Stitch together related insights that may be separated
50
+ 3. Ensure the analysis flows naturally from beginning to end
51
+ 4. Preserve ALL unique insights and specific examples
52
+ 5. Maintain the analytical depth while making it more coherent
53
+
54
+ Original {title} section:
55
+ {content}
56
+
57
+ Provide the cleaned and coherent version maintaining the same analytical depth."""
58
+
59
+ try:
60
+ response = self.model.generate_content(prompt)
61
+ return response.text
62
+ except Exception as e:
63
+ logger.error(f"Error cleaning {title}: {str(e)}")
64
+ return content
65
+
66
+ def process_analysis(self, input_path: str, output_path: str):
67
+ """Process the entire analysis file"""
68
+ try:
69
+ # Read and separate sections
70
+ sections = self.read_sections(input_path)
71
+
72
+ # Process each section
73
+ cleaned_sections = {}
74
+ for title, content in sections.items():
75
+ logger.info(f"Processing {title}")
76
+ cleaned_sections[title] = self.clean_section(title, content)
77
+
78
+ # Combine sections
79
+ final_analysis = "SCREENPLAY CREATIVE ANALYSIS\n\n"
80
+ for title, content in cleaned_sections.items():
81
+ final_analysis += f"### {title} ###\n\n{content}\n\n"
82
+
83
+ # Save result
84
+ with open(output_path, 'w') as f:
85
+ f.write(final_analysis)
86
+
87
+ logger.info(f"Cleaned analysis saved to: {output_path}")
88
+ return True
89
+
90
+ except Exception as e:
91
+ logger.error(f"Error in post-processing: {str(e)}")
92
+ return False
93
+
94
+ def main():
95
+ processor = AnalysisPostProcessor()
96
+ input_file = "path/to/creative_analysis.txt"
97
+ output_file = "path/to/cleaned_creative_analysis.txt"
98
+ processor.process_analysis(input_file, output_file)
99
+
100
+ if __name__ == "__main__":
101
+ main()