| | import json |
| | import os |
| | from dotenv import load_dotenv |
| | from openai import OpenAI |
| | from tqdm import tqdm |
| | from scripts.regulatory_change_foundation import CONTEXT_CATEGORIES |
| | from scripts.utility_functions import call_nlp_service, render_prompt |
| |
|
| |
|
| | |
| | load_dotenv() |
| |
|
| | api_key = os.getenv("OPENAI_API_KEY") |
| | openai_client = OpenAI(api_key=api_key) |
| |
|
| |
|
| | def preprocess_text_with_nlp(text, max_chunk_size=512, overlap=50): |
| | result = call_nlp_service({"text": text}, "preprocess_text_with_nlp_llm") |
| | return result["chunks"], result["preprocessed_data"] |
| |
|
| |
|
| | def create_prompt(chunk, preprocessed_data): |
| | return render_prompt(chunk, include_nlp=True, preprocessed_data=preprocessed_data) |
| |
|
| |
|
| | def search_for_regulatory_changes(chunks, preprocessed_data, subtitle): |
| | results = [] |
| |
|
| | for chunk in chunks: |
| | response = openai_client.chat.completions.create( |
| | model="gpt-4o-mini", |
| | messages=[ |
| | { |
| | "role": "system", |
| | "content": "You are a legal expert specializing in analyzing German regulatory documents with a focus on identifying regulatory changes. Only return JSON output.", |
| | }, |
| | {"role": "user", "content": create_prompt(chunk, preprocessed_data)}, |
| | ], |
| | temperature=0.7, |
| | max_tokens=1024, |
| | ) |
| |
|
| | try: |
| | result = json.loads(response.choices[0].message.content) |
| | if result.get("changes_detected", False): |
| | result["location"] = {"subtitle": subtitle} |
| | result["source_text"] = chunk |
| | results.append(result) |
| | except json.JSONDecodeError: |
| | continue |
| |
|
| | return results |
| |
|
| |
|
| | def detect_regulatory_changes(text_content, subtitle): |
| | """ |
| | Main function to detect regulatory changes from text content. |
| | |
| | Args: |
| | text_content (str): The raw text content to analyze |
| | subtitle (str): The subtitle associated with the content |
| | |
| | Returns: |
| | dict: Structured output containing detected changes and analysis summary |
| | """ |
| |
|
| | |
| | chunks, preprocessed_data = preprocess_text_with_nlp(text_content) |
| |
|
| | |
| | results = search_for_regulatory_changes(chunks, preprocessed_data, subtitle) |
| |
|
| | return results |
| |
|
| |
|
| | def llm_regulatory_change_detector(hierarchical_structure): |
| | if hierarchical_structure: |
| | analysis_summary = { |
| | "total_changes_detected": 0, |
| | "changes_by_type": {"additions": 0, "deletions": 0, "modifications": 0}, |
| | } |
| | subtitles = {} |
| |
|
| | |
| | for section in tqdm( |
| | hierarchical_structure["sections"], desc="Analyzing Sections" |
| | ): |
| | subtitle = section["subtitle"] |
| | content = section["content"] |
| | if isinstance(content, list): |
| | content = "\n".join(content) |
| |
|
| | |
| | changes = detect_regulatory_changes(content, subtitle) |
| |
|
| | |
| | for change in changes: |
| | analysis_summary["total_changes_detected"] += len( |
| | change["classifications"] |
| | ) |
| | for classification in change["classifications"]: |
| | change_type = classification["change_type"] |
| | analysis_summary["changes_by_type"][f"{change_type}s"] += 1 |
| |
|
| | |
| | subtitles[subtitle] = [] |
| | for change in changes: |
| | for classification in change["classifications"]: |
| | change_subtype = ( |
| | "context" |
| | if classification["change"] in CONTEXT_CATEGORIES |
| | else "scope" |
| | ) |
| | subtitles[subtitle].append( |
| | { |
| | "change": classification["change"], |
| | "change_type": classification["change_type"], |
| | "change_subtype": change_subtype, |
| | "relevant_text": classification["relevant_text"], |
| | "explanation": classification["explanation"], |
| | "nlp_evidence": classification["evidence"], |
| | } |
| | ) |
| |
|
| | |
| | final_output = {"analysis_summary": analysis_summary, "results": subtitles} |
| |
|
| | return final_output |
| |
|