import gradio as gr import pdfplumber import matplotlib.pyplot as plt import numpy as np from word2number import w2n import re from typing import Tuple, List, Dict from io import BytesIO import base64 # Custom CSS for styling css = """ :root { --low-color: #28a745; --medium-color: #ffc107; --high-color: #dc3545; --inactive-color: #e9ecef; } .risk-container { display: flex; flex-direction: column; gap: 12px; margin-bottom: 25px; } .risk-row { display: flex; align-items: center; background: white; border-radius: 8px; padding: 15px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); transition: all 0.3s ease; } .risk-row.active { transform: scale(1.02); box-shadow: 0 4px 8px rgba(0,0,0,0.15); } .risk-label { width: 100px; font-weight: 600; font-size: 16px; color: #495057; } .risk-score { width: 80px; font-size: 20px; font-weight: 700; text-align: center; } .risk-low { color: var(--low-color); } .risk-medium { color: var(--medium-color); } .risk-high { color: var(--high-color); } .heatmap-container { flex-grow: 1; height: 30px; border-radius: 15px; overflow: hidden; position: relative; } .heatmap-bar { height: 100%; border-radius: 15px; transition: width 0.5s ease; } .risk-meter { position: absolute; right: 10px; top: 50%; transform: translateY(-50%); font-size: 12px; font-weight: 600; color: white; text-shadow: 0 1px 2px rgba(0,0,0,0.3); } .result-section { background: white; border-radius: 8px; padding: 20px; margin-bottom: 20px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); } .result-title { font-size: 18px; font-weight: 600; margin-bottom: 15px; color: #343a40; display: flex; align-items: center; gap: 8px; } .clause-item { margin-bottom: 8px; padding-left: 15px; position: relative; } .clause-item:before { content: "•"; position: absolute; left: 0; color: #6c757d; } .penalty-amount { font-family: monospace; background: #f8f9fa; padding: 2px 6px; border-radius: 4px; margin-left: 5px; } .example-clause { background: #f8f9fa; padding: 12px; border-radius: 6px; margin-bottom: 10px; border-left: 3px solid #6c757d; } .example-number { font-weight: 600; margin-right: 8px; color: #6c757d; } """ def extract_text_from_pdf(pdf_path: str) -> str: """Extract text from PDF using pdfplumber""" text = "" with pdfplumber.open(pdf_path) as pdf: for page in pdf.pages: text += page.extract_text() or "" return text def count_keywords(text: str, keywords: List[str]) -> Dict[str, int]: """Count occurrences of keywords in text""" counts = {} for keyword in keywords: counts[keyword] = len(re.findall(r'\b' + re.escape(keyword) + r'\b', text, flags=re.IGNORECASE)) return counts def find_penalty_values(text: str) -> List[float]: """Find penalty amounts in the text""" patterns = [ r'\$\s*[\d,]+(?:\.\d+)?', r'(?:USD|usd)\s*[\d,]+(?:\.\d+)?', r'\d+\s*(?:percent|%)', r'(?:\b[a-z]+\s*)+dollars', ] penalties = [] for pattern in patterns: matches = re.finditer(pattern, text, flags=re.IGNORECASE) for match in matches: penalty_text = match.group() try: if any(word in penalty_text.lower() for word in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'hundred', 'thousand', 'million']): penalty_value = w2n.word_to_num(penalty_text.split('dollars')[0].strip()) else: penalty_value = float(re.sub(r'[^\d.]', '', penalty_text)) penalties.append(penalty_value) except: continue return penalties def calculate_risk_score(penalty_count: int, penalty_values: List[float], obligation_count: int, delay_count: int) -> Tuple[float, str]: """Calculate risk score based on various factors""" score = 0 score += min(penalty_count * 5, 30) if penalty_values: avg_penalty = sum(penalty_values) / len(penalty_values) if avg_penalty > 1000000: score += 40 elif avg_penalty > 100000: score += 25 elif avg_penalty > 10000: score += 15 else: score += 5 score += min(obligation_count * 2, 20) score += min(delay_count * 10, 30) score = min(score, 100) if score < 30: return score, "Low" elif score < 70: return score, "Medium" else: return score, "High" def create_risk_display(risk_score: float, risk_level: str) -> str: """Create HTML display for all three risk levels""" risk_levels = ["Low", "Medium", "High"] colors = { "Low": "var(--low-color)", "Medium": "var(--medium-color)", "High": "var(--high-color)" } html_parts = [] html_parts.append("
Upload a contract PDF to analyze penalties, obligations, and delays