Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pdfplumber | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from word2number import w2n | |
import re | |
from typing import Tuple, List, Dict | |
from io import BytesIO | |
import base64 | |
# Custom CSS for styling | |
css = """ | |
:root { | |
--low-color: #28a745; | |
--medium-color: #ffc107; | |
--high-color: #dc3545; | |
--inactive-color: #e9ecef; | |
} | |
.risk-container { | |
display: flex; | |
flex-direction: column; | |
gap: 12px; | |
margin-bottom: 25px; | |
} | |
.risk-row { | |
display: flex; | |
align-items: center; | |
background: white; | |
border-radius: 8px; | |
padding: 15px; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
transition: all 0.3s ease; | |
} | |
.risk-row.active { | |
transform: scale(1.02); | |
box-shadow: 0 4px 8px rgba(0,0,0,0.15); | |
} | |
.risk-label { | |
width: 100px; | |
font-weight: 600; | |
font-size: 16px; | |
color: #495057; | |
} | |
.risk-score { | |
width: 80px; | |
font-size: 20px; | |
font-weight: 700; | |
text-align: center; | |
} | |
.risk-low { color: var(--low-color); } | |
.risk-medium { color: var(--medium-color); } | |
.risk-high { color: var(--high-color); } | |
.heatmap-container { | |
flex-grow: 1; | |
height: 30px; | |
border-radius: 15px; | |
overflow: hidden; | |
position: relative; | |
} | |
.heatmap-bar { | |
height: 100%; | |
border-radius: 15px; | |
transition: width 0.5s ease; | |
} | |
.risk-meter { | |
position: absolute; | |
right: 10px; | |
top: 50%; | |
transform: translateY(-50%); | |
font-size: 12px; | |
font-weight: 600; | |
color: white; | |
text-shadow: 0 1px 2px rgba(0,0,0,0.3); | |
} | |
.result-section { | |
background: white; | |
border-radius: 8px; | |
padding: 20px; | |
margin-bottom: 20px; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
.result-title { | |
font-size: 18px; | |
font-weight: 600; | |
margin-bottom: 15px; | |
color: #343a40; | |
display: flex; | |
align-items: center; | |
gap: 8px; | |
} | |
.clause-item { | |
margin-bottom: 8px; | |
padding-left: 15px; | |
position: relative; | |
} | |
.clause-item:before { | |
content: "β’"; | |
position: absolute; | |
left: 0; | |
color: #6c757d; | |
} | |
.penalty-amount { | |
font-family: monospace; | |
background: #f8f9fa; | |
padding: 2px 6px; | |
border-radius: 4px; | |
margin-left: 5px; | |
} | |
.example-clause { | |
background: #f8f9fa; | |
padding: 12px; | |
border-radius: 6px; | |
margin-bottom: 10px; | |
border-left: 3px solid #6c757d; | |
} | |
.example-number { | |
font-weight: 600; | |
margin-right: 8px; | |
color: #6c757d; | |
} | |
""" | |
def extract_text_from_pdf(pdf_path: str) -> str: | |
"""Extract text from PDF using pdfplumber""" | |
text = "" | |
with pdfplumber.open(pdf_path) as pdf: | |
for page in pdf.pages: | |
text += page.extract_text() or "" | |
return text | |
def count_keywords(text: str, keywords: List[str]) -> Dict[str, int]: | |
"""Count occurrences of keywords in text""" | |
counts = {} | |
for keyword in keywords: | |
counts[keyword] = len(re.findall(r'\b' + re.escape(keyword) + r'\b', text, flags=re.IGNORECASE)) | |
return counts | |
def find_penalty_values(text: str) -> List[float]: | |
"""Find penalty amounts in the text""" | |
patterns = [ | |
r'\$\s*[\d,]+(?:\.\d+)?', | |
r'(?:USD|usd)\s*[\d,]+(?:\.\d+)?', | |
r'\d+\s*(?:percent|%)', | |
r'(?:\b[a-z]+\s*)+dollars', | |
] | |
penalties = [] | |
for pattern in patterns: | |
matches = re.finditer(pattern, text, flags=re.IGNORECASE) | |
for match in matches: | |
penalty_text = match.group() | |
try: | |
if any(word in penalty_text.lower() for word in ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten', 'hundred', 'thousand', 'million']): | |
penalty_value = w2n.word_to_num(penalty_text.split('dollars')[0].strip()) | |
else: | |
penalty_value = float(re.sub(r'[^\d.]', '', penalty_text)) | |
penalties.append(penalty_value) | |
except: | |
continue | |
return penalties | |
def calculate_risk_score(penalty_count: int, penalty_values: List[float], obligation_count: int, delay_count: int) -> Tuple[float, str]: | |
"""Calculate risk score based on various factors""" | |
score = 0 | |
score += min(penalty_count * 5, 30) | |
if penalty_values: | |
avg_penalty = sum(penalty_values) / len(penalty_values) | |
if avg_penalty > 1000000: | |
score += 40 | |
elif avg_penalty > 100000: | |
score += 25 | |
elif avg_penalty > 10000: | |
score += 15 | |
else: | |
score += 5 | |
score += min(obligation_count * 2, 20) | |
score += min(delay_count * 10, 30) | |
score = min(score, 100) | |
if score < 30: | |
return score, "Low" | |
elif score < 70: | |
return score, "Medium" | |
else: | |
return score, "High" | |
def create_risk_display(risk_score: float, risk_level: str) -> str: | |
"""Create HTML display for all three risk levels""" | |
risk_levels = ["Low", "Medium", "High"] | |
colors = { | |
"Low": "var(--low-color)", | |
"Medium": "var(--medium-color)", | |
"High": "var(--high-color)" | |
} | |
html_parts = [] | |
html_parts.append("<div class='risk-container'>") | |
for level in risk_levels: | |
active = level == risk_level | |
score = risk_score if active else 0 | |
color = colors[level] if active else "var(--inactive-color)" | |
opacity = "1" if active else "0.6" | |
html_parts.append(f""" | |
<div class='risk-row {'active' if active else ''}'> | |
<div class='risk-label risk-{level.lower()}'>{level} Risk</div> | |
<div class='risk-score risk-{level.lower()}'>{score:.1f}%</div> | |
<div class='heatmap-container'> | |
<div class='heatmap-bar" | |
style="width: {score}%; background: {color}; opacity: {opacity}"> | |
<span class='risk-meter'>{score:.1f}%</span> | |
</div> | |
</div> | |
</div> | |
""") | |
html_parts.append("</div>") | |
return "\n".join(html_parts) | |
def format_clauses(counts: Dict[str, int]) -> str: | |
"""Format clause counts into HTML""" | |
return "\n".join([f"<div class='clause-item'>{kw}: <strong>{count}</strong></div>" for kw, count in counts.items()]) | |
def format_penalty_amounts(amounts: List[float]) -> str: | |
"""Format penalty amounts into HTML""" | |
if not amounts: | |
return "<div style='color: #6c757d;'>No specific penalty amounts found</div>" | |
return "\n".join([f"<div class='clause-item'><span class='penalty-amount'>${amt:,.2f}</span></div>" for amt in amounts[:5]]) | |
def format_examples(sentences: List[str]) -> str: | |
"""Format example sentences into HTML""" | |
if not sentences: | |
return "<div style='color: #6c757d;'>No penalty clauses found</div>" | |
return "\n".join([f""" | |
<div class='example-clause'> | |
<span class='example-number'>{i+1}.</span> {sent} | |
</div> | |
""" for i, sent in enumerate(sentences[:3])]) | |
def analyze_pdf(file_obj) -> List: | |
"""Main analysis function for Gradio interface""" | |
try: | |
# Extract text from the uploaded file | |
text = extract_text_from_pdf(file_obj.name) | |
# Define keywords to search for | |
penalty_keywords = ["penalty", "fine", "forfeit", "liquidated damages", "breach"] | |
obligation_keywords = ["shall", "must", "required to", "obligated to", "duty"] | |
delay_keywords = ["delay", "late", "overdue", "extension", "time is of the essence"] | |
# Count keyword occurrences | |
penalty_counts = count_keywords(text, penalty_keywords) | |
obligation_counts = count_keywords(text, obligation_keywords) | |
delay_counts = count_keywords(text, delay_keywords) | |
# Find penalty values | |
penalty_values = find_penalty_values(text) | |
# Calculate total counts | |
total_penalties = sum(penalty_counts.values()) | |
total_obligations = sum(obligation_counts.values()) | |
total_delays = sum(delay_counts.values()) | |
# Calculate risk score | |
risk_score, risk_level = calculate_risk_score( | |
total_penalties, penalty_values, total_obligations, total_delays | |
) | |
# Generate risk display | |
risk_display = create_risk_display(risk_score, risk_level) | |
# Find example sentences with penalties | |
penalty_sentences = [] | |
for sentence in re.split(r'(?<=[.!?])\s+', text): | |
if any(kw.lower() in sentence.lower() for kw in penalty_keywords): | |
penalty_sentences.append(sentence.strip()) | |
# Format all results | |
penalty_html = f""" | |
<div class='result-section'> | |
<div class='result-title'>π Penalty Clauses: <strong>{total_penalties}</strong> found</div> | |
{format_clauses(penalty_counts)} | |
</div> | |
""" | |
amounts_html = f""" | |
<div class='result-section'> | |
<div class='result-title'>π° Penalty Amounts: <strong>{len(penalty_values)}</strong> found</div> | |
{format_penalty_amounts(penalty_values)} | |
</div> | |
""" | |
obligation_html = f""" | |
<div class='result-section'> | |
<div class='result-title'>βοΈ Obligation Clauses: <strong>{total_obligations}</strong> found</div> | |
{format_clauses(obligation_counts)} | |
</div> | |
""" | |
delay_html = f""" | |
<div class='result-section'> | |
<div class='result-title'>β±οΈ Delay Clauses: <strong>{total_delays}</strong> found</div> | |
{format_clauses(delay_counts)} | |
</div> | |
""" | |
examples_html = f""" | |
<div class='result-section'> | |
<div class='result-title'>π Example Penalty Clauses</div> | |
{format_examples(penalty_sentences)} | |
</div> | |
""" | |
return [ | |
risk_display, | |
penalty_html, | |
amounts_html, | |
obligation_html, | |
delay_html, | |
examples_html | |
] | |
except Exception as e: | |
error_html = f""" | |
<div class='result-section' style='background: #fff3cd;'> | |
<div class='result-title'>β Error</div> | |
<div>{str(e)}</div> | |
</div> | |
""" | |
return [error_html] * 6 | |
# Create Gradio interface | |
with gr.Blocks(css=css, title="PDF Contract Risk Analyzer") as demo: | |
gr.Markdown(""" | |
<div style='text-align: center; margin-bottom: 30px;'> | |
<h1 style='margin-bottom: 10px;'>π PDF Contract Risk Analyzer</h1> | |
<p style='color: #6c757d;'>Upload a contract PDF to analyze penalties, obligations, and delays</p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
submit_btn = gr.Button("Analyze Contract", variant="primary") | |
with gr.Column(scale=3): | |
gr.Markdown("### π Risk Assessment Summary") | |
risk_display = gr.HTML() | |
with gr.Row(): | |
with gr.Column(): | |
penalty_count = gr.HTML() | |
penalty_amounts = gr.HTML() | |
with gr.Column(): | |
obligation_count = gr.HTML() | |
delay_count = gr.HTML() | |
penalty_examples = gr.HTML() | |
submit_btn.click( | |
fn=analyze_pdf, | |
inputs=file_input, | |
outputs=[risk_display, penalty_count, penalty_amounts, | |
obligation_count, delay_count, penalty_examples] | |
) | |
if __name__ == "__main__": | |
demo.launch() |