Spaces:
Build error
Build error
| import os | |
| import csv | |
| import PyPDF2 | |
| import gradio as gr | |
| from transformers import pipeline | |
| # Define the parameters and clauses to extract | |
| KEY_PARAMETERS = [ | |
| "Contract Start Date", | |
| "Contract End Date", | |
| "Payment Terms", | |
| "Renewal Terms", | |
| "Liability Clause", | |
| "Termination Clause", | |
| "Confidentiality Clause", | |
| "Indemnification Clause", | |
| "Governing Law", | |
| "Jurisdiction" | |
| ] | |
| # Load PDF file and extract text | |
| def load_contract(file_path): | |
| text = "" | |
| with open(file_path, "rb") as f: | |
| reader = PyPDF2.PdfReader(f) | |
| for page in reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Initialize Hugging Face pipelines | |
| qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
| summarization_pipeline = pipeline("summarization", model="facebook/bart-large-cnn") | |
| translation_pipeline = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr") | |
| def extract_parameters(context, question): | |
| result = qa_pipeline({"context": context, "question": question}) | |
| return result.get("answer", "Not found") | |
| def summarize_contract(context): | |
| return summarization_pipeline(context, max_length=200, min_length=50, do_sample=False)[0]['summary_text'] | |
| def translate_contract(context): | |
| return translation_pipeline(context)[0]['translation_text'] | |
| # Save results to CSV | |
| def save_to_csv(file_name, data): | |
| file_path = os.path.join("output", file_name) | |
| fieldnames = ["Parameter", "Value"] | |
| os.makedirs("output", exist_ok=True) | |
| with open(file_path, mode="w", newline="", encoding="utf-8") as file: | |
| writer = csv.DictWriter(file, fieldnames=fieldnames) | |
| writer.writeheader() | |
| for key, value in data.items(): | |
| writer.writerow({"Parameter": key, "Value": value}) | |
| return file_path | |
| def process_contract(file_path): | |
| # Load and process the contract | |
| full_context = load_contract(file_path) | |
| # Extract parameters | |
| results = {} | |
| for param in KEY_PARAMETERS: | |
| results[param] = extract_parameters(full_context, f"What is the {param}?") | |
| # Summarize the contract | |
| summary = summarize_contract(full_context) | |
| results["Contract Summary"] = summary | |
| # Translate the contract summary to French | |
| translation = translate_contract(summary) | |
| results["Contract Summary (French)"] = translation | |
| # Save results to CSV | |
| output_file = f"results_{os.path.splitext(os.path.basename(file_path))[0]}.csv" | |
| save_to_csv(output_file, results) | |
| return results, output_file | |
| def interface(file): | |
| if not file: | |
| return "No file provided", None | |
| results, output_file = process_contract(file.name) | |
| display_results = "\n".join([f"{key}: {value}" for key, value in results.items()]) | |
| return display_results, output_file | |
| # Create Gradio Interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Contract Intelligence") | |
| gr.Markdown("Upload a contract PDF to extract key parameters, generate a summary, and translate the summary into French.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload Contract PDF", file_types=[".pdf"]) | |
| result_output = gr.Textbox(label="Extracted Information") | |
| download_link = gr.Textbox(label="Download CSV File Path") | |
| process_button = gr.Button("Process Contract") | |
| process_button.click(interface, inputs=file_input, outputs=[result_output, download_link]) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=8080) | |