Spaces:
Configuration error
Configuration error
| import streamlit as st | |
| import pandas as pd | |
| import os | |
| import tempfile | |
| from backend import SemanticAnalyzer | |
| st.set_page_config(page_title="Semantic Document Analyzer", layout="wide") | |
| st.markdown(""" | |
| <style> | |
| /* Premium Look & Feel */ | |
| .stApp { | |
| background: linear-gradient(to right, #f8f9fa, #e9ecef); | |
| font-family: 'Inter', sans-serif; | |
| } | |
| .stButton>button { | |
| background: linear-gradient(45deg, #4f46e5, #7c3aed); | |
| color: white; | |
| border: none; | |
| border-radius: 8px; | |
| padding: 0.75rem 1.5rem; | |
| font-weight: 600; | |
| transition: all 0.3s ease; | |
| } | |
| .stButton>button:hover { | |
| transform: translateY(-2px); | |
| box-shadow: 0 4px 12px rgba(79, 70, 229, 0.3); | |
| } | |
| div[data-testid="stMetricValue"] { | |
| color: #111827; | |
| font-weight: 700; | |
| } | |
| h1 { | |
| background: -webkit-linear-gradient(45deg, #1e3a8a, #3b82f6); | |
| -webkit-background-clip: text; | |
| -webkit-text-fill-color: transparent; | |
| font-weight: 800 !important; | |
| } | |
| .css-1d391kg { | |
| background-color: #ffffff; | |
| border-radius: 12px; | |
| padding: 1.5rem; | |
| box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| st.title("🧠 Semantic Document Analyzer") | |
| st.markdown(""" | |
| <div style='background-color: white; padding: 1.5rem; border-radius: 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.05); margin-bottom: 2rem;'> | |
| <h4 style='margin-top:0'>Holistic Document Understanding</h4> | |
| <p style='color: #4b5563;'> | |
| This AI system leverages <b>Sentence-BERT</b> and <b>Cross-Encoders</b> to perform deep semantic analysis across long documents. | |
| It goes beyond simple keyword matching to understand context, detecting subtle contradictions and semantic duplicates. | |
| </p> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("Upload Documents") | |
| uploaded_files = st.file_uploader("Upload PDF files", type=['pdf'], accept_multiple_files=True) | |
| analyze_btn = st.button("Analyze Documents", type="primary") | |
| if analyze_btn and uploaded_files: | |
| if len(uploaded_files) == 0: | |
| st.error("Please upload at least one document.") | |
| else: | |
| with st.spinner("Processing documents... This may take a while for large files."): | |
| # Save uploaded files temporarily | |
| temp_dir = tempfile.mkdtemp() | |
| file_paths = [] | |
| for uploaded_file in uploaded_files: | |
| path = os.path.join(temp_dir, uploaded_file.name) | |
| with open(path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| file_paths.append(path) | |
| # Initialize Analyzer | |
| try: | |
| analyzer = SemanticAnalyzer() | |
| results = analyzer.analyze_documents(file_paths) | |
| # Cleanup | |
| # for path in file_paths: os.remove(path) | |
| # os.rmdir(temp_dir) | |
| if "error" in results: | |
| st.error(results["error"]) | |
| else: | |
| # Dashboard Layout | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Total Documents", results['stats']['total_docs']) | |
| with col2: | |
| st.metric("Total Text Chunks", results['stats']['total_chunks']) | |
| st.divider() | |
| # 1. Duplicates | |
| st.subheader(f"⚠️ Potential Duplicates Detected ({len(results['duplicates'])})") | |
| if results['duplicates']: | |
| for dup in results['duplicates']: | |
| with st.expander(f"Similarity Score: {dup['score']:.4f}"): | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.caption(f"Source: {dup['chunk_a']['source']}") | |
| st.info(dup['chunk_a']['text']) | |
| with c2: | |
| st.caption(f"Source: {dup['chunk_b']['source']}") | |
| st.info(dup['chunk_b']['text']) | |
| else: | |
| st.success("No duplicates found.") | |
| st.divider() | |
| # 2. Contradictions | |
| st.subheader(f"🛑 Contradictions / Inconsistencies ({len(results['contradictions'])})") | |
| if results['contradictions']: | |
| for contra in results['contradictions']: | |
| with st.expander(f"Contradiction Confidence: {contra['confidence']:.4f}"): | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| st.caption(f"Source: {contra['chunk_a']['source']}") | |
| st.warning(contra['chunk_a']['text']) | |
| with c2: | |
| st.caption(f"Source: {contra['chunk_b']['source']}") | |
| st.warning(contra['chunk_b']['text']) | |
| # Export Report | |
| report_text = f"# Semantic Analysis Report\n\n" | |
| report_text += f"Total Documents: {results['stats']['total_docs']}\n" | |
| report_text += f"Total Chunks: {results['stats']['total_chunks']}\n\n" | |
| report_text += "## Duplicates\n" | |
| if results['duplicates']: | |
| for d in results['duplicates']: | |
| report_text += f"- Score: {d['score']:.4f}\n" | |
| report_text += f" - Source A: {d['chunk_a']['source']} | \"{d['chunk_a']['text'][:100]}...\"\n" | |
| report_text += f" - Source B: {d['chunk_b']['source']} | \"{d['chunk_b']['text'][:100]}...\"\n\n" | |
| else: | |
| report_text += "No duplicates found.\n\n" | |
| report_text += "## Contradictions\n" | |
| if results['contradictions']: | |
| for c in results['contradictions']: | |
| report_text += f"- Confidence: {c['confidence']:.4f}\n" | |
| report_text += f" - Source A: {c['chunk_a']['source']} | \"{c['chunk_a']['text']}\"\n" | |
| report_text += f" - Source B: {c['chunk_b']['source']} | \"{c['chunk_b']['text']}\"\n\n" | |
| else: | |
| report_text += "No contradictions found.\n" | |
| st.download_button( | |
| label="Download Report (Markdown)", | |
| data=report_text, | |
| file_name="analysis_report.md", | |
| mime="text/markdown" | |
| ) | |
| except Exception as e: | |
| st.error(f"An error occurred during analysis: {str(e)}") | |
| import traceback | |
| st.write(traceback.format_exc()) | |
| else: | |
| st.info("Upload documents and click Analyze to start.") | |