Spaces:

JAYASREESS
/

semantic_main

Configuration error

App Files Files Community

semantic_main / app.py

JAYASREESS

Upload 8 files

253246d verified about 1 month ago

raw

history blame contribute delete

7.31 kB

	import streamlit as st
	import pandas as pd
	import os
	import tempfile
	from backend import SemanticAnalyzer

	st.set_page_config(page_title="Semantic Document Analyzer", layout="wide")

	st.markdown("""
	<style>
	/* Premium Look & Feel */
	.stApp {
	background: linear-gradient(to right, #f8f9fa, #e9ecef);
	font-family: 'Inter', sans-serif;
	}
	.stButton>button {
	background: linear-gradient(45deg, #4f46e5, #7c3aed);
	color: white;
	border: none;
	border-radius: 8px;
	padding: 0.75rem 1.5rem;
	font-weight: 600;
	transition: all 0.3s ease;
	}
	.stButton>button:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 12px rgba(79, 70, 229, 0.3);
	}
	div[data-testid="stMetricValue"] {
	color: #111827;
	font-weight: 700;
	}
	h1 {
	background: -webkit-linear-gradient(45deg, #1e3a8a, #3b82f6);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-weight: 800 !important;
	}
	.css-1d391kg {
	background-color: #ffffff;
	border-radius: 12px;
	padding: 1.5rem;
	box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
	}
	</style>
	""", unsafe_allow_html=True)

	st.title("🧠 Semantic Document Analyzer")
	st.markdown("""
	<div style='background-color: white; padding: 1.5rem; border-radius: 10px; box-shadow: 0 2px 5px rgba(0,0,0,0.05); margin-bottom: 2rem;'>
	<h4 style='margin-top:0'>Holistic Document Understanding</h4>
	<p style='color: #4b5563;'>
	This AI system leverages <b>Sentence-BERT</b> and <b>Cross-Encoders</b> to perform deep semantic analysis across long documents.
	It goes beyond simple keyword matching to understand context, detecting subtle contradictions and semantic duplicates.
	</p>
	</div>
	""", unsafe_allow_html=True)

	# Sidebar
	with st.sidebar:
	st.header("Upload Documents")
	uploaded_files = st.file_uploader("Upload PDF files", type=['pdf'], accept_multiple_files=True)
	analyze_btn = st.button("Analyze Documents", type="primary")

	if analyze_btn and uploaded_files:
	if len(uploaded_files) == 0:
	st.error("Please upload at least one document.")
	else:
	with st.spinner("Processing documents... This may take a while for large files."):
	# Save uploaded files temporarily
	temp_dir = tempfile.mkdtemp()
	file_paths = []
	for uploaded_file in uploaded_files:
	path = os.path.join(temp_dir, uploaded_file.name)
	with open(path, "wb") as f:
	f.write(uploaded_file.getbuffer())
	file_paths.append(path)

	# Initialize Analyzer
	try:
	analyzer = SemanticAnalyzer()
	results = analyzer.analyze_documents(file_paths)

	# Cleanup
	# for path in file_paths: os.remove(path)
	# os.rmdir(temp_dir)

	if "error" in results:
	st.error(results["error"])
	else:
	# Dashboard Layout
	col1, col2 = st.columns(2)
	with col1:
	st.metric("Total Documents", results['stats']['total_docs'])
	with col2:
	st.metric("Total Text Chunks", results['stats']['total_chunks'])

	st.divider()

	# 1. Duplicates
	st.subheader(f"⚠️ Potential Duplicates Detected ({len(results['duplicates'])})")
	if results['duplicates']:
	for dup in results['duplicates']:
	with st.expander(f"Similarity Score: {dup['score']:.4f}"):
	c1, c2 = st.columns(2)
	with c1:
	st.caption(f"Source: {dup['chunk_a']['source']}")
	st.info(dup['chunk_a']['text'])
	with c2:
	st.caption(f"Source: {dup['chunk_b']['source']}")
	st.info(dup['chunk_b']['text'])
	else:
	st.success("No duplicates found.")

	st.divider()

	# 2. Contradictions
	st.subheader(f"🛑 Contradictions / Inconsistencies ({len(results['contradictions'])})")
	if results['contradictions']:
	for contra in results['contradictions']:
	with st.expander(f"Contradiction Confidence: {contra['confidence']:.4f}"):
	c1, c2 = st.columns(2)
	with c1:
	st.caption(f"Source: {contra['chunk_a']['source']}")
	st.warning(contra['chunk_a']['text'])
	with c2:
	st.caption(f"Source: {contra['chunk_b']['source']}")
	st.warning(contra['chunk_b']['text'])
	# Export Report
	report_text = f"# Semantic Analysis Report\n\n"
	report_text += f"Total Documents: {results['stats']['total_docs']}\n"
	report_text += f"Total Chunks: {results['stats']['total_chunks']}\n\n"

	report_text += "## Duplicates\n"
	if results['duplicates']:
	for d in results['duplicates']:
	report_text += f"- Score: {d['score']:.4f}\n"
	report_text += f" - Source A: {d['chunk_a']['source']} \| \"{d['chunk_a']['text'][:100]}...\"\n"
	report_text += f" - Source B: {d['chunk_b']['source']} \| \"{d['chunk_b']['text'][:100]}...\"\n\n"
	else:
	report_text += "No duplicates found.\n\n"

	report_text += "## Contradictions\n"
	if results['contradictions']:
	for c in results['contradictions']:
	report_text += f"- Confidence: {c['confidence']:.4f}\n"
	report_text += f" - Source A: {c['chunk_a']['source']} \| \"{c['chunk_a']['text']}\"\n"
	report_text += f" - Source B: {c['chunk_b']['source']} \| \"{c['chunk_b']['text']}\"\n\n"
	else:
	report_text += "No contradictions found.\n"

	st.download_button(
	label="Download Report (Markdown)",
	data=report_text,
	file_name="analysis_report.md",
	mime="text/markdown"
	)


	except Exception as e:
	st.error(f"An error occurred during analysis: {str(e)}")
	import traceback
	st.write(traceback.format_exc())

	else:
	st.info("Upload documents and click Analyze to start.")