Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from pathlib import Path | |
| import requests | |
| API_URL = "http://localhost:8000" | |
| # ---------------------------------- | |
| # Setup folders (UI only) | |
| # ---------------------------------- | |
| BASE_DIR = Path.cwd() | |
| QUERY_DIR = BASE_DIR / "queries" | |
| QUERY_DIR.mkdir(parents=True, exist_ok=True) | |
| # ---------------------------------- | |
| # Session state init (CRITICAL FIX) | |
| # ---------------------------------- | |
| if "saved_docs" not in st.session_state: | |
| st.session_state.saved_docs = [] | |
| if "saved_once" not in st.session_state: | |
| st.session_state.saved_once = set() # prevents duplicates | |
| saved_docs = st.session_state.saved_docs | |
| # ---------------------------------- | |
| # UI | |
| # ---------------------------------- | |
| st.set_page_config(page_title="Exam Pipeline", layout="wide") | |
| st.title("π Exam Question Processing Pipeline") | |
| api_key = "your_groq_api_key_here" # Replace with your actual API key or use st.text_input to get from user | |
| num_docs = st.number_input( | |
| "Number of documents", | |
| min_value=1, | |
| step=1 | |
| ) | |
| # ---------------------------------- | |
| # Upload Section | |
| # ---------------------------------- | |
| for i in range(num_docs): | |
| st.subheader(f"Document {i+1}") | |
| doc_type = st.radio( | |
| "Input type", | |
| ["Images", "PDF"], | |
| key=f"type_{i}" | |
| ) | |
| # -------- Images -------- | |
| if doc_type == "Images": | |
| uploaded_images = st.file_uploader( | |
| "Upload images", | |
| type=["png", "jpg", "jpeg"], | |
| accept_multiple_files=True, | |
| key=f"img_{i}" | |
| ) | |
| if uploaded_images and st.button(f"Save Images as PDF (Doc {i+1})"): | |
| unique_key = f"img_{i}_{','.join(img.name for img in uploaded_images)}" | |
| if unique_key not in st.session_state.saved_once: | |
| res = requests.post( | |
| f"{API_URL}/images-to-pdf", | |
| files=[ | |
| ("files", (img.name, img.getvalue(), img.type)) | |
| for img in uploaded_images | |
| ] | |
| ) | |
| if res.status_code != 200: | |
| st.error(res.text) | |
| st.stop() | |
| data = res.json() | |
| if "error" in data: | |
| st.error(data["error"]) | |
| st.stop() | |
| pdf_path = data["path"] | |
| saved_docs.append(pdf_path) | |
| st.session_state.saved_once.add(unique_key) | |
| st.success(f"Saved β {pdf_path}") | |
| else: | |
| st.info("Images already saved for this document.") | |
| # -------- PDF -------- | |
| else: | |
| uploaded_pdf = st.file_uploader( | |
| "Upload PDF", | |
| type=["pdf"], | |
| key=f"pdf_{i}" | |
| ) | |
| if uploaded_pdf: | |
| unique_key = f"pdf_{i}_{uploaded_pdf.name}" | |
| if unique_key not in st.session_state.saved_once: | |
| res = requests.post( | |
| f"{API_URL}/save-pdf", | |
| files={ | |
| "file": ( | |
| uploaded_pdf.name, | |
| uploaded_pdf.getvalue(), | |
| "application/pdf" | |
| ) | |
| } | |
| ) | |
| if res.status_code != 200: | |
| st.error(res.text) | |
| st.stop() | |
| data = res.json() | |
| if "error" in data: | |
| st.error(data["error"]) | |
| st.stop() | |
| pdf_path = data["path"] | |
| saved_docs.append(pdf_path) | |
| st.session_state.saved_once.add(unique_key) | |
| st.success(f"Saved β {pdf_path}") | |
| else: | |
| st.info("PDF already saved for this document.") | |
| # ---------------------------------- | |
| # Run Pipeline | |
| # ---------------------------------- | |
| import time | |
| import time | |
| if st.button("π Run Full Pipeline"): | |
| if not api_key: | |
| st.error("β API key required") | |
| st.stop() | |
| if not saved_docs: | |
| st.error("β Please upload images or PDFs first") | |
| st.stop() | |
| # ---------------- START PIPELINE ---------------- | |
| start_res = requests.post( | |
| f"{API_URL}/run-pipeline", | |
| params={"api_key": api_key}, | |
| json=saved_docs | |
| ) | |
| if start_res.status_code != 200: | |
| st.error(start_res.text) | |
| st.stop() | |
| start_data = start_res.json() | |
| job_id = start_data.get("job_id") | |
| if not job_id: | |
| st.error("β Failed to start pipeline") | |
| st.stop() | |
| # ---------------- STATUS POLLING ---------------- | |
| st.subheader("π Pipeline Progress") | |
| status_box = st.empty() | |
| final_result = None | |
| while True: | |
| status_res = requests.get(f"{API_URL}/job-status/{job_id}") | |
| if status_res.status_code != 200: | |
| st.error(status_res.text) | |
| st.stop() | |
| status_data = status_res.json() | |
| status_text = status_data.get("status", "Unknown status") | |
| status_box.info(status_text) | |
| # β error case | |
| if status_text.startswith("β"): | |
| st.error(status_text) | |
| st.stop() | |
| # β completed | |
| if status_text == "β Completed": | |
| final_result = status_data.get("result") | |
| break | |
| time.sleep(1) | |
| # ---------------- FINAL OUTPUT ---------------- | |
| if not final_result: | |
| st.error("Pipeline finished but no result returned") | |
| st.stop() | |
| st.success("β Pipeline completed successfully!") | |
| final_pdf = final_result["final_pdf"] | |
| freq_json = final_result["frequency_json"] | |
| with open(final_pdf, "rb") as f: | |
| st.download_button( | |
| "π₯ Download Final PDF", | |
| f, | |
| file_name="Exam_Frequency_Report.pdf" | |
| ) | |
| with open(freq_json, "rb") as f: | |
| st.download_button( | |
| "π₯ Download Frequency JSON", | |
| f, | |
| file_name="output_frequency.json" | |
| ) | |
| # ---------------------------------- | |
| # Debug view (UNCHANGED) | |
| # ---------------------------------- | |
| st.subheader("π PDFs available in queries/") | |
| st.write([str(p) for p in QUERY_DIR.glob("*.pdf")]) | |
| st.subheader("π Recently Uploaded PDFs (This Session Only)") | |
| if saved_docs: | |
| st.write(saved_docs) | |
| else: | |
| st.info("No documents uploaded in this session.") | |