MindFlow_AI / app.py
NatalieMonged's picture
Update app.py
49ad7b0 verified
import streamlit as st
import os
import tempfile
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import PromptTemplate
# GLOBAL UI & STYLING CONFIGURATION
#===================================
st.set_page_config(page_title="MindFlow AI", layout="wide" , page_icon="πŸ’‘")
# Injecting specialized CSS to enhance User Experience (UX)
st.markdown("""
<style>
div.stButton > button:first-child {
background-color: #5DADE2;
color: white;
transition: all 0.3s ease;
}
div.stButton > button:first-child:hover {
background-color: #2E86C1;
border-color: #2E86C1;
color: #FFFFFF;
}
</style>
""", unsafe_allow_html=True)
# Custom Branding Header: Using HTML/CSS for advanced typography and branding alignment
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Fredoka+One&family=Montserrat:wght@400;700&display=swap');
</style>
<div style='text-align: center; margin-bottom: 20px;'>
<h1 style='font-family: "Fredoka One", cursive; font-size: 60px; color: #5D6D7E; letter-spacing: 2px; margin-bottom: 0px;'>
MindFlow <span style='color: #85C1E9;'>AI</span> <br>
<span style='text-align: center; font-family: "Segoe UI";font-size: 30px; color: #666;'>Driven Assistant Summarization</span>
</h1>
</div>
""", unsafe_allow_html=True)
# BACKEND & MODEL INITIALIZATION
#====================================
# Setting the GOOGLE_API_KEY
os.environ["GOOGLE_API_KEY"] = os.environ["GOOGLE_API_KEY"]
# Initialize Google Gemini Model
# Temperature 0.01 is utilized to minimize variance and ensure factual consistency
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.01)
# SIDEBAR: Audience Type Controls
with st.sidebar:
st.title("User Settings")
st.markdown("### **Target Audience:**")
audience_type = st.radio("", ["Beginner", "Expert"])
st.info(f"Targeting: {audience_type} level.")
# Data Ingestion Layer
# Drag and drop a pdf OR paste a text manually
st.header("Input Source")
tab1, tab2 = st.tabs(["πŸ“„ Upload PDF", "✍️ Paste Text"])
full_text = ""
# Handling PDF uploads using LangChain loaders and temporary disk storage
with tab1:
uploaded_file = st.file_uploader("Upload PDF Document", type="pdf")
if uploaded_file:
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
tmp_file.write(uploaded_file.getvalue())
tmp_path = tmp_file.name
# Extracting semantic content from PDF
loader = PyPDFLoader(tmp_path)
pages = loader.load()
# Merging PDF pages
full_text = " ".join([page.page_content for page in pages])
# Ensuring local storage cleanup
os.remove(tmp_path)
# Handling direct text input
with tab2:
manual_text = st.text_area("Paste your article or text here:", height=300)
if manual_text:
full_text = manual_text
# PROCESSING PIPELINE: Summarization & Evaluation
if st.button("Generate & Evaluate"):
if full_text.strip():
with st.spinner("Processing..."):
# SEMANTIC CHUNKING PHASE
# Recursive splitting ensures text segments stay within LLM context windows
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=300)
chunks = text_splitter.split_text(full_text)
text_to_process = " ".join(chunks[:2])
# TAILORED SUMMARIZATION PHASE
#Utilizing a persona-driven Prompt Template for audience-specific output
summary_prompt = PromptTemplate.from_template("""
[STRICT AUDIT MODE: ZERO HALLUCINATION TOLERANCE]
You are an elite expert. Your ONLY source of truth is the provided text.
CRITICAL RESTRICTION:
If a concept (like 'Regularization', 'Overfitting', or 'Lasso') is NOT explicitly mentioned in the source text,
you are FORBIDDEN from mentioning it, even if it is factually related to the topic.
Failure to follow this will result in an inaccurate evaluation.
Act as an elite educational consultant and technical expert. Your goal is to transform complex information into a high-quality summary perfectly tailored for a {audience} audience.
Target Persona:
- If Audience is "Beginner": You are a supportive teacher. Use simple analogies, avoid technical jargon unless explained, and focus on the "Big Picture" and "Why it matters." Use friendly, encouraging tone and clear bullet points.
- If Audience is "Expert": You are a senior researcher. Use precise academic terminology, focus on methodology, data results, and nuanced conclusions. Maintain high information density and professional tone.
Task Instructions:
1. Core Essence**: Extract the most critical information without losing the original context.
2. Structural Integrity: Organize the output with clear headers (e.g., "Overview", "Key Findings", "Implications").
3. Contextual Adaptation:
- For Beginners: Include a "Simple Definition" section for complex terms.
- For Experts: Include a "Technical Highlights" section focusing on metrics or logic.
4. Faithfulness: Ensure ( 100% ) accuracy to the source text; do not hallucinate or add external information[cite: 56].
CONSTRAINTS:
- STRICT ADHERENCE: Do NOT include any information, concepts, or terms that are NOT present in the source text.
- NO OUTSIDE KNOWLEDGE: Even if you know more about the topic, ignore it.
- FORBIDDEN TOPICS: If the source text does not mention things like 'Regularization' or 'Overfitting', you MUST NOT mention them.
- AUDIENCE ADAPTATION:
- If {audience} is Beginner: Explain ONLY the concepts in the text using simple analogies.
- If {audience} is Expert: Focus ONLY on the technical details provided in the text.
Source Text:
{text}
Final Output Requirements:
- Format: Professional Markdown.
- Language: Clear and Concise English[cite: 6].
- Accuracy: Maintain strict adherence to the facts provided in the document[cite: 56].
""")
summary_chain = summary_prompt | llm
summary_output = summary_chain.invoke({"audience": audience_type, "text": text_to_process})
st.subheader(f"πŸ“ Summary for {audience_type}")
st.markdown(summary_output.content)
st.divider()
# AUTOMATED AI-AS-A-JUDGE EVALUATION PHASE
# Implementing a secondary LLM chain to audit the quality of the generated summary
eval_prompt = PromptTemplate.from_template("""
As an AI Auditor, evaluate the summary against the source text.
Return a Markdown table with scores (1-5) and justifications.
| Criterion | Score | Justification |
| :--- | :--- | :--- |
| Faithfulness | | |
| Coherence | | |
| Audience Alignment | | |
Level: {level}
Source: {source}
Summary: {summary}
""")
eval_chain = eval_prompt | llm
eval_output = eval_chain.invoke({
"level": audience_type,
"source": text_to_process[:4000],
"summary": summary_output.content
})
st.subheader("πŸ“Š Automated Quality Evaluation")
st.markdown(eval_output.content)
else:
st.warning("Please upload a PDF or paste some text first!")