DexterSptizu's picture
Create app.py
4abd716 verified
import streamlit as st
from langchain_community.document_loaders import WebBaseLoader
from openai import OpenAI
from sentence_transformers import SentenceTransformer
# Initialize session state for OpenAI summary
if 'openai_summary' not in st.session_state:
st.session_state.openai_summary = None
if 'show_summary' not in st.session_state:
st.session_state.show_summary = False
def toggle_summary():
st.session_state.show_summary = not st.session_state.show_summary
# Set page configuration
st.set_page_config(
page_title="🦜 LangChain Document Explorer",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better styling
st.markdown("""
<style>
.main {
padding: 2rem;
}
.stButton>button {
width: 100%;
margin-top: 1rem;
}
.css-1d391kg {
padding: 1rem;
}
</style>
""", unsafe_allow_html=True)
# Main title with emoji
st.title("🦜 Webscrapping and Summarizing using OpenAI")
st.markdown("""
Explore web content with AI-powered analysis and processing.
Upload a URL to get started!
""")
# Sidebar configuration
with st.sidebar:
st.header("βš™οΈ Configuration")
openai_api_key = st.text_input("OpenAI API Key:", type="password")
st.markdown("---")
st.markdown("""
### πŸ“– Quick Guide
1. Enter your OpenAI API key
2. Input a webpage URL
3. Explore different analyses in the tabs
""")
st.markdown("---")
st.markdown("Made with ❀️ using LangChain 0.3 & Streamlit 1.41.0")
# Main content area
url = st.text_input("πŸ”— Enter webpage URL:", "https://python.langchain.com/docs/")
# Document loading
docs = None
if url:
try:
with st.spinner("Loading webpage..."):
loader = WebBaseLoader(web_paths=[url])
docs = loader.load()
st.success("βœ… Webpage loaded successfully!")
except Exception as e:
st.error(f"❌ Error loading webpage: {str(e)}")
# Process and display content in tabs
if docs:
tabs = st.tabs(["πŸ“„ Original Content", "πŸ€– AI Analysis", "πŸ“Š Embeddings"])
# Original Content Tab
with tabs[0]:
full_text = " ".join([doc.page_content for doc in docs])
st.markdown("### Original Web Content")
st.markdown(full_text)
# AI Analysis Tab
with tabs[1]:
if openai_api_key:
st.markdown("### AI Content Analysis")
if st.button("Generate AI Summary", key="generate_summary"):
try:
with st.spinner("Generating AI summary..."):
client = OpenAI(api_key=openai_api_key)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Create a detailed writeup with key points and insights from the following text. Be grounded in the given text"},
{"role": "user", "content": full_text}
],
max_tokens=500
)
st.session_state.openai_summary = response.choices[0].message.content
except Exception as e:
st.error(f"❌ Error generating summary: {str(e)}")
# Display OpenAI summary if available
if st.session_state.openai_summary:
st.markdown("#### πŸ“ AI-Generated Summary")
st.markdown(st.session_state.openai_summary)
else:
st.warning("⚠️ Please enter your OpenAI API key in the sidebar to use AI analysis.")
# Embeddings Tab
with tabs[2]:
st.markdown("### Document Embeddings")
try:
with st.spinner("Generating embeddings..."):
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(full_text)
st.markdown(f"**Embeddings Shape**: {embeddings.shape}")
st.markdown("#### Embedding Vector Preview")
st.write(embeddings[:10]) # Show first 10 dimensions
# Visualize embedding statistics
import numpy as np
st.markdown("#### Embedding Statistics")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Mean", f"{np.mean(embeddings):.4f}")
with col2:
st.metric("Std Dev", f"{np.std(embeddings):.4f}")
with col3:
st.metric("Dimensions", embeddings.shape[0])
except Exception as e:
st.error(f"❌ Error generating embeddings: {str(e)}")
else:
st.info("πŸ‘† Please enter a URL above to get started!")