import streamlit as st from langchain_community.document_loaders import WebBaseLoader from openai import OpenAI from sentence_transformers import SentenceTransformer # Initialize session state for OpenAI summary if 'openai_summary' not in st.session_state: st.session_state.openai_summary = None if 'show_summary' not in st.session_state: st.session_state.show_summary = False def toggle_summary(): st.session_state.show_summary = not st.session_state.show_summary # Set page configuration st.set_page_config( page_title="🦜 LangChain Document Explorer", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) # Main title with emoji st.title("🦜 Webscrapping and Summarizing using OpenAI") st.markdown(""" Explore web content with AI-powered analysis and processing. Upload a URL to get started! """) # Sidebar configuration with st.sidebar: st.header("⚙️ Configuration") openai_api_key = st.text_input("OpenAI API Key:", type="password") st.markdown("---") st.markdown(""" ### 📖 Quick Guide 1. Enter your OpenAI API key 2. Input a webpage URL 3. Explore different analyses in the tabs """) st.markdown("---") st.markdown("Made with ❤️ using LangChain 0.3 & Streamlit 1.41.0") # Main content area url = st.text_input("🔗 Enter webpage URL:", "https://python.langchain.com/docs/") # Document loading docs = None if url: try: with st.spinner("Loading webpage..."): loader = WebBaseLoader(web_paths=[url]) docs = loader.load() st.success("✅ Webpage loaded successfully!") except Exception as e: st.error(f"❌ Error loading webpage: {str(e)}") # Process and display content in tabs if docs: tabs = st.tabs(["📄 Original Content", "🤖 AI Analysis", "📊 Embeddings"]) # Original Content Tab with tabs[0]: full_text = " ".join([doc.page_content for doc in docs]) st.markdown("### Original Web Content") st.markdown(full_text) # AI Analysis Tab with tabs[1]: if openai_api_key: st.markdown("### AI Content Analysis") if st.button("Generate AI Summary", key="generate_summary"): try: with st.spinner("Generating AI summary..."): client = OpenAI(api_key=openai_api_key) response = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "Create a detailed writeup with key points and insights from the following text. Be grounded in the given text"}, {"role": "user", "content": full_text} ], max_tokens=500 ) st.session_state.openai_summary = response.choices[0].message.content except Exception as e: st.error(f"❌ Error generating summary: {str(e)}") # Display OpenAI summary if available if st.session_state.openai_summary: st.markdown("#### 📝 AI-Generated Summary") st.markdown(st.session_state.openai_summary) else: st.warning("⚠️ Please enter your OpenAI API key in the sidebar to use AI analysis.") # Embeddings Tab with tabs[2]: st.markdown("### Document Embeddings") try: with st.spinner("Generating embeddings..."): model = SentenceTransformer('all-MiniLM-L6-v2') embeddings = model.encode(full_text) st.markdown(f"**Embeddings Shape**: {embeddings.shape}") st.markdown("#### Embedding Vector Preview") st.write(embeddings[:10]) # Show first 10 dimensions # Visualize embedding statistics import numpy as np st.markdown("#### Embedding Statistics") col1, col2, col3 = st.columns(3) with col1: st.metric("Mean", f"{np.mean(embeddings):.4f}") with col2: st.metric("Std Dev", f"{np.std(embeddings):.4f}") with col3: st.metric("Dimensions", embeddings.shape[0]) except Exception as e: st.error(f"❌ Error generating embeddings: {str(e)}") else: st.info("👆 Please enter a URL above to get started!")