|
import streamlit as st |
|
from langchain_community.document_loaders import WebBaseLoader |
|
from openai import OpenAI |
|
from sentence_transformers import SentenceTransformer |
|
|
|
|
|
if 'openai_summary' not in st.session_state: |
|
st.session_state.openai_summary = None |
|
if 'show_summary' not in st.session_state: |
|
st.session_state.show_summary = False |
|
|
|
def toggle_summary(): |
|
st.session_state.show_summary = not st.session_state.show_summary |
|
|
|
|
|
st.set_page_config( |
|
page_title="π¦ LangChain Document Explorer", |
|
layout="wide", |
|
initial_sidebar_state="expanded" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.main { |
|
padding: 2rem; |
|
} |
|
.stButton>button { |
|
width: 100%; |
|
margin-top: 1rem; |
|
} |
|
.css-1d391kg { |
|
padding: 1rem; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.title("π¦ Webscrapping and Summarizing using OpenAI") |
|
st.markdown(""" |
|
Explore web content with AI-powered analysis and processing. |
|
Upload a URL to get started! |
|
""") |
|
|
|
|
|
with st.sidebar: |
|
st.header("βοΈ Configuration") |
|
openai_api_key = st.text_input("OpenAI API Key:", type="password") |
|
|
|
st.markdown("---") |
|
st.markdown(""" |
|
### π Quick Guide |
|
1. Enter your OpenAI API key |
|
2. Input a webpage URL |
|
3. Explore different analyses in the tabs |
|
""") |
|
|
|
st.markdown("---") |
|
st.markdown("Made with β€οΈ using LangChain 0.3 & Streamlit 1.41.0") |
|
|
|
|
|
url = st.text_input("π Enter webpage URL:", "https://python.langchain.com/docs/") |
|
|
|
|
|
docs = None |
|
if url: |
|
try: |
|
with st.spinner("Loading webpage..."): |
|
loader = WebBaseLoader(web_paths=[url]) |
|
docs = loader.load() |
|
st.success("β
Webpage loaded successfully!") |
|
except Exception as e: |
|
st.error(f"β Error loading webpage: {str(e)}") |
|
|
|
|
|
if docs: |
|
tabs = st.tabs(["π Original Content", "π€ AI Analysis", "π Embeddings"]) |
|
|
|
|
|
with tabs[0]: |
|
full_text = " ".join([doc.page_content for doc in docs]) |
|
st.markdown("### Original Web Content") |
|
st.markdown(full_text) |
|
|
|
|
|
with tabs[1]: |
|
if openai_api_key: |
|
st.markdown("### AI Content Analysis") |
|
|
|
if st.button("Generate AI Summary", key="generate_summary"): |
|
try: |
|
with st.spinner("Generating AI summary..."): |
|
client = OpenAI(api_key=openai_api_key) |
|
response = client.chat.completions.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "system", "content": "Create a detailed writeup with key points and insights from the following text. Be grounded in the given text"}, |
|
{"role": "user", "content": full_text} |
|
], |
|
max_tokens=500 |
|
) |
|
st.session_state.openai_summary = response.choices[0].message.content |
|
|
|
except Exception as e: |
|
st.error(f"β Error generating summary: {str(e)}") |
|
|
|
|
|
if st.session_state.openai_summary: |
|
st.markdown("#### π AI-Generated Summary") |
|
st.markdown(st.session_state.openai_summary) |
|
else: |
|
st.warning("β οΈ Please enter your OpenAI API key in the sidebar to use AI analysis.") |
|
|
|
|
|
with tabs[2]: |
|
st.markdown("### Document Embeddings") |
|
try: |
|
with st.spinner("Generating embeddings..."): |
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
embeddings = model.encode(full_text) |
|
|
|
st.markdown(f"**Embeddings Shape**: {embeddings.shape}") |
|
st.markdown("#### Embedding Vector Preview") |
|
st.write(embeddings[:10]) |
|
|
|
|
|
import numpy as np |
|
st.markdown("#### Embedding Statistics") |
|
col1, col2, col3 = st.columns(3) |
|
with col1: |
|
st.metric("Mean", f"{np.mean(embeddings):.4f}") |
|
with col2: |
|
st.metric("Std Dev", f"{np.std(embeddings):.4f}") |
|
with col3: |
|
st.metric("Dimensions", embeddings.shape[0]) |
|
|
|
except Exception as e: |
|
st.error(f"β Error generating embeddings: {str(e)}") |
|
else: |
|
st.info("π Please enter a URL above to get started!") |
|
|