import streamlit as st import requests from bs4 import BeautifulSoup import trafilatura from smolagents import create_agent # Streamlit UI def main(): st.set_page_config(page_title="AI Documentation Assistant", layout="wide") st.title("📖 AI Documentation Assistant") st.write("Enter the top-level URL of your documentation, and I'll find the most relevant article to answer your question.") # User input doc_url = st.text_input("🔗 Documentation URL (Homepage)", "https://example.com/docs") user_question = st.text_area("❓ Your Question", "How do I reset my password?") if st.button("🔍 Find Answer"): with st.spinner("Searching for relevant information..."): article_url, extracted_text = find_relevant_article(doc_url, user_question) if article_url: answer = generate_answer(user_question, extracted_text) st.success("✅ Answer Found!") st.write(answer) st.write(f"[🔗 Read Full Article]({article_url})") else: st.error("⚠️ No relevant articles found.") # Step 3 & 4: Crawling and Finding the Most Relevant Article def find_relevant_article(base_url, question): """Crawls the top-domain docs, finds the most relevant article, and extracts text.""" response = requests.get(base_url) if response.status_code != 200: return None, None soup = BeautifulSoup(response.text, "html.parser") links = [a['href'] for a in soup.find_all('a', href=True) if base_url in a['href']] best_match = None best_text = "" for link in links[:10]: # Limit to first 10 links for now page_text = trafilatura.extract(requests.get(link).text) if page_text and question.lower() in page_text.lower(): best_match = link best_text = page_text break # Stop at first good match return best_match, best_text # Step 5: Generate Answer using `smolagents` def generate_answer(question, context): """Defines an AI agent to generate answers based on documentation context.""" def answer_logic(state): """Agent logic to answer based on context.""" return f"Based on the documentation, here is my answer: {state['context'][:500]}..." # Truncating for brevity # Create the agent agent = create_agent( name="QA_Agent", description="Answers questions based on documentation content.", process=answer_logic, ) # Run the agent response = agent({"context": context, "question": question}) return response if __name__ == "__main__": main()