gdnjr5233-YOLOer's picture
Create app.py
608b5af verified
import streamlit as st
from htmlrag import clean_html, build_block_tree
from htmlrag import EmbedHTMLPruner
from htmlrag import GenHTMLPruner
import torch
# Title
st.title("HtmlRAG Demo - HTML Cleaning and Query Rewriting")
# HTML document comparison before and after cleaning
st.header("HTML Document Comparison Before and After Cleaning")
# Upload an HTML document
html_file = st.file_uploader("Upload an HTML file", type=["html"])
if html_file is not None:
# Display the original HTML content
raw_html = html_file.getvalue().decode("utf-8")
st.subheader("Original HTML Content")
st.code(raw_html, language="html")
# HtmlRAG cleaning
if st.button("Clean HTML"):
# Clean the HTML using the clean_html function
simplified_html = clean_html(raw_html)
st.subheader("Cleaned HTML Content")
st.code(simplified_html, language="html")
# Compare the original HTML and the cleaned HTML
st.subheader("Comparison")
st.write("The parts removed or compressed will be highlighted in the cleaned version.")
# Build the HTML block tree and display it
block_tree, simplified_html = build_block_tree(simplified_html, max_node_words=10)
st.subheader("Block Tree")
for block in block_tree:
st.write(f"Block Content: {block[0]}")
st.write(f"Block Path: {block[1]}")
st.write(f"Is Leaf: {block[2]}")
st.write("---")
# Query rewriting and web search results visualization
st.header("Query Rewriting and Web Search Results Visualization")
# Input a query
query = st.text_input("Enter a query:")
if query:
# Display the rewritten query
rewritten_query = query
st.subheader("Rewritten Query")
st.write(rewritten_query)
# Simulate fetching search results
search_results = ["Result 1: Bellagio is a luxury hotel.", "Result 2: It was built in 1998.", "Result 3: The Bellagio is on the Las Vegas Strip."]
# Display the search results
st.subheader("Search Results")
for i, result in enumerate(search_results):
st.write(f"Result {i+1}:")
st.write(result)
st.write("---")
# Display a comparison of the original query and rewritten query
st.subheader("Query Comparison")
st.write(f"Original Query: {query}")
st.write(f"Rewritten Query: {rewritten_query}")