gdnjr5233-YOLOer commited on
Commit
608b5af
·
verified ·
1 Parent(s): 641561b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from htmlrag import clean_html, build_block_tree
3
+ from htmlrag import EmbedHTMLPruner
4
+ from htmlrag import GenHTMLPruner
5
+ import torch
6
+
7
+ # Title
8
+ st.title("HtmlRAG Demo - HTML Cleaning and Query Rewriting")
9
+
10
+ # HTML document comparison before and after cleaning
11
+ st.header("HTML Document Comparison Before and After Cleaning")
12
+
13
+ # Upload an HTML document
14
+ html_file = st.file_uploader("Upload an HTML file", type=["html"])
15
+
16
+ if html_file is not None:
17
+ # Display the original HTML content
18
+ raw_html = html_file.getvalue().decode("utf-8")
19
+ st.subheader("Original HTML Content")
20
+ st.code(raw_html, language="html")
21
+
22
+ # HtmlRAG cleaning
23
+ if st.button("Clean HTML"):
24
+ # Clean the HTML using the clean_html function
25
+ simplified_html = clean_html(raw_html)
26
+ st.subheader("Cleaned HTML Content")
27
+ st.code(simplified_html, language="html")
28
+
29
+ # Compare the original HTML and the cleaned HTML
30
+ st.subheader("Comparison")
31
+ st.write("The parts removed or compressed will be highlighted in the cleaned version.")
32
+
33
+ # Build the HTML block tree and display it
34
+ block_tree, simplified_html = build_block_tree(simplified_html, max_node_words=10)
35
+ st.subheader("Block Tree")
36
+ for block in block_tree:
37
+ st.write(f"Block Content: {block[0]}")
38
+ st.write(f"Block Path: {block[1]}")
39
+ st.write(f"Is Leaf: {block[2]}")
40
+ st.write("---")
41
+
42
+ # Query rewriting and web search results visualization
43
+ st.header("Query Rewriting and Web Search Results Visualization")
44
+
45
+ # Input a query
46
+ query = st.text_input("Enter a query:")
47
+
48
+ if query:
49
+ # Display the rewritten query
50
+ rewritten_query = query
51
+ st.subheader("Rewritten Query")
52
+ st.write(rewritten_query)
53
+
54
+ # Simulate fetching search results
55
+ search_results = ["Result 1: Bellagio is a luxury hotel.", "Result 2: It was built in 1998.", "Result 3: The Bellagio is on the Las Vegas Strip."]
56
+
57
+ # Display the search results
58
+ st.subheader("Search Results")
59
+ for i, result in enumerate(search_results):
60
+ st.write(f"Result {i+1}:")
61
+ st.write(result)
62
+ st.write("---")
63
+
64
+ # Display a comparison of the original query and rewritten query
65
+ st.subheader("Query Comparison")
66
+ st.write(f"Original Query: {query}")
67
+ st.write(f"Rewritten Query: {rewritten_query}")