Spaces:
Sleeping
Sleeping
orionweller
commited on
Commit
·
da8206b
1
Parent(s):
c2fe84b
right java
Browse files- app.py +79 -23
- packages.txt +1 -1
app.py
CHANGED
@@ -6,37 +6,74 @@ import os
|
|
6 |
if not os.path.exists('msmarco-passage'):
|
7 |
os.system('python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'msmarco-passage\')"')
|
8 |
|
9 |
-
|
10 |
-
searcher = LuceneSearcher('msmarco-passage')
|
11 |
searcher.set_bm25(k1=0.9, b=0.4)
|
12 |
|
13 |
-
|
14 |
def search_pyserini(query):
|
15 |
try:
|
16 |
hits = searcher.search(query, k=10)
|
17 |
results = []
|
18 |
for i, hit in enumerate(hits):
|
19 |
doc = searcher.doc(hit.docid)
|
20 |
-
content = doc.raw()
|
21 |
-
results.append(
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
except Exception as e:
|
25 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
css = """
|
28 |
.gradio-container {
|
29 |
font-family: 'Arial', sans-serif;
|
30 |
}
|
31 |
-
.
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
border: 1px solid #
|
38 |
-
border-radius:
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
}
|
41 |
"""
|
42 |
|
@@ -55,16 +92,35 @@ with gr.Blocks(css=css) as iface:
|
|
55 |
search_button = gr.Button("Search", variant="primary")
|
56 |
|
57 |
with gr.Row():
|
58 |
-
output = gr.
|
59 |
-
lines=20,
|
60 |
-
label="Search Results",
|
61 |
-
elem_classes=["output-text"]
|
62 |
-
)
|
63 |
|
64 |
search_button.click(
|
65 |
fn=search_pyserini,
|
66 |
inputs=query_input,
|
67 |
-
outputs=output
|
|
|
68 |
)
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
iface.launch()
|
|
|
6 |
if not os.path.exists('msmarco-passage'):
|
7 |
os.system('python -c "from pyserini.search import LuceneSearcher; LuceneSearcher.from_prebuilt_index(\'msmarco-passage\')"')
|
8 |
|
9 |
+
searcher = LuceneSearcher.from_prebuilt_index('msmarco-passage')
|
|
|
10 |
searcher.set_bm25(k1=0.9, b=0.4)
|
11 |
|
|
|
12 |
def search_pyserini(query):
|
13 |
try:
|
14 |
hits = searcher.search(query, k=10)
|
15 |
results = []
|
16 |
for i, hit in enumerate(hits):
|
17 |
doc = searcher.doc(hit.docid)
|
18 |
+
content = doc.raw()
|
19 |
+
results.append({
|
20 |
+
"rank": i + 1,
|
21 |
+
"doc_id": hit.docid,
|
22 |
+
"score": hit.score,
|
23 |
+
"content": content
|
24 |
+
})
|
25 |
+
return results
|
26 |
except Exception as e:
|
27 |
+
return [{"error": str(e)}]
|
28 |
+
|
29 |
+
def format_results(results):
|
30 |
+
if isinstance(results, list) and len(results) > 0 and "error" in results[0]:
|
31 |
+
return f"<div class='error'>An error occurred: {results[0]['error']}</div>"
|
32 |
+
|
33 |
+
html = "<div class='results-container'>"
|
34 |
+
for result in results:
|
35 |
+
html += f"""
|
36 |
+
<div class='result-item'>
|
37 |
+
<h3>Rank {result['rank']} (Score: {result['score']:.4f})</h3>
|
38 |
+
<p class='doc-id'>Doc ID: {result['doc_id']}</p>
|
39 |
+
<p class='content'>{result['content']}</p>
|
40 |
+
</div>
|
41 |
+
"""
|
42 |
+
html += "</div>"
|
43 |
+
return html
|
44 |
|
45 |
css = """
|
46 |
.gradio-container {
|
47 |
font-family: 'Arial', sans-serif;
|
48 |
}
|
49 |
+
.results-container {
|
50 |
+
display: flex;
|
51 |
+
flex-wrap: wrap;
|
52 |
+
gap: 20px;
|
53 |
+
}
|
54 |
+
.result-item {
|
55 |
+
border: 1px solid #ddd;
|
56 |
+
border-radius: 8px;
|
57 |
+
padding: 15px;
|
58 |
+
width: calc(50% - 10px);
|
59 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
60 |
+
}
|
61 |
+
.result-item h3 {
|
62 |
+
margin-top: 0;
|
63 |
+
color: #333;
|
64 |
+
}
|
65 |
+
.doc-id {
|
66 |
+
font-size: 0.9em;
|
67 |
+
color: #666;
|
68 |
+
margin-bottom: 10px;
|
69 |
+
}
|
70 |
+
.content {
|
71 |
+
font-size: 0.95em;
|
72 |
+
line-height: 1.4;
|
73 |
+
}
|
74 |
+
.error {
|
75 |
+
color: red;
|
76 |
+
font-weight: bold;
|
77 |
}
|
78 |
"""
|
79 |
|
|
|
92 |
search_button = gr.Button("Search", variant="primary")
|
93 |
|
94 |
with gr.Row():
|
95 |
+
output = gr.HTML(label="Search Results")
|
|
|
|
|
|
|
|
|
96 |
|
97 |
search_button.click(
|
98 |
fn=search_pyserini,
|
99 |
inputs=query_input,
|
100 |
+
outputs=output,
|
101 |
+
_js="(results) => format_results(results)" # Client-side formatting
|
102 |
)
|
103 |
|
104 |
+
# Add the JavaScript function to format results
|
105 |
+
iface.load(js="""
|
106 |
+
function format_results(results) {
|
107 |
+
if (Array.isArray(results) && results.length > 0 && results[0].hasOwnProperty('error')) {
|
108 |
+
return `<div class='error'>An error occurred: ${results[0].error}</div>`;
|
109 |
+
}
|
110 |
+
|
111 |
+
let html = "<div class='results-container'>";
|
112 |
+
for (let result of results) {
|
113 |
+
html += `
|
114 |
+
<div class='result-item'>
|
115 |
+
<h3>Rank ${result.rank} (Score: ${result.score.toFixed(4)})</h3>
|
116 |
+
<p class='doc-id'>Doc ID: ${result.doc_id}</p>
|
117 |
+
<p class='content'>${result.content}</p>
|
118 |
+
</div>
|
119 |
+
`;
|
120 |
+
}
|
121 |
+
html += "</div>";
|
122 |
+
return html;
|
123 |
+
}
|
124 |
+
""")
|
125 |
+
|
126 |
iface.launch()
|
packages.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
openjdk-21-jdk
|