Spaces:
Runtime error
Runtime error
Sean MacAvaney
commited on
Commit
β’
cf494b2
1
Parent(s):
9889763
update
Browse files
README.md
CHANGED
@@ -11,7 +11,62 @@ models:
|
|
11 |
- macavaney/doc2query-t5-base-msmarco
|
12 |
---
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
This is a demonstration of [PyTerrier's Doc2Query package](https://github.com/terrierteam/pyterrier_doc2query). Doc2Query generates
|
15 |
queries for a document, which can then be appended to a document's text before indexing to boost important terms and add missing terms.
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
Try it below!
|
|
|
11 |
- macavaney/doc2query-t5-base-msmarco
|
12 |
---
|
13 |
|
14 |
+
<style>
|
15 |
+
.transformer {
|
16 |
+
display: inline-block;
|
17 |
+
background: #8facdb;
|
18 |
+
position: relative;
|
19 |
+
height: 60px;
|
20 |
+
line-height: 60px;
|
21 |
+
padding: 0 24px;
|
22 |
+
margin: 0 18px;
|
23 |
+
color: #333;
|
24 |
+
}
|
25 |
+
.transformer::before {
|
26 |
+
content: "";
|
27 |
+
position: absolute;
|
28 |
+
bottom: 0;
|
29 |
+
top: 0;
|
30 |
+
left: -15px;
|
31 |
+
border-top: 30px solid #8facdb;
|
32 |
+
border-bottom: 30px solid #8facdb;
|
33 |
+
border-left: 15px solid transparent;
|
34 |
+
}
|
35 |
+
.transformer::after {
|
36 |
+
content: "";
|
37 |
+
position: absolute;
|
38 |
+
bottom: 0;
|
39 |
+
top: 0;
|
40 |
+
right: -15px;
|
41 |
+
border-top: 30px solid transparent;
|
42 |
+
border-bottom: 30px solid transparent;
|
43 |
+
border-left: 15px solid #8facdb;
|
44 |
+
}
|
45 |
+
.df {
|
46 |
+
width: 24px;
|
47 |
+
line-height: 24px;
|
48 |
+
text-align: center;
|
49 |
+
border: 3px double #888;
|
50 |
+
background-color: #eee;
|
51 |
+
color: #333;
|
52 |
+
border-radius: 4px;
|
53 |
+
display: inline-block;
|
54 |
+
box-sizing: content-box;
|
55 |
+
}
|
56 |
+
.pipeline {
|
57 |
+
text-align: center;
|
58 |
+
}
|
59 |
+
</style>
|
60 |
+
|
61 |
This is a demonstration of [PyTerrier's Doc2Query package](https://github.com/terrierteam/pyterrier_doc2query). Doc2Query generates
|
62 |
queries for a document, which can then be appended to a document's text before indexing to boost important terms and add missing terms.
|
63 |
|
64 |
+
Doc2Query functions as a `DβD` (document-to-document) transformer and can be used in pipelines accordingly.
|
65 |
+
|
66 |
+
<div class="pipeline">
|
67 |
+
<div class="df" title="Document Frame">D</div>
|
68 |
+
<div class="transformer">Doc2Query</div>
|
69 |
+
<div class="df" title="Document Frame">D</div>
|
70 |
+
</div>
|
71 |
+
|
72 |
Try it below!
|
app.py
CHANGED
@@ -2,7 +2,9 @@ import pandas as pd
|
|
2 |
import gradio as gr
|
3 |
from pyterrier_doc2query import Doc2Query
|
4 |
|
5 |
-
|
|
|
|
|
6 |
|
7 |
def df2code(df):
|
8 |
rows = []
|
@@ -14,7 +16,7 @@ def df2code(df):
|
|
14 |
])'''
|
15 |
|
16 |
def predict(input, model, append, num_samples):
|
17 |
-
assert model ==
|
18 |
doc2query.append = append
|
19 |
doc2query.num_samples = num_samples
|
20 |
code = f'''
|
@@ -33,6 +35,8 @@ example_inp = pd.DataFrame([
|
|
33 |
{'docno': '0', 'text': 'The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their success truly meant; hundreds of thousands of innocent lives obliterated.'}
|
34 |
])
|
35 |
|
|
|
|
|
36 |
gr.Interface(
|
37 |
predict,
|
38 |
inputs=[gr.Dataframe(
|
@@ -44,8 +48,8 @@ gr.Interface(
|
|
44 |
label='Pipeline Input',
|
45 |
value=example_inp,
|
46 |
), gr.Dropdown(
|
47 |
-
choices=[
|
48 |
-
value=
|
49 |
label='Model',
|
50 |
interactive=False,
|
51 |
), gr.Checkbox(
|
@@ -65,10 +69,10 @@ gr.Interface(
|
|
65 |
row_count=1,
|
66 |
wrap=True,
|
67 |
label='Pipeline Output',
|
68 |
-
value=
|
69 |
-
), gr.Markdown()],
|
70 |
title="π PyTerrier: Doc2Query",
|
71 |
-
description=open('README.md', 'rt').read().split('---\n')[-1],
|
72 |
allow_flagging='never',
|
73 |
css="table.font-mono td { white-space: pre-line; }",
|
74 |
).launch(share=False)
|
|
|
2 |
import gradio as gr
|
3 |
from pyterrier_doc2query import Doc2Query
|
4 |
|
5 |
+
MODEL = 'macavaney/doc2query-t5-base-msmarco'
|
6 |
+
|
7 |
+
doc2query = Doc2Query(MODEL, append=True, num_samples=5)
|
8 |
|
9 |
def df2code(df):
|
10 |
rows = []
|
|
|
16 |
])'''
|
17 |
|
18 |
def predict(input, model, append, num_samples):
|
19 |
+
assert model == MODEL
|
20 |
doc2query.append = append
|
21 |
doc2query.num_samples = num_samples
|
22 |
code = f'''
|
|
|
35 |
{'docno': '0', 'text': 'The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their success truly meant; hundreds of thousands of innocent lives obliterated.'}
|
36 |
])
|
37 |
|
38 |
+
example_out = predict(example_inp, MODEL, doc2query.append, doc2query.num_samples)
|
39 |
+
|
40 |
gr.Interface(
|
41 |
predict,
|
42 |
inputs=[gr.Dataframe(
|
|
|
48 |
label='Pipeline Input',
|
49 |
value=example_inp,
|
50 |
), gr.Dropdown(
|
51 |
+
choices=[MODEL],
|
52 |
+
value=MODEL,
|
53 |
label='Model',
|
54 |
interactive=False,
|
55 |
), gr.Checkbox(
|
|
|
69 |
row_count=1,
|
70 |
wrap=True,
|
71 |
label='Pipeline Output',
|
72 |
+
value=example_out[0],
|
73 |
+
), gr.Markdown(value=example_out[1])],
|
74 |
title="π PyTerrier: Doc2Query",
|
75 |
+
description=open('README.md', 'rt').read().split('\n---\n')[-1],
|
76 |
allow_flagging='never',
|
77 |
css="table.font-mono td { white-space: pre-line; }",
|
78 |
).launch(share=False)
|