File size: 3,046 Bytes
9889763
68be317
 
 
cf494b2
 
 
68be317
cba50c7
 
 
 
 
 
 
 
 
871af30
cf494b2
cba50c7
adbdb15
cba50c7
506b8cf
cba50c7
 
 
 
871af30
cba50c7
 
 
 
68be317
9889763
0071fbe
 
 
9889763
 
cf494b2
 
68be317
 
cba50c7
0f58367
 
 
cba50c7
 
 
9889763
871af30
cf494b2
 
871af30
 
cba50c7
9889763
cba50c7
adbdb15
 
 
9889763
adbdb15
 
cba50c7
 
0f58367
 
9889763
cba50c7
 
 
cf494b2
 
02864b5
cf494b2
506b8cf
232edb2
9889763
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import pandas as pd
import gradio as gr
from pyterrier_doc2query import Doc2Query

MODEL = 'macavaney/doc2query-t5-base-msmarco'

doc2query = Doc2Query(MODEL, append=True, num_samples=5)

def df2code(df):
  rows = []
  for row in df.itertuples(index=False):
    rows.append(f'  {dict(row._asdict())},')
  rows = '\n'.join(rows)
  return f'''pd.DataFrame([
{rows}
])'''

def predict(input, model, append, num_samples):
  assert model == MODEL
  doc2query.append = append
  doc2query.num_samples = num_samples
  code = f'''
**Code:**

```python
import pandas as pd
from pyterrier_doc2query import Doc2Query
doc2query = Doc2Query({repr(model)}, append={append}, num_samples={num_samples})
doc2query({df2code(input)})
```
'''
  return (doc2query(input), code)

example_inp = pd.DataFrame([
  {'docno': '0', 'text': 'The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their success truly meant; hundreds of thousands of innocent lives obliterated.'},
  {'docno': '86', 'text': 'Usually, you can feel the pain reverberating from the upper portion of the left side of your abdomen towards the left side of your ribcage. Irritation on the Spleen – There is a chance that your spleen has already ruptured because of various reasons and this can cause some pains on the left rib cage.'},
  {'docno': '985', 'text': 'Continue on Hollins Ferry Road to Patapsco Avenue. Make a right onto Patapsco Avenue for approximately 2.5 miles. The courthouse is at the corner of Patapsco Avenue and 7th Street. The commissioner\'s office is on the first (ground) floor.'}
])

example_out = predict(example_inp, MODEL, doc2query.append, doc2query.num_samples)

gr.Interface(
    predict,
    inputs=[gr.Dataframe(
      headers=["docno", "text"],
      datatype=["str", "str"],
      col_count=(2, "fixed"),
      row_count=1,
      wrap=True,
      label='Pipeline Input',
      value=example_inp,
    ), gr.Dropdown(
      choices=[MODEL],
      value=MODEL,
      label='Model',
      interactive=False,
    ), gr.Checkbox(
      value=doc2query.append,
      label="Append",
    ), gr.Slider(
      minimum=1,
      maximum=10,
      value=doc2query.num_samples,
      step=1.,
      label='# Queries'
    )],
    outputs=[gr.Dataframe(
      headers=["docno", "text", "querygen"],
      datatype=["str", "str", "str"],
      col_count=3,
      row_count=1,
      wrap=True,
      label='Pipeline Output',
      value=example_out[0],
    ), gr.Markdown(value=example_out[1])],
    title="πŸ• PyTerrier: Doc2Query",
    description=open('README.md', 'rt').read().split('\n---\n')[-1],
    allow_flagging='never',
    css="table.font-mono td, table.font-mono th { white-space: pre-line; font-size: 11px; line-height: 16px; } table.font-mono td input { width: 95%; } th .cursor-pointer {display: none;} th .min-h-\[2\.3rem\] {min-height: inherit;}",
).launch(share=False)