File size: 2,044 Bytes
70f85dc
 
 
 
 
 
4692c69
70f85dc
 
 
 
 
 
 
 
 
 
 
 
4692c69
70f85dc
 
 
4692c69
70f85dc
4692c69
70f85dc
 
 
 
 
 
 
 
 
 
 
 
 
4692c69
 
 
 
 
70f85dc
 
4692c69
 
70f85dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c0bdf6a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import pandas as pd
import gradio as gr
import pyterrier as pt
pt.init()
from pyterrier_gradio import Demo, MarkdownFile, interface, df2code, code2md, EX_Q

retr = pt.TerrierRetrieve.from_dataset('vaswani', 'terrier_stemmed')

COLAB_NAME = 'pyterrier_retrieve.ipynb'
COLAB_INSTALL = '''
!pip install -q python-terrier
'''.strip()

def predict(input, _, wmodel, num_results, pipe_text):
  retr.controls["wmodel"] = wmodel
  retr.controls["end"] = str(num_results -1)
  code = f'''import pandas as pd
import pyterrier as pt ; pt.init()

retr = pt.TerrierRetrieve.from_dataset('vaswani', 'terrier_stemmed', wmodel={repr(wmodel)}, num_results={num_results})
'''
  pipeline = retr
  if pipe_text:
    pipeline = pipeline >> pt.text.get_text(pt.get_dataset('irds:vaswani'), 'text')
    code += f'''
pipeline = retr >> pt.text.get_text(pt.get_dataset('irds:vaswani'), 'text')

pipeline({df2code(input)})'''
  else:
    code += f'''
retr({df2code(input)})'''
  res = pipeline(input)
  res['score'] = res['score'].map(lambda x: round(x, 2))
  return (res, code2md(code, COLAB_INSTALL, COLAB_NAME))

interface(
  MarkdownFile('README.md'),
  Demo(
    predict,
    pd.DataFrame([
      ['1', 'measurement of dielectric constant of liquids by the use of microwave techniques'],
      ['2', 'mathematical analysis and design details of waveguide fed microwave radiations'],
      ['3', 'use of digital computers in the design of band pass filters having given phase and attenuation characteristics'],
    ], columns=['qid', 'query']),
    [
    gr.Dropdown(
      choices=['vaswani stemmed'],
      value='vaswani stemmed',
      label='Index',
      interactive=False,
    ), gr.Dropdown(
      choices=['TF_IDF', 'BM25', 'PL2', 'DPH'],
      value='BM25',
      label='Retrieval Model',
    ), gr.Slider(
      minimum=1,
      maximum=10,
      value=5,
      step=1.,
      label='# Results'
    ), gr.Checkbox(
      value=True,
      label="Include get_text in pipeline",
    )],
    scale=2/3
  ),
  MarkdownFile('wrapup.md'),
).launch(share=False)