Spaces:

terrierteam
/

doc2query

Runtime error

App Files Files Community

Sean MacAvaney commited on Oct 29, 2022

Commit

cf494b2

1 Parent(s): 9889763

update

Browse files

Files changed (2) hide show

README.md +55 -0
app.py +11 -7

README.md CHANGED Viewed

@@ -11,7 +11,62 @@ models:
 - macavaney/doc2query-t5-base-msmarco
 ---
 This is a demonstration of [PyTerrier's Doc2Query package](https://github.com/terrierteam/pyterrier_doc2query). Doc2Query generates
 queries for a document, which can then be appended to a document's text before indexing to boost important terms and add missing terms.
 Try it below!

 - macavaney/doc2query-t5-base-msmarco
 ---
+<style>
+.transformer {
+  display: inline-block;
+  background: #8facdb;
+  position: relative;
+  height: 60px;
+  line-height: 60px;
+  padding: 0 24px;
+  margin: 0 18px;
+  color: #333;
+}
+.transformer::before {
+  content: "";
+  position: absolute;
+  bottom: 0;
+  top: 0;
+  left: -15px;
+  border-top: 30px solid #8facdb;
+  border-bottom: 30px solid #8facdb;
+  border-left: 15px solid transparent;
+}
+.transformer::after {
+  content: "";
+  position: absolute;
+  bottom: 0;
+  top: 0;
+  right: -15px;
+  border-top: 30px solid transparent;
+  border-bottom: 30px solid transparent;
+  border-left: 15px solid #8facdb;
+}
+.df {
+  width: 24px;
+  line-height: 24px;
+  text-align: center;
+  border: 3px double #888;
+  background-color: #eee;
+  color: #333;
+  border-radius: 4px;
+  display: inline-block;
+  box-sizing: content-box;
+}
+.pipeline {
+  text-align: center;
+}
+</style>
 This is a demonstration of [PyTerrier's Doc2Query package](https://github.com/terrierteam/pyterrier_doc2query). Doc2Query generates
 queries for a document, which can then be appended to a document's text before indexing to boost important terms and add missing terms.
+Doc2Query functions as a `D→D` (document-to-document) transformer and can be used in pipelines accordingly.
+<div class="pipeline">
+  <div class="df" title="Document Frame">D</div>
+  <div class="transformer">Doc2Query</div>
+  <div class="df" title="Document Frame">D</div>
+</div>
 Try it below!

app.py CHANGED Viewed

@@ -2,7 +2,9 @@ import pandas as pd
 import gradio as gr
 from pyterrier_doc2query import Doc2Query
-doc2query = Doc2Query('macavaney/doc2query-t5-base-msmarco', append=True, num_samples=5)
 def df2code(df):
   rows = []
@@ -14,7 +16,7 @@ def df2code(df):
 ])'''
 def predict(input, model, append, num_samples):
-  assert model == 'macavaney/doc2query-t5-base-msmarco'
   doc2query.append = append
   doc2query.num_samples = num_samples
   code = f'''
@@ -33,6 +35,8 @@ example_inp = pd.DataFrame([
   {'docno': '0', 'text': 'The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their success truly meant; hundreds of thousands of innocent lives obliterated.'}
 ])
 gr.Interface(
     predict,
     inputs=[gr.Dataframe(
@@ -44,8 +48,8 @@ gr.Interface(
       label='Pipeline Input',
       value=example_inp,
     ), gr.Dropdown(
-      choices=['macavaney/doc2query-t5-base-msmarco'],
-      value='macavaney/doc2query-t5-base-msmarco',
       label='Model',
       interactive=False,
     ), gr.Checkbox(
@@ -65,10 +69,10 @@ gr.Interface(
       row_count=1,
       wrap=True,
       label='Pipeline Output',
-      value=doc2query(example_inp),
-    ), gr.Markdown()],
     title="🐕 PyTerrier: Doc2Query",
-    description=open('README.md', 'rt').read().split('---\n')[-1],
     allow_flagging='never',
     css="table.font-mono td { white-space: pre-line; }",
 ).launch(share=False)

 import gradio as gr
 from pyterrier_doc2query import Doc2Query
+MODEL = 'macavaney/doc2query-t5-base-msmarco'
+doc2query = Doc2Query(MODEL, append=True, num_samples=5)
 def df2code(df):
   rows = []
 ])'''
 def predict(input, model, append, num_samples):
+  assert model == MODEL
   doc2query.append = append
   doc2query.num_samples = num_samples
   code = f'''
   {'docno': '0', 'text': 'The presence of communication amid scientific minds was equally important to the success of the Manhattan Project as scientific intellect was. The only cloud hanging over the impressive achievement of the atomic researchers and engineers is what their success truly meant; hundreds of thousands of innocent lives obliterated.'}
 ])
+example_out = predict(example_inp, MODEL, doc2query.append, doc2query.num_samples)
 gr.Interface(
     predict,
     inputs=[gr.Dataframe(
       label='Pipeline Input',
       value=example_inp,
     ), gr.Dropdown(
+      choices=[MODEL],
+      value=MODEL,
       label='Model',
       interactive=False,
     ), gr.Checkbox(
       row_count=1,
       wrap=True,
       label='Pipeline Output',
+      value=example_out[0],
+    ), gr.Markdown(value=example_out[1])],
     title="🐕 PyTerrier: Doc2Query",
+    description=open('README.md', 'rt').read().split('\n---\n')[-1],
     allow_flagging='never',
     css="table.font-mono td { white-space: pre-line; }",
 ).launch(share=False)