Sheshera Mysore commited on
Commit
5dc6194
1 Parent(s): 136e376

First commit.

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea/*
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Maple Paper Recommender
3
- emoji: 🦀
4
  colorFrom: purple
5
  colorTo: indigo
6
  sdk: streamlit
 
1
  ---
2
  title: Maple Paper Recommender
3
+ emoji: 🍁
4
  colorFrom: purple
5
  colorTo: indigo
6
  sdk: streamlit
app.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Build an editable user profile based recommender.
3
+ - Read the users json and read their paper reps and keyphrases into memory.
4
+ - Read the candidates document (first stage retrieval) and
5
+ sentence embeddings into memory (second stage retrieval).
6
+ - Display the keyphrases to users and ask them to check it.
7
+ - Use the keyphrases and sentence embeddings to compute keyphrase values.
8
+ - Display the keyphrase selection box to users for retrieval.
9
+ - Use the selected keyphrases for performing retrieval.
10
+ """
11
+ import copy
12
+ import json
13
+ import pickle
14
+ import joblib
15
+ import os
16
+ import collections
17
+
18
+ import streamlit as st
19
+ import numpy as np
20
+ from scipy.spatial import distance
21
+ from scipy import special
22
+ from sklearn.neighbors import NearestNeighbors
23
+ from sentence_transformers import SentenceTransformer, models
24
+ import torch
25
+ import ot
26
+ # import seaborn as sns
27
+ import matplotlib
28
+ matplotlib.use('Agg')
29
+ import matplotlib.pyplot as plt
30
+ plt.rcParams['figure.dpi'] = 400
31
+ plt.rcParams.update({'axes.labelsize': 'small'})
32
+
33
+
34
+ in_path = './data'
35
+
36
+
37
+ ########################################
38
+ # BACKEND CODE #
39
+ ########################################
40
+ def read_user(seed_json):
41
+ """
42
+ Given the seed json for the user read the embedded
43
+ documents for the user.
44
+ :param seed_json:
45
+ :return:
46
+ """
47
+ if 'doc_vectors_user' not in st.session_state:
48
+ uname = seed_json['username']
49
+ user_kps = seed_json['user_kps']
50
+ # Read document vectors.
51
+ doc_vectors_user = np.load(os.path.join(in_path, 'users', uname, f'embeds-{uname}-doc.npy'))
52
+ with open(os.path.join(in_path, 'users', uname, f'pid2idx-{uname}-doc.json'), 'r') as fp:
53
+ pid2idx_user = json.load(fp)
54
+ # Read sentence vectors.
55
+ pid2sent_vectors = joblib.load(os.path.join(in_path, 'users', uname, f'embeds-{uname}-sent.pickle'))
56
+ pid2sent_vectors_user = collections.OrderedDict()
57
+ for pid in sorted(pid2sent_vectors):
58
+ pid2sent_vectors_user[pid] = pid2sent_vectors[pid]
59
+ st.session_state['doc_vectors_user'] = doc_vectors_user
60
+ st.session_state['pid2idx_user'] = pid2idx_user
61
+ st.session_state['pid2sent_vectors_user'] = pid2sent_vectors_user
62
+ st.session_state['user_kps'] = user_kps
63
+ return doc_vectors_user, pid2idx_user, pid2sent_vectors, user_kps
64
+ else:
65
+ return st.session_state.doc_vectors_user, st.session_state.pid2idx_user, \
66
+ st.session_state.pid2sent_vectors_user, st.session_state.user_kps
67
+
68
+
69
+ def first_stage_ranked_docs(user_doc_queries, to_rank=30):
70
+ """
71
+ Return a list of ranked documents given a set of queries.
72
+ :param user_doc_queries: read the cached query embeddings
73
+ :return:
74
+ """
75
+ if 'first_stage_ret_pids' not in st.session_state:
76
+ # read the document vectors
77
+ doc_vectors = np.load(os.path.join(in_path, 'cands', 'embeds-s2orccompsci-100k.npy'))
78
+ with open(os.path.join(in_path, 'cands', 'pid2idx-s2orccompsci-100k.pickle'), 'rb') as fp:
79
+ pid2idx_cands = pickle.load(fp)
80
+ idx2pid_cands = dict([(v, k) for k, v in pid2idx_cands.items()])
81
+ # index the vectors into a nearest neighbors structure
82
+ neighbors = NearestNeighbors(n_neighbors=to_rank)
83
+ neighbors.fit(doc_vectors)
84
+ st.session_state['neighbors'] = neighbors
85
+ st.session_state['idx2pid_cands'] = idx2pid_cands
86
+
87
+ # Get the dists for all the query docs.
88
+ nearest_dists, nearest_idxs = neighbors.kneighbors(user_doc_queries, return_distance=True)
89
+
90
+ # Get the docs
91
+ top_pids = []
92
+ uniq_top = set()
93
+ for ranki in range(to_rank): # Save papers by rank position for debugging.
94
+ for qi in range(user_doc_queries.shape[0]):
95
+ idx = nearest_idxs[qi, ranki]
96
+ pid = idx2pid_cands[idx]
97
+ if pid not in uniq_top: # Only save the unique papers. (ignore multiple retrievals of the same paper)
98
+ top_pids.append(pid)
99
+ uniq_top.add(pid)
100
+ st.session_state['first_stage_ret_pids'] = top_pids
101
+ return top_pids
102
+ else:
103
+ return st.session_state.first_stage_ret_pids
104
+
105
+
106
+ def read_kp_encoder(in_path):
107
+ """
108
+ Read the kp encoder model from disk.
109
+ :param in_path: string;
110
+ :return:
111
+ """
112
+ if 'kp_enc_model' not in st.session_state:
113
+ word_embedding_model = models.Transformer('allenai/scibert_scivocab_uncased', max_seq_length=512)
114
+ trained_model_fname = os.path.join(in_path, 'models', 'kp_encoder_cur_best.pt')
115
+ if torch.cuda.is_available():
116
+ saved_model = torch.load(trained_model_fname)
117
+ else:
118
+ saved_model = torch.load(trained_model_fname, map_location=torch.device('cpu'))
119
+ word_embedding_model.auto_model.load_state_dict(saved_model)
120
+ pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode='mean')
121
+ kp_enc_model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
122
+ st.session_state['kp_enc_model'] = kp_enc_model
123
+ else:
124
+ return st.session_state.kp_enc_model
125
+
126
+
127
+ def read_candidates(in_path):
128
+ """
129
+ Read candidate papers into pandas dataframe.
130
+ :param in_path:
131
+ :return:
132
+ """
133
+ if 'pid2abstract' not in st.session_state:
134
+ with open(os.path.join(in_path, 'cands', 'abstracts-s2orccompsci-100k.pickle'), 'rb') as fp:
135
+ pid2abstract = pickle.load(fp)
136
+ # read the sentence vectors
137
+ pid2sent_vectors = joblib.load(os.path.join(in_path, 'cands', f'embeds-sent-s2orccompsci-100k.pickle'))
138
+ st.session_state['pid2sent_vectors_cands'] = pid2sent_vectors
139
+ st.session_state['pid2abstract'] = pid2abstract
140
+ return pid2abstract, pid2sent_vectors
141
+ else:
142
+ return st.session_state.pid2abstract, st.session_state.pid2sent_vectors_cands
143
+
144
+
145
+ def get_kp_embeddings(profile_keyphrases):
146
+ """
147
+ Embed the passed profike keyphrases
148
+ :param profile_keyphrases: list(string)
149
+ :return:
150
+ """
151
+ kp_enc_model = st.session_state['kp_enc_model']
152
+ if 'kp_vectors_user' not in st.session_state:
153
+ kp_embeddings = kp_enc_model.encode(profile_keyphrases)
154
+ kp_vectors_user = collections.OrderedDict()
155
+ for i, kp in enumerate(profile_keyphrases):
156
+ kp_vectors_user[kp] = kp_embeddings[i, :]
157
+ st.session_state['kp_vectors_user'] = kp_vectors_user
158
+ return kp_vectors_user
159
+ else:
160
+ uncached_kps = [kp for kp in profile_keyphrases if kp not in st.session_state.kp_vectors_user]
161
+ kp_embeddings = kp_enc_model.encode(uncached_kps)
162
+ for i, kp in enumerate(uncached_kps):
163
+ st.session_state.kp_vectors_user[kp] = kp_embeddings[i, :]
164
+ return st.session_state.kp_vectors_user
165
+
166
+
167
+ def generate_profile_values(profile_keyphrases):
168
+ """
169
+ - Read sentence embeddings
170
+ - Read profile keyphrase embeddings
171
+ - Compute alignment from sentences to keyphrases
172
+ - Barycenter project the keyphrases to sentences to get kp values
173
+ - Return the kp values
174
+ :param profile_keyphrases: list(string)
175
+ :return:
176
+ """
177
+ kp_embeddings = get_kp_embeddings(profile_keyphrases)
178
+ # Read sentence embeddings.
179
+ user_seed_sentembeds = np.vstack(list(st.session_state.pid2sent_vectors_user.values()))
180
+ # Read keyphrase embeddings.
181
+ kps_embeds_flat = []
182
+ for kp in profile_keyphrases:
183
+ kps_embeds_flat.append(kp_embeddings[kp])
184
+ kps_embeds_flat = np.vstack(kps_embeds_flat)
185
+ # Compute transport plan from sentence to keyphrases.
186
+ pair_dists = distance.cdist(user_seed_sentembeds, kps_embeds_flat, 'euclidean')
187
+ a_distr = [1 / user_seed_sentembeds.shape[0]] * user_seed_sentembeds.shape[0]
188
+ b_distr = [1 / kps_embeds_flat.shape[0]] * kps_embeds_flat.shape[0]
189
+ tplan = ot.bregman.sinkhorn_epsilon_scaling(a_distr, b_distr, pair_dists, 0.05, numItermax=2000)
190
+ # Barycenter project the keyphrases to the sentences: len(profile_keyphraases) x embedding_dim
191
+ proj_kp_vectors = np.matmul(user_seed_sentembeds.T, tplan).T
192
+ norm = np.sum(tplan, axis=0)
193
+ kp_value_vectors = proj_kp_vectors/norm[:, np.newaxis]
194
+ # Return as a dict.
195
+ kp2valvectors = {}
196
+ for i, kp in enumerate(profile_keyphrases):
197
+ kp2valvectors[kp] = kp_value_vectors[i, :]
198
+ return kp2valvectors, tplan
199
+
200
+
201
+ def second_stage_ranked_docs(selected_query_kps, first_stage_pids, pid2abstract, pid2sent_reps_cand, to_rank=30):
202
+ """
203
+ Return a list of ranked documents given a set of queries.
204
+ :param first_stage_pids: list(string)
205
+ :param pid2abstract: dict(pid: paperd)
206
+ :param query_paper_idxs: list(int);
207
+ :return:
208
+ """
209
+ if len(selected_query_kps) < 3:
210
+ topk = len(selected_query_kps)
211
+ else: # Use 20% of keyphrases for scoring or 3 whichever is larger
212
+ topk = max(int(len(st.session_state.kp2val_vectors)*0.2), 3)
213
+ query_kp_values = np.vstack([st.session_state.kp2val_vectors[kp] for kp in selected_query_kps])
214
+ pid2topkdist = dict()
215
+ pid2kp_expls = collections.defaultdict(list)
216
+ for i, pid in enumerate(first_stage_pids):
217
+ sent_reps = pid2sent_reps_cand[pid]
218
+ pair_dists = distance.cdist(query_kp_values, sent_reps)
219
+ kp_ind, sent_ind = np.unravel_index(np.argsort(pair_dists, axis=None), pair_dists.shape)
220
+ kp_ind = list(set(kp_ind[:topk].tolist()))
221
+ sub_pair_dists = pair_dists[kp_ind, :]
222
+ # sub_kp_reps = query_kp_values[kp_ind, :]
223
+ a_distr = special.softmax(-1*np.min(sub_pair_dists, axis=1))
224
+ b_distr = [1 / sent_reps.shape[0]] * sent_reps.shape[0]
225
+ tplan = ot.bregman.sinkhorn_epsilon_scaling(a_distr, b_distr, sub_pair_dists, 0.05)
226
+ wd = np.sum(sub_pair_dists * tplan)
227
+ # topk_dist = 0
228
+ # for k in range(topk):
229
+ # topk_dist += pair_dists[kp_ind[k], sent_ind[k]]
230
+ # pid2kp_expls[pid].append(selected_query_kps[kp_ind[k]])
231
+ # pid2topkdist[pid] = topk_dist
232
+ pid2topkdist[pid] = wd
233
+
234
+ top_pids = sorted(pid2topkdist, key=pid2topkdist.get)
235
+
236
+ # Get the docs
237
+ retrieved_papers = collections.OrderedDict()
238
+ for pid in top_pids:
239
+ retrieved_papers[pid2abstract[pid]['title']] = {
240
+ 'title': pid2abstract[pid]['title'],
241
+ 'kp_explanations': pid2kp_expls[pid],
242
+ 'abstract': pid2abstract[pid]['abstract']
243
+ }
244
+ if len(retrieved_papers) == to_rank:
245
+ break
246
+ return retrieved_papers
247
+
248
+
249
+ ########################################
250
+ # HELPER CODE #
251
+ ########################################
252
+ def parse_input_kps(unparsed_kps, initial_user_kps):
253
+ """
254
+ Function to parse the input keyphrase string.
255
+ :return:
256
+ """
257
+ if unparsed_kps.strip():
258
+ kps = unparsed_kps.split(',')
259
+ parsed_user_kps = []
260
+ uniq_kps = set()
261
+ for kp in kps:
262
+ kp = kp.strip()
263
+ if kp not in uniq_kps:
264
+ parsed_user_kps.append(kp)
265
+ uniq_kps.add(kp)
266
+ else: # If its an empty string use the initial kps
267
+ parsed_user_kps = copy.copy(initial_user_kps)
268
+ return parsed_user_kps
269
+
270
+
271
+ # def plot_sent_kp_alignment(tplan, kp_labels, sent_labels):
272
+ # """
273
+ # Plot the sentence keyphrase alignment.
274
+ # :return:
275
+ # """
276
+ # fig, ax = plt.subplots()
277
+ # h = sns.heatmap(tplan.T, linewidths=.3, xticklabels=sent_labels,
278
+ # yticklabels=kp_labels, cmap='Blues')
279
+ # h.tick_params('y', labelsize=5)
280
+ # h.tick_params('x', labelsize=2)
281
+ # plt.tight_layout()
282
+ # return fig
283
+
284
+
285
+ def multiselect_title_formatter(title):
286
+ """
287
+ Format the multi-select titles.
288
+ :param title: string
289
+ :return: string: formatted title
290
+ """
291
+ ftitle = title.split()[:5]
292
+ return ' '.join(ftitle) + '...'
293
+
294
+
295
+ def format_abstract(paperd, to_display=3, markdown=True):
296
+ """
297
+ Given a dict with title and abstract return
298
+ a formatted text for rendering with markdown.
299
+ :param paperd:
300
+ :param to_display:
301
+ :return:
302
+ """
303
+ if len(paperd['abstract']) < to_display:
304
+ sents = ' '.join(paperd['abstract'])
305
+ else:
306
+ sents = ' '.join(paperd['abstract'][:to_display]) + '...'
307
+ try:
308
+ kp_expl = ', '.join(paperd['kp_explanations'])
309
+ except KeyError:
310
+ kp_expl = ''
311
+ if markdown:
312
+ par = '<p><b>Title</b>: <i>{:s}</i><br><b>Abstract</b>: {:s}<br><i>{:s}</i></p>'.\
313
+ format(paper['title'], sents, kp_expl)
314
+ else:
315
+ par = 'Title: {:s}; Abstract: {:s}'.format(paper['title'], sents)
316
+ return par
317
+
318
+
319
+ def perp_result_json():
320
+ """
321
+ Create a json with the results retrieved for each
322
+ iteration and the papers users choose to save at
323
+ each step.
324
+ :return:
325
+ """
326
+ result_json = {}
327
+ # print(len(st.session_state.i_selections))
328
+ # print(len(st.session_state.i_resultps))
329
+ # print(len(st.session_state.i_savedps))
330
+ # print(st.session_state.tuning_i)
331
+ assert(len(st.session_state.i_selections) == len(st.session_state.i_resultps)
332
+ == len(st.session_state.i_savedps) == st.session_state.tuning_i)
333
+ for tuning_i, i_pselects, (_, i_savedps) in zip(range(st.session_state.tuning_i), st.session_state.i_selections,
334
+ st.session_state.i_savedps.items()):
335
+ iterdict = {
336
+ 'iteration': tuning_i,
337
+ 'profile_selections': copy.deepcopy(i_pselects),
338
+ 'saved_papers': copy.deepcopy(i_savedps)
339
+ }
340
+ result_json[tuning_i] = iterdict
341
+ return json.dumps(result_json)
342
+
343
+
344
+ ########################################
345
+ # APP CODE #
346
+ ########################################
347
+ st.title('\U0001F341 Maple Paper Recommender \U0001F341')
348
+ st.markdown(
349
+ '\U0001F341 Maple \U0001F341 makes controllable paper recommendations personalized to you based on a \U0001F331 seed set '
350
+ '\U0001F331 of papers. The seed set of papers is used to build a \U0001F9D1 personalized profile \U0001F9D1 of keyphrases '
351
+ 'which describe the seed papers. These are your profile descriptors. You can change your recommendations by editing '
352
+ 'the list of descriptors, or including or excluding descriptors.')
353
+
354
+ # Load candidate documents and models.
355
+ pid2abstract_cands, pid2sent_vectors_cands = read_candidates(in_path)
356
+ kp_encoding_model = read_kp_encoder(in_path)
357
+
358
+ # Initialize the session state:
359
+ if 'tuning_i' not in st.session_state:
360
+ st.session_state['tuning_i'] = 0
361
+ # Save the profile keyphrases at every run
362
+ # (run is every time the script runs, iteration is every time recs are requested)
363
+ st.session_state['run_user_kps'] = []
364
+ # Save the profile selections at each iteration
365
+ st.session_state['i_selections'] = []
366
+ # dict of dicts: tuning_i: dict(paper_title: paper)
367
+ st.session_state['i_resultps'] = {}
368
+ # dict of dicts: tuning_i: dict(paper_title: saved or not bool)
369
+ st.session_state['i_savedps'] = collections.defaultdict(dict)
370
+
371
+ # Ask user to upload a set of seed query papers.
372
+ with st.sidebar:
373
+ uploaded_file = st.file_uploader("\U0001F331 Upload seed papers",
374
+ type='json',
375
+ help='Upload a json file with titles and abstracts of the papers to '
376
+ 'include in your profile.')
377
+ if uploaded_file is not None:
378
+ user_papers = json.load(uploaded_file)
379
+ # Read user data.
380
+ doc_vectors_user, pid2idx_user, pid2sent_vectors_user, user_kps = read_user(user_papers)
381
+ st.session_state.run_user_kps.append(copy.copy(user_kps))
382
+ display_profile_kps = ', '.join(user_kps)
383
+ # Perform first stage retrieval.
384
+ first_stage_ret_pids = first_stage_ranked_docs(user_doc_queries=doc_vectors_user, to_rank=100)
385
+ with st.expander("Examine seed papers"):
386
+ st.markdown(f'**Initial profile descriptors**:')
387
+ st.markdown(display_profile_kps)
388
+ st.markdown('**Seed papers**:')
389
+ for paper in user_papers['papers']:
390
+ par = format_abstract(paperd=paper, to_display=6)
391
+ st.markdown(par, unsafe_allow_html=True)
392
+
393
+ st.markdown('\u2b50 Saved papers')
394
+
395
+ if uploaded_file is not None:
396
+ # Create a text box where users can see their profile keyphrases.
397
+ st.subheader('\U0001F4DD Seed paper descriptors')
398
+ with st.form('profile_kps'):
399
+ input_kps = st.text_area('Edit seed descriptors:', display_profile_kps,
400
+ help='Edit the profile descriptors if they are redundant, incomplete, nonsensical, '
401
+ 'or dont describe the seed papers. OR if you would like the descriptors to '
402
+ 'capture aspects of the seed papers that the descriptors dont currently capture.',
403
+ placeholder='If left empty initial profile descriptors will be used...')
404
+ input_user_kps = parse_input_kps(unparsed_kps=input_kps, initial_user_kps=user_kps)
405
+ col1, col2, col3 = st.columns([1, 1, 1])
406
+ with col2:
407
+ generate_profile = st.form_submit_button('\U0001F9D1 Generate profile \U0001F9D1')
408
+
409
+ if generate_profile:
410
+ prev_run_input_kps = st.session_state.run_user_kps[-1]
411
+ if set(prev_run_input_kps) == set(input_user_kps): # If there is no change then use
412
+ if 'kp2val_vectors' in st.session_state: # This happens all the time except the first run.
413
+ kp2val_vectors = st.session_state.kp2val_vectors
414
+ user_tplan = st.session_state.user_tplan
415
+ else: # This happens on the first run.
416
+ kp2val_vectors, user_tplan = generate_profile_values(profile_keyphrases=input_user_kps)
417
+ st.session_state['kp2val_vectors'] = kp2val_vectors
418
+ st.session_state['user_tplan'] = user_tplan
419
+ else:
420
+ kp2val_vectors, user_tplan = generate_profile_values(profile_keyphrases=input_user_kps)
421
+ st.session_state['kp2val_vectors'] = kp2val_vectors
422
+ st.session_state['user_tplan'] = user_tplan
423
+ st.session_state.run_user_kps.append(copy.copy(input_user_kps))
424
+
425
+ # Create a multiselect dropdown
426
+ if 'kp2val_vectors' in st.session_state:
427
+ # with st.expander("Examine paper-descriptor alignment"):
428
+ # user_tplan = st.session_state.user_tplan
429
+ # fig = plot_sent_kp_alignment(tplan=user_tplan, kp_labels=input_user_kps,
430
+ # sent_labels=range(user_tplan.shape[0]))
431
+ # st.write(fig)
432
+
433
+ st.subheader('\U0001F9D1 Profile descriptors for ranking')
434
+ with st.form('profile_input'):
435
+ profile_selections = st.multiselect(label='Add or remove profile descriptors to use for recommendations:',
436
+ default=input_user_kps, # Use all the values by default.
437
+ options=input_user_kps,
438
+ help='Items selected here will be used for creating your '
439
+ 'recommended list')
440
+ col1, col2, col3 = st.columns([1, 1, 1])
441
+ with col2:
442
+ generate_recs = st.form_submit_button('\U0001F9ED Recommend papers \U0001F9ED')
443
+
444
+ # Use the uploaded files to create a ranked list of items.
445
+ if generate_recs and profile_selections:
446
+ # st.write('Generating recs...')
447
+ st.session_state.tuning_i += 1
448
+ st.session_state.i_selections.append(copy.deepcopy(profile_selections))
449
+ top_papers = second_stage_ranked_docs(first_stage_pids=first_stage_ret_pids,
450
+ selected_query_kps=profile_selections,
451
+ pid2abstract=pid2abstract_cands,
452
+ pid2sent_reps_cand=pid2sent_vectors_cands,
453
+ to_rank=30)
454
+ st.session_state.i_resultps[st.session_state.tuning_i] = copy.deepcopy(top_papers)
455
+
456
+ # Read off from the result cache and allow users to save some papers.
457
+ if st.session_state.tuning_i in st.session_state.i_resultps:
458
+ # st.write('Waiting for selections...')
459
+ cached_top_papers = st.session_state.i_resultps[st.session_state.tuning_i]
460
+ for paper in cached_top_papers.values():
461
+ # This statement ensures correctness for when users unselect a previously selected item.
462
+ st.session_state.i_savedps[st.session_state.tuning_i][paper['title']] = False
463
+ dcol1, dcol2 = st.columns([1, 16])
464
+ with dcol1:
465
+ save_paper = st.checkbox('\u2b50', key=paper['title'])
466
+ with dcol2:
467
+ plabel = format_abstract(paperd=paper, to_display=2, markdown=True)
468
+ st.markdown(plabel, unsafe_allow_html=True)
469
+ with st.expander('See more..'):
470
+ full_abstract = ' '.join(paper['abstract'])
471
+ st.markdown(full_abstract, unsafe_allow_html=True)
472
+ if save_paper:
473
+ st.session_state.i_savedps[st.session_state.tuning_i].update({paper['title']: True})
474
+
475
+ # Print the saved papers across iterations in the sidebar.
476
+ with st.sidebar:
477
+ with st.expander("Examine saved papers"):
478
+ # st.write('Later write..')
479
+ # st.write(st.session_state.i_savedps)
480
+ for iteration, savedps in st.session_state.i_savedps.items():
481
+ st.markdown('Iteration: {:}'.format(iteration))
482
+ for papert, saved in savedps.items():
483
+ if saved:
484
+ fpapert = '<p style=color:Gray; ">- {:}</p>'.format(papert)
485
+ st.markdown('{:}'.format(fpapert), unsafe_allow_html=True)
486
+ if st.session_state.tuning_i > 0:
487
+ st.download_button('Download papers', perp_result_json(), mime='json',
488
+ help='Download the papers saved in the session.')
489
+ with st.expander("Copy saved papers to clipboard"):
490
+ st.write(json.loads(perp_result_json()))
491
+
data/cands/abstracts-s2orccompsci-100k.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e5fb34095b5540449e9f8796a14d7e6a943517c8c79bcb39f8eb8b99ff082b9
3
+ size 193435012
data/cands/embeds-s2orccompsci-100k.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4bda4f92e298644fe1487fe8b663fe1cbcff6761619c29111573074ffbf7a6f
3
+ size 614400128
data/cands/embeds-sent-s2orccompsci-100k.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2b247db248fe8e6ac81683164715253b683c49c1a1b85b9d9df3b00f0e3a7e9
3
+ size 2187387328
data/cands/pid2idx-s2orccompsci-100k.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41378d5d68130d7e24bbc63b9f8e0dd035698de1ee17737b3784def1447b440f
3
+ size 2130983
data/models/kp_encoder_cur_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dae993a087cecd4e1e03888fe5685b086c17f0db7224d718927556cb18b28bb
3
+ size 439760375
data/users/hzamani/abstracts-hzamani-forecite.jsonl ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"title": "You can't pick your neighbors, or can you? When and how to rely on retrieval in the $k$NN-LM", "abstract": ["Retrieval-enhanced language models (LMs), which condition their predictions on text retrieved from large external datastores, have re-cently shown signi\ufb01cant perplexity improvements compared to standard LMs.", "One such approach, the k NN-LM, interpolates any existing LM\u2019s predictions with the output of a k nearest neighbors model and requires no additional training.", "In this paper, we explore the importance of lexical and semantic matching in the context of items retrieved by k NN-LM.", "We \ufb01nd two trends: (1) the presence of large overlapping n -grams between the datastore and evaluation set plays an important fac-tor in strong performance, even when the datastore is derived from the training data; and (2) the k NN-LM is most bene\ufb01cial when retrieved items have high semantic similarity with the query.", "Based on our analysis, we de\ufb01ne a new formulation of the k NN-LM that uses retrieval quality to assign the interpolation coef\ufb01cient.", "We empirically measure the effectiveness of our approach on two English language modeling datasets, Wikitext-103 and PG-19.", "Our re-formulation of the k NN-LM is bene\ufb01cial in both cases, and leads to nearly 4% improvement in perplexity on the Wikitext-103 test set."], "forecite_tags": ["word retrieval", "discriminative language modeling", "word retrieval", "word retrieval", "word retrieval", "two-stage language model searching", "word retrieval"]}
2
+ {"title": "Maruna Bot: An Extensible Retrieval-Focused Framework for Task-Oriented Dialogues", "abstract": ["We present Maruna Bot, a Task-Oriented Dialogue System (TODS) that assists people in cooking or Do-It-Yourself (DIY) tasks using either a speech-only or multi-modal (speech and screen) interface.", "Building such a system is challenging, because it touches many research areas including language understanding, text generation, task planning, dialogue state tracking, question answering, multi-modal retrieval, instruction summarization, robustness, and result presentation, among others.", "Our bot lets users choose their desired tasks with flexible phrases, uses multi-stage intent classification, asks clarifying questions to improve retrieval, supports in-task and open-domain Question Answering throughout the conversation, effectively maintains the task status, performs query expansion and instruction re-ranking using both textual and visual signals."], "forecite_tags": ["dialogue system", "dialogue systems", "dialogue systems"]}
3
+ {"title": "Conversational Information Seeking", "abstract": ["Conversational information seeking (CIS) is concerned with a sequence of interactions between one or more users and an information system.", "Interactions in CIS are primarily based on natural language dialogue, while they may include other types of interactions, such as click, touch, and body gestures.", "This monograph provides a thorough overview of CIS definitions, applications, interactions, interfaces, design, implementation, and evaluation.", "This monograph views CIS applications as including conversational search, conversational question answering, and conversational recommendation.", "Our aim is to provide an overview of past research related to CIS, introduce the current state-of-the-art in CIS, highlight the challenges still being faced in the community.", "and suggest future directions."], "forecite_tags": ["conversational interfaces", "conversational interactivity", "conversational interfaces", "conversational interfaces", "conversational interactivity", "conversational interactivity"]}
4
+ {"title": "Curriculum Learning for Dense Retrieval Distillation", "abstract": ["Recent work has shown that more effective dense retrieval models can be obtained by distilling ranking knowledge from an existing base re-ranking model.", "In this paper, we propose a generic curriculum learning based optimization framework called CL-DRD that controls the difficulty level of training data produced by the re-ranking (teacher) model.", "CL-DRD iteratively optimizes the dense retrieval (student) model by increasing the difficulty of the knowledge distillation data made available to it.", "In more detail, we initially provide the student model coarse-grained preference pairs between documents in the teacher's ranking, and progressively move towards finer-grained pairwise document ordering requirements.", "In our experiments, we apply a simple implementation of the CL-DRD framework to enhance two state-of-the-art dense retrieval models.", "Experiments on three public passage retrieval datasets demonstrate the effectiveness of our proposed framework."], "forecite_tags": ["ranked retrieval", "curriculum learning", "ranked retrieval", "ranked retrieval", "ranked retrieval", "ranked retrieval"]}
5
+ {"title": "Stochastic Optimization of Text Set Generation for Learning Multiple Query Intent Representations", "abstract": ["Learning multiple intent representations for queries has potential applications in facet generation, document ranking, search result diversification, and search explanation.", "The state-of-the-art model for this task assumes that there is a sequence of intent representations.", "In this paper, we argue that the model should not be penalized as long as it generates an accurate and complete set of intent representations.", "Based on this intuition, we propose a stochastic permutation invariant approach for optimizing such networks.", "We extrinsically evaluate the proposed approach on a facet generation task and demonstrate significant improvements compared to competitive baselines.", "Our analysis shows that the proposed permutation invariant approach has the highest impact on queries with more potential intents."], "forecite_tags": ["faceted search", "textual representations", "term networks", "permutation invariant training", "term networks", "faceted search"]}
6
+ {"title": "The cardioprotective effects of nano\u2010curcumin against doxorubicin\u2010induced cardiotoxicity: A systematic review", "abstract": ["Although the chemotherapeutic drug, doxorubicin, is commonly used to treat various malignant tumors, its clinical use is restricted because of its toxicity especially cardiotoxicity.", "The use of curcumin may alleviate some of the doxorubicin\u2010induced cardiotoxic effects.", "Especially, using the nano\u2010formulation of curcumin can overcome the poor bioavailability of curcumin and enhance its physicochemical properties regarding its efficacy.", "In this study, we systematically reviewed the potential cardioprotective effects of nano\u2010curcumin against the doxorubicin\u2010induced cardiotoxicity.", "A systematic search was accomplished based on Preferred Reporting Items for Systematic Reviews and Meta\u2010Analyses guidelines for the identification of all relevant articles on \u201cthe role of nano\u2010curcumin on doxorubicin\u2010induced cardiotoxicity\u201d in the electronic databases of Scopus, PubMed, and Web of Science up to July 2021.", "One hundred and sixty\u2010nine articles were screened following a predefined set of inclusion and exclusion criteria.", "Ten eligible scientific papers were finally included in the present systematic review.", "The administration of doxorubicin reduced the body and heart weights of mice/rats compared to the control groups.", "In contrast, the combined treatment of doxorubicin and nano\u2010curcumin increased the body and heart weights of animals compared with the doxorubicin\u2010treated groups alone.", "Furthermore, doxorubicin could significantly induce the biochemical and histological changes in the cardiac tissue; however, coadministration of nano\u2010curcumin formulation demonstrated a pattern opposite to the doxorubicin\u2010induced changes.", "The coadministration of nano\u2010curcumin alleviates the doxorubicin\u2010induced cardiotoxicity through various mechanisms including antioxidant, anti\u2010inflammatory, and antiapoptotic effects.", "Also, the cardioprotective effect of nano\u2010curcumin formulation against doxorubicin\u2010induced cardiotoxicity was higher than free curcumin."], "forecite_tags": ["therapeutic targets", "cytotoxic effect", "therapeutic targets", "therapeutic targets", "biomedical abstract", "biomedical abstract", "biomedical abstract", "therapeutic targets", "therapeutic targets", "therapeutic targets", "therapeutic targets", "therapeutic targets"]}
7
+ {"title": "Multi-Task Retrieval-Augmented Text Generation with Relevance Sampling", "abstract": ["This paper studies multi-task training of retrieval-augmented generation models for knowledge-intensive tasks.", "We propose to clean the training set by utilizing a distinct property of knowledge-intensive generation: The connection of query-answer pairs to items in the knowledge base.", "We \ufb01lter training examples via a threshold of con\ufb01dence on the relevance labels, whether a pair is answerable by the knowledge base or not.", "We train a single Fusion-in-Decoder (FiD) generator on seven combined tasks of the KILT benchmark.", "The experimental results suggest that our simple yet effective approach substantially improves competitive baselines on two strongly imbalanced tasks; and shows either smaller improvements or no signi\ufb01cant regression on the remaining tasks.", "Furthermore, we demonstrate our multi-task training with relevance label sampling scales well with increased model capacity and achieves state-of-the-art results in \ufb01ve out of seven KILT tasks."], "forecite_tags": ["retrieval tasks", "retrieval tasks", "relevance networks", "word learning", "retrieval tasks", "relevance networks"]}
8
+ {"title": "Revisiting Open Domain Query Facet Extraction and Generation", "abstract": ["Web search queries can often be characterized by various facets.", "Extracting and generating query facets has various real-world applications, such as displaying facets to users in a search interface, search result diversification, clarifying question generation, and enabling exploratory search.", "In this work, we revisit the task of query facet extraction and generation and study various formulations of this task, including facet extraction as sequence labeling, facet generation as autoregressive text generation or extreme multi-label classification.", "We conduct extensive experiments and demonstrate that these approaches lead to complementary sets of facets.", "We also explored various aggregation approaches based on relevance and diversity to combine the facet sets produced by different formulations of the task.", "The approaches presented in this paper outperform state-of-the-art baselines in terms of both precision and recall.", "We confirm the quality of the proposed methods through manual annotation.", "Since there is no open-source software for facet extraction and generation, we release a toolkit named Faspect, that includes various model implementations for this task."], "forecite_tags": ["query retrieval", "faceted search", "unsupervised query segmentation", "faceted search", "faceted search", "unsupervised query segmentation", "annotation engine", "natural language toolkit"]}
9
+ {"title": "Conversational Information Seeking: Theory and Application", "abstract": ["Conversational information seeking (CIS) involves interaction sequences between one or more users and an information system.", "Interactions in CIS are primarily based on natural language dialogue, while they may include other types of interactions, such as click, touch, and body gestures.", "CIS recently attracted significant attention and advancements continue to be made.", "This tutorial follows the content of the recent Conversational Information Seeking book authored by several of the tutorial presenters.", "The tutorial aims to be an introduction to CIS for newcomers to CIS in addition to the recent advanced topics and state-of-the-art approaches for students and researchers with moderate knowledge of the topic.", "A significant part of the tutorial is dedicated to hands-on experiences based on toolkits developed by the presenters for conversational passage retrieval and multi-modal task-oriented dialogues.", "The outcomes of this tutorial include theoretical and practical knowledge, including a forum to meet researchers interested in CIS."], "forecite_tags": ["conversational interfaces", "conversational interactivity", "conversational interactivity", "conversational interactivity", "conversational interactivity", "conversational interfaces", "conversational interactivity"]}
10
+ {"title": "MIMICS-Duo: Offline & Online Evaluation of Search Clarification", "abstract": ["Asking clarification questions is an active area of research; however, resources for training and evaluating search clarification methods are not sufficient.", "To address this issue, we describe MIMICS-Duo, a new freely available dataset of 306 search queries with multiple clarifications (a total of 1,034 query-clarification pairs).", "MIMICS-Duo contains fine-grained annotations on clarification questions and their candidate answers and enhances the existing MIMICS datasets by enabling multi-dimensional evaluation of search clarification methods, including online and offline evaluation.", "We conduct extensive analysis to demonstrate the relationship between offline and online search clarification datasets and outline several research directions enabled by MIMICS-Duo.", "We believe that this resource will help researchers better understand clarification in search."], "forecite_tags": ["exploratory search tasks", "semantic searches", "exploratory search tasks", "exploratory search tasks", "exploratory search tasks"]}
11
+ {"title": "Are We There Yet? A Decision Framework for Replacing Term Based Retrieval with Dense Retrieval Systems", "abstract": ["Recently, several dense retrieval (DR) models have demonstrated competitive performance to term-based retrieval that are ubiquitous in search systems.", "In contrast to term-based matching, DR projects queries and documents into a dense vector space and retrieves results via (approximate) nearest neighbor search.", "Deploying a new system, such as DR, inevitably involves tradeoffs in aspects of its performance.", "Established retrieval systems running at scale are usually well understood in terms of effectiveness and costs, such as query latency, indexing throughput, or storage requirements.", "In this work, we propose a framework with a set of criteria that go beyond simple effectiveness measures to thoroughly compare two retrieval systems with the explicit goal of assessing the readiness of one system to replace the other.", "This includes careful tradeoff considerations between effectiveness and various cost factors.", "Furthermore, we describe guardrail criteria, since even a system that is better on average may have systematic failures on a minority of queries.", "The guardrails check for failures on certain query characteristics and novel failure types that are only possible in dense retrieval systems.", "We demonstrate our decision framework on a Web ranking scenario.", "In that scenario, state-of-the-art DR models have surprisingly strong results, not only on average performance but passing an extensive set of guardrail tests, showing robustness on different query characteristics, lexical matching, generalization, and number of regressions.", "DR with approximate nearest neighbor search has comparable low query latency to term-based systems.", "The main reason to reject current DR models in this scenario is the cost of vectorization, which is much higher than the cost of building a traditional index.", "It is impossible to predict whether DR will become ubiquitous in the future, but one way this is possible is through repeated applications of decision processes such as the one presented here."], "forecite_tags": ["similarity-based retrieval", "similarity-based retrieval", "similarity-based retrieval", "scalable data retrieval", "information retrieval performance", "ranked retrieval", "ranked retrieval", "ranked retrieval", "ranking framework", "information retrieval models", "similarity-based retrieval", "ranked retrieval", "ranked retrieval"]}
12
+ {"title": "Stochastic Retrieval-Conditioned Reranking", "abstract": ["The multi-stage cascaded architecture has been adopted by many search engines for efficient and effective retrieval.", "This architecture consists of a stack of retrieval and reranking models in which efficient retrieval models are followed by effective (neural) learning-to-rank models.", "The optimization of these learning-to-rank models is loosely connected to the early stage retrieval models.", "This paper draws theoretical connections between the early stage retrieval and late stage reranking models by deriving expected reranking performance conditioned on the early stage retrieval results.", "Our findings shed light on optimization of both retrieval and reranking models.", "As a result, we also introduce a novel loss function for training reranking models that leads to significant improvements on multiple public benchmarks.", "Our findings provide theoretical and empirical guidelines for developing multi-stage cascaded retrieval models."], "forecite_tags": ["ranked retrieval", "ranked retrieval", "ranked retrieval", "ranked retrieval", "ranked retrieval", "ranked retrieval", "ranked retrieval"]}
13
+ {"title": "Predicting Prerequisite Relations for Unseen Concepts", "abstract": ["Concept prerequisite learning (CPL) plays a key role in developing technologies that assist people to learn a new complex topic or concept.", "Previous work commonly assumes that all concepts are given at training time and solely focuses on predicting the unseen prerequisite relationships between them.", "However, many real-world scenarios deal with concepts that are left undiscovered at training time, which is relatively unexplored.", "This paper studies this problem and proposes a novel alternating knowledge distillation approach to take advantage of both contentand graph-based models for this task.", "Extensive experiments on three public benchmarks demonstrate up to 10% improvements in terms of F1 score."], "forecite_tags": ["learning concepts", "learning concepts", "pre-existing knowledge", "structured knowledge", "knowledge enhancement"]}
14
+ {"title": "Entrance Surface Dose Measurement at Thyroid and Parotid Gland Regions in Cone-Beam Computed Tomography and Panoramic Radiography", "abstract": ["Purpose: Ionizing radiation-absorbed doses is a crucial concern in Cone-Beam Computed Tomography (CBCT) and panoramic radiography.", "This study aimed to evaluate and compare the Entrance Skin Doses (ESD) of thyroid and parotid gland regions in CBCT and panoramic radiography in Yazd province, Iran.", "\nMaterials and Methods: In this cross-sectional study, 332 patients were included, who were then divided into two age groups (adult and pediatric) and underwent dental CBCT and panoramic radiography.", "Twelve Thermoluminescence Dosimeters (TLD- GR200) were used for each patient to measure the ESD of thyroid and parotid glands.", "The differences between the ESD values in CBCT and panoramic examinations as well as between the adults and children groups were evaluated by one-way ANOVA and Man-Whitney tests.", "\nResults: The mean and Standard Deviation (SD) values of ESD in panoramic imaging were equal to 61 \u00b1 4 and 290 \u00b1 12 \u00b5Gy for the thyroid and parotid glands of the adult groups, respectively.", "Notably, these values for CBCT were significantly higher (P<0.01), as 377 \u00b1 139 and 1554 \u00b1 177 \u00b5Gy, respectively.", "Moreover, the mean ESD values in the panoramic examination were 41 \u00b1 3 and 190 \u00b1 16 \u00b5Gy for thyroid and parotid glands for the children group, while they were 350 \u00b1 120 and 990 \u00b1 107 \u00b5Gy in CBCT (P<0.01), respectively.", "The ESD values in the parotid gland were approximately 3.4 (2.8-4.1) and 4.7 (4.6-4.8) times greater than those for CBCT and panoramic examinations, respectively.", "\nConclusion: Although CBCT provides supplementary diagnostic advantages, the thyroid and parotid glands\u2019 doses are higher than panoramic radiography.", "Therefore, the risks and benefits of each method should be considered before their prescription."], "forecite_tags": ["cone beam computed tomography", "cone beam computed tomography", "dental radiographs", "thyroid gland", "radiologist assessment", "thyroid gland", "thyroid gland", "thyroid gland", "thyroid gland", "thyroid gland", "radiologist assessment"]}
15
+ {"title": "Estimating the risks of exposure-induced death associated with common computed tomography procedures", "abstract": ["Background : This study aimed to assess the risks of exposure - induced death (REID) in patients and embryos during CT examinations in Yazd province (Iran).", "Materials and Methods: Data on the exposure parameters were retrospectively collected from six imaging institutions.", "In total, 932 patients were included in this study and for each patient, organ doses were then estimated using ImpactDose software.", "The REIDs were calculated by BEIR VII risk model and using PCXMC software.", "In the case of gestational irradiation, excess cancer risk of 0.006% per mSv was taken into account in terms of the ICRP 84 recommendations, to calculate the excess childhood cancer risk imposed on the embryo.", "Results: The highest estimated organ doses for abdomen - pelvis, routine chest, chest HRCT, brain, and sinus examinations were obtained as 12.82 mSv for kidneys, 12.09 mSv for thymus, 13.16 mSv for thymus, 29.71 mSv for brain, and 11.70 mSv for oral mucosa, respectively.", "Across all procedures, abdomen - pelvis CT scan induced the highest excess REID to the patients (240 deaths per million).", "The highest delivered dose to the fetus was roughly 35 mSv, which was lower than the threshold dose proposed by ICRP (100 mSv) for the induction of malformations.", "However, the associated excess fatal childhood cancer risk of 2122 incidence per million scans can be a subject of concern for public health experts.", "Conclusion: Based on the results, although death risks related to induced cancer from CT scans were negligible, this risk can be relatively significant for children exposed during the fetal period."], "forecite_tags": ["ct image", "ct images", "radiation dose", "risk assessment framework", "radiation dose", "radiation dose", "pelvic ct images", "radiation dose", "radiation dose", "radiation exposure"]}
16
+ {"title": "Retrieval-Enhanced Machine Learning", "abstract": ["Although information access systems have long supportedpeople in accomplishing a wide range of tasks, we propose broadening the scope of users of information access systems to include task-driven machines, such as machine learning models.", "In this way, the core principles of indexing, representation, retrieval, and ranking can be applied and extended to substantially improve model generalization, scalability, robustness, and interpretability.", "We describe a generic retrieval-enhanced machine learning (REML) framework, which includes a number of existing models as special cases.", "REML challenges information retrieval conventions, presenting opportunities for novel advances in core areas, including optimization.", "The REML research agenda lays a foundation for a new style of information access research and paves a path towards advancing machine learning and artificial intelligence."], "forecite_tags": ["retrieval model", "retrieval model", "retrieval model", "intelligent retrieval", "retrieval model"]}
17
+ {"title": "Analyzing clarification in asynchronous information\u2010seeking conversations", "abstract": ["This research analyzes human\u2010generated clarification questions to provide insights into how they are used to disambiguate and provide a better understanding of information needs.", "A set of clarification questions is extracted from posts on the Stack Exchange platform.", "Novel taxonomy is defined for the annotation of the questions and their responses.", "We investigate the clarification questions in terms of whether they add any information to the post (the initial question posted by the asker) and the accepted answer, which is the answer chosen by the asker.", "After identifying, which clarification questions are more useful, we investigated the characteristics of these questions in terms of their types and patterns.", "Non\u2010useful clarification questions are identified, and their patterns are compared with useful clarifications.", "Our analysis indicates that the most useful clarification questions have similar patterns, regardless of topic.", "This research contributes to an understanding of clarification in conversations and can provide insight for clarification dialogues in conversational search scenarios and for the possible system generation of clarification requests in information\u2010seeking conversations."], "forecite_tags": ["user comprehension", "conversational threads", "question answering", "question answering", "natural language questions", "user comprehension", "conversational threads", "conversational threads"]}
18
+ {"title": "DISAPERE: A Dataset for Discourse Structure in Peer Review Discussions", "abstract": ["At the foundation of scientific evaluation is the labor-intensive process of peer review.", "This critical task requires participants to consume vast amounts of highly technical text.", "Prior work has annotated different aspects of review argumentation, but discourse relations between reviews and rebuttals have yet to be examined.", "We present DISAPERE, a labeled dataset of 20k sentences contained in 506 review-rebuttal pairs in English, annotated by experts.", "DISAPERE synthesizes label sets from prior work and extends them to include fine-grained annotation of the rebuttal sentences, characterizing their context in the review and the authors\u2019 stance towards review arguments.", "Further, we annotate every review and rebuttal sentence.", "We show that discourse cues from rebuttals can shed light on the quality and interpretation of reviews.", "Further, an understanding of the argumentative strategies employed by the reviewers and authors provides useful signal for area chairs and other decision makers."], "forecite_tags": ["textual reviews", "argumentation mining", "argumentation mining", "argumentation mining", "argumentation mining", "argumentation mining", "discourse processing", "textual reviews"]}
19
+ {"title": "FiD-Light: Efficient and Effective Retrieval-Augmented Text Generation", "abstract": ["Retrieval-augmented generation models offer many bene\ufb01ts over standalone language models: besides a textual answer to a given query they provide provenance items retrieved from an updateable knowledge base.", "However, they are also more complex systems and need to handle long inputs.", "In this work, we introduce FiD-Light to strongly increase the ef\ufb01ciency of the state-of-the-art retrieval-augmented FiD model, while maintaining the same level of effectiveness.", "Our FiD-Light model constrains the information \ufb02ow from the encoder (which encodes passages separately) to the decoder (using concatenated encoded representations).", "Fur-thermore, we adapt FiD-Light with re-ranking capabilities through textual source pointers, to improve the top-ranked provenance precision.", "Our experiments on a diverse set of seven knowledge intensive tasks (KILT) show FiD-Light consistently improves the Pareto frontier between query latency and effectiveness.", "FiD-Light with source pointing sets substantial new state-of-the-art results on six KILT tasks for combined text generation and provenance retrieval evaluation, while maintaining reasonable ef\ufb01ciency."], "forecite_tags": ["natural language generation", "attention-based neural machine translation", "attention-based neural machine translation", "sentence encoders", "ranked retrieval", "attention-based neural machine translation", "attention-based neural machine translation"]}
20
+ {"title": "Generalizing Discriminative Retrieval Models using Generative Tasks", "abstract": ["Information Retrieval has a long history of applying either discriminative or generative modeling to retrieval and ranking tasks.", "Recent developments in transformer architectures and multi-task learning techniques have dramatically improved our ability to train effective neural models capable of resolving a wide variety of tasks using either of these paradigms.", "In this paper, we propose a novel multi-task learning approach which can be used to produce more effective neural ranking models.", "The key idea is to improve the quality of the underlying transformer model by cross-training a retrieval task and one or more complementary language generation tasks.", "By targeting the training on the encoding layer in the transformer architecture, our experimental results show that the proposed multi-task learning approach consistently improves retrieval effectiveness on the targeted collection and can easily be re-targeted to new ranking tasks.", "We provide an in-depth analysis showing how multi-task learning modifies model behaviors, resulting in more general models."], "forecite_tags": ["neural ranking models", "neural ranking models", "neural ranking models", "sequence-to-sequence learning", "neural ranking models", "neural ranking models"]}
21
+ {"title": "Explaining Documents' Relevance to Search Queries", "abstract": ["RAZIEH RAHIMI, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA YOUNGWOO KIM, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA HAMED ZAMANI, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA JAMES ALLAN, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA"], "forecite_tags": ["document retrieval"]}
data/users/hzamani/abstracts-hzamani-forecite.txt ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0
2
+ Retrieval-enhanced language models (LMs), which condition their predictions on text retrieved from large external datastores, have re-cently shown significant perplexity improvements compared to standard LMs.
3
+ [('word retrieval', '2.4194'), ('two-stage language model searching', '2.5523'), ('information retrieval models', '2.5805'), ('sentence retrieval', '2.5908'), ('similarity-based retrieval', '2.5979'), ('discriminative language models', '2.6257'), ('discriminative language modeling', '2.6339'), ('lexical database', '2.6401'), ('similarity based retrieval', '2.6414'), ('retrieval model', '2.6519')]
4
+
5
+ One such approach, the k NN-LM, interpolates any existing LM’s predictions with the output of a k nearest neighbors model and requires no additional training.
6
+ [('discriminative language modeling', '2.5731'), ('two-stage language model searching', '2.6024'), ('word retrieval', '2.6099'), ('discriminative language models', '2.6121'), ('n-gram language models', '2.6342'), ('recurrent neural network language model', '2.6521'), ('language modeling', '2.6809'), ('neural network based language models', '2.6864'), ('neural language model', '2.6921'), ('information retrieval models', '2.6965')]
7
+
8
+ In this paper, we explore the importance of lexical and semantic matching in the context of items retrieved by k NN-LM.
9
+ [('word retrieval', '2.4256'), ('word similarity', '2.5723'), ('two-stage language model searching', '2.5733'), ('keyword similarity', '2.6087'), ('similarity-based retrieval', '2.6210'), ('lexical database', '2.6265'), ('discriminative language models', '2.6479'), ('keyword matching', '2.6626'), ('semantically related words', '2.6704'), ('similarity based retrieval', '2.6730')]
10
+
11
+ We find two trends: (1) the presence of large overlapping n -grams between the datastore and evaluation set plays an important fac-tor in strong performance, even when the datastore is derived from the training data; and (2) the k NN-LM is most beneficial when retrieved items have high semantic similarity with the query.
12
+ [('word retrieval', '2.4718'), ('two-stage language model searching', '2.5771'), ('similarity-based retrieval', '2.5945'), ('discriminative language models', '2.6232'), ('discriminative language modeling', '2.6407'), ('lexical database', '2.6413'), ('similarity based retrieval', '2.6428'), ('word2vec model', '2.6433'), ('word representation features', '2.6489'), ('sentence retrieval', '2.6674')]
13
+
14
+ Based on our analysis, we define a new formulation of the k NN-LM that uses retrieval quality to assign the interpolation coefficient.
15
+ [('word retrieval', '2.5377'), ('similarity-based retrieval', '2.5453'), ('similarity based retrieval', '2.5777'), ('retrieval model', '2.6155'), ('information retrieval models', '2.6391'), ('two-stage language model searching', '2.6556'), ('sentence retrieval', '2.6773'), ('semantic similarity score', '2.6782'), ('context-aware retrieval', '2.6923'), ('probabilistic retrieval model', '2.6980')]
16
+
17
+ We empirically measure the effectiveness of our approach on two English language modeling datasets, Wikitext-103 and PG-19.
18
+ [('two-stage language model searching', '2.5419'), ('word retrieval', '2.5424'), ('n-gram language models', '2.5469'), ('discriminative language modeling', '2.5509'), ('discriminative language models', '2.5519'), ('word2vec model', '2.5523'), ('n-gram language model', '2.6080'), ('word models', '2.6121'), ('information retrieval models', '2.6299'), ('factored language models', '2.6477')]
19
+
20
+ Our re-formulation of the k NN-LM is beneficial in both cases, and leads to nearly 4% improvement in perplexity on the Wikitext-103 test set.
21
+ [('word retrieval', '2.4559'), ('two-stage language model searching', '2.5910'), ('discriminative language modeling', '2.6010'), ('sentence retrieval', '2.6332'), ('word2vec model', '2.6338'), ('discriminative language models', '2.6381'), ('n-gram language models', '2.6444'), ('similarity-based retrieval', '2.6535'), ('word representation features', '2.6567'), ('word2vec', '2.6580')]
22
+
23
+ 1
24
+ We present Maruna Bot, a Task-Oriented Dialogue System (TODS) that assists people in cooking or Do-It-Yourself (DIY) tasks using either a speech-only or multi-modal (speech and screen) interface.
25
+ [('dialogue system', '2.5569'), ('dialogue systems', '2.5615'), ('conversational interfaces', '2.6068'), ('dialog systems', '2.6545'), ('conversational agent', '2.6638'), ('spoken dialogue system', '2.6669'), ('dialogue agent', '2.6710'), ('spoken dialog systems', '2.6754'), ('conversational systems', '2.6786'), ('natural language interfaces', '2.6872')]
26
+
27
+ Building such a system is challenging, because it touches many research areas including language understanding, text generation, task planning, dialogue state tracking, question answering, multi-modal retrieval, instruction summarization, robustness, and result presentation, among others.
28
+ [('dialogue systems', '2.5884'), ('dialogue system', '2.6094'), ('dialog systems', '2.6962'), ('natural language interfaces', '2.6982'), ('dialogue model', '2.7054'), ('automated dialogue systems', '2.7093'), ('dialog model', '2.7250'), ('natural language interface', '2.7251'), ('spoken dialogue system', '2.7440'), ('conversational systems', '2.7610')]
29
+
30
+ Our bot lets users choose their desired tasks with flexible phrases, uses multi-stage intent classification, asks clarifying questions to improve retrieval, supports in-task and open-domain Question Answering throughout the conversation, effectively maintains the task status, performs query expansion and instruction re-ranking using both textual and visual signals.
31
+ [('dialogue systems', '2.5606'), ('dialogue system', '2.5752'), ('question answering task', '2.5936'), ('dialog systems', '2.6423'), ('natural language interfaces', '2.6503'), ('natural language interface', '2.6921'), ('automated dialogue systems', '2.6962'), ('dialogue manager', '2.7031'), ('dialog model', '2.7146'), ('conversational interfaces', '2.7268')]
32
+
33
+ 2
34
+ Conversational information seeking (CIS) is concerned with a sequence of interactions between one or more users and an information system.
35
+ [('conversational interfaces', '2.3386'), ('conversational interactivity', '2.3717'), ('conversational systems', '2.4243'), ('interactive communication', '2.5259'), ('natural language interaction', '2.5718'), ('conversational framework', '2.5951'), ('conversational participants', '2.5953'), ('conversation system', '2.6005'), ('context awareness', '2.6514'), ('information seeking', '2.6530')]
36
+
37
+ Interactions in CIS are primarily based on natural language dialogue, while they may include other types of interactions, such as click, touch, and body gestures.
38
+ [('conversational interactivity', '2.3443'), ('conversational interfaces', '2.3658'), ('natural language interaction', '2.5097'), ('gesture-based interaction', '2.5466'), ('gesture interaction', '2.5528'), ('gesture-based interfaces', '2.5743'), ('gesture based interaction', '2.5759'), ('conversational systems', '2.5838'), ('nonverbal interface system', '2.5856'), ('interactive communication', '2.6019')]
39
+
40
+ This monograph provides a thorough overview of CIS definitions, applications, interactions, interfaces, design, implementation, and evaluation.
41
+ [('conversational interfaces', '2.3651'), ('conversational interactivity', '2.4326'), ('context-sensitive user interfaces', '2.5113'), ('intelligent interface', '2.5648'), ('interaction design', '2.5767'), ('human interface', '2.5780'), ('interaction designs', '2.5790'), ('context-awareness', '2.5810'), ('active human interface', '2.5942'), ('context awareness', '2.5952')]
42
+
43
+ This monograph views CIS applications as including conversational search, conversational question answering, and conversational recommendation.
44
+ [('conversational interfaces', '2.3836'), ('conversational systems', '2.4336'), ('conversational interactivity', '2.4474'), ('conversational framework', '2.5867'), ('conversation system', '2.6573'), ('contextual inquiry', '2.6694'), ('conversational participants', '2.6820'), ('natural language interaction', '2.6828'), ('conversational agents', '2.6951'), ('dialogue systems', '2.7080')]
45
+
46
+ Our aim is to provide an overview of past research related to CIS, introduce the current state-of-the-art in CIS, highlight the challenges still being faced in the community.
47
+ [('conversational interactivity', '2.5124'), ('conversational interfaces', '2.5324'), ('context-awareness', '2.6188'), ('context awareness', '2.6198'), ('conversational framework', '2.6432'), ('conversational systems', '2.6493'), ('interactive communication', '2.6581'), ('context-sensitive user interfaces', '2.6596'), ('awareness systems', '2.6937'), ('knowledge usability', '2.7030')]
48
+
49
+ and suggest future directions.
50
+ [('conversational interactivity', '2.6979'), ('context awareness', '2.7914'), ('context-awareness', '2.8078'), ('conversational framework', '2.8078'), ('interactive communication', '2.8084'), ('awareness systems', '2.8160'), ('conversational interfaces', '2.8168'), ('informational support', '2.8324'), ('itc-sense', '2.8491'), ('information centric', '2.8558')]
51
+
52
+ 3
53
+ Recent work has shown that more effective dense retrieval models can be obtained by distilling ranking knowledge from an existing base re-ranking model.
54
+ [('ranked retrieval', '2.4517'), ('discriminative reranking', '2.6016'), ('flexible retrieval experience', '2.6083'), ('retrieval model', '2.6099'), ('knowledge retrieval', '2.6342'), ('expertise retrieval', '2.6908'), ('ranking models', '2.7144'), ('similarity based retrieval', '2.7206'), ('curriculum learning', '2.7225'), ('ranking model', '2.7261')]
55
+
56
+ In this paper, we propose a generic curriculum learning based optimization framework called CL-DRD that controls the difficulty level of training data produced by the re-ranking (teacher) model.
57
+ [('curriculum learning', '2.5266'), ('curriculum learning strategy', '2.6248'), ('learning contents', '2.6308'), ('ranked retrieval', '2.6722'), ('flexible retrieval experience', '2.6734'), ('discriminative reranking', '2.6949'), ('curriculum', '2.7324'), ('learning content', '2.7438'), ('retrieval model', '2.7511'), ('memory-based learning', '2.7514')]
58
+
59
+ CL-DRD iteratively optimizes the dense retrieval (student) model by increasing the difficulty of the knowledge distillation data made available to it.
60
+ [('ranked retrieval', '2.5209'), ('flexible retrieval experience', '2.6393'), ('knowledge retrieval', '2.6440'), ('curriculum learning', '2.6900'), ('learning contents', '2.6910'), ('retrieval model', '2.7044'), ('discriminative reranking', '2.7063'), ('expertise retrieval', '2.7077'), ('content-based retrieval', '2.7207'), ('interactive retrieval', '2.7256')]
61
+
62
+ In more detail, we initially provide the student model coarse-grained preference pairs between documents in the teacher's ranking, and progressively move towards finer-grained pairwise document ordering requirements.
63
+ [('ranked retrieval', '2.5957'), ('ranking objects', '2.6752'), ('discriminative reranking', '2.7212'), ('similarity-based retrieval', '2.7385'), ('neural ranking models', '2.7410'), ('content-based retrieval', '2.7421'), ('similarity based retrieval', '2.7460'), ('flexible retrieval experience', '2.7488'), ('document retrieval', '2.7519'), ('ranking model', '2.7546')]
64
+
65
+ In our experiments, we apply a simple implementation of the CL-DRD framework to enhance two state-of-the-art dense retrieval models.
66
+ [('ranked retrieval', '2.5848'), ('flexible retrieval experience', '2.5967'), ('content-based retrieval', '2.6732'), ('interactive retrieval', '2.6815'), ('retrieval model', '2.6826'), ('content based retrieval', '2.7093'), ('similarity based retrieval', '2.7119'), ('similarity-based retrieval', '2.7138'), ('document retrieval', '2.7207'), ('automatic retrieval', '2.7238')]
67
+
68
+ Experiments on three public passage retrieval datasets demonstrate the effectiveness of our proposed framework.
69
+ [('ranked retrieval', '2.4705'), ('document retrieval', '2.5858'), ('flexible retrieval experience', '2.5919'), ('content-based retrieval', '2.6594'), ('similarity based retrieval', '2.6637'), ('similarity-based retrieval', '2.6736'), ('content based retrieval', '2.6868'), ('context-aware retrieval', '2.6898'), ('interactive retrieval', '2.6946'), ('content retrieval', '2.7055')]
70
+
71
+ 4
72
+ Learning multiple intent representations for queries has potential applications in facet generation, document ranking, search result diversification, and search explanation.
73
+ [('faceted search', '2.5518'), ('semantic searches', '2.5881'), ('query retrieval', '2.6112'), ('semantic search', '2.6112'), ('information retrieval models', '2.6626'), ('knowledge retrieval', '2.6684'), ('context-aware retrieval', '2.6725'), ('knowledge-based query expansion', '2.6738'), ('semantic-based searching', '2.6911'), ('contextual query expansion', '2.6923')]
74
+
75
+ The state-of-the-art model for this task assumes that there is a sequence of intent representations.
76
+ [('textual representations', '2.6996'), ('interactive intent modeling', '2.7238'), ('minutiae-based representation', '2.7379'), ('term networks', '2.7379'), ('relevance networks', '2.7460'), ('faceted search', '2.7720'), ('knowledge retrieval', '2.7768'), ('semantic search', '2.7822'), ('directed information', '2.7902'), ('context-aware retrieval', '2.7962')]
77
+
78
+ In this paper, we argue that the model should not be penalized as long as it generates an accurate and complete set of intent representations.
79
+ [('term networks', '2.6810'), ('relevance networks', '2.7342'), ('minutiae-based representation', '2.7476'), ('textual representations', '2.7603'), ('word models', '2.7738'), ('directed information', '2.7992'), ('interactive intent modeling', '2.8029'), ('text-based models', '2.8111'), ('semantic search', '2.8126'), ('faceted search', '2.8135')]
80
+
81
+ Based on this intuition, we propose a stochastic permutation invariant approach for optimizing such networks.
82
+ [('permutation invariant training', '2.6155'), ('term networks', '2.7114'), ('directed information', '2.7711'), ('stochastic automata networks', '2.7773'), ('siamese networks', '2.7951'), ('associative learning', '2.7997'), ('symbol-level network', '2.8102'), ('minutiae-based representation', '2.8172'), ('stochastic architecture', '2.8200'), ('rearrangeable networks', '2.8206')]
83
+
84
+ We extrinsically evaluate the proposed approach on a facet generation task and demonstrate significant improvements compared to competitive baselines.
85
+ [('term networks', '2.7370'), ('relevance networks', '2.8073'), ('faceted navigation', '2.8240'), ('faceted search', '2.8298'), ('keyword generation', '2.8916'), ('visual synset', '2.9051'), ('symbol-level network', '2.9068'), ('permutation invariant training', '2.9078'), ('semantic enrichment', '2.9155'), ('confusion network', '2.9158')]
86
+
87
+ Our analysis shows that the proposed permutation invariant approach has the highest impact on queries with more potential intents.
88
+ [('faceted search', '2.7021'), ('maximum relevance', '2.7506'), ('semantic search', '2.7652'), ('semantic-based searching', '2.7664'), ('semantic searches', '2.7753'), ('term networks', '2.7800'), ('concept-based query expansion', '2.7835'), ('relevance networks', '2.7836'), ('context-aware retrieval', '2.7889'), ('faceted navigation', '2.7898')]
89
+
90
+ 5
91
+ Although the chemotherapeutic drug, doxorubicin, is commonly used to treat various malignant tumors, its clinical use is restricted because of its toxicity especially cardiotoxicity.
92
+ [('therapeutic targets', '2.9195'), ('drug targets', '2.9507'), ('chemotherapy', '2.9624'), ('antiangiogenic therapy', '2.9894'), ('tumor responses', '3.0519'), ('drug compounds', '3.0647'), ('tumor response', '3.0836'), ('tumor purity', '3.0969'), ('drug development', '3.1036'), ('cytotoxicity', '3.1064')]
93
+
94
+ The use of curcumin may alleviate some of the doxorubicin‐induced cardiotoxic effects.
95
+ [('cytotoxic effect', '3.0268'), ('cytotoxicity', '3.0372'), ('therapeutic targets', '3.0809'), ('antiangiogenic therapy', '3.1135'), ('drug targets', '3.1199'), ('chemotherapy', '3.1264'), ('drug compounds', '3.1554'), ('further clinical studies', '3.1573'), ('biomedical abstract', '3.1768'), ('tumor purity', '3.1785')]
96
+
97
+ Especially, using the nano‐formulation of curcumin can overcome the poor bioavailability of curcumin and enhance its physicochemical properties regarding its efficacy.
98
+ [('therapeutic targets', '3.2133'), ('biological molecules', '3.2210'), ('biomaterials', '3.2264'), ('drug development', '3.2298'), ('drug design', '3.2496'), ('bioavailability', '3.2517'), ('drug targets', '3.2534'), ('drug delivery', '3.2677'), ('drug discovery', '3.2729'), ('bioactive conformers', '3.3025')]
99
+
100
+ In this study, we systematically reviewed the potential cardioprotective effects of nano‐curcumin against the doxorubicin‐induced cardiotoxicity.
101
+ [('therapeutic targets', '3.0424'), ('cytotoxicity', '3.0522'), ('cytotoxic effect', '3.0594'), ('biomedical abstract', '3.1003'), ('drug targets', '3.1080'), ('further clinical studies', '3.1250'), ('antiangiogenic therapy', '3.1329'), ('drug development', '3.1461'), ('drug discovery', '3.1561'), ('biomedical question', '3.1569')]
102
+
103
+ A systematic search was accomplished based on Preferred Reporting Items for Systematic Reviews and Meta‐Analyses guidelines for the identification of all relevant articles on “the role of nano‐curcumin on doxorubicin‐induced cardiotoxicity” in the electronic databases of Scopus, PubMed, and Web of Science up to July 2021.
104
+ [('biomedical abstract', '2.8096'), ('therapeutic targets', '2.8438'), ('pubmed', '2.8537'), ('further clinical studies', '2.8954'), ('reviewed literature', '2.9550'), ('drug development', '2.9559'), ('drug targets', '2.9643'), ('comprehensive literature review', '2.9714'), ('pubmed abstracts', '2.9765'), ('clinical studies', '2.9860')]
105
+
106
+ One hundred and sixty‐nine articles were screened following a predefined set of inclusion and exclusion criteria.
107
+ [('biomedical abstract', '2.8758'), ('therapeutic targets', '2.9134'), ('drug development', '2.9787'), ('further clinical studies', '2.9957'), ('pubmed', '3.0269'), ('drug design', '3.0300'), ('drug targets', '3.0394'), ('drug discovery', '3.0420'), ('biomedical question', '3.0619'), ('biological molecules', '3.0832')]
108
+
109
+ Ten eligible scientific papers were finally included in the present systematic review.
110
+ [('biomedical abstract', '2.7445'), ('pubmed', '2.7502'), ('reviewed literature', '2.8006'), ('pubmed abstracts', '2.8373'), ('further clinical studies', '2.8470'), ('therapeutic targets', '2.8824'), ('biomedical literature', '2.9036'), ('clinical studies', '2.9088'), ('biomedical articles', '2.9177'), ('medline abstracts', '2.9265')]
111
+
112
+ The administration of doxorubicin reduced the body and heart weights of mice/rats compared to the control groups.
113
+ [('therapeutic targets', '2.9305'), ('biomedical abstract', '2.9816'), ('drug targets', '2.9905'), ('biomedical question', '3.0190'), ('chemotherapy', '3.0336'), ('further clinical studies', '3.0386'), ('drug development', '3.0404'), ('biological molecules', '3.0417'), ('cytotoxic effect', '3.0454'), ('antiangiogenic therapy', '3.0464')]
114
+
115
+ In contrast, the combined treatment of doxorubicin and nano‐curcumin increased the body and heart weights of animals compared with the doxorubicin‐treated groups alone.
116
+ [('therapeutic targets', '2.9934'), ('drug targets', '3.0321'), ('antiangiogenic therapy', '3.0775'), ('chemotherapy', '3.0809'), ('drug compounds', '3.0907'), ('biomedical abstract', '3.0912'), ('cytotoxicity', '3.0938'), ('biological molecules', '3.0943'), ('cytotoxic effect', '3.0964'), ('further clinical studies', '3.0995')]
117
+
118
+ Furthermore, doxorubicin could significantly induce the biochemical and histological changes in the cardiac tissue; however, coadministration of nano‐curcumin formulation demonstrated a pattern opposite to the doxorubicin‐induced changes.
119
+ [('therapeutic targets', '3.0611'), ('cytotoxic effect', '3.0958'), ('cytotoxicity', '3.0959'), ('biological molecules', '3.0986'), ('drug targets', '3.1020'), ('biomedical abstract', '3.1047'), ('biomedical question', '3.1263'), ('drug development', '3.1436'), ('drug discovery', '3.1479'), ('further clinical studies', '3.1488')]
120
+
121
+ The coadministration of nano‐curcumin alleviates the doxorubicin‐induced cardiotoxicity through various mechanisms including antioxidant, anti‐inflammatory, and antiapoptotic effects.
122
+ [('therapeutic targets', '3.1087'), ('cytotoxicity', '3.1104'), ('cytotoxic effect', '3.1171'), ('biological molecules', '3.1816'), ('antiangiogenic therapy', '3.1928'), ('drug targets', '3.1951'), ('drug discovery', '3.2154'), ('drug compounds', '3.2202'), ('biomedical abstract', '3.2261'), ('drug development', '3.2316')]
123
+
124
+ Also, the cardioprotective effect of nano‐curcumin formulation against doxorubicin‐induced cardiotoxicity was higher than free curcumin.
125
+ [('therapeutic targets', '3.0909'), ('cytotoxicity', '3.1030'), ('cytotoxic effect', '3.1128'), ('drug targets', '3.1348'), ('antiangiogenic therapy', '3.1440'), ('biological molecules', '3.1615'), ('biomedical abstract', '3.1664'), ('drug compounds', '3.1732'), ('drug discovery', '3.1756'), ('drug development', '3.1829')]
126
+
127
+ 6
128
+ This paper studies multi-task training of retrieval-augmented generation models for knowledge-intensive tasks.
129
+ [('retrieval tasks', '2.7197'), ('question answering task', '2.8333'), ('retrieval task', '2.8527'), ('relevance networks', '2.8607'), ('sentence retrieval', '2.8682'), ('retrieval model', '2.8734'), ('recurrent tasks', '2.8794'), ('information retrieval tasks', '2.8869'), ('learning tasks', '2.8966'), ('word retrieval', '2.8985')]
130
+
131
+ We propose to clean the training set by utilizing a distinct property of knowledge-intensive generation: The connection of query-answer pairs to items in the knowledge base.
132
+ [('retrieval tasks', '2.7616'), ('question answering task', '2.7769'), ('semantic enrichment', '2.8169'), ('relevance networks', '2.8298'), ('question generation', '2.8311'), ('word confusion networks', '2.8465'), ('sentence retrieval', '2.8473'), ('sentence generation', '2.8475'), ('knowledge retrieval', '2.8552'), ('retrieval task', '2.8554')]
133
+
134
+ We filter training examples via a threshold of confidence on the relevance labels, whether a pair is answerable by the knowledge base or not.
135
+ [('relevance networks', '2.7229'), ('word confusion networks', '2.7639'), ('question answering task', '2.7791'), ('retrieval tasks', '2.7814'), ('sentence retrieval', '2.8008'), ('maximal relevance', '2.8312'), ('maximum relevance', '2.8393'), ('knowledge retrieval', '2.8471'), ('semantic relevance', '2.8472'), ('semantic enrichment', '2.8476')]
136
+
137
+ We train a single Fusion-in-Decoder (FiD) generator on seven combined tasks of the KILT benchmark.
138
+ [('word learning', '2.8535'), ('word confusion networks', '2.9074'), ('sequence-to-sequence learning', '2.9143'), ('sentence retrieval', '2.9182'), ('word retrieval', '2.9382'), ('training corpus', '2.9408'), ('lexical transducer', '2.9450'), ('multi-task training', '2.9503'), ('sequence training', '2.9548'), ('lexical entrainment', '2.9578')]
139
+
140
+ The experimental results suggest that our simple yet effective approach substantially improves competitive baselines on two strongly imbalanced tasks; and shows either smaller improvements or no significant regression on the remaining tasks.
141
+ [('retrieval tasks', '2.8309'), ('multi-task training', '2.8375'), ('relevance networks', '2.8468'), ('recurrent tasks', '2.8637'), ('learning tasks', '2.8769'), ('prediction tasks', '2.8797'), ('enhanced learning', '2.8835'), ('sentence retrieval', '2.8882'), ('word confusion networks', '2.8896'), ('training corpus', '2.8903')]
142
+
143
+ Furthermore, we demonstrate our multi-task training with relevance label sampling scales well with increased model capacity and achieves state-of-the-art results in five out of seven KILT tasks.
144
+ [('relevance networks', '2.7894'), ('term networks', '2.8331'), ('multi-task training', '2.8377'), ('word confusion networks', '2.8669'), ('sequence-to-sequence learning', '2.8749'), ('word learning', '2.8784'), ('retrieval tasks', '2.8808'), ('maximum relevance', '2.8865'), ('recurrent tasks', '2.8909'), ('semantic enrichment', '2.8915')]
145
+
146
+ 7
147
+ Web search queries can often be characterized by various facets.
148
+ [('query retrieval', '2.5662'), ('faceted search', '2.6265'), ('web query interfaces', '2.6523'), ('query generation', '2.7070'), ('unsupervised query segmentation', '2.7254'), ('query engines', '2.7714'), ('context-aware query suggestion', '2.7831'), ('web search queries', '2.7911'), ('interactive retrieval', '2.8018'), ('semantic searches', '2.8049')]
149
+
150
+ Extracting and generating query facets has various real-world applications, such as displaying facets to users in a search interface, search result diversification, clarifying question generation, and enabling exploratory search.
151
+ [('faceted search', '2.5903'), ('query retrieval', '2.6846'), ('exploratory search tasks', '2.6918'), ('interactive retrieval', '2.7203'), ('query generation', '2.7281'), ('context-aware query suggestion', '2.7354'), ('unsupervised query segmentation', '2.7472'), ('exploratory search', '2.7509'), ('exploratory searches', '2.7601'), ('faceted navigation', '2.7637')]
152
+
153
+ In this work, we revisit the task of query facet extraction and generation and study various formulations of this task, including facet extraction as sequence labeling, facet generation as autoregressive text generation or extreme multi-label classification.
154
+ [('unsupervised query segmentation', '2.7828'), ('keyword generation', '2.7875'), ('faceted search', '2.8394'), ('query retrieval', '2.8476'), ('query generation', '2.8537'), ('faceted navigation', '2.8599'), ('term networks', '2.8766'), ('interactive retrieval', '2.8795'), ('query segmentation', '2.9093'), ('automatic question generation', '2.9135')]
155
+
156
+ We conduct extensive experiments and demonstrate that these approaches lead to complementary sets of facets.
157
+ [('faceted search', '2.8412'), ('faceted navigation', '2.8922'), ('term networks', '2.9104'), ('faceted interface', '2.9250'), ('unsupervised query segmentation', '2.9262'), ('relevance networks', '2.9540'), ('keyword generation', '2.9722'), ('query retrieval', '2.9781'), ('query generation', '2.9943'), ('interactive retrieval', '3.0018')]
158
+
159
+ We also explored various aggregation approaches based on relevance and diversity to combine the facet sets produced by different formulations of the task.
160
+ [('faceted search', '2.7853'), ('faceted navigation', '2.9008'), ('query retrieval', '2.9105'), ('interactive retrieval', '2.9176'), ('relevance networks', '2.9177'), ('term networks', '2.9221'), ('context-aware retrieval', '2.9311'), ('concept-based query expansion', '2.9424'), ('faceted interface', '2.9463'), ('unsupervised query segmentation', '2.9476')]
161
+
162
+ The approaches presented in this paper outperform state-of-the-art baselines in terms of both precision and recall.
163
+ [('unsupervised query segmentation', '2.8656'), ('interactive retrieval', '2.8839'), ('query retrieval', '2.8854'), ('term networks', '2.8908'), ('relevance networks', '2.9285'), ('context-aware retrieval', '2.9367'), ('faceted navigation', '2.9547'), ('flexible retrieval experience', '2.9560'), ('faceted search', '2.9716'), ('automatic retrieval', '2.9775')]
164
+
165
+ We confirm the quality of the proposed methods through manual annotation.
166
+ [('annotation engine', '2.6800'), ('automated annotation', '2.7217'), ('automatic annotation', '2.7313'), ('annotated keywords', '2.7652'), ('annotators', '2.7675'), ('user-generated annotations', '2.7801'), ('manual annotation', '2.7878'), ('multi-level annotation', '2.7952'), ('automated semantic annotation', '2.7958'), ('deep annotation', '2.8055')]
167
+
168
+ Since there is no open-source software for facet extraction and generation, we release a toolkit named Faspect, that includes various model implementations for this task.
169
+ [('natural language toolkit', '2.7976'), ('visual query language', '2.8187'), ('annotation engine', '2.8297'), ('text-based models', '2.8376'), ('interactive retrieval', '2.8558'), ('faceted navigation', '2.8588'), ('unsupervised query segmentation', '2.8617'), ('faceted interface', '2.8697'), ('faceted search', '2.8719'), ('textual interface', '2.9016')]
170
+
171
+ 8
172
+ Conversational information seeking (CIS) involves interaction sequences between one or more users and an information system.
173
+ [('conversational interfaces', '2.3339'), ('conversational interactivity', '2.3406'), ('conversational systems', '2.3923'), ('interactive communication', '2.4647'), ('conversational participants', '2.5485'), ('natural language interaction', '2.5605'), ('conversational framework', '2.5707'), ('speech interaction', '2.6090'), ('conversation system', '2.6199'), ('conversational agent', '2.6681')]
174
+
175
+ Interactions in CIS are primarily based on natural language dialogue, while they may include other types of interactions, such as click, touch, and body gestures.
176
+ [('conversational interactivity', '2.3216'), ('conversational interfaces', '2.3877'), ('gesture-based interaction', '2.5063'), ('gesture interaction', '2.5121'), ('natural language interaction', '2.5127'), ('gesture based interaction', '2.5374'), ('gesture-based interfaces', '2.5611'), ('speech interaction', '2.5716'), ('interactive communication', '2.5726'), ('nonverbal interface system', '2.5826')]
177
+
178
+ CIS recently attracted significant attention and advancements continue to be made.
179
+ [('conversational interactivity', '2.5257'), ('conversational interfaces', '2.5958'), ('interactive communication', '2.6650'), ('active human interface', '2.6988'), ('information seeking', '2.7301'), ('user interactivity', '2.7341'), ('conversational framework', '2.7350'), ('context awareness', '2.7403'), ('conversational systems', '2.7428'), ('information seeker', '2.7548')]
180
+
181
+ This tutorial follows the content of the recent Conversational Information Seeking book authored by several of the tutorial presenters.
182
+ [('conversational interactivity', '2.3888'), ('conversational interfaces', '2.4549'), ('conversational framework', '2.4709'), ('information seeking', '2.5502'), ('interactive communication', '2.5506'), ('conversational systems', '2.5699'), ('conversational participants', '2.6188'), ('information seeker', '2.6432'), ('guiding information', '2.6465'), ('contextual help', '2.6642')]
183
+
184
+ The tutorial aims to be an introduction to CIS for newcomers to CIS in addition to the recent advanced topics and state-of-the-art approaches for students and researchers with moderate knowledge of the topic.
185
+ [('conversational interactivity', '2.4237'), ('conversational interfaces', '2.4559'), ('conversational framework', '2.5495'), ('interactive communication', '2.5657'), ('conversational systems', '2.6057'), ('active human interface', '2.6093'), ('natural language interaction', '2.6657'), ('user interactivity', '2.6703'), ('context awareness', '2.6787'), ('human interface', '2.6976')]
186
+
187
+ A significant part of the tutorial is dedicated to hands-on experiences based on toolkits developed by the presenters for conversational passage retrieval and multi-modal task-oriented dialogues.
188
+ [('conversational interfaces', '2.3351'), ('conversational interactivity', '2.3612'), ('conversational framework', '2.5164'), ('conversational systems', '2.5465'), ('active human interface', '2.5574'), ('interactive communication', '2.5930'), ('multimodal user interfaces', '2.6149'), ('natural language interaction', '2.6208'), ('context-sensitive user interfaces', '2.6261'), ('user interactivity', '2.6285')]
189
+
190
+ The outcomes of this tutorial include theoretical and practical knowledge, including a forum to meet researchers interested in CIS.
191
+ [('conversational interactivity', '2.3977'), ('conversational framework', '2.4638'), ('conversational interfaces', '2.4892'), ('interactive communication', '2.5454'), ('conversational systems', '2.5722'), ('natural language interaction', '2.6381'), ('user interactivity', '2.6399'), ('user interactions', '2.6555'), ('conversational participants', '2.6556'), ('interaction designs', '2.6620')]
192
+
193
+ 9
194
+ Asking clarification questions is an active area of research; however, resources for training and evaluating search clarification methods are not sufficient.
195
+ [('exploratory search tasks', '2.5892'), ('question answering task', '2.6986'), ('question answering', '2.7042'), ('question search', '2.7141'), ('semantic searches', '2.7144'), ('semantic search', '2.7186'), ('semantic enrichment', '2.7209'), ('exploratory searches', '2.7256'), ('information retrieval tasks', '2.7305'), ('exploratory search', '2.7493')]
196
+
197
+ To address this issue, we describe MIMICS-Duo, a new freely available dataset of 306 search queries with multiple clarifications (a total of 1,034 query-clarification pairs).
198
+ [('semantic searches', '2.6938'), ('semantic search', '2.7307'), ('question search', '2.7337'), ('exploratory search tasks', '2.7557'), ('knowledge retrieval', '2.7686'), ('interactive retrieval', '2.7998'), ('exploratory searches', '2.8001'), ('two-stage language model searching', '2.8022'), ('annotated keywords', '2.8074'), ('exploratory search', '2.8170')]
199
+
200
+ MIMICS-Duo contains fine-grained annotations on clarification questions and their candidate answers and enhances the existing MIMICS datasets by enabling multi-dimensional evaluation of search clarification methods, including online and offline evaluation.
201
+ [('exploratory search tasks', '2.7006'), ('annotated keywords', '2.7119'), ('semantic enrichment', '2.7476'), ('semantic searches', '2.7594'), ('relevance networks', '2.7677'), ('semantic search', '2.7710'), ('interactive retrieval', '2.7732'), ('term networks', '2.7912'), ('knowledge retrieval', '2.8059'), ('question search', '2.8079')]
202
+
203
+ We conduct extensive analysis to demonstrate the relationship between offline and online search clarification datasets and outline several research directions enabled by MIMICS-Duo.
204
+ [('exploratory search tasks', '2.6099'), ('semantic searches', '2.6675'), ('semantic search', '2.6878'), ('exploratory searches', '2.6900'), ('interactive retrieval', '2.7208'), ('exploratory search', '2.7210'), ('semantic enrichment', '2.7214'), ('annotated keywords', '2.7362'), ('knowledge retrieval', '2.7484'), ('information retrieval tasks', '2.7526')]
205
+
206
+ We believe that this resource will help researchers better understand clarification in search.
207
+ [('exploratory search tasks', '2.6023'), ('semantic searches', '2.6204'), ('semantic search', '2.6346'), ('exploratory searches', '2.6657'), ('semantic enrichment', '2.6884'), ('exploratory search', '2.6887'), ('information search', '2.7096'), ('search paradigm', '2.7577'), ('question search', '2.7592'), ('information retrieval tasks', '2.7735')]
208
+
209
+ 10
210
+ Recently, several dense retrieval (DR) models have demonstrated competitive performance to term-based retrieval that are ubiquitous in search systems.
211
+ [('similarity-based retrieval', '2.4625'), ('information retrieval models', '2.4834'), ('ranked retrieval', '2.4993'), ('probabilistic retrieval model', '2.5215'), ('information retrieval technologies', '2.5341'), ('content-based retrieval', '2.5428'), ('similarity based retrieval', '2.5533'), ('interactive retrieval', '2.5574'), ('query retrieval', '2.5706'), ('information retrieval systems', '2.5794')]
212
+
213
+ In contrast to term-based matching, DR projects queries and documents into a dense vector space and retrieves results via (approximate) nearest neighbor search.
214
+ [('similarity-based retrieval', '2.3587'), ('similarity based retrieval', '2.4435'), ('ranked retrieval', '2.4888'), ('content-based retrieval', '2.4897'), ('query retrieval', '2.5168'), ('content-based search', '2.5432'), ('document retrieval', '2.5496'), ('keyword-based searches', '2.5642'), ('keyword-based search', '2.5701'), ('interactive retrieval', '2.5792')]
215
+
216
+ Deploying a new system, such as DR, inevitably involves tradeoffs in aspects of its performance.
217
+ [('similarity-based retrieval', '2.4966'), ('ranked retrieval', '2.5061'), ('information retrieval technologies', '2.5266'), ('content-based retrieval', '2.5384'), ('scalable data retrieval', '2.5456'), ('interactive retrieval', '2.5533'), ('information retrieval systems', '2.5801'), ('retrieval systems', '2.5819'), ('probabilistic retrieval model', '2.5823'), ('similarity based retrieval', '2.5880')]
218
+
219
+ Established retrieval systems running at scale are usually well understood in terms of effectiveness and costs, such as query latency, indexing throughput, or storage requirements.
220
+ [('scalable data retrieval', '2.4391'), ('information retrieval performance', '2.5095'), ('similarity-based retrieval', '2.5231'), ('ranked retrieval', '2.5250'), ('retrieval efficiency', '2.5635'), ('content-based retrieval', '2.5709'), ('retrieval systems', '2.5786'), ('information retrieval technologies', '2.5877'), ('data retrieval efficiency', '2.5880'), ('similarity based retrieval', '2.5938')]
221
+
222
+ In this work, we propose a framework with a set of criteria that go beyond simple effectiveness measures to thoroughly compare two retrieval systems with the explicit goal of assessing the readiness of one system to replace the other.
223
+ [('information retrieval performance', '2.5029'), ('similarity-based retrieval', '2.5105'), ('ranked retrieval', '2.5239'), ('retrieval systems', '2.5257'), ('information retrieval technologies', '2.5341'), ('information retrieval evaluation', '2.5568'), ('information retrieval systems', '2.5660'), ('information retrieval models', '2.5746'), ('retrieval relevance', '2.5756'), ('retrieval metric', '2.5778')]
224
+
225
+ This includes careful tradeoff considerations between effectiveness and various cost factors.
226
+ [('ranked retrieval', '2.5885'), ('information retrieval performance', '2.6251'), ('retrieval quality', '2.6277'), ('retrieval efficiency', '2.6442'), ('retrieval relevance', '2.6479'), ('cost-based query optimization', '2.6577'), ('retrieval systems', '2.6744'), ('information retrieval effectiveness', '2.7020'), ('similarity-based retrieval', '2.7021'), ('retrieval metric', '2.7033')]
227
+
228
+ Furthermore, we describe guardrail criteria, since even a system that is better on average may have systematic failures on a minority of queries.
229
+ [('ranked retrieval', '2.5185'), ('query retrieval', '2.5625'), ('similarity-based retrieval', '2.5661'), ('retrieval quality', '2.6164'), ('probabilistic retrieval model', '2.6336'), ('retrieval systems', '2.6343'), ('information retrieval performance', '2.6463'), ('information retrieval models', '2.6480'), ('retrieval accuracy', '2.6493'), ('interactive retrieval', '2.6502')]
230
+
231
+ The guardrails check for failures on certain query characteristics and novel failure types that are only possible in dense retrieval systems.
232
+ [('ranked retrieval', '2.5368'), ('similarity-based retrieval', '2.5596'), ('retrieval systems', '2.6076'), ('query retrieval', '2.6235'), ('similarity based retrieval', '2.6511'), ('intelligent retrieval', '2.6536'), ('content-based retrieval', '2.6554'), ('interactive retrieval', '2.6572'), ('information retrieval technologies', '2.6628'), ('probabilistic retrieval model', '2.6658')]
233
+
234
+ We demonstrate our decision framework on a Web ranking scenario.
235
+ [('ranking framework', '2.4571'), ('ranking algorithms', '2.5078'), ('ranked retrieval', '2.5149'), ('search ranking', '2.5264'), ('link-based ranking algorithms', '2.5281'), ('ranking queries', '2.5354'), ('ranking accuracy', '2.5458'), ('ranking model', '2.5926'), ('ranking approaches', '2.5956'), ('ranking models', '2.5978')]
236
+
237
+ In that scenario, state-of-the-art DR models have surprisingly strong results, not only on average performance but passing an extensive set of guardrail tests, showing robustness on different query characteristics, lexical matching, generalization, and number of regressions.
238
+ [('information retrieval models', '2.4305'), ('query retrieval', '2.4768'), ('ranked retrieval', '2.4776'), ('similarity-based retrieval', '2.4799'), ('probabilistic retrieval model', '2.5294'), ('information retrieval performance', '2.5461'), ('similarity based retrieval', '2.5838'), ('information retrieval technologies', '2.5933'), ('content-based retrieval', '2.5962'), ('interactive retrieval', '2.6017')]
239
+
240
+ DR with approximate nearest neighbor search has comparable low query latency to term-based systems.
241
+ [('similarity-based retrieval', '2.4049'), ('similarity based retrieval', '2.4702'), ('approximate retrieval', '2.5250'), ('similarity-based queries', '2.5530'), ('ranked retrieval', '2.5745'), ('nearest-neighbor queries', '2.5825'), ('keyword-based search', '2.5907'), ('keyword-based searches', '2.5968'), ('approximate nearest neighbor search', '2.6010'), ('content-based search', '2.6075')]
242
+
243
+ The main reason to reject current DR models in this scenario is the cost of vectorization, which is much higher than the cost of building a traditional index.
244
+ [('ranked retrieval', '2.4929'), ('similarity-based retrieval', '2.5153'), ('content-based retrieval', '2.5870'), ('information retrieval models', '2.5934'), ('probabilistic retrieval model', '2.5981'), ('similarity based retrieval', '2.6082'), ('query-driven indexing', '2.6170'), ('query retrieval', '2.6173'), ('scalable data retrieval', '2.6186'), ('interactive retrieval', '2.6263')]
245
+
246
+ It is impossible to predict whether DR will become ubiquitous in the future, but one way this is possible is through repeated applications of decision processes such as the one presented here.
247
+ [('ranked retrieval', '2.5310'), ('similarity-based retrieval', '2.5663'), ('information retrieval technologies', '2.5775'), ('retrieval systems', '2.5847'), ('content-based retrieval', '2.5856'), ('interactive retrieval', '2.6055'), ('probabilistic retrieval model', '2.6218'), ('intelligent retrieval', '2.6224'), ('information retrieval systems', '2.6249'), ('content-based search', '2.6289')]
248
+
249
+ 11
250
+ The multi-stage cascaded architecture has been adopted by many search engines for efficient and effective retrieval.
251
+ [('ranked retrieval', '2.2334'), ('flexible retrieval experience', '2.4484'), ('retrieval model', '2.4583'), ('interactive retrieval', '2.5241'), ('retrieval functions', '2.5734'), ('intelligent retrieval', '2.6041'), ('retrieval systems', '2.6142'), ('similarity-based retrieval', '2.6450'), ('probabilistic retrieval model', '2.6462'), ('information retrieval performance', '2.6522')]
252
+
253
+ This architecture consists of a stack of retrieval and reranking models in which efficient retrieval models are followed by effective (neural) learning-to-rank models.
254
+ [('ranked retrieval', '2.2322'), ('retrieval model', '2.3204'), ('retrieval functions', '2.5168'), ('information retrieval models', '2.5929'), ('ranking models', '2.6038'), ('neural ranking models', '2.6288'), ('probabilistic retrieval model', '2.6448'), ('ranking model', '2.6451'), ('flexible retrieval experience', '2.6543'), ('retrieval function', '2.6611')]
255
+
256
+ The optimization of these learning-to-rank models is loosely connected to the early stage retrieval models.
257
+ [('ranked retrieval', '2.2430'), ('retrieval model', '2.2758'), ('retrieval functions', '2.4976'), ('ranking models', '2.5381'), ('ranking model', '2.5767'), ('information retrieval models', '2.5933'), ('reranking model', '2.6166'), ('retrieval tasks', '2.6325'), ('discriminative reranking', '2.6397'), ('retrieval function', '2.6430')]
258
+
259
+ This paper draws theoretical connections between the early stage retrieval and late stage reranking models by deriving expected reranking performance conditioned on the early stage retrieval results.
260
+ [('ranked retrieval', '2.2460'), ('retrieval model', '2.3545'), ('retrieval functions', '2.4910'), ('discriminative reranking', '2.5641'), ('retrieval measures', '2.5661'), ('reranking model', '2.5775'), ('retrieval bias', '2.6054'), ('retrieval tasks', '2.6085'), ('retrieval function', '2.6287'), ('ranking models', '2.6368')]
261
+
262
+ Our findings shed light on optimization of both retrieval and reranking models.
263
+ [('ranked retrieval', '2.1985'), ('retrieval model', '2.3458'), ('retrieval functions', '2.4939'), ('discriminative reranking', '2.6145'), ('retrieval function', '2.6471'), ('retrieval tasks', '2.6502'), ('retrieval measures', '2.6586'), ('information retrieval models', '2.6765'), ('flexible retrieval experience', '2.6786'), ('retrieval bias', '2.6814')]
264
+
265
+ As a result, we also introduce a novel loss function for training reranking models that leads to significant improvements on multiple public benchmarks.
266
+ [('ranked retrieval', '2.3530'), ('retrieval model', '2.5577'), ('discriminative reranking', '2.5665'), ('retrieval functions', '2.6021'), ('reranking model', '2.7234'), ('retrieval function', '2.7248'), ('retrieval bias', '2.7367'), ('neural ranking models', '2.7511'), ('ranking models', '2.7520'), ('retrieval tasks', '2.7692')]
267
+
268
+ Our findings provide theoretical and empirical guidelines for developing multi-stage cascaded retrieval models.
269
+ [('ranked retrieval', '2.3170'), ('retrieval model', '2.3414'), ('retrieval functions', '2.5340'), ('flexible retrieval experience', '2.6181'), ('retrieval measures', '2.6335'), ('retrieval tasks', '2.6498'), ('information retrieval models', '2.6501'), ('retrieval function', '2.6781'), ('discriminative reranking', '2.6932'), ('retrieval dynamics', '2.7005')]
270
+
271
+ 12
272
+ Concept prerequisite learning (CPL) plays a key role in developing technologies that assist people to learn a new complex topic or concept.
273
+ [('learning concepts', '2.2419'), ('conceptual development', '2.5223'), ('procedural knowledge', '2.5393'), ('declarative and procedural knowledge', '2.5509'), ('learning contents', '2.5635'), ('knowledge building', '2.5658'), ('constructivist learning', '2.5675'), ('cognitive learning', '2.5796'), ('project-based learning', '2.5848'), ('knowledge acquisition', '2.5865')]
274
+
275
+ Previous work commonly assumes that all concepts are given at training time and solely focuses on predicting the unseen prerequisite relationships between them.
276
+ [('learning concepts', '2.3131'), ('pre-existing knowledge', '2.4376'), ('declarative and procedural knowledge', '2.4576'), ('procedural and declarative knowledge', '2.4660'), ('procedural knowledge', '2.4937'), ('task-specific knowledge', '2.5183'), ('a-priori knowledge', '2.5228'), ('conceptual development', '2.5666'), ('constructivist learning', '2.5721'), ('knowledge acquisition', '2.5752')]
277
+
278
+ However, many real-world scenarios deal with concepts that are left undiscovered at training time, which is relatively unexplored.
279
+ [('pre-existing knowledge', '2.4195'), ('learning concepts', '2.4199'), ('task-specific knowledge', '2.5278'), ('a-priori knowledge', '2.5289'), ('procedural knowledge', '2.5442'), ('declarative and procedural knowledge', '2.5563'), ('delayed knowledge', '2.5700'), ('procedural and declarative knowledge', '2.5768'), ('structured knowledge', '2.5946'), ('constructivist learning', '2.6058')]
280
+
281
+ This paper studies this problem and proposes a novel alternating knowledge distillation approach to take advantage of both contentand graph-based models for this task.
282
+ [('structured knowledge', '2.5107'), ('knowledge enhancement', '2.5316'), ('knowledge structures', '2.5638'), ('learning concepts', '2.5645'), ('task-specific knowledge', '2.5646'), ('knowledge structure', '2.5798'), ('knowledge maps', '2.6030'), ('knowledge components', '2.6122'), ('knowledge-based model', '2.6264'), ('knowledge retrieval', '2.6293')]
283
+
284
+ Extensive experiments on three public benchmarks demonstrate up to 10% improvements in terms of F1 score.
285
+ [('knowledge enhancement', '2.7054'), ('similarity scores', '2.7216'), ('semantic enrichment', '2.7364'), ('similarity score', '2.7417'), ('computational advances', '2.7448'), ('learning concepts', '2.7788'), ('semantic similarity score', '2.7835'), ('comprehension performance', '2.7842'), ('cognitive information', '2.7880'), ('memory-based learning', '2.7883')]
286
+
287
+ 13
288
+ Purpose: Ionizing radiation-absorbed doses is a crucial concern in Cone-Beam Computed Tomography (CBCT) and panoramic radiography.
289
+ [('cone beam computed tomography', '2.5730'), ('radiation dose', '2.7669'), ('computed tomography', '2.8038'), ('radio tomographic imaging', '2.8289'), ('ct image', '2.8825'), ('computed tomography angiography', '2.8904'), ('quantitative radiology', '2.9078'), ('radiologist assessment', '2.9181'), ('radiomics', '2.9291'), ('radiation intensity', '2.9443')]
290
+
291
+ This study aimed to evaluate and compare the Entrance Skin Doses (ESD) of thyroid and parotid gland regions in CBCT and panoramic radiography in Yazd province, Iran.
292
+ [('cone beam computed tomography', '2.8476'), ('thyroid gland', '2.8962'), ('radiologist assessment', '2.9073'), ('computed tomography', '2.9097'), ('thyroid nodules', '2.9127'), ('ct image', '2.9393'), ('radiology reports', '2.9434'), ('dental radiographs', '2.9449'), ('quantitative radiology', '2.9644'), ('computed tomography angiography', '2.9738')]
293
+
294
+
295
+ Materials and Methods: In this cross-sectional study, 332 patients were included, who were then divided into two age groups (adult and pediatric) and underwent dental CBCT and panoramic radiography.
296
+ [('dental radiographs', '2.8979'), ('radiologist assessment', '2.9400'), ('cone beam computed tomography', '2.9431'), ('radiology reports', '3.0180'), ('computed tomography', '3.0321'), ('radiation dose', '3.0597'), ('ct image', '3.0615'), ('thyroid nodules', '3.0616'), ('quantitative radiology', '3.0665'), ('computed tomography angiography', '3.0888')]
297
+
298
+ Twelve Thermoluminescence Dosimeters (TLD- GR200) were used for each patient to measure the ESD of thyroid and parotid glands.
299
+ [('thyroid gland', '2.8463'), ('cone beam computed tomography', '2.8587'), ('thyroid nodules', '2.8694'), ('radiologist assessment', '2.9514'), ('computed tomography', '2.9571'), ('quantitative radiology', '2.9632'), ('radiation dose', '2.9666'), ('radiomics', '2.9742'), ('computed tomography angiography', '2.9932'), ('ct image', '3.0106')]
300
+
301
+ The differences between the ESD values in CBCT and panoramic examinations as well as between the adults and children groups were evaluated by one-way ANOVA and Man-Whitney tests.
302
+ [('radiologist assessment', '2.8391'), ('cone beam computed tomography', '2.8622'), ('computed tomography', '2.9083'), ('quantitative radiology', '2.9347'), ('dental radiographs', '2.9482'), ('computed tomography angiography', '2.9574'), ('thyroid gland', '2.9690'), ('radiology reports', '2.9766'), ('radiomics', '2.9854'), ('cephalometric analysis', '2.9861')]
303
+
304
+
305
+ Results: The mean and Standard Deviation (SD) values of ESD in panoramic imaging were equal to 61 ± 4 and 290 ± 12 µGy for the thyroid and parotid glands of the adult groups, respectively.
306
+ [('thyroid gland', '2.8110'), ('thyroid nodules', '2.8709'), ('cone beam computed tomography', '2.8879'), ('radiologist assessment', '2.9456'), ('computed tomography', '2.9600'), ('dental radiographs', '2.9944'), ('quantitative radiology', '3.0026'), ('ct image', '3.0127'), ('computed tomography angiography', '3.0143'), ('gland segmentation', '3.0459')]
307
+
308
+ Notably, these values for CBCT were significantly higher (P<0.01), as 377 ± 139 and 1554 ± 177 µGy, respectively.
309
+ [('thyroid gland', '2.9452'), ('radiation dose', '2.9656'), ('radiologist assessment', '2.9744'), ('cone beam computed tomography', '2.9841'), ('thyroid nodules', '2.9950'), ('radiomics', '3.0492'), ('quantitative radiology', '3.0547'), ('dental radiographs', '3.0567'), ('computed tomography', '3.0574'), ('radiology reports', '3.0714')]
310
+
311
+ Moreover, the mean ESD values in the panoramic examination were 41 ± 3 and 190 ± 16 µGy for thyroid and parotid glands for the children group, while they were 350 ± 120 and 990 ± 107 µGy in CBCT (P<0.01), respectively.
312
+ [('thyroid gland', '2.8006'), ('thyroid nodules', '2.8598'), ('cone beam computed tomography', '2.9579'), ('radiologist assessment', '2.9712'), ('dental radiographs', '3.0119'), ('radiation dose', '3.0223'), ('computed tomography', '3.0390'), ('radiology reports', '3.0603'), ('radiomics', '3.0615'), ('ct image', '3.0697')]
313
+
314
+ The ESD values in the parotid gland were approximately 3.4 (2.8-4.1) and 4.7 (4.6-4.8) times greater than those for CBCT and panoramic examinations, respectively.
315
+ [('thyroid gland', '2.8125'), ('thyroid nodules', '2.8608'), ('radiologist assessment', '3.0113'), ('cone beam computed tomography', '3.0162'), ('dental radiographs', '3.0246'), ('computed tomography', '3.0931'), ('radiation dose', '3.1021'), ('radiomics', '3.1140'), ('ct image', '3.1140'), ('radiology reports', '3.1184')]
316
+
317
+
318
+ Conclusion: Although CBCT provides supplementary diagnostic advantages, the thyroid and parotid glands’ doses are higher than panoramic radiography.
319
+ [('thyroid gland', '2.7889'), ('radiologist assessment', '2.8358'), ('cone beam computed tomography', '2.8439'), ('thyroid nodules', '2.8511'), ('computed tomography', '2.9069'), ('dental radiographs', '2.9388'), ('radiomics', '2.9497'), ('ct image', '2.9575'), ('computed tomography angiography', '2.9677'), ('radiology reports', '2.9692')]
320
+
321
+ Therefore, the risks and benefits of each method should be considered before their prescription.
322
+ [('radiologist assessment', '2.9044'), ('radiation dose', '2.9634'), ('cone beam computed tomography', '3.0386'), ('radiology reports', '3.0537'), ('dental radiographs', '3.0683'), ('radiomics', '3.0795'), ('computed tomography', '3.0977'), ('quantitative radiology', '3.1038'), ('thyroid nodules', '3.1108'), ('thyroid gland', '3.1254')]
323
+
324
+ 14
325
+ Background : This study aimed to assess the risks of exposure - induced death (REID) in patients and embryos during CT examinations in Yazd province (Iran).
326
+ [('ct image', '2.7832'), ('snomed ct', '2.7833'), ('ct images', '2.7875'), ('ct scans', '2.8173'), ('ct scan', '2.8255'), ('radiation exposure', '2.8263'), ('radiation dose', '2.8662'), ('abdominal ct images', '2.8666'), ('ct data', '2.8748'), ('pelvic ct images', '2.8849')]
327
+
328
+ Materials and Methods: Data on the exposure parameters were retrospectively collected from six imaging institutions.
329
+ [('ct images', '2.8151'), ('radiation dose', '2.8301'), ('ct image', '2.8376'), ('ct scans', '2.8462'), ('snomed ct', '2.8542'), ('ct scan', '2.8751'), ('ct data', '2.8768'), ('radiation exposure', '2.8903'), ('abdominal ct images', '2.9109'), ('medical imaging', '2.9192')]
330
+
331
+ In total, 932 patients were included in this study and for each patient, organ doses were then estimated using ImpactDose software.
332
+ [('radiation dose', '2.7694'), ('snomed ct', '2.8919'), ('radiation exposure', '2.8922'), ('ct image', '2.9187'), ('ct images', '2.9325'), ('radiology reports', '2.9386'), ('pelvic ct images', '2.9414'), ('ct scans', '2.9449'), ('ct scan', '2.9530'), ('ct data', '2.9563')]
333
+
334
+ The REIDs were calculated by BEIR VII risk model and using PCXMC software.
335
+ [('risk assessment framework', '2.6617'), ('risk assessment model', '2.7201'), ('patient risk', '2.7714'), ('risk levels', '2.7893'), ('radiation dose', '2.8029'), ('hazard analysis', '2.8042'), ('snomed ct', '2.8248'), ('risk estimation', '2.8251'), ('dose-response data', '2.8286'), ('dose distributions', '2.8323')]
336
+
337
+ In the case of gestational irradiation, excess cancer risk of 0.006% per mSv was taken into account in terms of the ICRP 84 recommendations, to calculate the excess childhood cancer risk imposed on the embryo.
338
+ [('radiation dose', '2.6403'), ('radiation exposure', '2.7355'), ('radiation effects', '2.8257'), ('radiation intensity', '2.8841'), ('tumor purity', '2.8998'), ('patient risk', '2.9286'), ('radiomics', '2.9612'), ('national cancer institute', '2.9632'), ('risk levels', '2.9812'), ('risk assessment framework', '2.9877')]
339
+
340
+ Results: The highest estimated organ doses for abdomen - pelvis, routine chest, chest HRCT, brain, and sinus examinations were obtained as 12.82 mSv for kidneys, 12.09 mSv for thymus, 13.16 mSv for thymus, 29.71 mSv for brain, and 11.70 mSv for oral mucosa, respectively.
341
+ [('radiation dose', '2.8069'), ('ct image', '2.8070'), ('abdominal ct images', '2.8146'), ('pelvic ct images', '2.8159'), ('ct scan', '2.8311'), ('ct colonography', '2.8313'), ('ct images', '2.8356'), ('ct scans', '2.8439'), ('snomed ct', '2.8595'), ('radiation exposure', '2.9157')]
342
+
343
+ Across all procedures, abdomen - pelvis CT scan induced the highest excess REID to the patients (240 deaths per million).
344
+ [('pelvic ct images', '2.6698'), ('ct scans', '2.6997'), ('ct scan', '2.7043'), ('abdominal ct images', '2.7055'), ('ct image', '2.7214'), ('ct images', '2.7362'), ('ct colonography', '2.7427'), ('snomed ct', '2.7682'), ('preoperative images', '2.8378'), ('computed tomography', '2.8784')]
345
+
346
+ The highest delivered dose to the fetus was roughly 35 mSv, which was lower than the threshold dose proposed by ICRP (100 mSv) for the induction of malformations.
347
+ [('radiation dose', '2.6921'), ('radiation exposure', '2.8405'), ('ct image', '2.9215'), ('snomed ct', '2.9294'), ('tumor purity', '2.9322'), ('radiation effects', '2.9396'), ('ct images', '2.9419'), ('dangerous operations', '2.9445'), ('fetal head', '2.9522'), ('pelvic ct images', '2.9543')]
348
+
349
+ However, the associated excess fatal childhood cancer risk of 2122 incidence per million scans can be a subject of concern for public health experts.
350
+ [('radiation dose', '2.8571'), ('radiation exposure', '2.9046'), ('disease risk', '2.9388'), ('national cancer institute', '2.9404'), ('patient risk', '2.9456'), ('ct scans', '2.9535'), ('snomed ct', '2.9590'), ('tumor purity', '2.9806'), ('identified risks', '2.9828'), ('epidemiological studies', '2.9844')]
351
+
352
+ Conclusion: Based on the results, although death risks related to induced cancer from CT scans were negligible, this risk can be relatively significant for children exposed during the fetal period.
353
+ [('radiation exposure', '2.8768'), ('radiation dose', '2.8882'), ('ct scans', '2.9153'), ('ct image', '2.9362'), ('ct images', '2.9395'), ('ct scan', '2.9531'), ('snomed ct', '2.9626'), ('radiation effects', '2.9778'), ('pelvic ct images', '2.9788'), ('tumor purity', '2.9820')]
354
+
355
+ 15
356
+ Although information access systems have long supportedpeople in accomplishing a wide range of tasks, we propose broadening the scope of users of information access systems to include task-driven machines, such as machine learning models.
357
+ [('retrieval model', '2.5240'), ('intelligent retrieval', '2.5252'), ('retrieval systems', '2.5288'), ('knowledge retrieval', '2.5543'), ('interactive retrieval', '2.5852'), ('automatic retrieval', '2.5858'), ('retrieval functions', '2.5921'), ('content-based retrieval', '2.5924'), ('flexible retrieval experience', '2.5925'), ('information retrieval systems', '2.5975')]
358
+
359
+ In this way, the core principles of indexing, representation, retrieval, and ranking can be applied and extended to substantially improve model generalization, scalability, robustness, and interpretability.
360
+ [('retrieval model', '2.3558'), ('ranked retrieval', '2.4575'), ('retrieval functions', '2.4780'), ('intelligent retrieval', '2.5009'), ('knowledge retrieval', '2.5132'), ('interactive retrieval', '2.5406'), ('automatic retrieval', '2.5439'), ('content-based retrieval', '2.5481'), ('similarity based retrieval', '2.5613'), ('flexible retrieval experience', '2.5663')]
361
+
362
+ We describe a generic retrieval-enhanced machine learning (REML) framework, which includes a number of existing models as special cases.
363
+ [('retrieval model', '2.2553'), ('retrieval functions', '2.4336'), ('ranked retrieval', '2.4832'), ('intelligent retrieval', '2.4840'), ('probabilistic retrieval model', '2.5081'), ('content-based retrieval', '2.5204'), ('retrieval systems', '2.5218'), ('knowledge retrieval', '2.5440'), ('information retrieval models', '2.5452'), ('automatic retrieval', '2.5463')]
364
+
365
+ REML challenges information retrieval conventions, presenting opportunities for novel advances in core areas, including optimization.
366
+ [('intelligent retrieval', '2.4780'), ('retrieval model', '2.4796'), ('retrieval functions', '2.4801'), ('ranked retrieval', '2.4858'), ('retrieval systems', '2.5278'), ('content-based retrieval', '2.5321'), ('retrievals', '2.5421'), ('interactive retrieval', '2.5785'), ('information retrieval algorithms', '2.5787'), ('content based retrieval', '2.5935')]
367
+
368
+ The REML research agenda lays a foundation for a new style of information access research and paves a path towards advancing machine learning and artificial intelligence.
369
+ [('retrieval model', '2.5410'), ('intelligent information', '2.5611'), ('intelligent retrieval', '2.5758'), ('knowledge retrieval', '2.5897'), ('retrieval functions', '2.6144'), ('content-based retrieval', '2.6248'), ('machine intelligence', '2.6316'), ('intelligent data', '2.6407'), ('augmented information', '2.6485'), ('maximum relevance', '2.6542')]
370
+
371
+ 16
372
+ This research analyzes human‐generated clarification questions to provide insights into how they are used to disambiguate and provide a better understanding of information needs.
373
+ [('user comprehension', '2.6499'), ('user understanding', '2.6971'), ('information seekers', '2.7045'), ('information seeking', '2.7150'), ('question answering', '2.7236'), ('improving information', '2.7564'), ('conversational threads', '2.7793'), ('explanation system', '2.7872'), ('informational needs', '2.7932'), ('natural language questions', '2.8043')]
374
+
375
+ A set of clarification questions is extracted from posts on the Stack Exchange platform.
376
+ [('conversational threads', '2.7278'), ('question answering', '2.7458'), ('discussion forums', '2.7508'), ('interactive discussion', '2.7988'), ('discussion forum', '2.8012'), ('online discussions', '2.8211'), ('forums', '2.8242'), ('user comprehension', '2.8251'), ('stack overflow questions', '2.8263'), ('online discussion forums', '2.8317')]
377
+
378
+ Novel taxonomy is defined for the annotation of the questions and their responses.
379
+ [('question answering', '2.7691'), ('conversational threads', '2.7744'), ('question classification', '2.7833'), ('user-generated annotations', '2.8021'), ('interactive discussion', '2.8079'), ('annotation study', '2.8341'), ('natural language description', '2.8408'), ('natural language questions', '2.8467'), ('question answering task', '2.8542'), ('user comments', '2.8543')]
380
+
381
+ We investigate the clarification questions in terms of whether they add any information to the post (the initial question posted by the asker) and the accepted answer, which is the answer chosen by the asker.
382
+ [('question answering', '2.6659'), ('user comprehension', '2.6939'), ('conversational threads', '2.7321'), ('interactive discussion', '2.7770'), ('natural language questions', '2.7829'), ('information seeking', '2.7896'), ('answering systems', '2.7933'), ('user understanding', '2.7937'), ('askers', '2.7954'), ('incremental elaboration', '2.8107')]
383
+
384
+ After identifying, which clarification questions are more useful, we investigated the characteristics of these questions in terms of their types and patterns.
385
+ [('natural language questions', '2.7298'), ('question answering', '2.7682'), ('user comprehension', '2.7704'), ('conversational threads', '2.8015'), ('question classification', '2.8237'), ('information seekers', '2.8467'), ('information seeking', '2.8495'), ('question answering task', '2.8575'), ('user understanding', '2.8855'), ('answering systems', '2.8967')]
386
+
387
+ Non‐useful clarification questions are identified, and their patterns are compared with useful clarifications.
388
+ [('user comprehension', '2.7280'), ('conversational threads', '2.7363'), ('question answering', '2.7408'), ('information seeking', '2.7538'), ('information seekers', '2.7672'), ('natural language questions', '2.7844'), ('incremental elaboration', '2.7853'), ('informational needs', '2.8078'), ('improving information', '2.8081'), ('user understanding', '2.8245')]
389
+
390
+ Our analysis indicates that the most useful clarification questions have similar patterns, regardless of topic.
391
+ [('conversational threads', '2.5707'), ('information seeking', '2.6157'), ('question answering', '2.6275'), ('user comprehension', '2.6365'), ('information seekers', '2.6395'), ('natural language questions', '2.6814'), ('informational needs', '2.7081'), ('user understanding', '2.7091'), ('improving information', '2.7102'), ('textual relevance', '2.7143')]
392
+
393
+ This research contributes to an understanding of clarification in conversations and can provide insight for clarification dialogues in conversational search scenarios and for the possible system generation of clarification requests in information‐seeking conversations.
394
+ [('conversational threads', '2.5252'), ('conversational interfaces', '2.6182'), ('interactive discussion', '2.6246'), ('conversation structure', '2.6507'), ('conversational systems', '2.6701'), ('conversational framework', '2.6955'), ('information seeking', '2.7017'), ('information seekers', '2.7069'), ('conversation system', '2.7251'), ('crowd-powered conversational assistant', '2.7340')]
395
+
396
+ 17
397
+ At the foundation of scientific evaluation is the labor-intensive process of peer review.
398
+ [('textual reviews', '2.6462'), ('citations', '2.7389'), ('scientific journals', '2.7841'), ('scientific literature', '2.7867'), ('scientific references', '2.8225'), ('bibliometric study', '2.8243'), ('journal impact', '2.8425'), ('scientific articles', '2.8454'), ('reviewers', '2.8495'), ('reviewers', '2.8495')]
399
+
400
+ This critical task requires participants to consume vast amounts of highly technical text.
401
+ [('argumentation mining', '2.7383'), ('textual reviews', '2.7680'), ('discourse processing', '2.7993'), ('discourse trees', '2.8379'), ('in-depth discussion', '2.8466'), ('citations', '2.8583'), ('discourse tree', '2.8829'), ('citation networks', '2.8844'), ('citation network', '2.8904'), ('argumentative structure', '2.8912')]
402
+
403
+ Prior work has annotated different aspects of review argumentation, but discourse relations between reviews and rebuttals have yet to be examined.
404
+ [('argumentation mining', '2.6895'), ('argumentative structure', '2.7285'), ('discourse trees', '2.7311'), ('discourse processing', '2.7318'), ('discourse analysis', '2.7430'), ('textual reviews', '2.7477'), ('discourse tree', '2.8009'), ('discourse segmentation', '2.8390'), ('discourse context', '2.8412'), ('citation context analysis', '2.8425')]
405
+
406
+ We present DISAPERE, a labeled dataset of 20k sentences contained in 506 review-rebuttal pairs in English, annotated by experts.
407
+ [('argumentation mining', '2.7269'), ('textual reviews', '2.7443'), ('annotated corpus', '2.7585'), ('discourse trees', '2.7765'), ('discourse processing', '2.8188'), ('annotated dataset', '2.8294'), ('discourse segmentation', '2.8350'), ('discourse tree', '2.8591'), ('citations', '2.8846'), ('annotated corpora', '2.8866')]
408
+
409
+ DISAPERE synthesizes label sets from prior work and extends them to include fine-grained annotation of the rebuttal sentences, characterizing their context in the review and the authors’ stance towards review arguments.
410
+ [('argumentation mining', '2.6445'), ('discourse trees', '2.7377'), ('textual reviews', '2.7563'), ('annotated corpus', '2.7663'), ('discourse processing', '2.7714'), ('argumentative structure', '2.7857'), ('discourse segmentation', '2.7876'), ('citation context analysis', '2.7977'), ('discourse tree', '2.8237'), ('annotation analysis', '2.8237')]
411
+
412
+ Further, we annotate every review and rebuttal sentence.
413
+ [('argumentation mining', '2.7465'), ('textual reviews', '2.7659'), ('discourse trees', '2.7863'), ('discourse processing', '2.8047'), ('discourse segmentation', '2.8154'), ('argumentative structure', '2.8188'), ('annotated corpus', '2.8426'), ('discourse tree', '2.8449'), ('discourse analysis', '2.8628'), ('annotation analysis', '2.8669')]
414
+
415
+ We show that discourse cues from rebuttals can shed light on the quality and interpretation of reviews.
416
+ [('discourse processing', '2.6747'), ('discourse analysis', '2.6975'), ('argumentative structure', '2.7335'), ('discourse trees', '2.7371'), ('textual reviews', '2.7417'), ('argumentation mining', '2.7521'), ('discourse context', '2.7718'), ('citation context analysis', '2.8024'), ('discourse structure', '2.8129'), ('citations', '2.8151')]
417
+
418
+ Further, an understanding of the argumentative strategies employed by the reviewers and authors provides useful signal for area chairs and other decision makers.
419
+ [('textual reviews', '2.6933'), ('argumentative structure', '2.6946'), ('argumentation mining', '2.7119'), ('citations', '2.7430'), ('in-depth discussion', '2.7486'), ('argumentation theory', '2.7554'), ('discourse analysis', '2.8121'), ('citation context analysis', '2.8318'), ('discourse processing', '2.8712'), ('scientific literature', '2.8722')]
420
+
421
+ 18
422
+ Retrieval-augmented generation models offer many benefits over standalone language models: besides a textual answer to a given query they provide provenance items retrieved from an updateable knowledge base.
423
+ [('natural language generation', '2.9254'), ('natural language generator', '2.9263'), ('automated generation', '2.9336'), ('text-based models', '2.9510'), ('natural language generation system', '2.9519'), ('attention-based neural machine translation', '2.9668'), ('sentence retrieval', '2.9690'), ('two-stage language model searching', '2.9690'), ('knowledge retrieval', '2.9740'), ('source-to-source translation', '2.9744')]
424
+
425
+ However, they are also more complex systems and need to handle long inputs.
426
+ [('attention-based neural machine translation', '2.8489'), ('sequence-to-sequence models', '2.8899'), ('sequence-to-sequence learning', '2.8977'), ('neural sequence-to-sequence models', '2.9278'), ('neural sequence-to-sequence model', '2.9287'), ('natural language generator', '2.9561'), ('sequence-to-sequence model', '2.9567'), ('natural language generation system', '2.9607'), ('unsupervised neural machine translation', '2.9672'), ('automated generation', '2.9674')]
427
+
428
+ In this work, we introduce FiD-Light to strongly increase the efficiency of the state-of-the-art retrieval-augmented FiD model, while maintaining the same level of effectiveness.
429
+ [('attention-based neural machine translation', '2.7833'), ('sentence retrieval', '2.8708'), ('sequence-to-sequence learning', '2.8798'), ('unsupervised neural machine translation', '2.8803'), ('sentence encoders', '2.8972'), ('sequence-to-sequence models', '2.9031'), ('neural sequence-to-sequence models', '2.9211'), ('neural sequence-to-sequence model', '2.9302'), ('hierarchical pitman-yor language model', '2.9383'), ('discriminative language modeling', '2.9411')]
430
+
431
+ Our FiD-Light model constrains the information flow from the encoder (which encodes passages separately) to the decoder (using concatenated encoded representations).
432
+ [('sentence encoders', '2.6388'), ('attention-based neural machine translation', '2.7172'), ('sequence encoder', '2.7881'), ('neural sequence-to-sequence model', '2.8219'), ('progressive encoding', '2.8292'), ('neural sequence-to-sequence models', '2.8332'), ('unsupervised neural machine translation', '2.8350'), ('sequence-to-sequence learning', '2.8428'), ('selective decode-and-forward', '2.8594'), ('layered encoding', '2.8615')]
433
+
434
+ Fur-thermore, we adapt FiD-Light with re-ranking capabilities through textual source pointers, to improve the top-ranked provenance precision.
435
+ [('ranked retrieval', '2.7236'), ('sentence retrieval', '2.7902'), ('attention-based neural machine translation', '2.7908'), ('ranking', '2.8201'), ('context-aware retrieval', '2.8230'), ('neural ranking models', '2.8395'), ('content retrieval', '2.8490'), ('sequence-to-sequence learning', '2.8605'), ('unsupervised neural machine translation', '2.8620'), ('textual representations', '2.8625')]
436
+
437
+ Our experiments on a diverse set of seven knowledge intensive tasks (KILT) show FiD-Light consistently improves the Pareto frontier between query latency and effectiveness.
438
+ [('attention-based neural machine translation', '2.8231'), ('sentence retrieval', '2.8390'), ('sequence-to-sequence learning', '2.8393'), ('information retrieval performance', '2.8555'), ('content retrieval', '2.8569'), ('interactive retrieval', '2.8694'), ('ranked retrieval', '2.8704'), ('corpus', '2.8753'), ('context-aware retrieval', '2.8754'), ('knowledge retrieval', '2.8935')]
439
+
440
+ FiD-Light with source pointing sets substantial new state-of-the-art results on six KILT tasks for combined text generation and provenance retrieval evaluation, while maintaining reasonable efficiency.
441
+ [('attention-based neural machine translation', '2.8618'), ('sentence retrieval', '2.8688'), ('text retrieval conference', '2.9006'), ('source-to-source translation', '2.9006'), ('context-aware retrieval', '2.9008'), ('content retrieval', '2.9017'), ('sequence-to-sequence learning', '2.9071'), ('automated generation', '2.9167'), ('interactive retrieval', '2.9181'), ('unsupervised neural machine translation', '2.9225')]
442
+
443
+ 19
444
+ Information Retrieval has a long history of applying either discriminative or generative modeling to retrieval and ranking tasks.
445
+ [('neural ranking models', '2.6455'), ('ranked retrieval', '2.7252'), ('discriminative language models', '2.7574'), ('discriminative language modeling', '2.7610'), ('discriminative reranking', '2.7697'), ('word retrieval', '2.7719'), ('information retrieval models', '2.7771'), ('retrieval task', '2.8198'), ('relevance networks', '2.8299'), ('retrieval tasks', '2.8372')]
446
+
447
+ Recent developments in transformer architectures and multi-task learning techniques have dramatically improved our ability to train effective neural models capable of resolving a wide variety of tasks using either of these paradigms.
448
+ [('neural ranking models', '2.6959'), ('sequence-to-sequence learning', '2.7914'), ('term networks', '2.8025'), ('neural sequence-to-sequence model', '2.8218'), ('multi-task cnn', '2.8235'), ('neural sequence-to-sequence models', '2.8243'), ('relevance networks', '2.8283'), ('hierarchical task networks', '2.8296'), ('sentence encoders', '2.8541'), ('representation learning', '2.8634')]
449
+
450
+ In this paper, we propose a novel multi-task learning approach which can be used to produce more effective neural ranking models.
451
+ [('neural ranking models', '2.5656'), ('sequence-to-sequence learning', '2.7754'), ('term networks', '2.7884'), ('neural sequence-to-sequence models', '2.8023'), ('neural sequence-to-sequence model', '2.8052'), ('relevance networks', '2.8062'), ('neural language models', '2.8195'), ('neural language model', '2.8303'), ('sentence encoders', '2.8387'), ('discriminative language modeling', '2.8413')]
452
+
453
+ The key idea is to improve the quality of the underlying transformer model by cross-training a retrieval task and one or more complementary language generation tasks.
454
+ [('sequence-to-sequence learning', '2.8403'), ('neural ranking models', '2.8518'), ('neural sequence-to-sequence model', '2.8905'), ('unsupervised neural machine translation', '2.8925'), ('neural machine translation model', '2.9055'), ('attention-based neural machine translation', '2.9144'), ('neural sequence-to-sequence models', '2.9157'), ('sentence encoders', '2.9194'), ('sentence generation', '2.9231'), ('generating sentences', '2.9462')]
455
+
456
+ By targeting the training on the encoding layer in the transformer architecture, our experimental results show that the proposed multi-task learning approach consistently improves retrieval effectiveness on the targeted collection and can easily be re-targeted to new ranking tasks.
457
+ [('neural ranking models', '2.7618'), ('sequence-to-sequence learning', '2.7991'), ('multi-task cnn', '2.8009'), ('transform learning', '2.8373'), ('sentence encoders', '2.8379'), ('hierarchical task networks', '2.8477'), ('term networks', '2.8534'), ('neural sequence-to-sequence model', '2.8683'), ('unsupervised neural machine translation', '2.8753'), ('neural sequence-to-sequence models', '2.8842')]
458
+
459
+ We provide an in-depth analysis showing how multi-task learning modifies model behaviors, resulting in more general models.
460
+ [('neural ranking models', '2.6894'), ('multi-task cnn', '2.7016'), ('transform learning', '2.7195'), ('term networks', '2.7412'), ('hierarchical task networks', '2.7414'), ('multi-task learning', '2.7719'), ('sequence-to-sequence learning', '2.7745'), ('representation learning', '2.7747'), ('relevance networks', '2.7770'), ('task learning', '2.7840')]
461
+
462
+ 20
463
+ RAZIEH RAHIMI, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA YOUNGWOO KIM, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA HAMED ZAMANI, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA JAMES ALLAN, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA
464
+ [('document retrieval', '2.2691'), ('document relevance', '2.3111'), ('document search', '2.3932'), ('relevance information', '2.4311'), ('information retrieval algorithms', '2.4850'), ('information retrieval models', '2.4930'), ('information retrieval', '2.4937'), ('automatic relevance determination', '2.4952'), ('information retrieval technologies', '2.4960'), ('information retrieval tasks', '2.5095')]
465
+
data/users/hzamani/embeds-hzamani-doc.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2af4832caa6efd69279f559727a1b6ae01409c484d85efe5475568c94468a6ef
3
+ size 129152
data/users/hzamani/embeds-hzamani-sent.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6ed41e4f7e25e0ca6c8573c0cf0607067894e227f76edc4814933f6008068dd
3
+ size 452617
data/users/hzamani/pid2idx-hzamani-doc.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, "9": 9, "10": 10, "11": 11, "12": 12, "13": 13, "14": 14, "15": 15, "16": 16, "17": 17, "18": 18, "19": 19, "20": 20}
data/users/hzamani/seedset-hzamani.json ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "username": "hzamani",
3
+ "s2_authorid": "2499986",
4
+ "papers": [
5
+ {
6
+ "title": "You can't pick your neighbors, or can you? When and how to rely on retrieval in the $k$NN-LM",
7
+ "abstract": [
8
+ "Retrieval-enhanced language models (LMs), which condition their predictions on text retrieved from large external datastores, have re-cently shown signi\ufb01cant perplexity improvements compared to standard LMs.",
9
+ "One such approach, the k NN-LM, interpolates any existing LM\u2019s predictions with the output of a k nearest neighbors model and requires no additional training.",
10
+ "In this paper, we explore the importance of lexical and semantic matching in the context of items retrieved by k NN-LM.",
11
+ "We \ufb01nd two trends: (1) the presence of large overlapping n -grams between the datastore and evaluation set plays an important fac-tor in strong performance, even when the datastore is derived from the training data; and (2) the k NN-LM is most bene\ufb01cial when retrieved items have high semantic similarity with the query.",
12
+ "Based on our analysis, we de\ufb01ne a new formulation of the k NN-LM that uses retrieval quality to assign the interpolation coef\ufb01cient.",
13
+ "We empirically measure the effectiveness of our approach on two English language modeling datasets, Wikitext-103 and PG-19.",
14
+ "Our re-formulation of the k NN-LM is bene\ufb01cial in both cases, and leads to nearly 4% improvement in perplexity on the Wikitext-103 test set."
15
+ ]
16
+ },
17
+ {
18
+ "title": "Maruna Bot: An Extensible Retrieval-Focused Framework for Task-Oriented Dialogues",
19
+ "abstract": [
20
+ "We present Maruna Bot, a Task-Oriented Dialogue System (TODS) that assists people in cooking or Do-It-Yourself (DIY) tasks using either a speech-only or multi-modal (speech and screen) interface.",
21
+ "Building such a system is challenging, because it touches many research areas including language understanding, text generation, task planning, dialogue state tracking, question answering, multi-modal retrieval, instruction summarization, robustness, and result presentation, among others.",
22
+ "Our bot lets users choose their desired tasks with flexible phrases, uses multi-stage intent classification, asks clarifying questions to improve retrieval, supports in-task and open-domain Question Answering throughout the conversation, effectively maintains the task status, performs query expansion and instruction re-ranking using both textual and visual signals."
23
+ ]
24
+ },
25
+ {
26
+ "title": "Conversational Information Seeking",
27
+ "abstract": [
28
+ "Conversational information seeking (CIS) is concerned with a sequence of interactions between one or more users and an information system.",
29
+ "Interactions in CIS are primarily based on natural language dialogue, while they may include other types of interactions, such as click, touch, and body gestures.",
30
+ "This monograph provides a thorough overview of CIS definitions, applications, interactions, interfaces, design, implementation, and evaluation.",
31
+ "This monograph views CIS applications as including conversational search, conversational question answering, and conversational recommendation.",
32
+ "Our aim is to provide an overview of past research related to CIS, introduce the current state-of-the-art in CIS, highlight the challenges still being faced in the community.",
33
+ "and suggest future directions."
34
+ ]
35
+ },
36
+ {
37
+ "title": "Curriculum Learning for Dense Retrieval Distillation",
38
+ "abstract": [
39
+ "Recent work has shown that more effective dense retrieval models can be obtained by distilling ranking knowledge from an existing base re-ranking model.",
40
+ "In this paper, we propose a generic curriculum learning based optimization framework called CL-DRD that controls the difficulty level of training data produced by the re-ranking (teacher) model.",
41
+ "CL-DRD iteratively optimizes the dense retrieval (student) model by increasing the difficulty of the knowledge distillation data made available to it.",
42
+ "In more detail, we initially provide the student model coarse-grained preference pairs between documents in the teacher's ranking, and progressively move towards finer-grained pairwise document ordering requirements.",
43
+ "In our experiments, we apply a simple implementation of the CL-DRD framework to enhance two state-of-the-art dense retrieval models.",
44
+ "Experiments on three public passage retrieval datasets demonstrate the effectiveness of our proposed framework."
45
+ ]
46
+ },
47
+ {
48
+ "title": "Stochastic Optimization of Text Set Generation for Learning Multiple Query Intent Representations",
49
+ "abstract": [
50
+ "Learning multiple intent representations for queries has potential applications in facet generation, document ranking, search result diversification, and search explanation.",
51
+ "The state-of-the-art model for this task assumes that there is a sequence of intent representations.",
52
+ "In this paper, we argue that the model should not be penalized as long as it generates an accurate and complete set of intent representations.",
53
+ "Based on this intuition, we propose a stochastic permutation invariant approach for optimizing such networks.",
54
+ "We extrinsically evaluate the proposed approach on a facet generation task and demonstrate significant improvements compared to competitive baselines.",
55
+ "Our analysis shows that the proposed permutation invariant approach has the highest impact on queries with more potential intents."
56
+ ]
57
+ },
58
+ {
59
+ "title": "The cardioprotective effects of nano\u2010curcumin against doxorubicin\u2010induced cardiotoxicity: A systematic review",
60
+ "abstract": [
61
+ "Although the chemotherapeutic drug, doxorubicin, is commonly used to treat various malignant tumors, its clinical use is restricted because of its toxicity especially cardiotoxicity.",
62
+ "The use of curcumin may alleviate some of the doxorubicin\u2010induced cardiotoxic effects.",
63
+ "Especially, using the nano\u2010formulation of curcumin can overcome the poor bioavailability of curcumin and enhance its physicochemical properties regarding its efficacy.",
64
+ "In this study, we systematically reviewed the potential cardioprotective effects of nano\u2010curcumin against the doxorubicin\u2010induced cardiotoxicity.",
65
+ "A systematic search was accomplished based on Preferred Reporting Items for Systematic Reviews and Meta\u2010Analyses guidelines for the identification of all relevant articles on \u201cthe role of nano\u2010curcumin on doxorubicin\u2010induced cardiotoxicity\u201d in the electronic databases of Scopus, PubMed, and Web of Science up to July 2021.",
66
+ "One hundred and sixty\u2010nine articles were screened following a predefined set of inclusion and exclusion criteria.",
67
+ "Ten eligible scientific papers were finally included in the present systematic review.",
68
+ "The administration of doxorubicin reduced the body and heart weights of mice/rats compared to the control groups.",
69
+ "In contrast, the combined treatment of doxorubicin and nano\u2010curcumin increased the body and heart weights of animals compared with the doxorubicin\u2010treated groups alone.",
70
+ "Furthermore, doxorubicin could significantly induce the biochemical and histological changes in the cardiac tissue; however, coadministration of nano\u2010curcumin formulation demonstrated a pattern opposite to the doxorubicin\u2010induced changes.",
71
+ "The coadministration of nano\u2010curcumin alleviates the doxorubicin\u2010induced cardiotoxicity through various mechanisms including antioxidant, anti\u2010inflammatory, and antiapoptotic effects.",
72
+ "Also, the cardioprotective effect of nano\u2010curcumin formulation against doxorubicin\u2010induced cardiotoxicity was higher than free curcumin."
73
+ ]
74
+ },
75
+ {
76
+ "title": "Multi-Task Retrieval-Augmented Text Generation with Relevance Sampling",
77
+ "abstract": [
78
+ "This paper studies multi-task training of retrieval-augmented generation models for knowledge-intensive tasks.",
79
+ "We propose to clean the training set by utilizing a distinct property of knowledge-intensive generation: The connection of query-answer pairs to items in the knowledge base.",
80
+ "We \ufb01lter training examples via a threshold of con\ufb01dence on the relevance labels, whether a pair is answerable by the knowledge base or not.",
81
+ "We train a single Fusion-in-Decoder (FiD) generator on seven combined tasks of the KILT benchmark.",
82
+ "The experimental results suggest that our simple yet effective approach substantially improves competitive baselines on two strongly imbalanced tasks; and shows either smaller improvements or no signi\ufb01cant regression on the remaining tasks.",
83
+ "Furthermore, we demonstrate our multi-task training with relevance label sampling scales well with increased model capacity and achieves state-of-the-art results in \ufb01ve out of seven KILT tasks."
84
+ ]
85
+ },
86
+ {
87
+ "title": "Revisiting Open Domain Query Facet Extraction and Generation",
88
+ "abstract": [
89
+ "Web search queries can often be characterized by various facets.",
90
+ "Extracting and generating query facets has various real-world applications, such as displaying facets to users in a search interface, search result diversification, clarifying question generation, and enabling exploratory search.",
91
+ "In this work, we revisit the task of query facet extraction and generation and study various formulations of this task, including facet extraction as sequence labeling, facet generation as autoregressive text generation or extreme multi-label classification.",
92
+ "We conduct extensive experiments and demonstrate that these approaches lead to complementary sets of facets.",
93
+ "We also explored various aggregation approaches based on relevance and diversity to combine the facet sets produced by different formulations of the task.",
94
+ "The approaches presented in this paper outperform state-of-the-art baselines in terms of both precision and recall.",
95
+ "We confirm the quality of the proposed methods through manual annotation.",
96
+ "Since there is no open-source software for facet extraction and generation, we release a toolkit named Faspect, that includes various model implementations for this task."
97
+ ]
98
+ },
99
+ {
100
+ "title": "Conversational Information Seeking: Theory and Application",
101
+ "abstract": [
102
+ "Conversational information seeking (CIS) involves interaction sequences between one or more users and an information system.",
103
+ "Interactions in CIS are primarily based on natural language dialogue, while they may include other types of interactions, such as click, touch, and body gestures.",
104
+ "CIS recently attracted significant attention and advancements continue to be made.",
105
+ "This tutorial follows the content of the recent Conversational Information Seeking book authored by several of the tutorial presenters.",
106
+ "The tutorial aims to be an introduction to CIS for newcomers to CIS in addition to the recent advanced topics and state-of-the-art approaches for students and researchers with moderate knowledge of the topic.",
107
+ "A significant part of the tutorial is dedicated to hands-on experiences based on toolkits developed by the presenters for conversational passage retrieval and multi-modal task-oriented dialogues.",
108
+ "The outcomes of this tutorial include theoretical and practical knowledge, including a forum to meet researchers interested in CIS."
109
+ ]
110
+ },
111
+ {
112
+ "title": "MIMICS-Duo: Offline & Online Evaluation of Search Clarification",
113
+ "abstract": [
114
+ "Asking clarification questions is an active area of research; however, resources for training and evaluating search clarification methods are not sufficient.",
115
+ "To address this issue, we describe MIMICS-Duo, a new freely available dataset of 306 search queries with multiple clarifications (a total of 1,034 query-clarification pairs).",
116
+ "MIMICS-Duo contains fine-grained annotations on clarification questions and their candidate answers and enhances the existing MIMICS datasets by enabling multi-dimensional evaluation of search clarification methods, including online and offline evaluation.",
117
+ "We conduct extensive analysis to demonstrate the relationship between offline and online search clarification datasets and outline several research directions enabled by MIMICS-Duo.",
118
+ "We believe that this resource will help researchers better understand clarification in search."
119
+ ]
120
+ },
121
+ {
122
+ "title": "Are We There Yet? A Decision Framework for Replacing Term Based Retrieval with Dense Retrieval Systems",
123
+ "abstract": [
124
+ "Recently, several dense retrieval (DR) models have demonstrated competitive performance to term-based retrieval that are ubiquitous in search systems.",
125
+ "In contrast to term-based matching, DR projects queries and documents into a dense vector space and retrieves results via (approximate) nearest neighbor search.",
126
+ "Deploying a new system, such as DR, inevitably involves tradeoffs in aspects of its performance.",
127
+ "Established retrieval systems running at scale are usually well understood in terms of effectiveness and costs, such as query latency, indexing throughput, or storage requirements.",
128
+ "In this work, we propose a framework with a set of criteria that go beyond simple effectiveness measures to thoroughly compare two retrieval systems with the explicit goal of assessing the readiness of one system to replace the other.",
129
+ "This includes careful tradeoff considerations between effectiveness and various cost factors.",
130
+ "Furthermore, we describe guardrail criteria, since even a system that is better on average may have systematic failures on a minority of queries.",
131
+ "The guardrails check for failures on certain query characteristics and novel failure types that are only possible in dense retrieval systems.",
132
+ "We demonstrate our decision framework on a Web ranking scenario.",
133
+ "In that scenario, state-of-the-art DR models have surprisingly strong results, not only on average performance but passing an extensive set of guardrail tests, showing robustness on different query characteristics, lexical matching, generalization, and number of regressions.",
134
+ "DR with approximate nearest neighbor search has comparable low query latency to term-based systems.",
135
+ "The main reason to reject current DR models in this scenario is the cost of vectorization, which is much higher than the cost of building a traditional index.",
136
+ "It is impossible to predict whether DR will become ubiquitous in the future, but one way this is possible is through repeated applications of decision processes such as the one presented here."
137
+ ]
138
+ },
139
+ {
140
+ "title": "Stochastic Retrieval-Conditioned Reranking",
141
+ "abstract": [
142
+ "The multi-stage cascaded architecture has been adopted by many search engines for efficient and effective retrieval.",
143
+ "This architecture consists of a stack of retrieval and reranking models in which efficient retrieval models are followed by effective (neural) learning-to-rank models.",
144
+ "The optimization of these learning-to-rank models is loosely connected to the early stage retrieval models.",
145
+ "This paper draws theoretical connections between the early stage retrieval and late stage reranking models by deriving expected reranking performance conditioned on the early stage retrieval results.",
146
+ "Our findings shed light on optimization of both retrieval and reranking models.",
147
+ "As a result, we also introduce a novel loss function for training reranking models that leads to significant improvements on multiple public benchmarks.",
148
+ "Our findings provide theoretical and empirical guidelines for developing multi-stage cascaded retrieval models."
149
+ ]
150
+ },
151
+ {
152
+ "title": "Predicting Prerequisite Relations for Unseen Concepts",
153
+ "abstract": [
154
+ "Concept prerequisite learning (CPL) plays a key role in developing technologies that assist people to learn a new complex topic or concept.",
155
+ "Previous work commonly assumes that all concepts are given at training time and solely focuses on predicting the unseen prerequisite relationships between them.",
156
+ "However, many real-world scenarios deal with concepts that are left undiscovered at training time, which is relatively unexplored.",
157
+ "This paper studies this problem and proposes a novel alternating knowledge distillation approach to take advantage of both contentand graph-based models for this task.",
158
+ "Extensive experiments on three public benchmarks demonstrate up to 10% improvements in terms of F1 score."
159
+ ]
160
+ },
161
+ {
162
+ "title": "Entrance Surface Dose Measurement at Thyroid and Parotid Gland Regions in Cone-Beam Computed Tomography and Panoramic Radiography",
163
+ "abstract": [
164
+ "Purpose: Ionizing radiation-absorbed doses is a crucial concern in Cone-Beam Computed Tomography (CBCT) and panoramic radiography.",
165
+ "This study aimed to evaluate and compare the Entrance Skin Doses (ESD) of thyroid and parotid gland regions in CBCT and panoramic radiography in Yazd province, Iran.",
166
+ "\nMaterials and Methods: In this cross-sectional study, 332 patients were included, who were then divided into two age groups (adult and pediatric) and underwent dental CBCT and panoramic radiography.",
167
+ "Twelve Thermoluminescence Dosimeters (TLD- GR200) were used for each patient to measure the ESD of thyroid and parotid glands.",
168
+ "The differences between the ESD values in CBCT and panoramic examinations as well as between the adults and children groups were evaluated by one-way ANOVA and Man-Whitney tests.",
169
+ "\nResults: The mean and Standard Deviation (SD) values of ESD in panoramic imaging were equal to 61 \u00b1 4 and 290 \u00b1 12 \u00b5Gy for the thyroid and parotid glands of the adult groups, respectively.",
170
+ "Notably, these values for CBCT were significantly higher (P<0.01), as 377 \u00b1 139 and 1554 \u00b1 177 \u00b5Gy, respectively.",
171
+ "Moreover, the mean ESD values in the panoramic examination were 41 \u00b1 3 and 190 \u00b1 16 \u00b5Gy for thyroid and parotid glands for the children group, while they were 350 \u00b1 120 and 990 \u00b1 107 \u00b5Gy in CBCT (P<0.01), respectively.",
172
+ "The ESD values in the parotid gland were approximately 3.4 (2.8-4.1) and 4.7 (4.6-4.8) times greater than those for CBCT and panoramic examinations, respectively.",
173
+ "\nConclusion: Although CBCT provides supplementary diagnostic advantages, the thyroid and parotid glands\u2019 doses are higher than panoramic radiography.",
174
+ "Therefore, the risks and benefits of each method should be considered before their prescription."
175
+ ]
176
+ },
177
+ {
178
+ "title": "Estimating the risks of exposure-induced death associated with common computed tomography procedures",
179
+ "abstract": [
180
+ "Background : This study aimed to assess the risks of exposure - induced death (REID) in patients and embryos during CT examinations in Yazd province (Iran).",
181
+ "Materials and Methods: Data on the exposure parameters were retrospectively collected from six imaging institutions.",
182
+ "In total, 932 patients were included in this study and for each patient, organ doses were then estimated using ImpactDose software.",
183
+ "The REIDs were calculated by BEIR VII risk model and using PCXMC software.",
184
+ "In the case of gestational irradiation, excess cancer risk of 0.006% per mSv was taken into account in terms of the ICRP 84 recommendations, to calculate the excess childhood cancer risk imposed on the embryo.",
185
+ "Results: The highest estimated organ doses for abdomen - pelvis, routine chest, chest HRCT, brain, and sinus examinations were obtained as 12.82 mSv for kidneys, 12.09 mSv for thymus, 13.16 mSv for thymus, 29.71 mSv for brain, and 11.70 mSv for oral mucosa, respectively.",
186
+ "Across all procedures, abdomen - pelvis CT scan induced the highest excess REID to the patients (240 deaths per million).",
187
+ "The highest delivered dose to the fetus was roughly 35 mSv, which was lower than the threshold dose proposed by ICRP (100 mSv) for the induction of malformations.",
188
+ "However, the associated excess fatal childhood cancer risk of 2122 incidence per million scans can be a subject of concern for public health experts.",
189
+ "Conclusion: Based on the results, although death risks related to induced cancer from CT scans were negligible, this risk can be relatively significant for children exposed during the fetal period."
190
+ ]
191
+ },
192
+ {
193
+ "title": "Retrieval-Enhanced Machine Learning",
194
+ "abstract": [
195
+ "Although information access systems have long supportedpeople in accomplishing a wide range of tasks, we propose broadening the scope of users of information access systems to include task-driven machines, such as machine learning models.",
196
+ "In this way, the core principles of indexing, representation, retrieval, and ranking can be applied and extended to substantially improve model generalization, scalability, robustness, and interpretability.",
197
+ "We describe a generic retrieval-enhanced machine learning (REML) framework, which includes a number of existing models as special cases.",
198
+ "REML challenges information retrieval conventions, presenting opportunities for novel advances in core areas, including optimization.",
199
+ "The REML research agenda lays a foundation for a new style of information access research and paves a path towards advancing machine learning and artificial intelligence."
200
+ ]
201
+ },
202
+ {
203
+ "title": "Analyzing clarification in asynchronous information\u2010seeking conversations",
204
+ "abstract": [
205
+ "This research analyzes human\u2010generated clarification questions to provide insights into how they are used to disambiguate and provide a better understanding of information needs.",
206
+ "A set of clarification questions is extracted from posts on the Stack Exchange platform.",
207
+ "Novel taxonomy is defined for the annotation of the questions and their responses.",
208
+ "We investigate the clarification questions in terms of whether they add any information to the post (the initial question posted by the asker) and the accepted answer, which is the answer chosen by the asker.",
209
+ "After identifying, which clarification questions are more useful, we investigated the characteristics of these questions in terms of their types and patterns.",
210
+ "Non\u2010useful clarification questions are identified, and their patterns are compared with useful clarifications.",
211
+ "Our analysis indicates that the most useful clarification questions have similar patterns, regardless of topic.",
212
+ "This research contributes to an understanding of clarification in conversations and can provide insight for clarification dialogues in conversational search scenarios and for the possible system generation of clarification requests in information\u2010seeking conversations."
213
+ ]
214
+ },
215
+ {
216
+ "title": "DISAPERE: A Dataset for Discourse Structure in Peer Review Discussions",
217
+ "abstract": [
218
+ "At the foundation of scientific evaluation is the labor-intensive process of peer review.",
219
+ "This critical task requires participants to consume vast amounts of highly technical text.",
220
+ "Prior work has annotated different aspects of review argumentation, but discourse relations between reviews and rebuttals have yet to be examined.",
221
+ "We present DISAPERE, a labeled dataset of 20k sentences contained in 506 review-rebuttal pairs in English, annotated by experts.",
222
+ "DISAPERE synthesizes label sets from prior work and extends them to include fine-grained annotation of the rebuttal sentences, characterizing their context in the review and the authors\u2019 stance towards review arguments.",
223
+ "Further, we annotate every review and rebuttal sentence.",
224
+ "We show that discourse cues from rebuttals can shed light on the quality and interpretation of reviews.",
225
+ "Further, an understanding of the argumentative strategies employed by the reviewers and authors provides useful signal for area chairs and other decision makers."
226
+ ]
227
+ },
228
+ {
229
+ "title": "FiD-Light: Efficient and Effective Retrieval-Augmented Text Generation",
230
+ "abstract": [
231
+ "Retrieval-augmented generation models offer many bene\ufb01ts over standalone language models: besides a textual answer to a given query they provide provenance items retrieved from an updateable knowledge base.",
232
+ "However, they are also more complex systems and need to handle long inputs.",
233
+ "In this work, we introduce FiD-Light to strongly increase the ef\ufb01ciency of the state-of-the-art retrieval-augmented FiD model, while maintaining the same level of effectiveness.",
234
+ "Our FiD-Light model constrains the information \ufb02ow from the encoder (which encodes passages separately) to the decoder (using concatenated encoded representations).",
235
+ "Fur-thermore, we adapt FiD-Light with re-ranking capabilities through textual source pointers, to improve the top-ranked provenance precision.",
236
+ "Our experiments on a diverse set of seven knowledge intensive tasks (KILT) show FiD-Light consistently improves the Pareto frontier between query latency and effectiveness.",
237
+ "FiD-Light with source pointing sets substantial new state-of-the-art results on six KILT tasks for combined text generation and provenance retrieval evaluation, while maintaining reasonable ef\ufb01ciency."
238
+ ]
239
+ },
240
+ {
241
+ "title": "Generalizing Discriminative Retrieval Models using Generative Tasks",
242
+ "abstract": [
243
+ "Information Retrieval has a long history of applying either discriminative or generative modeling to retrieval and ranking tasks.",
244
+ "Recent developments in transformer architectures and multi-task learning techniques have dramatically improved our ability to train effective neural models capable of resolving a wide variety of tasks using either of these paradigms.",
245
+ "In this paper, we propose a novel multi-task learning approach which can be used to produce more effective neural ranking models.",
246
+ "The key idea is to improve the quality of the underlying transformer model by cross-training a retrieval task and one or more complementary language generation tasks.",
247
+ "By targeting the training on the encoding layer in the transformer architecture, our experimental results show that the proposed multi-task learning approach consistently improves retrieval effectiveness on the targeted collection and can easily be re-targeted to new ranking tasks.",
248
+ "We provide an in-depth analysis showing how multi-task learning modifies model behaviors, resulting in more general models."
249
+ ]
250
+ },
251
+ {
252
+ "title": "Explaining Documents' Relevance to Search Queries",
253
+ "abstract": [
254
+ "RAZIEH RAHIMI, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA YOUNGWOO KIM, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA HAMED ZAMANI, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA JAMES ALLAN, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA"
255
+ ]
256
+ }
257
+ ],
258
+ "user_kps": [
259
+ "argumentation mining",
260
+ "attention-based neural machine translation",
261
+ "cone beam computed tomography",
262
+ "conversational interactivity",
263
+ "conversational interfaces",
264
+ "dialogue systems",
265
+ "discriminative language modeling",
266
+ "exploratory search tasks",
267
+ "faceted search",
268
+ "learning concepts",
269
+ "neural ranking models",
270
+ "question answering",
271
+ "radiation dose",
272
+ "ranked retrieval",
273
+ "retrieval model",
274
+ "retrieval tasks",
275
+ "similarity-based retrieval",
276
+ "term networks",
277
+ "therapeutic targets",
278
+ "word retrieval"
279
+ ]
280
+ }
data/users/hzamani/sents-hzamani.txt ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Title: You can't pick your neighbors, or can you? When and how to rely on retrieval in the $k$NN-LM
2
+ 0: Retrieval-enhanced language models (LMs), which condition their predictions on text retrieved from large external datastores, have re-cently shown significant perplexity improvements compared to standard LMs.
3
+ 1: One such approach, the k NN-LM, interpolates any existing LM’s predictions with the output of a k nearest neighbors model and requires no additional training.
4
+ 2: In this paper, we explore the importance of lexical and semantic matching in the context of items retrieved by k NN-LM.
5
+ 3: We find two trends: (1) the presence of large overlapping n -grams between the datastore and evaluation set plays an important fac-tor in strong performance, even when the datastore is derived from the training data; and (2) the k NN-LM is most beneficial when retrieved items have high semantic similarity with the query.
6
+ 4: Based on our analysis, we define a new formulation of the k NN-LM that uses retrieval quality to assign the interpolation coefficient.
7
+ 5: We empirically measure the effectiveness of our approach on two English language modeling datasets, Wikitext-103 and PG-19.
8
+ 6: Our re-formulation of the k NN-LM is beneficial in both cases, and leads to nearly 4% improvement in perplexity on the Wikitext-103 test set.
9
+
10
+ Title: Maruna Bot: An Extensible Retrieval-Focused Framework for Task-Oriented Dialogues
11
+ 7: We present Maruna Bot, a Task-Oriented Dialogue System (TODS) that assists people in cooking or Do-It-Yourself (DIY) tasks using either a speech-only or multi-modal (speech and screen) interface.
12
+ 8: Building such a system is challenging, because it touches many research areas including language understanding, text generation, task planning, dialogue state tracking, question answering, multi-modal retrieval, instruction summarization, robustness, and result presentation, among others.
13
+ 9: Our bot lets users choose their desired tasks with flexible phrases, uses multi-stage intent classification, asks clarifying questions to improve retrieval, supports in-task and open-domain Question Answering throughout the conversation, effectively maintains the task status, performs query expansion and instruction re-ranking using both textual and visual signals.
14
+
15
+ Title: Conversational Information Seeking
16
+ 10: Conversational information seeking (CIS) is concerned with a sequence of interactions between one or more users and an information system.
17
+ 11: Interactions in CIS are primarily based on natural language dialogue, while they may include other types of interactions, such as click, touch, and body gestures.
18
+ 12: This monograph provides a thorough overview of CIS definitions, applications, interactions, interfaces, design, implementation, and evaluation.
19
+ 13: This monograph views CIS applications as including conversational search, conversational question answering, and conversational recommendation.
20
+ 14: Our aim is to provide an overview of past research related to CIS, introduce the current state-of-the-art in CIS, highlight the challenges still being faced in the community.
21
+ 15: and suggest future directions.
22
+
23
+ Title: Curriculum Learning for Dense Retrieval Distillation
24
+ 16: Recent work has shown that more effective dense retrieval models can be obtained by distilling ranking knowledge from an existing base re-ranking model.
25
+ 17: In this paper, we propose a generic curriculum learning based optimization framework called CL-DRD that controls the difficulty level of training data produced by the re-ranking (teacher) model.
26
+ 18: CL-DRD iteratively optimizes the dense retrieval (student) model by increasing the difficulty of the knowledge distillation data made available to it.
27
+ 19: In more detail, we initially provide the student model coarse-grained preference pairs between documents in the teacher's ranking, and progressively move towards finer-grained pairwise document ordering requirements.
28
+ 20: In our experiments, we apply a simple implementation of the CL-DRD framework to enhance two state-of-the-art dense retrieval models.
29
+ 21: Experiments on three public passage retrieval datasets demonstrate the effectiveness of our proposed framework.
30
+
31
+ Title: Stochastic Optimization of Text Set Generation for Learning Multiple Query Intent Representations
32
+ 22: Learning multiple intent representations for queries has potential applications in facet generation, document ranking, search result diversification, and search explanation.
33
+ 23: The state-of-the-art model for this task assumes that there is a sequence of intent representations.
34
+ 24: In this paper, we argue that the model should not be penalized as long as it generates an accurate and complete set of intent representations.
35
+ 25: Based on this intuition, we propose a stochastic permutation invariant approach for optimizing such networks.
36
+ 26: We extrinsically evaluate the proposed approach on a facet generation task and demonstrate significant improvements compared to competitive baselines.
37
+ 27: Our analysis shows that the proposed permutation invariant approach has the highest impact on queries with more potential intents.
38
+
39
+ Title: The cardioprotective effects of nano‐curcumin against doxorubicin‐induced cardiotoxicity: A systematic review
40
+ 28: Although the chemotherapeutic drug, doxorubicin, is commonly used to treat various malignant tumors, its clinical use is restricted because of its toxicity especially cardiotoxicity.
41
+ 29: The use of curcumin may alleviate some of the doxorubicin‐induced cardiotoxic effects.
42
+ 30: Especially, using the nano‐formulation of curcumin can overcome the poor bioavailability of curcumin and enhance its physicochemical properties regarding its efficacy.
43
+ 31: In this study, we systematically reviewed the potential cardioprotective effects of nano‐curcumin against the doxorubicin‐induced cardiotoxicity.
44
+ 32: A systematic search was accomplished based on Preferred Reporting Items for Systematic Reviews and Meta‐Analyses guidelines for the identification of all relevant articles on “the role of nano‐curcumin on doxorubicin‐induced cardiotoxicity” in the electronic databases of Scopus, PubMed, and Web of Science up to July 2021.
45
+ 33: One hundred and sixty‐nine articles were screened following a predefined set of inclusion and exclusion criteria.
46
+ 34: Ten eligible scientific papers were finally included in the present systematic review.
47
+ 35: The administration of doxorubicin reduced the body and heart weights of mice/rats compared to the control groups.
48
+ 36: In contrast, the combined treatment of doxorubicin and nano‐curcumin increased the body and heart weights of animals compared with the doxorubicin‐treated groups alone.
49
+ 37: Furthermore, doxorubicin could significantly induce the biochemical and histological changes in the cardiac tissue; however, coadministration of nano‐curcumin formulation demonstrated a pattern opposite to the doxorubicin‐induced changes.
50
+ 38: The coadministration of nano‐curcumin alleviates the doxorubicin‐induced cardiotoxicity through various mechanisms including antioxidant, anti‐inflammatory, and antiapoptotic effects.
51
+ 39: Also, the cardioprotective effect of nano‐curcumin formulation against doxorubicin‐induced cardiotoxicity was higher than free curcumin.
52
+
53
+ Title: Multi-Task Retrieval-Augmented Text Generation with Relevance Sampling
54
+ 40: This paper studies multi-task training of retrieval-augmented generation models for knowledge-intensive tasks.
55
+ 41: We propose to clean the training set by utilizing a distinct property of knowledge-intensive generation: The connection of query-answer pairs to items in the knowledge base.
56
+ 42: We filter training examples via a threshold of confidence on the relevance labels, whether a pair is answerable by the knowledge base or not.
57
+ 43: We train a single Fusion-in-Decoder (FiD) generator on seven combined tasks of the KILT benchmark.
58
+ 44: The experimental results suggest that our simple yet effective approach substantially improves competitive baselines on two strongly imbalanced tasks; and shows either smaller improvements or no significant regression on the remaining tasks.
59
+ 45: Furthermore, we demonstrate our multi-task training with relevance label sampling scales well with increased model capacity and achieves state-of-the-art results in five out of seven KILT tasks.
60
+
61
+ Title: Revisiting Open Domain Query Facet Extraction and Generation
62
+ 46: Web search queries can often be characterized by various facets.
63
+ 47: Extracting and generating query facets has various real-world applications, such as displaying facets to users in a search interface, search result diversification, clarifying question generation, and enabling exploratory search.
64
+ 48: In this work, we revisit the task of query facet extraction and generation and study various formulations of this task, including facet extraction as sequence labeling, facet generation as autoregressive text generation or extreme multi-label classification.
65
+ 49: We conduct extensive experiments and demonstrate that these approaches lead to complementary sets of facets.
66
+ 50: We also explored various aggregation approaches based on relevance and diversity to combine the facet sets produced by different formulations of the task.
67
+ 51: The approaches presented in this paper outperform state-of-the-art baselines in terms of both precision and recall.
68
+ 52: We confirm the quality of the proposed methods through manual annotation.
69
+ 53: Since there is no open-source software for facet extraction and generation, we release a toolkit named Faspect, that includes various model implementations for this task.
70
+
71
+ Title: Conversational Information Seeking: Theory and Application
72
+ 54: Conversational information seeking (CIS) involves interaction sequences between one or more users and an information system.
73
+ 55: Interactions in CIS are primarily based on natural language dialogue, while they may include other types of interactions, such as click, touch, and body gestures.
74
+ 56: CIS recently attracted significant attention and advancements continue to be made.
75
+ 57: This tutorial follows the content of the recent Conversational Information Seeking book authored by several of the tutorial presenters.
76
+ 58: The tutorial aims to be an introduction to CIS for newcomers to CIS in addition to the recent advanced topics and state-of-the-art approaches for students and researchers with moderate knowledge of the topic.
77
+ 59: A significant part of the tutorial is dedicated to hands-on experiences based on toolkits developed by the presenters for conversational passage retrieval and multi-modal task-oriented dialogues.
78
+ 60: The outcomes of this tutorial include theoretical and practical knowledge, including a forum to meet researchers interested in CIS.
79
+
80
+ Title: MIMICS-Duo: Offline & Online Evaluation of Search Clarification
81
+ 61: Asking clarification questions is an active area of research; however, resources for training and evaluating search clarification methods are not sufficient.
82
+ 62: To address this issue, we describe MIMICS-Duo, a new freely available dataset of 306 search queries with multiple clarifications (a total of 1,034 query-clarification pairs).
83
+ 63: MIMICS-Duo contains fine-grained annotations on clarification questions and their candidate answers and enhances the existing MIMICS datasets by enabling multi-dimensional evaluation of search clarification methods, including online and offline evaluation.
84
+ 64: We conduct extensive analysis to demonstrate the relationship between offline and online search clarification datasets and outline several research directions enabled by MIMICS-Duo.
85
+ 65: We believe that this resource will help researchers better understand clarification in search.
86
+
87
+ Title: Are We There Yet? A Decision Framework for Replacing Term Based Retrieval with Dense Retrieval Systems
88
+ 66: Recently, several dense retrieval (DR) models have demonstrated competitive performance to term-based retrieval that are ubiquitous in search systems.
89
+ 67: In contrast to term-based matching, DR projects queries and documents into a dense vector space and retrieves results via (approximate) nearest neighbor search.
90
+ 68: Deploying a new system, such as DR, inevitably involves tradeoffs in aspects of its performance.
91
+ 69: Established retrieval systems running at scale are usually well understood in terms of effectiveness and costs, such as query latency, indexing throughput, or storage requirements.
92
+ 70: In this work, we propose a framework with a set of criteria that go beyond simple effectiveness measures to thoroughly compare two retrieval systems with the explicit goal of assessing the readiness of one system to replace the other.
93
+ 71: This includes careful tradeoff considerations between effectiveness and various cost factors.
94
+ 72: Furthermore, we describe guardrail criteria, since even a system that is better on average may have systematic failures on a minority of queries.
95
+ 73: The guardrails check for failures on certain query characteristics and novel failure types that are only possible in dense retrieval systems.
96
+ 74: We demonstrate our decision framework on a Web ranking scenario.
97
+ 75: In that scenario, state-of-the-art DR models have surprisingly strong results, not only on average performance but passing an extensive set of guardrail tests, showing robustness on different query characteristics, lexical matching, generalization, and number of regressions.
98
+ 76: DR with approximate nearest neighbor search has comparable low query latency to term-based systems.
99
+ 77: The main reason to reject current DR models in this scenario is the cost of vectorization, which is much higher than the cost of building a traditional index.
100
+ 78: It is impossible to predict whether DR will become ubiquitous in the future, but one way this is possible is through repeated applications of decision processes such as the one presented here.
101
+
102
+ Title: Stochastic Retrieval-Conditioned Reranking
103
+ 79: The multi-stage cascaded architecture has been adopted by many search engines for efficient and effective retrieval.
104
+ 80: This architecture consists of a stack of retrieval and reranking models in which efficient retrieval models are followed by effective (neural) learning-to-rank models.
105
+ 81: The optimization of these learning-to-rank models is loosely connected to the early stage retrieval models.
106
+ 82: This paper draws theoretical connections between the early stage retrieval and late stage reranking models by deriving expected reranking performance conditioned on the early stage retrieval results.
107
+ 83: Our findings shed light on optimization of both retrieval and reranking models.
108
+ 84: As a result, we also introduce a novel loss function for training reranking models that leads to significant improvements on multiple public benchmarks.
109
+ 85: Our findings provide theoretical and empirical guidelines for developing multi-stage cascaded retrieval models.
110
+
111
+ Title: Predicting Prerequisite Relations for Unseen Concepts
112
+ 86: Concept prerequisite learning (CPL) plays a key role in developing technologies that assist people to learn a new complex topic or concept.
113
+ 87: Previous work commonly assumes that all concepts are given at training time and solely focuses on predicting the unseen prerequisite relationships between them.
114
+ 88: However, many real-world scenarios deal with concepts that are left undiscovered at training time, which is relatively unexplored.
115
+ 89: This paper studies this problem and proposes a novel alternating knowledge distillation approach to take advantage of both contentand graph-based models for this task.
116
+ 90: Extensive experiments on three public benchmarks demonstrate up to 10% improvements in terms of F1 score.
117
+
118
+ Title: Entrance Surface Dose Measurement at Thyroid and Parotid Gland Regions in Cone-Beam Computed Tomography and Panoramic Radiography
119
+ 91: Purpose: Ionizing radiation-absorbed doses is a crucial concern in Cone-Beam Computed Tomography (CBCT) and panoramic radiography.
120
+ 92: This study aimed to evaluate and compare the Entrance Skin Doses (ESD) of thyroid and parotid gland regions in CBCT and panoramic radiography in Yazd province, Iran.
121
+ 93:
122
+ Materials and Methods: In this cross-sectional study, 332 patients were included, who were then divided into two age groups (adult and pediatric) and underwent dental CBCT and panoramic radiography.
123
+ 94: Twelve Thermoluminescence Dosimeters (TLD- GR200) were used for each patient to measure the ESD of thyroid and parotid glands.
124
+ 95: The differences between the ESD values in CBCT and panoramic examinations as well as between the adults and children groups were evaluated by one-way ANOVA and Man-Whitney tests.
125
+ 96:
126
+ Results: The mean and Standard Deviation (SD) values of ESD in panoramic imaging were equal to 61 ± 4 and 290 ± 12 µGy for the thyroid and parotid glands of the adult groups, respectively.
127
+ 97: Notably, these values for CBCT were significantly higher (P<0.01), as 377 ± 139 and 1554 ± 177 µGy, respectively.
128
+ 98: Moreover, the mean ESD values in the panoramic examination were 41 ± 3 and 190 ± 16 µGy for thyroid and parotid glands for the children group, while they were 350 ± 120 and 990 ± 107 µGy in CBCT (P<0.01), respectively.
129
+ 99: The ESD values in the parotid gland were approximately 3.4 (2.8-4.1) and 4.7 (4.6-4.8) times greater than those for CBCT and panoramic examinations, respectively.
130
+ 100:
131
+ Conclusion: Although CBCT provides supplementary diagnostic advantages, the thyroid and parotid glands’ doses are higher than panoramic radiography.
132
+ 101: Therefore, the risks and benefits of each method should be considered before their prescription.
133
+
134
+ Title: Estimating the risks of exposure-induced death associated with common computed tomography procedures
135
+ 102: Background : This study aimed to assess the risks of exposure - induced death (REID) in patients and embryos during CT examinations in Yazd province (Iran).
136
+ 103: Materials and Methods: Data on the exposure parameters were retrospectively collected from six imaging institutions.
137
+ 104: In total, 932 patients were included in this study and for each patient, organ doses were then estimated using ImpactDose software.
138
+ 105: The REIDs were calculated by BEIR VII risk model and using PCXMC software.
139
+ 106: In the case of gestational irradiation, excess cancer risk of 0.006% per mSv was taken into account in terms of the ICRP 84 recommendations, to calculate the excess childhood cancer risk imposed on the embryo.
140
+ 107: Results: The highest estimated organ doses for abdomen - pelvis, routine chest, chest HRCT, brain, and sinus examinations were obtained as 12.82 mSv for kidneys, 12.09 mSv for thymus, 13.16 mSv for thymus, 29.71 mSv for brain, and 11.70 mSv for oral mucosa, respectively.
141
+ 108: Across all procedures, abdomen - pelvis CT scan induced the highest excess REID to the patients (240 deaths per million).
142
+ 109: The highest delivered dose to the fetus was roughly 35 mSv, which was lower than the threshold dose proposed by ICRP (100 mSv) for the induction of malformations.
143
+ 110: However, the associated excess fatal childhood cancer risk of 2122 incidence per million scans can be a subject of concern for public health experts.
144
+ 111: Conclusion: Based on the results, although death risks related to induced cancer from CT scans were negligible, this risk can be relatively significant for children exposed during the fetal period.
145
+
146
+ Title: Retrieval-Enhanced Machine Learning
147
+ 112: Although information access systems have long supportedpeople in accomplishing a wide range of tasks, we propose broadening the scope of users of information access systems to include task-driven machines, such as machine learning models.
148
+ 113: In this way, the core principles of indexing, representation, retrieval, and ranking can be applied and extended to substantially improve model generalization, scalability, robustness, and interpretability.
149
+ 114: We describe a generic retrieval-enhanced machine learning (REML) framework, which includes a number of existing models as special cases.
150
+ 115: REML challenges information retrieval conventions, presenting opportunities for novel advances in core areas, including optimization.
151
+ 116: The REML research agenda lays a foundation for a new style of information access research and paves a path towards advancing machine learning and artificial intelligence.
152
+
153
+ Title: Analyzing clarification in asynchronous information‐seeking conversations
154
+ 117: This research analyzes human‐generated clarification questions to provide insights into how they are used to disambiguate and provide a better understanding of information needs.
155
+ 118: A set of clarification questions is extracted from posts on the Stack Exchange platform.
156
+ 119: Novel taxonomy is defined for the annotation of the questions and their responses.
157
+ 120: We investigate the clarification questions in terms of whether they add any information to the post (the initial question posted by the asker) and the accepted answer, which is the answer chosen by the asker.
158
+ 121: After identifying, which clarification questions are more useful, we investigated the characteristics of these questions in terms of their types and patterns.
159
+ 122: Non‐useful clarification questions are identified, and their patterns are compared with useful clarifications.
160
+ 123: Our analysis indicates that the most useful clarification questions have similar patterns, regardless of topic.
161
+ 124: This research contributes to an understanding of clarification in conversations and can provide insight for clarification dialogues in conversational search scenarios and for the possible system generation of clarification requests in information‐seeking conversations.
162
+
163
+ Title: DISAPERE: A Dataset for Discourse Structure in Peer Review Discussions
164
+ 125: At the foundation of scientific evaluation is the labor-intensive process of peer review.
165
+ 126: This critical task requires participants to consume vast amounts of highly technical text.
166
+ 127: Prior work has annotated different aspects of review argumentation, but discourse relations between reviews and rebuttals have yet to be examined.
167
+ 128: We present DISAPERE, a labeled dataset of 20k sentences contained in 506 review-rebuttal pairs in English, annotated by experts.
168
+ 129: DISAPERE synthesizes label sets from prior work and extends them to include fine-grained annotation of the rebuttal sentences, characterizing their context in the review and the authors’ stance towards review arguments.
169
+ 130: Further, we annotate every review and rebuttal sentence.
170
+ 131: We show that discourse cues from rebuttals can shed light on the quality and interpretation of reviews.
171
+ 132: Further, an understanding of the argumentative strategies employed by the reviewers and authors provides useful signal for area chairs and other decision makers.
172
+
173
+ Title: FiD-Light: Efficient and Effective Retrieval-Augmented Text Generation
174
+ 133: Retrieval-augmented generation models offer many benefits over standalone language models: besides a textual answer to a given query they provide provenance items retrieved from an updateable knowledge base.
175
+ 134: However, they are also more complex systems and need to handle long inputs.
176
+ 135: In this work, we introduce FiD-Light to strongly increase the efficiency of the state-of-the-art retrieval-augmented FiD model, while maintaining the same level of effectiveness.
177
+ 136: Our FiD-Light model constrains the information flow from the encoder (which encodes passages separately) to the decoder (using concatenated encoded representations).
178
+ 137: Fur-thermore, we adapt FiD-Light with re-ranking capabilities through textual source pointers, to improve the top-ranked provenance precision.
179
+ 138: Our experiments on a diverse set of seven knowledge intensive tasks (KILT) show FiD-Light consistently improves the Pareto frontier between query latency and effectiveness.
180
+ 139: FiD-Light with source pointing sets substantial new state-of-the-art results on six KILT tasks for combined text generation and provenance retrieval evaluation, while maintaining reasonable efficiency.
181
+
182
+ Title: Generalizing Discriminative Retrieval Models using Generative Tasks
183
+ 140: Information Retrieval has a long history of applying either discriminative or generative modeling to retrieval and ranking tasks.
184
+ 141: Recent developments in transformer architectures and multi-task learning techniques have dramatically improved our ability to train effective neural models capable of resolving a wide variety of tasks using either of these paradigms.
185
+ 142: In this paper, we propose a novel multi-task learning approach which can be used to produce more effective neural ranking models.
186
+ 143: The key idea is to improve the quality of the underlying transformer model by cross-training a retrieval task and one or more complementary language generation tasks.
187
+ 144: By targeting the training on the encoding layer in the transformer architecture, our experimental results show that the proposed multi-task learning approach consistently improves retrieval effectiveness on the targeted collection and can easily be re-targeted to new ranking tasks.
188
+ 145: We provide an in-depth analysis showing how multi-task learning modifies model behaviors, resulting in more general models.
189
+
190
+ Title: Explaining Documents' Relevance to Search Queries
191
+ 146: RAZIEH RAHIMI, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA YOUNGWOO KIM, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA HAMED ZAMANI, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA JAMES ALLAN, Center for Intelligent Information Retrieval, University of Massachusetts Amherst, USA
192
+
data/users/hzamani/tplan-hzamani.png ADDED
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ joblib
2
+ geomloss==0.2.4
3
+ matplotlib==3.3.4
4
+ matplotlib-inline==0.1.2
5
+ nltk==3.6.2
6
+ numpy==1.22.3
7
+ pandas==1.3.1
8
+ POT==0.7.0
9
+ scikit-learn==0.24.2
10
+ sentence-transformers==1.2.0
11
+ sentencepiece==0.1.95
12
+ tokenizers==0.10.2
13
+ torch==1.8.1
14
+ tornado==6.1
15
+ transformers==4.5.1