ahmad21omar commited on
Commit
8e97619
·
1 Parent(s): 1c973d4

import am anfang

Browse files
Files changed (1) hide show
  1. app.py +31 -22
app.py CHANGED
@@ -25,6 +25,36 @@ import re
25
  import nltk
26
  nltk.download("stopwords", quiet=True)
27
  from nltk.corpus import stopwords as nltk_stopwords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  LANGUAGE = "english"
30
  word_splitter = re.compile(r"(?u)\b\w\w+\b").findall
@@ -147,20 +177,11 @@ def run_counting(
147
  doc_texts=doc_texts,
148
  )
149
 
150
- from nlp4web_codebase.ir.data_loaders.sciq import load_sciq
151
  sciq = load_sciq()
152
  counting = run_counting(documents=iter(sciq.corpus), ndocs=len(sciq.corpus))
153
 
154
  """### BM25 Index"""
155
 
156
- from __future__ import annotations
157
- from dataclasses import asdict, dataclass
158
- import math
159
- import os
160
- from typing import Iterable, List, Optional, Type
161
- import tqdm
162
- from nlp4web_codebase.ir.data_loaders.dm import Document
163
-
164
 
165
  @dataclass
166
  class BM25Index(InvertedIndex):
@@ -257,9 +278,7 @@ bm25_index.save("output/bm25_index")
257
 
258
  """### BM25 Retriever"""
259
 
260
- from nlp4web_codebase.ir.models import BaseRetriever
261
- from typing import Type
262
- from abc import abstractmethod
263
 
264
 
265
  class BaseInvertedIndexRetriever(BaseRetriever):
@@ -330,10 +349,6 @@ Tune b and k1 on the **dev** split of SciQ using the metric MAP@10. The evaluati
330
  $${\displaystyle {\text{score}}(D,Q)=\sum _{i=1}^{n}{\text{IDF}}(q_{i})\cdot {\frac {f(q_{i},D)\cdot (k_{1}+1)}{f(q_{i},D)+k_{1}\cdot \left(1-b+b\cdot {\frac {|D|}{\text{avgdl}}}\right)}}}$$
331
  """
332
 
333
- from nlp4web_codebase.ir.data_loaders import Split
334
- import pytrec_eval
335
- import numpy as np
336
-
337
 
338
 
339
  def evaluate_map(rankings: Dict[str, Dict[str, float]], split=Split.dev) -> float:
@@ -346,7 +361,6 @@ def evaluate_map(rankings: Dict[str, Dict[str, float]], split=Split.dev) -> floa
346
  """Example of using the pre-requisite code:"""
347
 
348
  # Loading dataset:
349
- from nlp4web_codebase.ir.data_loaders.sciq import load_sciq
350
  sciq = load_sciq()
351
  counting = run_counting(documents=iter(sciq.corpus), ndocs=len(sciq.corpus))
352
 
@@ -444,7 +458,6 @@ print(plots_b["Y"][1])
444
  print(plots_k1)
445
  print(plots_b)
446
 
447
- from matplotlib import pyplot as plt
448
  plt.plot(plots_b["X"], plots_b["Y"], label="b")
449
  plt.plot(plots_k1["X"], plots_k1["Y"], label="k1")
450
  plt.ylabel("MAP")
@@ -486,7 +499,6 @@ Convert the matrix \begin{bmatrix}
486
  \end{bmatrix} to a `csc_matrix` by specifying `data`, `indices`, `indptr` and `shape`.
487
  """
488
 
489
- from scipy.sparse._csc import csc_matrix
490
  input_matrix = [[0, 1, 0, 3], [10, 2, 1, 0], [0, 0, 0, 9]]
491
  data = None
492
  indices = None
@@ -760,9 +772,6 @@ def search(query: str) -> List[Hit]:
760
  ```
761
  """
762
 
763
- import gradio as gr
764
- from typing import TypedDict
765
-
766
  class Hit(TypedDict):
767
  cid: str
768
  score: float
 
25
  import nltk
26
  nltk.download("stopwords", quiet=True)
27
  from nltk.corpus import stopwords as nltk_stopwords
28
+ from nlp4web_codebase.ir.data_loaders.sciq import load_sciq
29
+
30
+
31
+ import gradio as gr
32
+ from typing import TypedDict
33
+
34
+
35
+ from __future__ import annotations
36
+ from dataclasses import asdict, dataclass
37
+ import math
38
+ import os
39
+ from typing import Iterable, List, Optional, Type
40
+ import tqdm
41
+ from nlp4web_codebase.ir.data_loaders.dm import Document
42
+
43
+ from nlp4web_codebase.ir.models import BaseRetriever
44
+ from typing import Type
45
+ from abc import abstractmethod
46
+
47
+
48
+ from nlp4web_codebase.ir.data_loaders import Split
49
+ import pytrec_eval
50
+ import numpy as np
51
+
52
+
53
+ from matplotlib import pyplot as plt
54
+
55
+ from scipy.sparse._csc import csc_matrix
56
+
57
+
58
 
59
  LANGUAGE = "english"
60
  word_splitter = re.compile(r"(?u)\b\w\w+\b").findall
 
177
  doc_texts=doc_texts,
178
  )
179
 
 
180
  sciq = load_sciq()
181
  counting = run_counting(documents=iter(sciq.corpus), ndocs=len(sciq.corpus))
182
 
183
  """### BM25 Index"""
184
 
 
 
 
 
 
 
 
 
185
 
186
  @dataclass
187
  class BM25Index(InvertedIndex):
 
278
 
279
  """### BM25 Retriever"""
280
 
281
+
 
 
282
 
283
 
284
  class BaseInvertedIndexRetriever(BaseRetriever):
 
349
  $${\displaystyle {\text{score}}(D,Q)=\sum _{i=1}^{n}{\text{IDF}}(q_{i})\cdot {\frac {f(q_{i},D)\cdot (k_{1}+1)}{f(q_{i},D)+k_{1}\cdot \left(1-b+b\cdot {\frac {|D|}{\text{avgdl}}}\right)}}}$$
350
  """
351
 
 
 
 
 
352
 
353
 
354
  def evaluate_map(rankings: Dict[str, Dict[str, float]], split=Split.dev) -> float:
 
361
  """Example of using the pre-requisite code:"""
362
 
363
  # Loading dataset:
 
364
  sciq = load_sciq()
365
  counting = run_counting(documents=iter(sciq.corpus), ndocs=len(sciq.corpus))
366
 
 
458
  print(plots_k1)
459
  print(plots_b)
460
 
 
461
  plt.plot(plots_b["X"], plots_b["Y"], label="b")
462
  plt.plot(plots_k1["X"], plots_k1["Y"], label="k1")
463
  plt.ylabel("MAP")
 
499
  \end{bmatrix} to a `csc_matrix` by specifying `data`, `indices`, `indptr` and `shape`.
500
  """
501
 
 
502
  input_matrix = [[0, 1, 0, 3], [10, 2, 1, 0], [0, 0, 0, 9]]
503
  data = None
504
  indices = None
 
772
  ```
773
  """
774
 
 
 
 
775
  class Hit(TypedDict):
776
  cid: str
777
  score: float