akaafridi commited on
Commit
41038fb
·
verified ·
1 Parent(s): 36c6dc4

Update src/ranker.py

Browse files
Files changed (1) hide show
  1. src/ranker.py +4 -9
src/ranker.py CHANGED
@@ -1,7 +1,6 @@
1
  """
2
  ranker.py
3
  ---------
4
-
5
  This module implements functionality for ranking candidate sentences by
6
  their relevance to a given claim. The ranking is performed by
7
  embedding both the claim and the candidate sentences into a semantic
@@ -17,12 +16,10 @@ approach still yields reasonable relevance orderings without requiring
17
  deep learning dependencies.
18
 
19
  Example:
20
-
21
  >>> from ranker import rank_sentences
22
  >>> ranked = rank_sentences("Cats are adorable pets", ["Cats purr when happy", "Airplanes fly"], top_k=1)
23
  >>> print(ranked[0][0])
24
  ... # prints the sentence most similar to the claim
25
-
26
  """
27
 
28
  from __future__ import annotations
@@ -40,7 +37,6 @@ _use_transformers = False
40
 
41
  def _load_sentence_transformer(model_name: str = "all-MiniLM-L6-v2"):
42
  """Load the sentence transformer model lazily.
43
-
44
  Attempts to import and instantiate the specified sentence
45
  transformer model. If the import fails, sets a flag to indicate
46
  fallback use of scikit-learn.
@@ -71,7 +67,6 @@ def _embed_with_st(texts: Iterable[str]) -> np.ndarray:
71
 
72
  def _rank_with_tfidf(claim: str, candidates: List[str], top_k: int) -> List[Tuple[str, float]]:
73
  """Rank candidates using TF-IDF cosine similarity.
74
-
75
  This fallback method uses scikit-learn's TfidfVectorizer to
76
  construct vectors for the claim and candidates and then computes
77
  pairwise cosine similarity. It does not require any heavy
@@ -97,10 +92,8 @@ def rank_sentences(claim: str, sentences: Iterable[str], top_k: int = 10) -> Lis
97
  ----------
98
  claim:
99
  The short textual claim against which candidates are compared.
100
-
101
  sentences:
102
  An iterable of candidate sentences to score.
103
-
104
  top_k:
105
  The maximum number of top-ranked sentences to return. If the
106
  number of candidates is less than ``top_k``, all candidates are
@@ -116,6 +109,9 @@ def rank_sentences(claim: str, sentences: Iterable[str], top_k: int = 10) -> Lis
116
  ranking, the scores may be lower but are still comparable within
117
  the same run.
118
  """
 
 
 
119
  # Convert the iterable to a list so we can index and iterate
120
  candidates = list(sentences)
121
  if not candidates:
@@ -144,9 +140,8 @@ def rank_sentences(claim: str, sentences: Iterable[str], top_k: int = 10) -> Lis
144
  exc,
145
  )
146
  # Mark the transformer as unusable for subsequent calls
147
- global _use_transformers
148
  _use_transformers = False
149
  _st_model = None
150
 
151
  # Fallback to TF-IDF ranking
152
- return _rank_with_tfidf(claim, candidates, top_k)
 
1
  """
2
  ranker.py
3
  ---------
 
4
  This module implements functionality for ranking candidate sentences by
5
  their relevance to a given claim. The ranking is performed by
6
  embedding both the claim and the candidate sentences into a semantic
 
16
  deep learning dependencies.
17
 
18
  Example:
 
19
  >>> from ranker import rank_sentences
20
  >>> ranked = rank_sentences("Cats are adorable pets", ["Cats purr when happy", "Airplanes fly"], top_k=1)
21
  >>> print(ranked[0][0])
22
  ... # prints the sentence most similar to the claim
 
23
  """
24
 
25
  from __future__ import annotations
 
37
 
38
  def _load_sentence_transformer(model_name: str = "all-MiniLM-L6-v2"):
39
  """Load the sentence transformer model lazily.
 
40
  Attempts to import and instantiate the specified sentence
41
  transformer model. If the import fails, sets a flag to indicate
42
  fallback use of scikit-learn.
 
67
 
68
  def _rank_with_tfidf(claim: str, candidates: List[str], top_k: int) -> List[Tuple[str, float]]:
69
  """Rank candidates using TF-IDF cosine similarity.
 
70
  This fallback method uses scikit-learn's TfidfVectorizer to
71
  construct vectors for the claim and candidates and then computes
72
  pairwise cosine similarity. It does not require any heavy
 
92
  ----------
93
  claim:
94
  The short textual claim against which candidates are compared.
 
95
  sentences:
96
  An iterable of candidate sentences to score.
 
97
  top_k:
98
  The maximum number of top-ranked sentences to return. If the
99
  number of candidates is less than ``top_k``, all candidates are
 
109
  ranking, the scores may be lower but are still comparable within
110
  the same run.
111
  """
112
+ # IMPORTANT: declare globals before any usage in this function
113
+ global _use_transformers, _st_model
114
+
115
  # Convert the iterable to a list so we can index and iterate
116
  candidates = list(sentences)
117
  if not candidates:
 
140
  exc,
141
  )
142
  # Mark the transformer as unusable for subsequent calls
 
143
  _use_transformers = False
144
  _st_model = None
145
 
146
  # Fallback to TF-IDF ranking
147
+ return _rank_with_tfidf(claim, candidates, top_k)