heymenn commited on
Commit
672b726
·
1 Parent(s): 460f7e9

fix etsi search ts issue

Browse files
Files changed (1) hide show
  1. classes.py +20 -6
classes.py CHANGED
@@ -202,22 +202,22 @@ class ETSISpecFinder:
202
 
203
  return f"Specification {doc_id} not found"
204
 
205
- def _get_wki_id_candidates(self, doc_id: str, version: str = None) -> list:
206
- """Return a list of candidate wki_ids for a spec version (best match first)."""
207
  if version:
208
  version_str = version
209
  else:
210
  # Derive version from the FTP PDF URL
211
  pdf_url = self.search_document(doc_id)
212
  if "not found" in pdf_url.lower():
213
- return []
214
  parts = pdf_url.rstrip("/").split("/")
215
  version_folder = parts[-2] # e.g. "18.04.00_60"
216
  v_parts = version_folder.split("_")[0].split(".") # ["18", "04", "00"]
217
  try:
218
  version_str = f"{int(v_parts[0])}.{int(v_parts[1])}.{int(v_parts[2])}"
219
  except (ValueError, IndexError):
220
- return []
221
 
222
  def fetch_candidates():
223
  spec_num = doc_id.split("-")[0].replace(" ", "")
@@ -243,7 +243,7 @@ class ETSISpecFinder:
243
  return []
244
 
245
  candidates = list(dict.fromkeys(fetch_candidates())) # single call, deduped
246
- return candidates
247
 
248
  def _authenticate_eol(self, wki_id: str) -> requests.Session:
249
  """Create a requests.Session authenticated to the ETSI EOL portal."""
@@ -274,10 +274,16 @@ class ETSISpecFinder:
274
 
275
  def search_document_docx(self, doc_id: str, version: str = None) -> str:
276
  """Download an ETSI spec as DOCX and return the local file path."""
277
- candidates = self._get_wki_id_candidates(doc_id, version)
278
  if not candidates:
279
  return f"Specification {doc_id} not found"
280
 
 
 
 
 
 
 
281
  # Authenticate once — cookies are auth tokens, not wki_id-specific
282
  auth_session = self._authenticate_eol(candidates[0])
283
 
@@ -333,6 +339,14 @@ class ETSISpecFinder:
333
  print(f" wki_id={wki_id}: DOCX spec mismatch (expected {spec_num}), trying next")
334
  return None
335
 
 
 
 
 
 
 
 
 
336
  docx_url = matching_urls[0]
337
  dl = session.get(docx_url, headers={"Referer": r4.url}, verify=False, timeout=60)
338
  filename = docx_url.split("/")[-1]
 
202
 
203
  return f"Specification {doc_id} not found"
204
 
205
+ def _get_wki_id_candidates(self, doc_id: str, version: str = None) -> tuple:
206
+ """Return (candidates, version_str) for a spec version (best match first)."""
207
  if version:
208
  version_str = version
209
  else:
210
  # Derive version from the FTP PDF URL
211
  pdf_url = self.search_document(doc_id)
212
  if "not found" in pdf_url.lower():
213
+ return [], ""
214
  parts = pdf_url.rstrip("/").split("/")
215
  version_folder = parts[-2] # e.g. "18.04.00_60"
216
  v_parts = version_folder.split("_")[0].split(".") # ["18", "04", "00"]
217
  try:
218
  version_str = f"{int(v_parts[0])}.{int(v_parts[1])}.{int(v_parts[2])}"
219
  except (ValueError, IndexError):
220
+ return [], ""
221
 
222
  def fetch_candidates():
223
  spec_num = doc_id.split("-")[0].replace(" ", "")
 
243
  return []
244
 
245
  candidates = list(dict.fromkeys(fetch_candidates())) # single call, deduped
246
+ return candidates, version_str
247
 
248
  def _authenticate_eol(self, wki_id: str) -> requests.Session:
249
  """Create a requests.Session authenticated to the ETSI EOL portal."""
 
274
 
275
  def search_document_docx(self, doc_id: str, version: str = None) -> str:
276
  """Download an ETSI spec as DOCX and return the local file path."""
277
+ candidates, version_str = self._get_wki_id_candidates(doc_id, version)
278
  if not candidates:
279
  return f"Specification {doc_id} not found"
280
 
281
+ # Build zero-padded version tag for filename matching, e.g. "1.2.1" -> "010201"
282
+ try:
283
+ version_tag = "".join(f"{int(p):02d}" for p in version_str.split("."))
284
+ except (ValueError, AttributeError):
285
+ version_tag = ""
286
+
287
  # Authenticate once — cookies are auth tokens, not wki_id-specific
288
  auth_session = self._authenticate_eol(candidates[0])
289
 
 
339
  print(f" wki_id={wki_id}: DOCX spec mismatch (expected {spec_num}), trying next")
340
  return None
341
 
342
+ if version_tag:
343
+ versioned_urls = [u for u in matching_urls if version_tag in u.split("/")[-1]]
344
+ if not versioned_urls:
345
+ found_names = [u.split("/")[-1] for u in matching_urls]
346
+ print(f" wki_id={wki_id}: DOCX version mismatch (expected {version_str} / tag {version_tag}, got {found_names}), trying next")
347
+ return None
348
+ matching_urls = versioned_urls
349
+
350
  docx_url = matching_urls[0]
351
  dl = session.get(docx_url, headers={"Referer": r4.url}, verify=False, timeout=60)
352
  filename = docx_url.split("/")[-1]