Spaces:
Running
Running
fix etsi search ts issue
Browse files- classes.py +20 -6
classes.py
CHANGED
|
@@ -202,22 +202,22 @@ class ETSISpecFinder:
|
|
| 202 |
|
| 203 |
return f"Specification {doc_id} not found"
|
| 204 |
|
| 205 |
-
def _get_wki_id_candidates(self, doc_id: str, version: str = None) ->
|
| 206 |
-
"""Return
|
| 207 |
if version:
|
| 208 |
version_str = version
|
| 209 |
else:
|
| 210 |
# Derive version from the FTP PDF URL
|
| 211 |
pdf_url = self.search_document(doc_id)
|
| 212 |
if "not found" in pdf_url.lower():
|
| 213 |
-
return []
|
| 214 |
parts = pdf_url.rstrip("/").split("/")
|
| 215 |
version_folder = parts[-2] # e.g. "18.04.00_60"
|
| 216 |
v_parts = version_folder.split("_")[0].split(".") # ["18", "04", "00"]
|
| 217 |
try:
|
| 218 |
version_str = f"{int(v_parts[0])}.{int(v_parts[1])}.{int(v_parts[2])}"
|
| 219 |
except (ValueError, IndexError):
|
| 220 |
-
return []
|
| 221 |
|
| 222 |
def fetch_candidates():
|
| 223 |
spec_num = doc_id.split("-")[0].replace(" ", "")
|
|
@@ -243,7 +243,7 @@ class ETSISpecFinder:
|
|
| 243 |
return []
|
| 244 |
|
| 245 |
candidates = list(dict.fromkeys(fetch_candidates())) # single call, deduped
|
| 246 |
-
return candidates
|
| 247 |
|
| 248 |
def _authenticate_eol(self, wki_id: str) -> requests.Session:
|
| 249 |
"""Create a requests.Session authenticated to the ETSI EOL portal."""
|
|
@@ -274,10 +274,16 @@ class ETSISpecFinder:
|
|
| 274 |
|
| 275 |
def search_document_docx(self, doc_id: str, version: str = None) -> str:
|
| 276 |
"""Download an ETSI spec as DOCX and return the local file path."""
|
| 277 |
-
candidates = self._get_wki_id_candidates(doc_id, version)
|
| 278 |
if not candidates:
|
| 279 |
return f"Specification {doc_id} not found"
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
# Authenticate once — cookies are auth tokens, not wki_id-specific
|
| 282 |
auth_session = self._authenticate_eol(candidates[0])
|
| 283 |
|
|
@@ -333,6 +339,14 @@ class ETSISpecFinder:
|
|
| 333 |
print(f" wki_id={wki_id}: DOCX spec mismatch (expected {spec_num}), trying next")
|
| 334 |
return None
|
| 335 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
docx_url = matching_urls[0]
|
| 337 |
dl = session.get(docx_url, headers={"Referer": r4.url}, verify=False, timeout=60)
|
| 338 |
filename = docx_url.split("/")[-1]
|
|
|
|
| 202 |
|
| 203 |
return f"Specification {doc_id} not found"
|
| 204 |
|
| 205 |
+
def _get_wki_id_candidates(self, doc_id: str, version: str = None) -> tuple:
|
| 206 |
+
"""Return (candidates, version_str) for a spec version (best match first)."""
|
| 207 |
if version:
|
| 208 |
version_str = version
|
| 209 |
else:
|
| 210 |
# Derive version from the FTP PDF URL
|
| 211 |
pdf_url = self.search_document(doc_id)
|
| 212 |
if "not found" in pdf_url.lower():
|
| 213 |
+
return [], ""
|
| 214 |
parts = pdf_url.rstrip("/").split("/")
|
| 215 |
version_folder = parts[-2] # e.g. "18.04.00_60"
|
| 216 |
v_parts = version_folder.split("_")[0].split(".") # ["18", "04", "00"]
|
| 217 |
try:
|
| 218 |
version_str = f"{int(v_parts[0])}.{int(v_parts[1])}.{int(v_parts[2])}"
|
| 219 |
except (ValueError, IndexError):
|
| 220 |
+
return [], ""
|
| 221 |
|
| 222 |
def fetch_candidates():
|
| 223 |
spec_num = doc_id.split("-")[0].replace(" ", "")
|
|
|
|
| 243 |
return []
|
| 244 |
|
| 245 |
candidates = list(dict.fromkeys(fetch_candidates())) # single call, deduped
|
| 246 |
+
return candidates, version_str
|
| 247 |
|
| 248 |
def _authenticate_eol(self, wki_id: str) -> requests.Session:
|
| 249 |
"""Create a requests.Session authenticated to the ETSI EOL portal."""
|
|
|
|
| 274 |
|
| 275 |
def search_document_docx(self, doc_id: str, version: str = None) -> str:
|
| 276 |
"""Download an ETSI spec as DOCX and return the local file path."""
|
| 277 |
+
candidates, version_str = self._get_wki_id_candidates(doc_id, version)
|
| 278 |
if not candidates:
|
| 279 |
return f"Specification {doc_id} not found"
|
| 280 |
|
| 281 |
+
# Build zero-padded version tag for filename matching, e.g. "1.2.1" -> "010201"
|
| 282 |
+
try:
|
| 283 |
+
version_tag = "".join(f"{int(p):02d}" for p in version_str.split("."))
|
| 284 |
+
except (ValueError, AttributeError):
|
| 285 |
+
version_tag = ""
|
| 286 |
+
|
| 287 |
# Authenticate once — cookies are auth tokens, not wki_id-specific
|
| 288 |
auth_session = self._authenticate_eol(candidates[0])
|
| 289 |
|
|
|
|
| 339 |
print(f" wki_id={wki_id}: DOCX spec mismatch (expected {spec_num}), trying next")
|
| 340 |
return None
|
| 341 |
|
| 342 |
+
if version_tag:
|
| 343 |
+
versioned_urls = [u for u in matching_urls if version_tag in u.split("/")[-1]]
|
| 344 |
+
if not versioned_urls:
|
| 345 |
+
found_names = [u.split("/")[-1] for u in matching_urls]
|
| 346 |
+
print(f" wki_id={wki_id}: DOCX version mismatch (expected {version_str} / tag {version_tag}, got {found_names}), trying next")
|
| 347 |
+
return None
|
| 348 |
+
matching_urls = versioned_urls
|
| 349 |
+
|
| 350 |
docx_url = matching_urls[0]
|
| 351 |
dl = session.get(docx_url, headers={"Referer": r4.url}, verify=False, timeout=60)
|
| 352 |
filename = docx_url.split("/")[-1]
|