Spaces:
Sleeping
Sleeping
fix wrong wiki_id retrieved
Browse files- classes.py +7 -1
classes.py
CHANGED
|
@@ -319,7 +319,13 @@ class ETSISpecFinder:
|
|
| 319 |
print(f" wki_id={wki_id}: DOCX not found in page, trying next")
|
| 320 |
continue
|
| 321 |
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
dl = session.get(docx_url, headers={"Referer": r4.url}, verify=False, timeout=60)
|
| 324 |
filename = docx_url.split("/")[-1]
|
| 325 |
tmp_path = f"/tmp/{filename}"
|
|
|
|
| 319 |
print(f" wki_id={wki_id}: DOCX not found in page, trying next")
|
| 320 |
continue
|
| 321 |
|
| 322 |
+
# Verify the DOCX belongs to the requested spec (e.g. "102 223" → "102223")
|
| 323 |
+
spec_num = doc_id.split("-")[0].replace(" ", "")
|
| 324 |
+
matching_urls = [u for u in docx_urls if spec_num in u.split("/")[-1]]
|
| 325 |
+
if not matching_urls:
|
| 326 |
+
print(f" wki_id={wki_id}: DOCX spec mismatch (expected {spec_num} in filename), trying next")
|
| 327 |
+
continue
|
| 328 |
+
docx_url = matching_urls[0]
|
| 329 |
dl = session.get(docx_url, headers={"Referer": r4.url}, verify=False, timeout=60)
|
| 330 |
filename = docx_url.split("/")[-1]
|
| 331 |
tmp_path = f"/tmp/{filename}"
|