heymenn commited on
Commit
14e31ef
·
1 Parent(s): 801e72f

fix wrong wiki_id retrieved

Browse files
Files changed (1) hide show
  1. classes.py +7 -1
classes.py CHANGED
@@ -319,7 +319,13 @@ class ETSISpecFinder:
319
  print(f" wki_id={wki_id}: DOCX not found in page, trying next")
320
  continue
321
 
322
- docx_url = docx_urls[0]
 
 
 
 
 
 
323
  dl = session.get(docx_url, headers={"Referer": r4.url}, verify=False, timeout=60)
324
  filename = docx_url.split("/")[-1]
325
  tmp_path = f"/tmp/{filename}"
 
319
  print(f" wki_id={wki_id}: DOCX not found in page, trying next")
320
  continue
321
 
322
+ # Verify the DOCX belongs to the requested spec (e.g. "102 223" → "102223")
323
+ spec_num = doc_id.split("-")[0].replace(" ", "")
324
+ matching_urls = [u for u in docx_urls if spec_num in u.split("/")[-1]]
325
+ if not matching_urls:
326
+ print(f" wki_id={wki_id}: DOCX spec mismatch (expected {spec_num} in filename), trying next")
327
+ continue
328
+ docx_url = matching_urls[0]
329
  dl = session.get(docx_url, headers={"Referer": r4.url}, verify=False, timeout=60)
330
  filename = docx_url.split("/")[-1]
331
  tmp_path = f"/tmp/{filename}"