Jofthomas commited on
Commit
6504d4f
·
1 Parent(s): 5bb7b59
Files changed (1) hide show
  1. app.py +27 -9
app.py CHANGED
@@ -59,6 +59,14 @@ def _default_headers(cookie: Optional[str]) -> dict:
59
  "Pragma": "no-cache",
60
  "Connection": "keep-alive",
61
  "Referer": "https://www.linkedin.com/jobs/",
 
 
 
 
 
 
 
 
62
  }
63
  if cookie:
64
  headers["Cookie"] = cookie
@@ -280,8 +288,12 @@ def _build_search_params(
280
  params["geoId"] = str(geo_id)
281
 
282
  # Sort: relevance (R) or date (DD)
283
- if sort_by and sort_by.lower() in {"relevance", "date"}:
284
- params["sortBy"] = "R" if sort_by.lower() == "relevance" else "DD"
 
 
 
 
285
 
286
  # Time posted
287
  if date_posted:
@@ -318,18 +330,24 @@ def _search_page(
318
  base_url = "https://www.linkedin.com/jobs/search/?" + urlencode(params)
319
  logger.debug("GET main page: %s", base_url)
320
  resp = client.get(base_url, follow_redirects=True, timeout=20.0)
321
- resp.raise_for_status()
322
  logger.debug(
323
  "Main page status=%d bytes=%d content-type=%s",
324
  resp.status_code,
325
  len(resp.content),
326
  resp.headers.get("content-type"),
327
  )
328
- block_hint = _detect_block_or_wall(resp.text)
329
- if block_hint:
330
- logger.warning("Main page may be blocked/walled (hint=%r)", block_hint)
331
- jobs = _parse_jobs_from_html(resp.text)
332
- logger.debug("Parsed %d jobs from main page", len(jobs))
 
 
 
 
 
 
 
333
 
334
  # If nothing parsed, try the fragment endpoint as a fallback regardless of page
335
  if len(jobs) == 0:
@@ -356,7 +374,7 @@ def _search_page(
356
  if len(jobs) == 0:
357
  logger.info(
358
  "Zero jobs after main+fragment. Body sample: %s",
359
- _summarize_body(resp.text or frag_resp.text or ""),
360
  )
361
 
362
  return jobs
 
59
  "Pragma": "no-cache",
60
  "Connection": "keep-alive",
61
  "Referer": "https://www.linkedin.com/jobs/",
62
+ "Accept-Encoding": "gzip, deflate, br, zstd",
63
+ "Upgrade-Insecure-Requests": "1",
64
+ "sec-ch-ua": '"Chromium";v="125", "Not.A/Brand";v="24", "Google Chrome";v="125"',
65
+ "sec-ch-ua-mobile": "?0",
66
+ "sec-ch-ua-platform": '"macOS"',
67
+ "Sec-Fetch-Site": "same-origin",
68
+ "Sec-Fetch-Mode": "navigate",
69
+ "Sec-Fetch-Dest": "document",
70
  }
71
  if cookie:
72
  headers["Cookie"] = cookie
 
288
  params["geoId"] = str(geo_id)
289
 
290
  # Sort: relevance (R) or date (DD)
291
+ if sort_by:
292
+ sb = sort_by.lower()
293
+ if sb in {"relevance", "r"}:
294
+ params["sortBy"] = "R"
295
+ elif sb in {"date", "recent", "dd"}:
296
+ params["sortBy"] = "DD"
297
 
298
  # Time posted
299
  if date_posted:
 
330
  base_url = "https://www.linkedin.com/jobs/search/?" + urlencode(params)
331
  logger.debug("GET main page: %s", base_url)
332
  resp = client.get(base_url, follow_redirects=True, timeout=20.0)
 
333
  logger.debug(
334
  "Main page status=%d bytes=%d content-type=%s",
335
  resp.status_code,
336
  len(resp.content),
337
  resp.headers.get("content-type"),
338
  )
339
+ jobs: list[JobPosting] = []
340
+ if resp.status_code == 200:
341
+ block_hint = _detect_block_or_wall(resp.text)
342
+ if block_hint:
343
+ logger.warning("Main page may be blocked/walled (hint=%r)", block_hint)
344
+ jobs = _parse_jobs_from_html(resp.text)
345
+ logger.debug("Parsed %d jobs from main page", len(jobs))
346
+ elif resp.status_code in (999, 401, 403, 429):
347
+ logger.warning("Main page blocked with status=%d; will try fragment", resp.status_code)
348
+ else:
349
+ # For other errors, raise to caller
350
+ resp.raise_for_status()
351
 
352
  # If nothing parsed, try the fragment endpoint as a fallback regardless of page
353
  if len(jobs) == 0:
 
374
  if len(jobs) == 0:
375
  logger.info(
376
  "Zero jobs after main+fragment. Body sample: %s",
377
+ _summarize_body(resp.text if resp is not None and resp.text else (frag_resp.text if frag_resp is not None else "")),
378
  )
379
 
380
  return jobs