#!/usr/bin/env python3 """ Debug script to examine eBoard page structure """ import asyncio from playwright.async_api import async_playwright from playwright_stealth import Stealth from bs4 import BeautifulSoup from urllib.parse import urljoin import re async def main(): url = "https://simbli.eboardsolutions.com/SB_Meetings/SB_MeetingListing.aspx?S=2088" base_url = "https://simbli.eboardsolutions.com" print(f"Loading: {url}\n") async with async_playwright() as p: browser = await p.chromium.launch( headless=True, args=[ '--disable-blink-features=AutomationControlled', '--disable-dev-shm-usage', '--no-sandbox' ] ) user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36' context = await browser.new_context( viewport={'width': 1920, 'height': 1080}, user_agent=user_agent, locale='en-US', timezone_id='America/Chicago', ) page = await context.new_page() # Apply stealth stealth = Stealth() await stealth.apply_stealth_async(page) # Navigate response = await page.goto(url, wait_until='networkidle', timeout=60000) print(f"Response status: {response.status}") # Wait for JavaScript await page.wait_for_timeout(5000) content = await page.content() print(f"Page size: {len(content)} bytes\n") # Save full HTML for inspection with open('/tmp/eboard_page.html', 'w') as f: f.write(content) print("Saved full HTML to /tmp/eboard_page.html\n") # Parse with BeautifulSoup soup = BeautifulSoup(content, 'html.parser') # Find all links all_links = soup.find_all('a', href=True) print(f"Total links found: {len(all_links)}\n") # Categorize links mid_links = [] meetingdetail_links = [] pdf_links = [] other_links = [] for link in all_links: href = link.get('href', '') text = link.get_text().strip() if 'MID=' in href.upper(): mid_links.append((href, text)) elif 'meetingdetail' in href.lower(): meetingdetail_links.append((href, text)) elif href.lower().endswith('.pdf'): pdf_links.append((href, text)) elif href and not href.startswith('#') and not href.startswith('javascript:'): other_links.append((href, text[:50])) print(f"Links with MID=: {len(mid_links)}") for href, text in mid_links[:10]: print(f" - {text[:60]}: {href[:80]}") print(f"\nLinks with 'meetingdetail': {len(meetingdetail_links)}") for href, text in meetingdetail_links[:10]: print(f" - {text[:60]}: {href[:80]}") print(f"\nPDF links: {len(pdf_links)}") for href, text in pdf_links[:10]: print(f" - {text[:60]}: {href[:80]}") print(f"\nOther significant links: {len(other_links)}") for href, text in other_links[:20]: print(f" - {text[:60]}: {href[:80]}") # Look for ASP.NET ViewState and other dynamic content indicators print("\n" + "="*80) print("Page Analysis:") print("="*80) viewstate = soup.find('input', {'id': '__VIEWSTATE'}) if viewstate: print(f"✓ ASP.NET ViewState present ({len(viewstate.get('value', ''))} chars)") # Look for tables or grids that might contain meetings tables = soup.find_all('table') print(f"Tables found: {len(tables)}") for i, table in enumerate(tables[:5]): rows = table.find_all('tr') print(f" Table {i+1}: {len(rows)} rows") if rows: first_row_text = rows[0].get_text().strip()[:100] print(f" First row: {first_row_text}") # Look for JavaScript-rendered content scripts = soup.find_all('script') print(f"\nJavaScript blocks: {len(scripts)}") # Check for common eBoard element IDs meeting_list_elem = soup.find(id=re.compile(r'meeting.*list', re.I)) if meeting_list_elem: print(f"✓ Found element with 'meeting' and 'list' in ID: {meeting_list_elem.get('id')}") await browser.close() if __name__ == "__main__": asyncio.run(main())