|
|
|
from fastapi import FastAPI, HTTPException, Query |
|
from pydantic import BaseModel |
|
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeout |
|
from typing import List, Optional |
|
import datetime |
|
import logging |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
app = FastAPI(title="RealEstateSnap", version="0.3.0") |
|
|
|
class Listing(BaseModel): |
|
title: str |
|
price: Optional[str] |
|
address: Optional[str] |
|
bedrooms: Optional[str] |
|
bathrooms: Optional[str] |
|
listing_url: str |
|
image_url: Optional[str] |
|
platform: str |
|
timestamp: str |
|
|
|
async def scrape_craigslist(location: str, limit: int = 10) -> List[Listing]: |
|
listings = [] |
|
async with async_playwright() as p: |
|
browser = await p.chromium.launch(headless=True) |
|
page = await browser.new_page() |
|
site = location.replace(' ', '').lower() |
|
url = f"https://{site}.craigslist.org/search/apa" |
|
logging.info(f"π¦ Scraping Craigslist: {url}") |
|
await page.goto(url) |
|
items = await page.query_selector_all(".result-row") |
|
for item in items[:limit]: |
|
try: |
|
title = await item.inner_text(".result-title") |
|
href = await item.get_attribute(".result-title", "href") |
|
price = (await item.inner_text(".result-price")).strip() |
|
listings.append(Listing( |
|
title=title.strip(), |
|
price=price, |
|
address=None, |
|
bedrooms=None, |
|
bathrooms=None, |
|
listing_url=href, |
|
image_url=None, |
|
platform="craigslist", |
|
timestamp=datetime.datetime.utcnow().isoformat() |
|
)) |
|
except PlaywrightTimeout: |
|
logging.warning("β± Timeout β skipping a Craigslist item") |
|
await browser.close() |
|
return listings |
|
|
|
async def scrape_kijiji(location: str, limit: int = 10) -> List[Listing]: |
|
listings = [] |
|
async with async_playwright() as p: |
|
browser = await p.chromium.launch(headless=True) |
|
page = await browser.new_page() |
|
city = location.replace(' ', '-').lower() |
|
url = f"https://www.kijiji.ca/b-apartments-condos/{city}/c37l1700271" |
|
logging.info(f"π¦ Scraping Kijiji: {url}") |
|
await page.goto(url) |
|
cards = await page.query_selector_all(".search-item") |
|
for card in cards[:limit]: |
|
try: |
|
title = await card.inner_text(".title") |
|
price = (await card.inner_text(".price")).strip() |
|
href = await card.get_attribute("a.title", "href") |
|
listings.append(Listing( |
|
title=title.strip(), |
|
price=price, |
|
address=None, |
|
bedrooms=None, |
|
bathrooms=None, |
|
listing_url=f"https://www.kijiji.ca{href}", |
|
image_url=None, |
|
platform="kijiji", |
|
timestamp=datetime.datetime.utcnow().isoformat() |
|
)) |
|
except PlaywrightTimeout: |
|
logging.warning("β± Timeout β skipping a Kijiji item") |
|
await browser.close() |
|
return listings |
|
|
|
@app.get("/realestate", response_model=List[Listing]) |
|
async def get_listings( |
|
location: str = Query(..., description="City name or ZIP/postal code"), |
|
platform: Optional[List[str]] = Query( |
|
None, |
|
description="Platforms to scrape: craigslist, kijiji. Defaults to all." |
|
) |
|
): |
|
selected = [p.lower() for p in platform] if platform else ["craigslist", "kijiji"] |
|
logging.info(f"π§ Platforms selected: {selected}") |
|
|
|
results: List[Listing] = [] |
|
|
|
if "craigslist" in selected: |
|
try: |
|
results += await scrape_craigslist(location) |
|
except Exception as e: |
|
logging.error(f"Craigslist scrape failed: {e}") |
|
raise HTTPException(status_code=500, detail="Craigslist scrape failed") |
|
|
|
if "kijiji" in selected: |
|
try: |
|
results += await scrape_kijiji(location) |
|
except Exception as e: |
|
logging.error(f"Kijiji scrape failed: {e}") |
|
raise HTTPException(status_code=500, detail="Kijiji scrape failed") |
|
|
|
if not results: |
|
raise HTTPException(status_code=404, detail="No listings found") |
|
return results |
|
|