Zok213 commited on
Commit
f2826d8
·
1 Parent(s): a705291
Files changed (3) hide show
  1. __init__.py +0 -0
  2. app.py +25 -2
  3. scraper.py +2 -0
__init__.py ADDED
File without changes
app.py CHANGED
@@ -1,19 +1,42 @@
1
- from fastapi import FastAPI, Query
2
  from typing import Optional
3
  from datetime import date
4
  import psycopg2
5
  import os
6
  from dotenv import load_dotenv
 
 
7
 
8
  # Load environment variables from .env file (optional, for local development)
9
  load_dotenv()
10
-
11
  app = FastAPI()
12
 
13
  def get_db_connection():
14
  conn = psycopg2.connect(os.getenv('DB_CONNECTION_STRING'))
15
  return conn
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  @app.get("/data")
18
  def get_data(
19
  start_date: Optional[date] = Query(None),
 
1
+ from fastapi import FastAPI, Query, BackgroundTasks
2
  from typing import Optional
3
  from datetime import date
4
  import psycopg2
5
  import os
6
  from dotenv import load_dotenv
7
+ # Import your scraper function
8
+ from scraper import get_china_cpi
9
 
10
  # Load environment variables from .env file (optional, for local development)
11
  load_dotenv()
 
12
  app = FastAPI()
13
 
14
  def get_db_connection():
15
  conn = psycopg2.connect(os.getenv('DB_CONNECTION_STRING'))
16
  return conn
17
 
18
+ # Add a new endpoint to run the scraper
19
+ @app.get("/run-scraper")
20
+ async def run_scraper(background_tasks: BackgroundTasks):
21
+ """Run the China CPI scraper in the background"""
22
+ background_tasks.add_task(get_china_cpi)
23
+ return {"message": "Scraper started in background"}
24
+
25
+ # Add a root endpoint
26
+ @app.get("/")
27
+ def root():
28
+ """Root endpoint with API information"""
29
+ return {
30
+ "message": "China CPI API",
31
+ "endpoints": [
32
+ {"path": "/", "method": "GET", "description": "This information"},
33
+ {"path": "/run-scraper", "method": "GET", "description": "Trigger the data scraper"},
34
+ {"path": "/data", "method": "GET", "description": "Get CPI data with optional filters"},
35
+ {"path": "/latest", "method": "GET", "description": "Get the latest CPI data"}
36
+ ]
37
+ }
38
+
39
+ # Your existing endpoints
40
  @app.get("/data")
41
  def get_data(
42
  start_date: Optional[date] = Query(None),
scraper.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from playwright.sync_api import sync_playwright
2
  import pandas as pd
3
  from datetime import datetime
@@ -50,6 +51,7 @@ def get_china_cpi(output_dir='data/raw/'):
50
  current_row_count = len(page.query_selector_all('#eventHistoryTable459 tbody tr'))
51
  if current_row_count == previous_row_count:
52
  break
 
53
  page.evaluate('document.querySelector("#showMoreHistory459 a").click()')
54
  page.wait_for_function(
55
  f"document.querySelectorAll('#eventHistoryTable459 tbody tr').length > {current_row_count}",
 
1
+ # Ensure scraper.py contains a complete function that can be imported
2
  from playwright.sync_api import sync_playwright
3
  import pandas as pd
4
  from datetime import datetime
 
51
  current_row_count = len(page.query_selector_all('#eventHistoryTable459 tbody tr'))
52
  if current_row_count == previous_row_count:
53
  break
54
+ # Fix the JavaScript method name
55
  page.evaluate('document.querySelector("#showMoreHistory459 a").click()')
56
  page.wait_for_function(
57
  f"document.querySelectorAll('#eventHistoryTable459 tbody tr').length > {current_row_count}",