Zok213
commited on
Commit
·
f2826d8
1
Parent(s):
a705291
fix
Browse files- __init__.py +0 -0
- app.py +25 -2
- scraper.py +2 -0
__init__.py
ADDED
File without changes
|
app.py
CHANGED
@@ -1,19 +1,42 @@
|
|
1 |
-
from fastapi import FastAPI, Query
|
2 |
from typing import Optional
|
3 |
from datetime import date
|
4 |
import psycopg2
|
5 |
import os
|
6 |
from dotenv import load_dotenv
|
|
|
|
|
7 |
|
8 |
# Load environment variables from .env file (optional, for local development)
|
9 |
load_dotenv()
|
10 |
-
|
11 |
app = FastAPI()
|
12 |
|
13 |
def get_db_connection():
|
14 |
conn = psycopg2.connect(os.getenv('DB_CONNECTION_STRING'))
|
15 |
return conn
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
@app.get("/data")
|
18 |
def get_data(
|
19 |
start_date: Optional[date] = Query(None),
|
|
|
1 |
+
from fastapi import FastAPI, Query, BackgroundTasks
|
2 |
from typing import Optional
|
3 |
from datetime import date
|
4 |
import psycopg2
|
5 |
import os
|
6 |
from dotenv import load_dotenv
|
7 |
+
# Import your scraper function
|
8 |
+
from scraper import get_china_cpi
|
9 |
|
10 |
# Load environment variables from .env file (optional, for local development)
|
11 |
load_dotenv()
|
|
|
12 |
app = FastAPI()
|
13 |
|
14 |
def get_db_connection():
|
15 |
conn = psycopg2.connect(os.getenv('DB_CONNECTION_STRING'))
|
16 |
return conn
|
17 |
|
18 |
+
# Add a new endpoint to run the scraper
|
19 |
+
@app.get("/run-scraper")
|
20 |
+
async def run_scraper(background_tasks: BackgroundTasks):
|
21 |
+
"""Run the China CPI scraper in the background"""
|
22 |
+
background_tasks.add_task(get_china_cpi)
|
23 |
+
return {"message": "Scraper started in background"}
|
24 |
+
|
25 |
+
# Add a root endpoint
|
26 |
+
@app.get("/")
|
27 |
+
def root():
|
28 |
+
"""Root endpoint with API information"""
|
29 |
+
return {
|
30 |
+
"message": "China CPI API",
|
31 |
+
"endpoints": [
|
32 |
+
{"path": "/", "method": "GET", "description": "This information"},
|
33 |
+
{"path": "/run-scraper", "method": "GET", "description": "Trigger the data scraper"},
|
34 |
+
{"path": "/data", "method": "GET", "description": "Get CPI data with optional filters"},
|
35 |
+
{"path": "/latest", "method": "GET", "description": "Get the latest CPI data"}
|
36 |
+
]
|
37 |
+
}
|
38 |
+
|
39 |
+
# Your existing endpoints
|
40 |
@app.get("/data")
|
41 |
def get_data(
|
42 |
start_date: Optional[date] = Query(None),
|
scraper.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from playwright.sync_api import sync_playwright
|
2 |
import pandas as pd
|
3 |
from datetime import datetime
|
@@ -50,6 +51,7 @@ def get_china_cpi(output_dir='data/raw/'):
|
|
50 |
current_row_count = len(page.query_selector_all('#eventHistoryTable459 tbody tr'))
|
51 |
if current_row_count == previous_row_count:
|
52 |
break
|
|
|
53 |
page.evaluate('document.querySelector("#showMoreHistory459 a").click()')
|
54 |
page.wait_for_function(
|
55 |
f"document.querySelectorAll('#eventHistoryTable459 tbody tr').length > {current_row_count}",
|
|
|
1 |
+
# Ensure scraper.py contains a complete function that can be imported
|
2 |
from playwright.sync_api import sync_playwright
|
3 |
import pandas as pd
|
4 |
from datetime import datetime
|
|
|
51 |
current_row_count = len(page.query_selector_all('#eventHistoryTable459 tbody tr'))
|
52 |
if current_row_count == previous_row_count:
|
53 |
break
|
54 |
+
# Fix the JavaScript method name
|
55 |
page.evaluate('document.querySelector("#showMoreHistory459 a").click()')
|
56 |
page.wait_for_function(
|
57 |
f"document.querySelectorAll('#eventHistoryTable459 tbody tr').length > {current_row_count}",
|