Spaces:
Runtime error
Runtime error
import requests | |
from datetime import datetime | |
from airflow import DAG | |
from airflow.operators.python import PythonOperator | |
from CrawDag.models import News | |
from CrawDag.crawling import CrawlingTask | |
from CrawDag.scraping import ScrapingTask | |
from CrawDag.saving import SavingTask | |
from CrawDag.sending import SendingTask | |
import pytz | |
with DAG( | |
dag_id = 'CrawDag', | |
description = 'Crawling news from multiple sources', | |
# start_date = datetime(2025,1,1, 0, 0 ,0, 0, tzinfo=pytz.timezone('Asia/Ho_Chi_Minh')), | |
schedule_interval='@monthly', | |
start_date = datetime.now(tz=pytz.timezone('Asia/Ho_Chi_Minh')), | |
# schedule_interval = '*/30 * * * *' | |
) as dag: | |
crawl_task = PythonOperator( | |
task_id = 'crawl_task', | |
python_callable = CrawlingTask('crawl_task').execute, | |
provide_context = True | |
) | |
scrape_task = PythonOperator( | |
task_id = 'scrape_task', | |
python_callable = ScrapingTask('scrape_task').execute, | |
provide_context = True | |
) | |
save_task = PythonOperator( | |
task_id = 'save_task', | |
python_callable = SavingTask('save_task').execute, | |
provide_context = True | |
) | |
sent_task = PythonOperator( | |
task_id = 'sent_task', | |
python_callable = SendingTask('sent_task').execute, | |
provide_context = True | |
) | |
# crawl_task | |
# crawl_task >> scrape_task | |
# crawl_task >> scrape_task >> save_task | |
crawl_task >> scrape_task >> save_task >> sent_task |