alvanli
commited on
Commit
•
a40bda5
1
Parent(s):
513505c
mv subset
Browse files- main.py +1 -1
- utilities/user_defined_functions.py +3 -1
main.py
CHANGED
@@ -22,7 +22,6 @@ subreddit = os.environ["SUBREDDIT"]
|
|
22 |
username = os.environ["USERNAME"]
|
23 |
dataset_name = f"{username}/reddit-{subreddit}"
|
24 |
comment_dataset_name = f"{username}/reddit-comments-{subreddit}"
|
25 |
-
subset = f"year_{datetime.now().year}"
|
26 |
|
27 |
dataset_readme_path = "README.md"
|
28 |
|
@@ -37,6 +36,7 @@ logger = setup_logger(__name__)
|
|
37 |
|
38 |
def upload(new_df, dataset, hf_dataset_name):
|
39 |
date = datetime.now().strftime('%Y-%m-%d')
|
|
|
40 |
|
41 |
if 'train' in dataset.keys():
|
42 |
old_df = dataset['train'].to_pandas() if 'train' in dataset.keys() else pd.DataFrame()
|
|
|
22 |
username = os.environ["USERNAME"]
|
23 |
dataset_name = f"{username}/reddit-{subreddit}"
|
24 |
comment_dataset_name = f"{username}/reddit-comments-{subreddit}"
|
|
|
25 |
|
26 |
dataset_readme_path = "README.md"
|
27 |
|
|
|
36 |
|
37 |
def upload(new_df, dataset, hf_dataset_name):
|
38 |
date = datetime.now().strftime('%Y-%m-%d')
|
39 |
+
subset = f"year_{datetime.now().year}"
|
40 |
|
41 |
if 'train' in dataset.keys():
|
42 |
old_df = dataset['train'].to_pandas() if 'train' in dataset.keys() else pd.DataFrame()
|
utilities/user_defined_functions.py
CHANGED
@@ -16,7 +16,7 @@ subreddit = os.environ["SUBREDDIT"]
|
|
16 |
username = os.environ["USERNAME"]
|
17 |
dataset_name = f"{username}/reddit-{subreddit}"
|
18 |
comment_dataset_name = f"{username}/reddit-comments-{subreddit}"
|
19 |
-
|
20 |
|
21 |
frequency = os.environ.get("FREQUENCY", '').lower()
|
22 |
if frequency not in ["daily", "hourly"]:
|
@@ -75,6 +75,7 @@ def load_or_create_dataset():
|
|
75 |
Raises:
|
76 |
FileNotFoundError: If the dataset cannot be loaded or created.
|
77 |
"""
|
|
|
78 |
# Load the existing dataset from the Hugging Face hub or create a new one
|
79 |
try:
|
80 |
logger.debug(f"Trying to download {dataset_name}")
|
@@ -97,6 +98,7 @@ def load_or_create_dataset():
|
|
97 |
|
98 |
|
99 |
def load_or_create_comment_dataset():
|
|
|
100 |
# Load the existing dataset from the Hugging Face hub or create a new one
|
101 |
try:
|
102 |
logger.debug(f"Trying to download {comment_dataset_name}")
|
|
|
16 |
username = os.environ["USERNAME"]
|
17 |
dataset_name = f"{username}/reddit-{subreddit}"
|
18 |
comment_dataset_name = f"{username}/reddit-comments-{subreddit}"
|
19 |
+
|
20 |
|
21 |
frequency = os.environ.get("FREQUENCY", '').lower()
|
22 |
if frequency not in ["daily", "hourly"]:
|
|
|
75 |
Raises:
|
76 |
FileNotFoundError: If the dataset cannot be loaded or created.
|
77 |
"""
|
78 |
+
subset = f"year_{datetime.now().year}"
|
79 |
# Load the existing dataset from the Hugging Face hub or create a new one
|
80 |
try:
|
81 |
logger.debug(f"Trying to download {dataset_name}")
|
|
|
98 |
|
99 |
|
100 |
def load_or_create_comment_dataset():
|
101 |
+
subset = f"year_{datetime.now().year}"
|
102 |
# Load the existing dataset from the Hugging Face hub or create a new one
|
103 |
try:
|
104 |
logger.debug(f"Trying to download {comment_dataset_name}")
|