dataset-creator-reddit-uwaterloo

Running

alvanli commited on Aug 29

Commit

a40bda5

•

1 Parent(s): 513505c

mv subset

Files changed (2) hide show

main.py CHANGED Viewed

@@ -22,7 +22,6 @@ subreddit = os.environ["SUBREDDIT"]
 username = os.environ["USERNAME"]
 dataset_name = f"{username}/reddit-{subreddit}"
 comment_dataset_name = f"{username}/reddit-comments-{subreddit}"
-subset = f"year_{datetime.now().year}"
 dataset_readme_path = "README.md"
@@ -37,6 +36,7 @@ logger = setup_logger(__name__)
 def upload(new_df, dataset, hf_dataset_name):
     date = datetime.now().strftime('%Y-%m-%d')
     if 'train' in dataset.keys():
         old_df = dataset['train'].to_pandas() if 'train' in dataset.keys() else pd.DataFrame()

 username = os.environ["USERNAME"]
 dataset_name = f"{username}/reddit-{subreddit}"
 comment_dataset_name = f"{username}/reddit-comments-{subreddit}"
 dataset_readme_path = "README.md"
 def upload(new_df, dataset, hf_dataset_name):
     date = datetime.now().strftime('%Y-%m-%d')
+    subset = f"year_{datetime.now().year}"
     if 'train' in dataset.keys():
         old_df = dataset['train'].to_pandas() if 'train' in dataset.keys() else pd.DataFrame()

utilities/user_defined_functions.py CHANGED Viewed

@@ -16,7 +16,7 @@ subreddit = os.environ["SUBREDDIT"]
 username = os.environ["USERNAME"]
 dataset_name = f"{username}/reddit-{subreddit}"
 comment_dataset_name = f"{username}/reddit-comments-{subreddit}"
-subset = f"year_{datetime.now().year}"
 frequency = os.environ.get("FREQUENCY", '').lower()
 if frequency not in ["daily", "hourly"]:
@@ -75,6 +75,7 @@ def load_or_create_dataset():
     Raises:
         FileNotFoundError: If the dataset cannot be loaded or created.
     """
     # Load the existing dataset from the Hugging Face hub or create a new one
     try:
         logger.debug(f"Trying to download {dataset_name}")
@@ -97,6 +98,7 @@ def load_or_create_dataset():
 def load_or_create_comment_dataset():
     # Load the existing dataset from the Hugging Face hub or create a new one
     try:
         logger.debug(f"Trying to download {comment_dataset_name}")

 username = os.environ["USERNAME"]
 dataset_name = f"{username}/reddit-{subreddit}"
 comment_dataset_name = f"{username}/reddit-comments-{subreddit}"
 frequency = os.environ.get("FREQUENCY", '').lower()
 if frequency not in ["daily", "hourly"]:
     Raises:
         FileNotFoundError: If the dataset cannot be loaded or created.
     """
+    subset = f"year_{datetime.now().year}"
     # Load the existing dataset from the Hugging Face hub or create a new one
     try:
         logger.debug(f"Trying to download {dataset_name}")
 def load_or_create_comment_dataset():
+    subset = f"year_{datetime.now().year}"
     # Load the existing dataset from the Hugging Face hub or create a new one
     try:
         logger.debug(f"Trying to download {comment_dataset_name}")