Spaces:
Running
Running
File size: 1,696 Bytes
7841ce0 41c5156 7aebf2b de2a82e 7841ce0 aba41f2 7aebf2b aba41f2 41c5156 de2a82e 41c5156 7841ce0 41c5156 7841ce0 41c5156 de2a82e 41c5156 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from typing import Tuple
import pandas as pd
import random
from datetime import datetime, timedelta
from dataset.download import presentation_data_schema
from whale_viewer import WHALE_CLASSES
def generate_fake_data(df:pd.DataFrame, num_fake:int) -> pd.DataFrame:
"""
Generate fake data for the dataset.
Args:
df (pd.DataFrame): Original DataFrame to append fake data to.
num_fake (int): Number of fake observations to generate.
Returns:
pd.DataFrame: DataFrame with the original and fake data.
"""
# Options for random generation
species_options = WHALE_CLASSES
email_options = [
'dr.marine@oceanic.org', 'whale.research@deepblue.org',
'observer@sea.net', 'super@whale.org'
]
def random_ocean_coord() -> Tuple[float, float]:
"""Generate random ocean-friendly coordinates."""
lat = random.uniform(-60, 60) # avoid poles
lon = random.uniform(-180, 180)
return lat, lon
def random_date(start_year:int=2018, end_year:int=2025) -> datetime:
"""Generate a random date."""
start = datetime(start_year, 1, 1)
end = datetime(end_year, 1, 1)
return start + timedelta(days=random.randint(0, (end - start).days))
new_data = []
for _ in range(num_fake):
lat, lon = random_ocean_coord()
species = random.choice(species_options)
email = random.choice(email_options)
date = random_date()
new_data.append([lat, lon, species, email, date])
new_df = pd.DataFrame(new_data, columns=presentation_data_schema).astype(presentation_data_schema)
df = pd.concat([df, new_df], ignore_index=True)
return df |