Spaces:
Running
Running
import pandas as pd | |
from datasets import load_dataset | |
def get_listings(): | |
dataset = load_dataset("MongoDB/airbnb_embeddings", streaming=True, split="train") | |
dataset = dataset.take(100) | |
dataset_df = pd.DataFrame(dataset) | |
dataset_df.head(5) | |
print("Columns:", dataset_df.columns) | |
records = dataset_df.to_dict(orient='records') | |
# To handle catch `NaT` values | |
for record in records: | |
for key, value in record.items(): | |
# Check if the value is list-like; if so, process each element. | |
if isinstance(value, list): | |
processed_list = [None if pd.isnull(v) else v for v in value] | |
record[key] = processed_list | |
# For scalar values, continue as before. | |
else: | |
if pd.isnull(value): | |
record[key] = None | |
try: | |
# Convert each dictionary to a Movie instance | |
listings = [Listing(**record).dict() for record in records] | |
# Get an overview of a single datapoint | |
print(listings[0].keys()) | |
return listings | |
except ValidationError as e: | |
print(e) |