Spaces:
Runtime error
Runtime error
"""Tests for the pandas source.""" | |
import pandas as pd | |
from ...schema import schema | |
from .pandas_source import PANDAS_INDEX_COLUMN, PandasDataset | |
from .source import SourceSchema | |
def test_simple_dataframe() -> None: | |
df = pd.DataFrame.from_records([{ | |
'name': 'a', | |
'age': 1 | |
}, { | |
'name': 'b', | |
'age': 2 | |
}, { | |
'name': 'c', | |
'age': 3 | |
}]) | |
source = PandasDataset(df) | |
source.setup() | |
source_schema = source.source_schema() | |
assert source_schema == SourceSchema( | |
fields=schema({ | |
PANDAS_INDEX_COLUMN: 'int64', | |
'name': 'string', | |
'age': 'int64' | |
}).fields, | |
num_items=3) | |
items = list(source.process()) | |
assert items == [{ | |
PANDAS_INDEX_COLUMN: 0, | |
'name': 'a', | |
'age': 1 | |
}, { | |
PANDAS_INDEX_COLUMN: 1, | |
'name': 'b', | |
'age': 2 | |
}, { | |
PANDAS_INDEX_COLUMN: 2, | |
'name': 'c', | |
'age': 3 | |
}] | |
def test_simple_dataframe_with_index() -> None: | |
df = pd.DataFrame.from_records([{ | |
'name': 'a', | |
'age': 1 | |
}, { | |
'name': 'b', | |
'age': 2 | |
}, { | |
'name': 'c', | |
'age': 3 | |
}], | |
index=['id1', 'id2', 'id3']) | |
source = PandasDataset(df) | |
source.setup() | |
source_schema = source.source_schema() | |
assert source_schema == SourceSchema( | |
fields=schema({ | |
PANDAS_INDEX_COLUMN: 'string', | |
'name': 'string', | |
'age': 'int64' | |
}).fields, | |
num_items=3) | |
items = list(source.process()) | |
# The PANDAS_INDEX_COLUMN aligns with the pandas index. | |
assert items == [{ | |
PANDAS_INDEX_COLUMN: 'id1', | |
'name': 'a', | |
'age': 1 | |
}, { | |
PANDAS_INDEX_COLUMN: 'id2', | |
'name': 'b', | |
'age': 2 | |
}, { | |
PANDAS_INDEX_COLUMN: 'id3', | |
'name': 'c', | |
'age': 3 | |
}] | |