| | import pandas as pd |
| | import pytest |
| | from langflow.schema.data import Data |
| | from langflow.schema.dataframe import DataFrame |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_data_objects() -> list[Data]: |
| | """Fixture providing a list of sample Data objects.""" |
| | return [ |
| | Data(data={"name": "John", "age": 30, "city": "New York"}), |
| | Data(data={"name": "Jane", "age": 25, "city": "Boston"}), |
| | Data(data={"name": "Bob", "age": 35, "city": "Chicago"}), |
| | ] |
| |
|
| |
|
| | @pytest.fixture |
| | def sample_dataset(sample_data_objects) -> DataFrame: |
| | """Fixture providing a sample DataFrame instance.""" |
| | return DataFrame(sample_data_objects) |
| |
|
| |
|
| | def test_from_data_list_basic(): |
| | """Test basic functionality of from_data_list.""" |
| | data_objects = [Data(data={"name": "John", "age": 30}), Data(data={"name": "Jane", "age": 25})] |
| | dataset = DataFrame(data_objects) |
| |
|
| | assert isinstance(dataset, DataFrame) |
| | assert isinstance(dataset, pd.DataFrame) |
| | assert len(dataset) == 2 |
| | assert list(dataset.columns) == ["name", "age"] |
| | assert dataset.iloc[0]["name"] == "John" |
| | assert dataset.iloc[1]["age"] == 25 |
| |
|
| |
|
| | def test_from_data_list_empty(): |
| | """Test from_data_list with empty input.""" |
| | dataset = DataFrame([]) |
| | assert isinstance(dataset, DataFrame) |
| | assert len(dataset) == 0 |
| |
|
| |
|
| | def test_from_data_list_missing_fields(): |
| | """Test from_data_list with inconsistent data fields.""" |
| | data_objects = [ |
| | Data(data={"name": "John", "age": 30}), |
| | Data(data={"name": "Jane", "city": "Boston"}), |
| | ] |
| | dataset = DataFrame(data_objects) |
| |
|
| | assert isinstance(dataset, DataFrame) |
| | assert set(dataset.columns) == {"name", "age", "city"} |
| | assert pd.isna(dataset.iloc[1]["age"]) |
| | assert pd.isna(dataset.iloc[0]["city"]) |
| |
|
| |
|
| | def test_from_data_list_nested_data(): |
| | """Test from_data_list with nested dictionary data.""" |
| | data_objects = [ |
| | Data(data={"name": "John", "address": {"city": "New York", "zip": "10001"}}), |
| | Data(data={"name": "Jane", "address": {"city": "Boston", "zip": "02108"}}), |
| | ] |
| | dataset = DataFrame(data_objects) |
| |
|
| | assert isinstance(dataset, DataFrame) |
| | assert isinstance(dataset["address"][0], dict) |
| | assert dataset["address"][0]["city"] == "New York" |
| |
|
| |
|
| | def test_to_data_list_basic(sample_dataset, sample_data_objects): |
| | """Test basic functionality of to_data_list.""" |
| | result = sample_dataset.to_data_list() |
| |
|
| | assert isinstance(result, list) |
| | assert all(isinstance(item, Data) for item in result) |
| | assert len(result) == len(sample_data_objects) |
| |
|
| | |
| | for original, converted in zip(sample_data_objects, result, strict=False): |
| | assert original.data == converted.data |
| |
|
| |
|
| | def test_to_data_list_empty(): |
| | """Test to_data_list with empty DataFrame.""" |
| | empty_dataset = DataFrame() |
| | result = empty_dataset.to_data_list() |
| | assert isinstance(result, list) |
| | assert len(result) == 0 |
| |
|
| |
|
| | def test_to_data_list_modified_data(sample_dataset): |
| | """Test to_data_list after DataFrame modifications.""" |
| | |
| | sample_dataset["new_column"] = [1, 2, 3] |
| | sample_dataset.iloc[0, sample_dataset.columns.get_loc("age")] = 31 |
| |
|
| | result = sample_dataset.to_data_list() |
| |
|
| | assert isinstance(result, list) |
| | assert all(isinstance(item, Data) for item in result) |
| | assert result[0].data["new_column"] == 1 |
| | assert result[0].data["age"] == 31 |
| |
|
| |
|
| | def test_dataset_pandas_operations(sample_dataset): |
| | """Test that pandas operations work correctly on DataFrame.""" |
| | |
| | filtered = sample_dataset[sample_dataset["age"] > 30] |
| | assert isinstance(filtered, DataFrame), f"Expected DataFrame, got {type(filtered)}" |
| | assert len(filtered) == 1 |
| | assert filtered.iloc[0]["name"] == "Bob" |
| |
|
| | |
| | mean_age = sample_dataset["age"].mean() |
| | assert mean_age == 30 |
| |
|
| | |
| | grouped = sample_dataset.groupby("city").agg({"age": "mean"}) |
| | assert isinstance(grouped, pd.DataFrame) |
| | assert len(grouped) == 3 |
| |
|
| |
|
| | def test_dataset_with_null_values(): |
| | """Test handling of null values in DataFrame.""" |
| | data_objects = [Data(data={"name": "John", "age": None}), Data(data={"name": None, "age": 25})] |
| | dataset = DataFrame(data_objects) |
| |
|
| | assert pd.isna(dataset.iloc[0]["age"]) |
| | assert pd.isna(dataset.iloc[1]["name"]) |
| |
|
| | |
| | result = dataset.to_data_list() |
| | assert pd.isna(result[0].data["age"]), f"Expected NaN, got {result[0].data['age']}" |
| | assert pd.isna(result[1].data["name"]), f"Expected NaN, got {result[1].data['name']}" |
| |
|
| |
|
| | def test_dataset_type_preservation(): |
| | """Test that data types are preserved through conversion.""" |
| | data_objects = [ |
| | Data( |
| | data={ |
| | "int_val": 1, |
| | "float_val": 1.5, |
| | "str_val": "test", |
| | "bool_val": True, |
| | "list_val": [1, 2, 3], |
| | "dict_val": {"key": "value"}, |
| | } |
| | ) |
| | ] |
| | dataset = DataFrame(data_objects) |
| | result = dataset.to_data_list() |
| |
|
| | assert isinstance(result[0].data["int_val"], int) |
| | assert isinstance(result[0].data["float_val"], float) |
| | assert isinstance(result[0].data["str_val"], str) |
| | assert isinstance(result[0].data["bool_val"], bool) |
| | assert isinstance(result[0].data["list_val"], list) |
| | assert isinstance(result[0].data["dict_val"], dict) |
| |
|
| |
|
| | def test_add_row_with_dict(sample_dataset): |
| | """Test adding a single row using a dictionary.""" |
| | new_row = {"name": "Alice", "age": 28, "city": "Seattle"} |
| | result = sample_dataset.add_row(new_row) |
| |
|
| | assert isinstance(result, DataFrame) |
| | assert len(result) == len(sample_dataset) + 1 |
| | assert result.iloc[-1]["name"] == "Alice" |
| | assert result.iloc[-1]["age"] == 28 |
| | assert result.iloc[-1]["city"] == "Seattle" |
| |
|
| |
|
| | def test_add_row_with_data_object(sample_dataset): |
| | """Test adding a single row using a Data object.""" |
| | new_row = Data(data={"name": "Alice", "age": 28, "city": "Seattle"}) |
| | result = sample_dataset.add_row(new_row) |
| |
|
| | assert isinstance(result, DataFrame) |
| | assert len(result) == len(sample_dataset) + 1 |
| | assert result.iloc[-1]["name"] == "Alice" |
| | assert result.iloc[-1]["age"] == 28 |
| | assert result.iloc[-1]["city"] == "Seattle" |
| |
|
| |
|
| | def test_add_rows_with_dicts(sample_dataset): |
| | """Test adding multiple rows using dictionaries.""" |
| | new_rows = [{"name": "Alice", "age": 28, "city": "Seattle"}, {"name": "Charlie", "age": 32, "city": "Portland"}] |
| | result = sample_dataset.add_rows(new_rows) |
| |
|
| | assert isinstance(result, DataFrame) |
| | assert len(result) == len(sample_dataset) + 2 |
| | assert result.iloc[-2]["name"] == "Alice" |
| | assert result.iloc[-1]["name"] == "Charlie" |
| |
|
| |
|
| | def test_add_rows_with_data_objects(sample_dataset): |
| | """Test adding multiple rows using Data objects.""" |
| | new_rows = [ |
| | Data(data={"name": "Alice", "age": 28, "city": "Seattle"}), |
| | Data(data={"name": "Charlie", "age": 32, "city": "Portland"}), |
| | ] |
| | result = sample_dataset.add_rows(new_rows) |
| |
|
| | assert isinstance(result, DataFrame) |
| | assert len(result) == len(sample_dataset) + 2 |
| | assert result.iloc[-2]["name"] == "Alice" |
| | assert result.iloc[-1]["name"] == "Charlie" |
| |
|
| |
|
| | def test_add_rows_mixed_types(sample_dataset): |
| | """Test adding multiple rows using a mix of dictionaries and Data objects.""" |
| | new_rows = [ |
| | {"name": "Alice", "age": 28, "city": "Seattle"}, |
| | Data(data={"name": "Charlie", "age": 32, "city": "Portland"}), |
| | ] |
| | result = sample_dataset.add_rows(new_rows) |
| |
|
| | assert isinstance(result, DataFrame) |
| | assert len(result) == len(sample_dataset) + 2 |
| | assert result.iloc[-2]["name"] == "Alice" |
| | assert result.iloc[-1]["name"] == "Charlie" |
| |
|
| |
|
| | def test_init_with_data_objects(): |
| | """Test initialization with Data objects.""" |
| | data_objects = [Data(data={"name": "John", "age": 30}), Data(data={"name": "Jane", "age": 25})] |
| | dataset = DataFrame(data_objects) |
| |
|
| | assert isinstance(dataset, DataFrame) |
| | assert len(dataset) == 2 |
| | assert list(dataset.columns) == ["name", "age"] |
| | assert dataset.iloc[0]["name"] == "John" |
| | assert dataset.iloc[1]["age"] == 25 |
| |
|
| |
|
| | def test_init_with_dicts(): |
| | """Test initialization with dictionaries.""" |
| | data_dicts = [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}] |
| | dataset = DataFrame(data_dicts) |
| |
|
| | assert isinstance(dataset, DataFrame) |
| | assert len(dataset) == 2 |
| | assert list(dataset.columns) == ["name", "age"] |
| | assert dataset.iloc[0]["name"] == "John" |
| | assert dataset.iloc[1]["age"] == 25 |
| |
|
| |
|
| | def test_init_with_dict_of_lists(): |
| | """Test initialization with a dictionary of lists.""" |
| | data = {"name": ["John", "Jane"], "age": [30, 25]} |
| | dataset = DataFrame(data) |
| |
|
| | assert isinstance(dataset, DataFrame) |
| | assert len(dataset) == 2 |
| | assert list(dataset.columns) == ["name", "age"] |
| | assert dataset.iloc[0]["name"] == "John" |
| | assert dataset.iloc[1]["age"] == 25 |
| |
|
| |
|
| | def test_init_with_pandas_dataframe(): |
| | """Test initialization with a pandas DataFrame.""" |
| | test_df = pd.DataFrame({"name": ["John", "Jane"], "age": [30, 25]}) |
| | dataset = DataFrame(test_df) |
| |
|
| | assert isinstance(dataset, DataFrame) |
| | assert len(dataset) == 2 |
| | assert list(dataset.columns) == ["name", "age"] |
| | assert dataset.iloc[0]["name"] == "John" |
| | assert dataset.iloc[1]["age"] == 25 |
| |
|
| |
|
| | def test_init_with_none(): |
| | """Test initialization with None.""" |
| | dataset = DataFrame(None) |
| | assert isinstance(dataset, DataFrame) |
| | assert len(dataset) == 0 |
| |
|
| |
|
| | def test_init_with_invalid_list(): |
| | """Test initialization with invalid list items.""" |
| | invalid_data = [ |
| | {"name": "John", "age": 30}, |
| | Data(data={"name": "Jane", "age": 25}), |
| | ] |
| | with pytest.raises(ValueError, match="List items must be either all Data objects or all dictionaries"): |
| | DataFrame(invalid_data) |
| |
|
| |
|
| | def test_init_with_kwargs(): |
| | """Test initialization with additional kwargs.""" |
| | data = {"name": ["John", "Jane"], "age": [30, 25]} |
| | dataset = DataFrame(data=data, index=["a", "b"]) |
| |
|
| | assert isinstance(dataset, DataFrame) |
| | assert len(dataset) == 2 |
| | assert list(dataset.index) == ["a", "b"] |
| | assert dataset.loc["a"]["name"] == "John" |
| | assert dataset.loc["b"]["age"] == 25 |
| |
|