Spaces:
No application file
No application file
import csv | |
import os | |
import pathlib | |
import tempfile | |
from unittest.mock import MagicMock, patch | |
import pytest | |
from embedchain.loaders.csv import CsvLoader | |
def test_load_data(delimiter): | |
""" | |
Test csv loader | |
Tests that file is loaded, metadata is correct and content is correct | |
""" | |
# Creating temporary CSV file | |
with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile: | |
writer = csv.writer(tmpfile, delimiter=delimiter) | |
writer.writerow(["Name", "Age", "Occupation"]) | |
writer.writerow(["Alice", "28", "Engineer"]) | |
writer.writerow(["Bob", "35", "Doctor"]) | |
writer.writerow(["Charlie", "22", "Student"]) | |
tmpfile.seek(0) | |
filename = tmpfile.name | |
# Loading CSV using CsvLoader | |
loader = CsvLoader() | |
result = loader.load_data(filename) | |
data = result["data"] | |
# Assertions | |
assert len(data) == 3 | |
assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer" | |
assert data[0]["meta_data"]["url"] == filename | |
assert data[0]["meta_data"]["row"] == 1 | |
assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor" | |
assert data[1]["meta_data"]["url"] == filename | |
assert data[1]["meta_data"]["row"] == 2 | |
assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student" | |
assert data[2]["meta_data"]["url"] == filename | |
assert data[2]["meta_data"]["row"] == 3 | |
# Cleaning up the temporary file | |
os.unlink(filename) | |
def test_load_data_with_file_uri(delimiter): | |
""" | |
Test csv loader with file URI | |
Tests that file is loaded, metadata is correct and content is correct | |
""" | |
# Creating temporary CSV file | |
with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile: | |
writer = csv.writer(tmpfile, delimiter=delimiter) | |
writer.writerow(["Name", "Age", "Occupation"]) | |
writer.writerow(["Alice", "28", "Engineer"]) | |
writer.writerow(["Bob", "35", "Doctor"]) | |
writer.writerow(["Charlie", "22", "Student"]) | |
tmpfile.seek(0) | |
filename = pathlib.Path(tmpfile.name).as_uri() # Convert path to file URI | |
# Loading CSV using CsvLoader | |
loader = CsvLoader() | |
result = loader.load_data(filename) | |
data = result["data"] | |
# Assertions | |
assert len(data) == 3 | |
assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer" | |
assert data[0]["meta_data"]["url"] == filename | |
assert data[0]["meta_data"]["row"] == 1 | |
assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor" | |
assert data[1]["meta_data"]["url"] == filename | |
assert data[1]["meta_data"]["row"] == 2 | |
assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student" | |
assert data[2]["meta_data"]["url"] == filename | |
assert data[2]["meta_data"]["row"] == 3 | |
# Cleaning up the temporary file | |
os.unlink(tmpfile.name) | |
def test_get_file_content(content): | |
with pytest.raises(ValueError): | |
loader = CsvLoader() | |
loader._get_file_content(content) | |
def test_get_file_content_http(content): | |
""" | |
Test _get_file_content method of CsvLoader for http and https URLs | |
""" | |
with patch("requests.get") as mock_get: | |
mock_response = MagicMock() | |
mock_response.text = "Name,Age,Occupation\nAlice,28,Engineer\nBob,35,Doctor\nCharlie,22,Student" | |
mock_get.return_value = mock_response | |
loader = CsvLoader() | |
file_content = loader._get_file_content(content) | |
mock_get.assert_called_once_with(content) | |
mock_response.raise_for_status.assert_called_once() | |
assert file_content.read() == mock_response.text | |