Spaces:
No application file
No application file
File size: 4,123 Bytes
a85c9b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import csv
import os
import pathlib
import tempfile
from unittest.mock import MagicMock, patch
import pytest
from embedchain.loaders.csv import CsvLoader
@pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
def test_load_data(delimiter):
"""
Test csv loader
Tests that file is loaded, metadata is correct and content is correct
"""
# Creating temporary CSV file
with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
writer = csv.writer(tmpfile, delimiter=delimiter)
writer.writerow(["Name", "Age", "Occupation"])
writer.writerow(["Alice", "28", "Engineer"])
writer.writerow(["Bob", "35", "Doctor"])
writer.writerow(["Charlie", "22", "Student"])
tmpfile.seek(0)
filename = tmpfile.name
# Loading CSV using CsvLoader
loader = CsvLoader()
result = loader.load_data(filename)
data = result["data"]
# Assertions
assert len(data) == 3
assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
assert data[0]["meta_data"]["url"] == filename
assert data[0]["meta_data"]["row"] == 1
assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
assert data[1]["meta_data"]["url"] == filename
assert data[1]["meta_data"]["row"] == 2
assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
assert data[2]["meta_data"]["url"] == filename
assert data[2]["meta_data"]["row"] == 3
# Cleaning up the temporary file
os.unlink(filename)
@pytest.mark.parametrize("delimiter", [",", "\t", ";", "|"])
def test_load_data_with_file_uri(delimiter):
"""
Test csv loader with file URI
Tests that file is loaded, metadata is correct and content is correct
"""
# Creating temporary CSV file
with tempfile.NamedTemporaryFile(mode="w+", newline="", delete=False) as tmpfile:
writer = csv.writer(tmpfile, delimiter=delimiter)
writer.writerow(["Name", "Age", "Occupation"])
writer.writerow(["Alice", "28", "Engineer"])
writer.writerow(["Bob", "35", "Doctor"])
writer.writerow(["Charlie", "22", "Student"])
tmpfile.seek(0)
filename = pathlib.Path(tmpfile.name).as_uri() # Convert path to file URI
# Loading CSV using CsvLoader
loader = CsvLoader()
result = loader.load_data(filename)
data = result["data"]
# Assertions
assert len(data) == 3
assert data[0]["content"] == "Name: Alice, Age: 28, Occupation: Engineer"
assert data[0]["meta_data"]["url"] == filename
assert data[0]["meta_data"]["row"] == 1
assert data[1]["content"] == "Name: Bob, Age: 35, Occupation: Doctor"
assert data[1]["meta_data"]["url"] == filename
assert data[1]["meta_data"]["row"] == 2
assert data[2]["content"] == "Name: Charlie, Age: 22, Occupation: Student"
assert data[2]["meta_data"]["url"] == filename
assert data[2]["meta_data"]["row"] == 3
# Cleaning up the temporary file
os.unlink(tmpfile.name)
@pytest.mark.parametrize("content", ["ftp://example.com", "sftp://example.com", "mailto://example.com"])
def test_get_file_content(content):
with pytest.raises(ValueError):
loader = CsvLoader()
loader._get_file_content(content)
@pytest.mark.parametrize("content", ["http://example.com", "https://example.com"])
def test_get_file_content_http(content):
"""
Test _get_file_content method of CsvLoader for http and https URLs
"""
with patch("requests.get") as mock_get:
mock_response = MagicMock()
mock_response.text = "Name,Age,Occupation\nAlice,28,Engineer\nBob,35,Doctor\nCharlie,22,Student"
mock_get.return_value = mock_response
loader = CsvLoader()
file_content = loader._get_file_content(content)
mock_get.assert_called_once_with(content)
mock_response.raise_for_status.assert_called_once()
assert file_content.read() == mock_response.text
|