Spaces:
No application file
No application file
File size: 3,714 Bytes
a85c9b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import hashlib
import pytest
from embedchain.loaders.json import JSONLoader
def test_load_data(mocker):
content = "temp.json"
mock_document = {
"doc_id": hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest(),
"data": [
{"content": "content1", "meta_data": {"url": content}},
{"content": "content2", "meta_data": {"url": content}},
],
}
mocker.patch("embedchain.loaders.json.JSONLoader.load_data", return_value=mock_document)
json_loader = JSONLoader()
result = json_loader.load_data(content)
assert "doc_id" in result
assert "data" in result
expected_data = [
{"content": "content1", "meta_data": {"url": content}},
{"content": "content2", "meta_data": {"url": content}},
]
assert result["data"] == expected_data
expected_doc_id = hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest()
assert result["doc_id"] == expected_doc_id
def test_load_data_url(mocker):
content = "https://example.com/posts.json"
mocker.patch("os.path.isfile", return_value=False)
mocker.patch(
"embedchain.loaders.json.JSONReader.load_data",
return_value=[
{
"text": "content1",
},
{
"text": "content2",
},
],
)
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.json.return_value = {"document1": "content1", "document2": "content2"}
mocker.patch("requests.get", return_value=mock_response)
result = JSONLoader.load_data(content)
assert "doc_id" in result
assert "data" in result
expected_data = [
{"content": "content1", "meta_data": {"url": content}},
{"content": "content2", "meta_data": {"url": content}},
]
assert result["data"] == expected_data
expected_doc_id = hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest()
assert result["doc_id"] == expected_doc_id
def test_load_data_invalid_string_content(mocker):
mocker.patch("os.path.isfile", return_value=False)
mocker.patch("requests.get")
content = "123: 345}"
with pytest.raises(ValueError, match="Invalid content to load json data from"):
JSONLoader.load_data(content)
def test_load_data_invalid_url(mocker):
mocker.patch("os.path.isfile", return_value=False)
mock_response = mocker.Mock()
mock_response.status_code = 404
mocker.patch("requests.get", return_value=mock_response)
content = "http://invalid-url.com/"
with pytest.raises(ValueError, match=f"Invalid content to load json data from: {content}"):
JSONLoader.load_data(content)
def test_load_data_from_json_string(mocker):
content = '{"foo": "bar"}'
content_url_str = hashlib.sha256((content).encode("utf-8")).hexdigest()
mocker.patch("os.path.isfile", return_value=False)
mocker.patch(
"embedchain.loaders.json.JSONReader.load_data",
return_value=[
{
"text": "content1",
},
{
"text": "content2",
},
],
)
result = JSONLoader.load_data(content)
assert "doc_id" in result
assert "data" in result
expected_data = [
{"content": "content1", "meta_data": {"url": content_url_str}},
{"content": "content2", "meta_data": {"url": content_url_str}},
]
assert result["data"] == expected_data
expected_doc_id = hashlib.sha256((content_url_str + ", ".join(["content1", "content2"])).encode()).hexdigest()
assert result["doc_id"] == expected_doc_id
|