embedchain / tests /loaders /test_json.py
rajesh1501's picture
Upload folder using huggingface_hub
a85c9b8 verified
import hashlib
import pytest
from embedchain.loaders.json import JSONLoader
def test_load_data(mocker):
content = "temp.json"
mock_document = {
"doc_id": hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest(),
"data": [
{"content": "content1", "meta_data": {"url": content}},
{"content": "content2", "meta_data": {"url": content}},
],
}
mocker.patch("embedchain.loaders.json.JSONLoader.load_data", return_value=mock_document)
json_loader = JSONLoader()
result = json_loader.load_data(content)
assert "doc_id" in result
assert "data" in result
expected_data = [
{"content": "content1", "meta_data": {"url": content}},
{"content": "content2", "meta_data": {"url": content}},
]
assert result["data"] == expected_data
expected_doc_id = hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest()
assert result["doc_id"] == expected_doc_id
def test_load_data_url(mocker):
content = "https://example.com/posts.json"
mocker.patch("os.path.isfile", return_value=False)
mocker.patch(
"embedchain.loaders.json.JSONReader.load_data",
return_value=[
{
"text": "content1",
},
{
"text": "content2",
},
],
)
mock_response = mocker.Mock()
mock_response.status_code = 200
mock_response.json.return_value = {"document1": "content1", "document2": "content2"}
mocker.patch("requests.get", return_value=mock_response)
result = JSONLoader.load_data(content)
assert "doc_id" in result
assert "data" in result
expected_data = [
{"content": "content1", "meta_data": {"url": content}},
{"content": "content2", "meta_data": {"url": content}},
]
assert result["data"] == expected_data
expected_doc_id = hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest()
assert result["doc_id"] == expected_doc_id
def test_load_data_invalid_string_content(mocker):
mocker.patch("os.path.isfile", return_value=False)
mocker.patch("requests.get")
content = "123: 345}"
with pytest.raises(ValueError, match="Invalid content to load json data from"):
JSONLoader.load_data(content)
def test_load_data_invalid_url(mocker):
mocker.patch("os.path.isfile", return_value=False)
mock_response = mocker.Mock()
mock_response.status_code = 404
mocker.patch("requests.get", return_value=mock_response)
content = "http://invalid-url.com/"
with pytest.raises(ValueError, match=f"Invalid content to load json data from: {content}"):
JSONLoader.load_data(content)
def test_load_data_from_json_string(mocker):
content = '{"foo": "bar"}'
content_url_str = hashlib.sha256((content).encode("utf-8")).hexdigest()
mocker.patch("os.path.isfile", return_value=False)
mocker.patch(
"embedchain.loaders.json.JSONReader.load_data",
return_value=[
{
"text": "content1",
},
{
"text": "content2",
},
],
)
result = JSONLoader.load_data(content)
assert "doc_id" in result
assert "data" in result
expected_data = [
{"content": "content1", "meta_data": {"url": content_url_str}},
{"content": "content2", "meta_data": {"url": content_url_str}},
]
assert result["data"] == expected_data
expected_doc_id = hashlib.sha256((content_url_str + ", ".join(["content1", "content2"])).encode()).hexdigest()
assert result["doc_id"] == expected_doc_id