File size: 3,714 Bytes
a85c9b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import hashlib

import pytest

from embedchain.loaders.json import JSONLoader


def test_load_data(mocker):
    content = "temp.json"

    mock_document = {
        "doc_id": hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest(),
        "data": [
            {"content": "content1", "meta_data": {"url": content}},
            {"content": "content2", "meta_data": {"url": content}},
        ],
    }

    mocker.patch("embedchain.loaders.json.JSONLoader.load_data", return_value=mock_document)

    json_loader = JSONLoader()

    result = json_loader.load_data(content)

    assert "doc_id" in result
    assert "data" in result

    expected_data = [
        {"content": "content1", "meta_data": {"url": content}},
        {"content": "content2", "meta_data": {"url": content}},
    ]

    assert result["data"] == expected_data

    expected_doc_id = hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest()
    assert result["doc_id"] == expected_doc_id


def test_load_data_url(mocker):
    content = "https://example.com/posts.json"

    mocker.patch("os.path.isfile", return_value=False)
    mocker.patch(
        "embedchain.loaders.json.JSONReader.load_data",
        return_value=[
            {
                "text": "content1",
            },
            {
                "text": "content2",
            },
        ],
    )

    mock_response = mocker.Mock()
    mock_response.status_code = 200
    mock_response.json.return_value = {"document1": "content1", "document2": "content2"}

    mocker.patch("requests.get", return_value=mock_response)

    result = JSONLoader.load_data(content)

    assert "doc_id" in result
    assert "data" in result

    expected_data = [
        {"content": "content1", "meta_data": {"url": content}},
        {"content": "content2", "meta_data": {"url": content}},
    ]

    assert result["data"] == expected_data

    expected_doc_id = hashlib.sha256((content + ", ".join(["content1", "content2"])).encode()).hexdigest()
    assert result["doc_id"] == expected_doc_id


def test_load_data_invalid_string_content(mocker):
    mocker.patch("os.path.isfile", return_value=False)
    mocker.patch("requests.get")

    content = "123: 345}"

    with pytest.raises(ValueError, match="Invalid content to load json data from"):
        JSONLoader.load_data(content)


def test_load_data_invalid_url(mocker):
    mocker.patch("os.path.isfile", return_value=False)

    mock_response = mocker.Mock()
    mock_response.status_code = 404
    mocker.patch("requests.get", return_value=mock_response)

    content = "http://invalid-url.com/"

    with pytest.raises(ValueError, match=f"Invalid content to load json data from: {content}"):
        JSONLoader.load_data(content)


def test_load_data_from_json_string(mocker):
    content = '{"foo": "bar"}'

    content_url_str = hashlib.sha256((content).encode("utf-8")).hexdigest()

    mocker.patch("os.path.isfile", return_value=False)
    mocker.patch(
        "embedchain.loaders.json.JSONReader.load_data",
        return_value=[
            {
                "text": "content1",
            },
            {
                "text": "content2",
            },
        ],
    )

    result = JSONLoader.load_data(content)

    assert "doc_id" in result
    assert "data" in result

    expected_data = [
        {"content": "content1", "meta_data": {"url": content_url_str}},
        {"content": "content2", "meta_data": {"url": content_url_str}},
    ]

    assert result["data"] == expected_data

    expected_doc_id = hashlib.sha256((content_url_str + ", ".join(["content1", "content2"])).encode()).hexdigest()
    assert result["doc_id"] == expected_doc_id