Spaces:
Runtime error
Runtime error
File size: 1,097 Bytes
7f7b773 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from pathlib import Path
from langchain.schema import Document
import json
from typing import Iterable
def save_docs_to_jsonl(array:Iterable[Document], path: str)->None:
"""
Save an array of Document objects to a JSONL file.
Args:
array: An iterable of Document objects.
path: The path to the folder where the output file should be.
Returns:
None
"""
Path(path).mkdir(parents=True, exist_ok=True)
with open(f"{path}/data.jsonl", 'w', encoding='utf-8') as jsonl_file:
for doc in array:
jsonl_file.write(doc.json() + '\n')
def load_docs_from_jsonl(path: str) -> Iterable[Document]:
"""
Load an array of Document objects from a JSONL file.
Args:
path: The path to the folder where the input file is.
Returns:
An iterable of Document objects.
"""
array = []
with open(f"{path}/data.jsonl", 'r', encoding='utf-8') as jsonl_file:
for line in jsonl_file:
data = json.loads(line)
obj = Document(**data)
array.append(obj)
return array
|