from pathlib import Path from langchain.schema import Document import json from typing import Iterable def save_docs_to_jsonl(array:Iterable[Document], path: str)->None: """ Save an array of Document objects to a JSONL file. Args: array: An iterable of Document objects. path: The path to the folder where the output file should be. Returns: None """ Path(path).mkdir(parents=True, exist_ok=True) with open(f"{path}/data.jsonl", 'w', encoding='utf-8') as jsonl_file: for doc in array: jsonl_file.write(doc.json() + '\n') def load_docs_from_jsonl(path: str) -> Iterable[Document]: """ Load an array of Document objects from a JSONL file. Args: path: The path to the folder where the input file is. Returns: An iterable of Document objects. """ array = [] with open(f"{path}/data.jsonl", 'r', encoding='utf-8') as jsonl_file: for line in jsonl_file: data = json.loads(line) obj = Document(**data) array.append(obj) return array