import streamlit as st from haystack.nodes.connector import Crawler from haystack.utils import fetch_archive_from_http, clean_wiki_text, convert_files_to_docs from haystack.document_stores import InMemoryDocumentStore from haystack.pipelines import ExtractiveQAPipeline from haystack.nodes import FARMReader, TfidfRetriever import validators import json doc_dir = './article_txt_got' document_store = InMemoryDocumentStore() docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True) st.write(docs[1])