import os import openai import pinecone from langchain.document_loaders import DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Pinecone from langchain.llms import OpenAI from langchain.chains.question_answering import load_qa_chain import streamlit as st st.header("Document Question Answering") directory = st.text_area("") #directory = '/content/data' def load_docs(directory): loader = DirectoryLoader(directory) documents = loader.load() return documents def split_docs(documents, chunk_size=1000, chunk_overlap=20): text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) docs = text_splitter.split_documents(documents) return docs if directory: documents = load_docs(directory) st.write(len(documents)) docs = split_docs(documents) print(len(docs)) embeddings = OpenAIEmbeddings(model_name="ada") query_result = embeddings.embed_query("Hello world") st.write(len(query_result))