table_and_text_QA / utils /functions.py
ainur1's picture
fix
79787b2
raw
history blame
900 Bytes
import pandas as pd
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
def make_descriptions(table, tittle):
df = pd.read_csv(table)
columns = list(df.columns)
table_description0 = {
'path': 'random',
'number': 1,
'columns': ["clothes", "animals", "students"],
'tittle': "fashionable student clothes"
}
table_description1 = {
'path': table,
'number': 2,
'columns': columns,
'tittle': tittle
}
table_descriptions = [table_description0, table_description1]
return table_descriptions
def make_documens(pdf):
loader = PyPDFLoader(pdf)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0, separator='\n')
documents = text_splitter.split_documents(documents)
return documents