UldisKK commited on
Commit
874cdc5
1 Parent(s): 44218ea

add pdf reader

Browse files
Files changed (1) hide show
  1. app.py +12 -0
app.py CHANGED
@@ -26,3 +26,15 @@ print(current_working_directory)
26
  st.write('current dir:', current_working_directory)
27
  arr = os.listdir('.')
28
  st.write('dir contents:',arr)
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  st.write('current dir:', current_working_directory)
27
  arr = os.listdir('.')
28
  st.write('dir contents:',arr)
29
+
30
+ def print_response(response: str):
31
+ print("\n".join(textwrap.wrap(response, width=100)))
32
+
33
+ pdf_loader = UnstructuredPDFLoader("./pdfs/Predicting issue types on GitHub.pdf")
34
+ pdf_pages = pdf_loader.load_and_split()
35
+ st.write('total pages from PDFs:', len(pdf_pages))
36
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=512)
37
+ texts = text_splitter.split_documents(pdf_pages)
38
+ st.write('total chunks from pages:', len(texts))
39
+
40
+