Kushwanth Chowday Kandala commited on
Commit
940c185
1 Parent(s): 2e70e8d

PyPDF2 to read pdf dataand print pages

Browse files
Files changed (2) hide show
  1. app.py +14 -2
  2. requirements.txt +4 -0
app.py CHANGED
@@ -4,6 +4,8 @@ from streamlit_chat import message
4
  import numpy as np
5
  import pandas as pd
6
  from io import StringIO
 
 
7
  # import json
8
 
9
  # st.config(PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION="python")
@@ -134,6 +136,12 @@ for i in st.session_state["chat_history"]:
134
  # from pinecone import Index, GRPCIndex
135
  # pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
136
  # st.text(pinecone)
 
 
 
 
 
 
137
  with st.sidebar:
138
  st.markdown("""
139
  ***Follow this steps***
@@ -157,5 +165,9 @@ with st.sidebar:
157
  # st.write(string_data)
158
 
159
  # Can be used wherever a "file-like" object is accepted:
160
- dataframe = pd.read_csv(uploaded_file)
161
- st.write(dataframe)
 
 
 
 
 
4
  import numpy as np
5
  import pandas as pd
6
  from io import StringIO
7
+ import PyPDF2
8
+ from tqdm import tqdm
9
  # import json
10
 
11
  # st.config(PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION="python")
 
136
  # from pinecone import Index, GRPCIndex
137
  # pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
138
  # st.text(pinecone)
139
+
140
+ def print_out(pages)
141
+ for i in range(len(pages)):
142
+ text = pages[i].extract_text().strip()
143
+ st.write(f"Page {i} : {text}")
144
+
145
  with st.sidebar:
146
  st.markdown("""
147
  ***Follow this steps***
 
165
  # st.write(string_data)
166
 
167
  # Can be used wherever a "file-like" object is accepted:
168
+ # dataframe = pd.read_csv(uploaded_file)
169
+ # st.write(dataframe)
170
+ reader = PyPDF2.PdfReader(uploaded_file)
171
+ pages = reader.pages
172
+ print_out(pages)
173
+
requirements.txt CHANGED
@@ -5,3 +5,7 @@ torch
5
  streamlit-chat-media
6
  streamlit-chat
7
  transformers
 
 
 
 
 
5
  streamlit-chat-media
6
  streamlit-chat
7
  transformers
8
+ PyPDF2
9
+ ratelimit
10
+ backoff
11
+ tqdm