akarshrajsingh7 commited on
Commit
3233632
·
1 Parent(s): 63f05af

streamlit pdf viewer - trial

Browse files
Files changed (2) hide show
  1. app.py +51 -94
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import streamlit as st
2
  from PyPDF2 import PdfReader
3
  from concurrent.futures import ThreadPoolExecutor
@@ -5,11 +7,16 @@ from base64 import b64encode
5
  from fpdf import FPDF
6
  import io, string, re, math
7
  from io import StringIO
8
- # from streamlit_pdf_viewer import pdf_viewer
 
9
 
10
  # Importing the Fastify Class
11
  from fast_reader import Fastify_Reader
12
 
 
 
 
 
13
  def pdf_extract_text(pdf_docs):
14
  '''
15
  Basic function for extracting text from the PDFs
@@ -21,10 +28,11 @@ def pdf_extract_text(pdf_docs):
21
  text += page.extract_text()
22
  return text
23
 
24
- def text_to_pdf_fastify(text):
25
  '''
26
  Basic function to apply fastification on the input text and convert it to bytes for PDF rendering
27
  '''
 
28
  # Applying the Fastify Logic
29
  bold_text = Fastify_Reader(text).fastify()
30
  bold_text = bold_text.encode('latin-1', 'ignore').decode('latin-1') #since fpdf works with latin-1 encoding
@@ -36,10 +44,11 @@ def text_to_pdf_fastify(text):
36
  pdf.multi_cell(0, 10, txt = bold_text, markdown=True)
37
  return bytes(pdf.output())
38
 
39
- def text_to_pdf(text):
40
  '''
41
  Basic function on the input text and convert it to bytes for PDF rendering
42
  '''
 
43
  text = text.encode('latin-1', 'ignore').decode('latin-1') #since fpdf works with latin-1 encoding
44
  # Creating the PDF
45
  pdf = FPDF()
@@ -80,31 +89,6 @@ with tab1:
80
  with st.spinner("Processing"):
81
  text = user_input
82
 
83
- # Generating base64 encoded text bytes for PDF rendering
84
- original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
85
- base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
86
-
87
- # Embedding the PDFs in the HTML
88
- original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf" download="original.pdf">'
89
- pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf" download="Modified.pdf">'
90
-
91
- # original_display = f"""<embed class="pdfobject" type="application/pdf" title="Original PDF" src="data:application/pdf;base64,{original_pdf}" style=" width: 100%; height: 150%;">"""
92
- # pdf_display = f"""<embed class="pdfobject" type="application/pdf" title="Modified PDF" src="data:application/pdf;base64,{base64_pdf}" style=" width: 100%; height: 150%;">"""
93
-
94
- # original_display = f'<iframe src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf" download="original.pdf"></iframe>'
95
- # pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf" download="Modified.pdf"></iframe>'
96
-
97
- # original_display = f"""
98
- # <div class="pdf-container" style="height: 500px;">
99
- # <embed class="pdfobject" type="application/pdf" title="Modified PDF" src="data:application/pdf;base64,{original_pdf}" style="width: 100%; height: 100%;">
100
- # </div>
101
- # """
102
- # pdf_display = f"""
103
- # <div class="pdf-container" style="height: 500px;">
104
- # <embed class="pdfobject" type="application/pdf" title="Modified PDF" src="data:application/pdf;base64,{base64_pdf}" style="width: 100%; height: 100%;">
105
- # </div>
106
- # """
107
-
108
  # Compare Logic implementation
109
  if compare:
110
  col1, col2, col3 = st.columns(3)
@@ -115,34 +99,32 @@ with tab1:
115
  col1, col2 = st.columns([1, 1], gap="small")
116
  with col1:
117
  with st.container(border = True):
118
- st.markdown("<div style='text-align: center;'><strong>Original PDF viewer</strong></div>", unsafe_allow_html=True)
119
- st.markdown(original_display, unsafe_allow_html=True)
 
120
  with col2:
121
  with st.container(border = True):
122
  st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
123
- st.markdown(pdf_display, unsafe_allow_html=True)
 
124
 
125
- # Browser Cache Note
126
- st.markdown(f"""
127
- <div style='background-color: #FFD580; border-radius: 5px;'>
128
- <p style='color: black;'><strong>Note</strong> - {note_text}</p>
129
- </div>
130
- """, unsafe_allow_html=True)
131
  else:
132
- # No Comparisons
133
  col1, col2, col3 = st.columns(3)
134
- with col2:
135
  st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
136
- with st.container(border = True):
137
- st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
138
- st.markdown(pdf_display, unsafe_allow_html=True)
 
 
139
 
140
- # Browser Cache Note
141
- st.markdown(f"""
142
- <div style='background-color: #FFD580; border-radius: 5px;'>
143
- <p style='color: black;'><strong>Note</strong> - {note_text}</p>
144
- </div>
145
- """, unsafe_allow_html=True)
146
 
147
  # Added support for PDFs having text
148
  with tab2:
@@ -157,34 +139,6 @@ with tab2:
157
  with st.spinner("Processing"):
158
  text = pdf_extract_text(uploaded_file)
159
 
160
- # Generating base64 encoded text bytes for PDF rendering
161
- original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
162
- base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
163
-
164
- # Embedding the PDFs in the HTML
165
- original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf" download="original.pdf">'
166
- pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf" download="Modified.pdf">'
167
-
168
-
169
- # original_display = f"""<embed class="pdfobject" type="application/pdf" title="Original PDF" src="data:application/pdf;base64,{original_pdf}" style=" width: 100%; height: 150%;">"""
170
- # pdf_display = f"""<embed class="pdfobject" type="application/pdf" title="Modified PDF" src="data:application/pdf;base64,{base64_pdf}" style=" width: 100%; height: 150%;">"""
171
-
172
- # original_display = f'<iframe src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf" download="original.pdf"></iframe>'
173
- # pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf" download="Modified.pdf"></iframe>'
174
-
175
- # original_display = f"""
176
- # <div class="pdf-container" style="height: 500px;">
177
- # <embed class="pdfobject" type="application/pdf" title="Modified PDF" src="data:application/pdf;base64,{original_pdf}" style="width: 100%; height: 100%;">
178
- # </div>
179
- # """
180
- # pdf_display = f"""
181
- # <div class="pdf-container" style="height: 500px;">
182
- # <embed class="pdfobject" type="application/pdf" title="Modified PDF" src="data:application/pdf;base64,{base64_pdf}" style="width: 100%; height: 100%;">
183
- # </div>
184
- # """
185
-
186
-
187
-
188
  # Compare Logic implementation
189
  if compare:
190
  col1, col2, col3 = st.columns(3)
@@ -193,33 +147,36 @@ with tab2:
193
 
194
  # Side by Side comparison
195
  col1, col2 = st.columns([1, 1], gap="small")
 
196
  with col1:
197
  with st.container(border = True):
198
  st.markdown("<div style='text-align: center;'><strong>Original PDF viewer</strong></div>", unsafe_allow_html=True)
199
- st.markdown(original_display, unsafe_allow_html=True)
200
  with col2:
201
  with st.container(border = True):
202
  st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
203
- st.markdown(pdf_display, unsafe_allow_html=True)
204
- # Browser Cache Note
205
- st.markdown(f"""
206
- <div style='background-color: #FFD580; border-radius: 5px;'>
207
- <p style='color: black;'><strong>Note</strong> - {note_text}</p>
208
- </div>
209
- """, unsafe_allow_html=True)
 
210
  else:
211
  # No Comparison
212
  col1, col2, col3 = st.columns(3)
213
- with col2:
214
  st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
215
- with st.container(border = True):
216
- st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
217
- st.markdown(pdf_display, unsafe_allow_html=True)
218
 
219
- # Browser Cache Note
220
- st.markdown(f"""
221
- <div style='background-color: #FFD580; border-radius: 5px;'>
222
- <p style='color: black;'><strong>Note</strong> - {note_text}</p>
223
- </div>
224
- """, unsafe_allow_html=True)
 
 
 
 
225
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
  import streamlit as st
4
  from PyPDF2 import PdfReader
5
  from concurrent.futures import ThreadPoolExecutor
 
7
  from fpdf import FPDF
8
  import io, string, re, math
9
  from io import StringIO
10
+ from streamlit_pdf_viewer import pdf_viewer
11
+ #import deepcopy
12
 
13
  # Importing the Fastify Class
14
  from fast_reader import Fastify_Reader
15
 
16
+ def text_to_binary(text):
17
+ return ''.join(bin(ord(char))[2:].zfill(8) for char in text)
18
+
19
+
20
  def pdf_extract_text(pdf_docs):
21
  '''
22
  Basic function for extracting text from the PDFs
 
28
  text += page.extract_text()
29
  return text
30
 
31
+ def text_to_pdf_fastify(txt):
32
  '''
33
  Basic function to apply fastification on the input text and convert it to bytes for PDF rendering
34
  '''
35
+ text = (txt + '.')[:-1]
36
  # Applying the Fastify Logic
37
  bold_text = Fastify_Reader(text).fastify()
38
  bold_text = bold_text.encode('latin-1', 'ignore').decode('latin-1') #since fpdf works with latin-1 encoding
 
44
  pdf.multi_cell(0, 10, txt = bold_text, markdown=True)
45
  return bytes(pdf.output())
46
 
47
+ def text_to_pdf(txt):
48
  '''
49
  Basic function on the input text and convert it to bytes for PDF rendering
50
  '''
51
+ text = (txt + '.')[:-1]
52
  text = text.encode('latin-1', 'ignore').decode('latin-1') #since fpdf works with latin-1 encoding
53
  # Creating the PDF
54
  pdf = FPDF()
 
89
  with st.spinner("Processing"):
90
  text = user_input
91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  # Compare Logic implementation
93
  if compare:
94
  col1, col2, col3 = st.columns(3)
 
99
  col1, col2 = st.columns([1, 1], gap="small")
100
  with col1:
101
  with st.container(border = True):
102
+ st.markdown("<div style='text-align: center;'><strong>Original PDF viewer</strong></div>", unsafe_allow_html=True)
103
+ # st.markdown(original_display, unsafe_allow_html=True)
104
+ pdf_viewer(input = text_to_pdf(text), width = 600)
105
  with col2:
106
  with st.container(border = True):
107
  st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
108
+ # st.markdown(pdf_display, unsafe_allow_html=True)
109
+ pdf_viewer(text_to_pdf_fastify(text), width = 600)
110
 
111
+ # # Browser Cache Note
112
+ # st.markdown(f"""
113
+ # <div style='background-color: #FFD580; border-radius: 5px;'>
114
+ # <p style='color: black;'><strong>Note</strong> - {note_text}</p>
115
+ # </div>
116
+ # """, unsafe_allow_html=True)
117
  else:
118
+ # # No Comparisons
119
  col1, col2, col3 = st.columns(3)
120
+ with col1:
121
  st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
122
+ with st.container():
123
+
124
+ st.markdown("<div><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
125
+ # st.markdown(pdf_display, unsafe_allow_html=True)
126
+ pdf_viewer(text_to_pdf_fastify(text), width = 600)
127
 
 
 
 
 
 
 
128
 
129
  # Added support for PDFs having text
130
  with tab2:
 
139
  with st.spinner("Processing"):
140
  text = pdf_extract_text(uploaded_file)
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  # Compare Logic implementation
143
  if compare:
144
  col1, col2, col3 = st.columns(3)
 
147
 
148
  # Side by Side comparison
149
  col1, col2 = st.columns([1, 1], gap="small")
150
+
151
  with col1:
152
  with st.container(border = True):
153
  st.markdown("<div style='text-align: center;'><strong>Original PDF viewer</strong></div>", unsafe_allow_html=True)
154
+ pdf_viewer(text_to_pdf(text), width = 600)
155
  with col2:
156
  with st.container(border = True):
157
  st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
158
+ # st.markdown(pdf_display, unsafe_allow_html=True)
159
+ pdf_viewer(text_to_pdf_fastify(text), width = 600)
160
+ # # Browser Cache Note
161
+ # st.markdown(f"""
162
+ # <div style='background-color: #FFD580; border-radius: 5px;'>
163
+ # <p style='color: black;'><strong>Note</strong> - {note_text}</p>
164
+ # </div>
165
+ # """, unsafe_allow_html=True)
166
  else:
167
  # No Comparison
168
  col1, col2, col3 = st.columns(3)
169
+ with col1:
170
  st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
 
 
 
171
 
172
+ with st.container():
173
+ st.markdown("<div><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
174
+ pdf_viewer(text_to_pdf_fastify(text), width = 500)
175
+
176
+ # # Browser Cache Note
177
+ # st.markdown(f"""
178
+ # <div style='background-color: #FFD580; border-radius: 5px;'>
179
+ # <p style='color: black;'><strong>Note</strong> - {note_text}</p>
180
+ # </div>
181
+ # """, unsafe_allow_html=True)
182
 
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  PyPDF2==3.0.1
2
  fpdf2==2.7.8
 
 
1
  PyPDF2==3.0.1
2
  fpdf2==2.7.8
3
+ streamlit-pdf-viewer==0.0.7