Liam Kilroy commited on
Commit
2d29e07
·
1 Parent(s): 0f52c1f

replace link option with direct file upload

Browse files
Files changed (1) hide show
  1. pdf_summarizer/app.py +28 -5
pdf_summarizer/app.py CHANGED
@@ -1,22 +1,45 @@
1
  import streamlit as st
2
  from transformers import pipeline
3
  import tensorflow as tf
4
-
5
 
6
 
7
 
8
  file_link: str = st.text_area("Put the PDF url here")
9
  pipe = pipeline("summarization") #import a specific, lightweight model.
10
 
11
- def url_checker(file_link) -> bool:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  pdf_substring = ".pdf" or "/pdf"
13
- if pdf_substring not in file_link:
14
- st.write("Incorrect file type detected. Please link to pdfs only - file must end in .pdf")
15
  return True
16
  #this is dumb with incorrect return types
17
  url_checker(file_link)
18
 
19
- #write a function for file ingestion and text extraction.
20
 
21
 
22
  def text_extract(file) -> list:
 
1
  import streamlit as st
2
  from transformers import pipeline
3
  import tensorflow as tf
4
+ #from scraping_utils import FileImporter
5
 
6
 
7
 
8
  file_link: str = st.text_area("Put the PDF url here")
9
  pipe = pipeline("summarization") #import a specific, lightweight model.
10
 
11
+ #add file uploader field
12
+ #add pdf filetype checker
13
+ uploaded_file = st.file_uploader("Choose a file")
14
+ if uploaded_file is not None:
15
+ # To read file as bytes:
16
+ bytes_data = uploaded_file.getvalue()
17
+ st.write(bytes_data)
18
+
19
+ # To convert to a string based IO:
20
+ stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
21
+ st.write(stringio)
22
+
23
+ # To read file as string:
24
+ string_data = stringio.read()
25
+ st.write(string_data)
26
+
27
+ # Can be used wherever a "file-like" object is accepted:
28
+ dataframe = pd.read_csv(uploaded_file)
29
+ st.write(dataframe)
30
+
31
+
32
+
33
+
34
+ def url_checker(uploaded_file) -> bool:
35
  pdf_substring = ".pdf" or "/pdf"
36
+ if pdf_substring not in uploaded_file:
37
+ st.write("Incorrect file type detected. Please upload pdfs only - file must end in .pdf")
38
  return True
39
  #this is dumb with incorrect return types
40
  url_checker(file_link)
41
 
42
+
43
 
44
 
45
  def text_extract(file) -> list: