venkatchoudharyala commited on
Commit
12cc604
1 Parent(s): aa17f32

Upload 7 files

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base = "dark"
3
+ font = "monospace"
WebComponents/ArticleExtractor.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytesseract
2
+ from PIL import Image
3
+ from io import BytesIO
4
+ #import easyocr
5
+
6
+ # Function to perform OCR with Tesseract
7
+ def ocr_with_tesseract(image):
8
+ # Perform OCR on the image using pytesseract
9
+ extracted_text = pytesseract.image_to_string(image)
10
+ return extracted_text
11
+
12
+ # Function to perform OCR with EasyOCR
13
+ '''
14
+ def ocr_with_easyocr(image):
15
+ # Convert the image to bytes for easyocr
16
+ image_bytes = image.tobytes()
17
+
18
+ # Perform OCR on the image using easyocr
19
+ reader = easyocr.Reader(['en']) # You can add other languages as needed
20
+ result = reader.readtext(image_bytes)
21
+
22
+ # Extract text from OCR result
23
+ extracted_text = [text[1] for text in result]
24
+
25
+ return extracted_text
26
+ '''
app.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from headline_gen.Control import ServerInit
3
+ import os
4
+ import nltk
5
+
6
+ def main():
7
+ #os.environ['JAVAHOME'] = 'usr/bin/java'
8
+ st.title("Abstract Page")
9
+ if "Server" not in st.session_state:
10
+ with st.spinner("Booting Server"):
11
+ st.session_state["Server"] = ServerInit("Start")
12
+ else:
13
+ if st.button("Upload Text"):
14
+ st.switch_page("pages/Extraction.py")
15
+ if __name__ == "__main__":
16
+ main()
packages.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ tesseract-ocr
2
+ tesseract-ocr-por
3
+ default-jdk
4
+ default-jre
pages/Extraction.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from WebComponents import ArticleExtractor as ocr
3
+
4
+ from streamlit_cropper import st_cropper
5
+ from PIL import Image
6
+
7
+ st.set_page_config(layout="wide")
8
+ def main():
9
+ tab1, tab2 = st.tabs(['Manual', 'OCR'])
10
+ if 'Questions' not in st.session_state:
11
+ st.session_state['Questions'] = []
12
+ st.session_state['Flag'] = False
13
+ with tab1:
14
+ with st.form("Manual", clear_on_submit = True):
15
+ Question = st.text_area("Question")
16
+ st.write(Question)
17
+ col1, col2 = st.columns(2)
18
+ with col1:
19
+ if st.form_submit_button("Reset"):
20
+ k = 0
21
+ #st.session_state['Questions'].append({"QuestionID": ID, "Question": Question})
22
+ with col2:
23
+ if st.form_submit_button("Generate HL"):
24
+ st.session_state['Questions'].append(Question)
25
+ st.switch_page("pages/Final.py")
26
+ with tab2:
27
+ if "file_uploader_key" not in st.session_state:
28
+ st.session_state["file_uploader_key"] = 0
29
+ Image_File = st.file_uploader("Upload the Image", type=['png', 'jpg'], key=st.session_state["file_uploader_key"],)
30
+ if Image_File:
31
+ col1, col2 = st.columns(2)
32
+ with col1:
33
+ img = Image.open(Image_File)
34
+ cropped_img = st_cropper(img, realtime_update = True, box_color = "#FF0012", aspect_ratio = (5, 2))
35
+
36
+ with col2:
37
+ st.write("Preview")
38
+ _ = cropped_img.thumbnail((580, 580))
39
+ st.image(cropped_img)
40
+ #if st.button("Extract"):
41
+ ExText = ocr.ocr_with_tesseract(cropped_img)
42
+ with st.form("OCR", clear_on_submit = True):
43
+ Question = st.text_area("Question", value = ExText)
44
+ col1, col2 = st.columns(2)
45
+ with col1:
46
+ if st.form_submit_button("Reset"):
47
+ st.session_state["file_uploader_key"] += 1
48
+ st.rerun()
49
+ #st.session_state['Questions'].append({"Question": Question})
50
+ with col2:
51
+ if st.form_submit_button("Generate HL"):
52
+ st.session_state['Questions'].append(Question)
53
+ st.switch_page("pages/Final.py")
54
+ if __name__ == "__main__":
55
+ main()
pages/Final.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from headline_gen.Control import Generate
3
+
4
+ def main():
5
+ if 'Questions' in st.session_state:
6
+ with st.spinner("Generating Head Line!!!"):
7
+ #st.subheader(st.session_state['Questions'][-1])
8
+ if 'Server' in st.session_state:
9
+ st.success(Generate(st.session_state['Questions'][-1], st.session_state["Server"]))
10
+ else:
11
+ st.error("Server is not Booted")
12
+ else:
13
+ st.error("Please Navigate back to Extraction Page and Upload your Article")
14
+
15
+ if __name__ == "__main__":
16
+ main()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ streamlit-cropper
2
+ pytesseract
3
+ textacy
4
+ regex
5
+ nltk
6
+ scipy==1.12.0
7
+ gensim
8
+ networkx
9
+ headline-gen