msr2903 commited on
Commit
bffe4e4
·
1 Parent(s): b143c19

Update pages.py

Browse files
__pycache__/config.cpython-311.pyc ADDED
Binary file (1.75 kB). View file
 
__pycache__/pages.cpython-311.pyc ADDED
Binary file (2.42 kB). View file
 
__pycache__/pages.cpython-312.pyc ADDED
Binary file (2.44 kB). View file
 
__pycache__/section_extract.cpython-311.pyc ADDED
Binary file (13.1 kB). View file
 
__pycache__/section_extract.cpython-312.pyc ADDED
Binary file (11.3 kB). View file
 
pages.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
  from section_extract import find_cover, find_underwriter, find_section
3
- from config import keywords_dict, stop_keywords, anti_keywords
4
 
5
  def home():
6
  st.title("Prospectus Lens")
@@ -15,33 +14,22 @@ def cover():
15
  def underwriter():
16
  find_underwriter(
17
  uploaded_file=st.session_state.get("uploaded_file"),
18
- section_name="underwriter",
19
- keywords_dict=keywords_dict
20
  )
21
 
22
  def income_statement():
23
  find_section(
24
  uploaded_file=st.session_state.get("uploaded_file"),
25
  section_name="income_statement",
26
- keywords_dict=keywords_dict,
27
- stop_keywords=stop_keywords,
28
- anti_keywords=anti_keywords
29
  )
30
 
31
  def balance_sheet():
32
  find_section(
33
  uploaded_file=st.session_state.get("uploaded_file"),
34
  section_name="balance_sheet",
35
- keywords_dict=keywords_dict,
36
- stop_keywords=stop_keywords,
37
- anti_keywords=anti_keywords
38
  )
39
 
40
  def cash_flow():
41
  find_section(
42
  uploaded_file=st.session_state.get("uploaded_file"),
43
  section_name="cash_flow",
44
- keywords_dict=keywords_dict,
45
- stop_keywords=stop_keywords,
46
- anti_keywords=anti_keywords
47
  )
 
1
  import streamlit as st
2
  from section_extract import find_cover, find_underwriter, find_section
 
3
 
4
  def home():
5
  st.title("Prospectus Lens")
 
14
  def underwriter():
15
  find_underwriter(
16
  uploaded_file=st.session_state.get("uploaded_file"),
 
 
17
  )
18
 
19
  def income_statement():
20
  find_section(
21
  uploaded_file=st.session_state.get("uploaded_file"),
22
  section_name="income_statement",
 
 
 
23
  )
24
 
25
  def balance_sheet():
26
  find_section(
27
  uploaded_file=st.session_state.get("uploaded_file"),
28
  section_name="balance_sheet",
 
 
 
29
  )
30
 
31
  def cash_flow():
32
  find_section(
33
  uploaded_file=st.session_state.get("uploaded_file"),
34
  section_name="cash_flow",
 
 
 
35
  )
section_extract.py CHANGED
@@ -3,6 +3,7 @@ import re
3
  from PyPDF2 import PdfReader, PdfWriter
4
  from streamlit_pdf_viewer import pdf_viewer
5
  import streamlit as st
 
6
 
7
  def find_cover(uploaded_file):
8
  """
@@ -14,8 +15,8 @@ def find_cover(uploaded_file):
14
  Returns:
15
  None
16
  """
17
- section_title = "Cover"
18
- st.title(section_title)
19
 
20
  if uploaded_file:
21
  try:
@@ -27,7 +28,7 @@ def find_cover(uploaded_file):
27
  pdf_writer.add_page(first_page)
28
 
29
  # Save the first page to a temporary file
30
- temp_first_page_path = os.path.join(f"temp_{section_title.lower()}.pdf")
31
  with open(temp_first_page_path, "wb") as f:
32
  pdf_writer.write(f)
33
 
@@ -39,19 +40,18 @@ def find_cover(uploaded_file):
39
  st.warning("Please upload a PDF on the Home page first.")
40
 
41
 
42
- def find_underwriter(uploaded_file, section_name, keywords_dict):
43
  """
44
  Searches for pages in a PDF containing specific keywords for the 'underwriter' section and displays them,
45
  starting from the last 2/3 of the PDF to improve performance.
46
 
47
  Parameters:
48
  uploaded_file: The uploaded PDF file.
49
- section_name: The name of the section (e.g., "Underwriter").
50
- keywords_dict: Dictionary containing keyword sets for different sections.
51
 
52
  Returns:
53
  None
54
  """
 
55
  st.title(section_name.title())
56
 
57
  keyword_sets = keywords_dict.get(section_name, [])
@@ -92,20 +92,18 @@ def find_underwriter(uploaded_file, section_name, keywords_dict):
92
  st.warning("Please upload a PDF on the Home page first.")
93
 
94
 
95
- def find_section(uploaded_file, section_name, keywords_dict, stop_keywords, anti_keywords):
96
  """
97
  Extracts and displays sections of a PDF based on keyword matches.
98
 
99
  Parameters:
100
  uploaded_file: The uploaded PDF file (Streamlit file uploader object).
101
  section_name: The name of the section to search for (e.g., "income_statement").
102
- keywords_dict: A dictionary containing keyword sets for different sections.
103
- stop_keywords: A dictionary of keywords to indicate where extraction should stop.
104
- anti_keywords: A dictionary of keywords to exclude specific pages from the results.
105
 
106
  Returns:
107
  bool: True if processing completed without interruptions; False if stopped or an error occurred.
108
  """
 
109
  st.title(section_name.replace("_", " ").title())
110
 
111
  if uploaded_file:
 
3
  from PyPDF2 import PdfReader, PdfWriter
4
  from streamlit_pdf_viewer import pdf_viewer
5
  import streamlit as st
6
+ from config import keywords_dict, stop_keywords, anti_keywords
7
 
8
  def find_cover(uploaded_file):
9
  """
 
15
  Returns:
16
  None
17
  """
18
+ section_title = "cover"
19
+ st.title(section_title.title())
20
 
21
  if uploaded_file:
22
  try:
 
28
  pdf_writer.add_page(first_page)
29
 
30
  # Save the first page to a temporary file
31
+ temp_first_page_path = os.path.join(f"temp_{section_title}.pdf")
32
  with open(temp_first_page_path, "wb") as f:
33
  pdf_writer.write(f)
34
 
 
40
  st.warning("Please upload a PDF on the Home page first.")
41
 
42
 
43
+ def find_underwriter(uploaded_file):
44
  """
45
  Searches for pages in a PDF containing specific keywords for the 'underwriter' section and displays them,
46
  starting from the last 2/3 of the PDF to improve performance.
47
 
48
  Parameters:
49
  uploaded_file: The uploaded PDF file.
 
 
50
 
51
  Returns:
52
  None
53
  """
54
+ section_name = "underwriter"
55
  st.title(section_name.title())
56
 
57
  keyword_sets = keywords_dict.get(section_name, [])
 
92
  st.warning("Please upload a PDF on the Home page first.")
93
 
94
 
95
+ def find_section(uploaded_file, section_name):
96
  """
97
  Extracts and displays sections of a PDF based on keyword matches.
98
 
99
  Parameters:
100
  uploaded_file: The uploaded PDF file (Streamlit file uploader object).
101
  section_name: The name of the section to search for (e.g., "income_statement").
 
 
 
102
 
103
  Returns:
104
  bool: True if processing completed without interruptions; False if stopped or an error occurred.
105
  """
106
+
107
  st.title(section_name.replace("_", " ").title())
108
 
109
  if uploaded_file: