Manishkumaryadav commited on
Commit
c45a056
·
verified ·
1 Parent(s): 804bb8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -26
app.py CHANGED
@@ -1,48 +1,46 @@
1
  import os
2
  import streamlit as st
3
  import fitz
4
- import openai
5
  import sqlite3
6
- from langchain.embeddings import OpenAIEmbeddings
7
- from langchain.vectorstores import FAISS
8
- from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  import pdfplumber
 
 
 
10
 
11
- # Initialize once
12
  @st.cache_resource
13
  def init_system():
14
  # 1. Process PDF
15
  process_pdf("Q1FY24.pdf")
16
 
17
- # 2. Load pre-processed data
18
- embeddings = OpenAIEmbeddings(openai_api_key="sk-schoolaiassistant-IJAus8rOlO5f3hnrBcyuT3BlbkFJ60gsZPoeRzVR0bwKuABN")
 
 
19
  vector_store = FAISS.load_local("faiss_index", embeddings)
20
 
21
- # 3. Connect SQL
22
  conn = sqlite3.connect('metric_table.db')
23
  return vector_store, conn
24
 
25
  def process_pdf(pdf_path):
26
- # Structured Data
27
  conn = sqlite3.connect('metric_table.db')
28
  cursor = conn.cursor()
29
  cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table
30
  (metric TEXT, quarter TEXT, value REAL)''')
31
 
32
- # Unstructured Data
33
- full_text = ""
34
- doc = fitz.open(pdf_path)
35
-
36
- with pdfplumber.open(pdf_path) as pdf:
37
- for page_num, page in enumerate(pdf.pages):
38
- # Structured extraction
39
- if "Financial Performance Summary" in page.extract_text():
40
- tables = page.extract_tables()
41
- # Add to SQL (example)
42
 
43
- # ... (Add full processing logic from previous code)
 
 
 
 
44
 
45
- # Save vector store
46
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
47
  chunks = splitter.split_text(full_text)
48
  embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
@@ -58,14 +56,16 @@ def main():
58
  query = st.text_input("Ask financial question:")
59
 
60
  if query:
61
- # Hybrid query logic
62
- if any(keyword in query.lower() for keyword in ["trend", "margin", "growth"]):
63
  cursor = conn.cursor()
64
  cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'")
65
- st.table(cursor.fetchall())
 
 
66
  else:
67
- docs = vector_store.similarity_search(query)
68
- st.write(docs[0].page_content)
69
 
70
  if __name__ == "__main__":
71
  main()
 
1
  import os
2
  import streamlit as st
3
  import fitz
 
4
  import sqlite3
 
 
 
5
  import pdfplumber
6
+ from langchain_community.vectorstores import FAISS
7
+ from langchain_openai import OpenAIEmbeddings
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
 
10
+ # Initialize system
11
  @st.cache_resource
12
  def init_system():
13
  # 1. Process PDF
14
  process_pdf("Q1FY24.pdf")
15
 
16
+ # 2. Load embeddings with secure API key
17
+ embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
18
+
19
+ # 3. Load vector store
20
  vector_store = FAISS.load_local("faiss_index", embeddings)
21
 
22
+ # 4. Connect SQL
23
  conn = sqlite3.connect('metric_table.db')
24
  return vector_store, conn
25
 
26
  def process_pdf(pdf_path):
27
+ # Structured Data Extraction
28
  conn = sqlite3.connect('metric_table.db')
29
  cursor = conn.cursor()
30
  cursor.execute('''CREATE TABLE IF NOT EXISTS metric_table
31
  (metric TEXT, quarter TEXT, value REAL)''')
32
 
33
+ # Example metric insertion (add full extraction logic)
34
+ cursor.execute("INSERT INTO metric_table VALUES ('Revenue', 'Q1 FY24', 19.8)")
35
+ conn.commit()
 
 
 
 
 
 
 
36
 
37
+ # Unstructured Data Processing
38
+ full_text = ""
39
+ with fitz.open(pdf_path) as doc:
40
+ for page in doc:
41
+ full_text += page.get_text()
42
 
43
+ # Text Chunking & Embedding
44
  splitter = RecursiveCharacterTextSplitter(chunk_size=1000)
45
  chunks = splitter.split_text(full_text)
46
  embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
 
56
  query = st.text_input("Ask financial question:")
57
 
58
  if query:
59
+ # Structured data queries
60
+ if any(kw in query.lower() for kw in ["trend", "margin", "revenue"]):
61
  cursor = conn.cursor()
62
  cursor.execute(f"SELECT * FROM metric_table WHERE metric LIKE '%{query}%'")
63
+ results = cursor.fetchall()
64
+ st.table(results if results else "No matching metrics found")
65
+ # Unstructured data queries
66
  else:
67
+ docs = vector_store.similarity_search(query, k=1)
68
+ st.write(docs[0].page_content if docs else "No relevant information found")
69
 
70
  if __name__ == "__main__":
71
  main()