Spaces:

rolwinpinto
/

Legally-Finance

Sleeping

App Files Files Community

Legally-Finance / app.py

rolwinpinto

Update app.py

7c52f56 verified 11 months ago

raw

history blame contribute delete

5.97 kB

	import os
	import PyPDF2
	import matplotlib.pyplot as plt
	from io import BytesIO
	from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
	from llama_index.embeddings.fastembed import FastEmbedEmbedding
	from llama_index.llms.gemini import Gemini
	import re
	import streamlit as st

	# Configure Google Gemini
	Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
	Settings.llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.5, model_name="models/gemini-pro")

	def write_to_file(content, filename="./files/uploaded.pdf"):
	os.makedirs(os.path.dirname(filename), exist_ok=True)
	with open(filename, "wb") as f:
	f.write(content)

	def extract_financial_data(document_text):
	"""
	Extracts financial data from the text of the document.
	"""
	financial_data = {
	"Revenue": [],
	"Date": []
	}
	lines = document_text.split("\n")
	revenue_pattern = re.compile(r'\$?\d+(?:,\d{3})*(?:\.\d+)?')

	for i, line in enumerate(lines):
	# Check for revenue-related keywords
	if any(keyword in line.lower() for keyword in ["revenue", "total revenue", "sales"]):
	# Attempt to extract numbers from the following lines
	for j in range(i + 1, i + 6): # Look ahead a few lines for potential numbers
	matches = revenue_pattern.findall(lines[j])
	if matches:
	for match in matches:
	try:
	value = float(match.replace("$", "").replace(",", ""))
	financial_data["Revenue"].append(value)
	except ValueError:
	continue

	# Check for date-related lines
	if "Q1" in line or "Q2" in line or "Q3" in line or "Q4" in line or re.search(r'FY\s*\d{4}', line):
	financial_data["Date"].append(line.strip())

	# Ensure the data lists are of equal length
	min_length = min(len(financial_data["Revenue"]), len(financial_data["Date"]))
	financial_data["Revenue"] = financial_data["Revenue"][:min_length]
	financial_data["Date"] = financial_data["Date"][:min_length]

	return financial_data

	def ingest_documents():
	reader = SimpleDirectoryReader("./files/")
	documents = reader.load_data()
	return documents

	def load_data(documents):
	index = VectorStoreIndex.from_documents(documents)
	return index

	def generate_summary(index, document_text, query, target_language):
	query_engine = index.as_query_engine()

	# Instruct the LLM to translate the query to English and generate the response
	response = query_engine.query(f"""
	You are a financial analyst and translator. Your task is to translate the following query into English,
	analyze the financial document based on the translated query, and then translate the response back into {target_language}.

	Query: {query}

	Document: {document_text}

	Please cover the following aspects:
	1. Revenue and profit trends
	2. Key financial metrics
	3. Comparison with previous periods
	4. Future outlook or forecasts
	5. Any notable financial risks or opportunities

	Provide a clear, concise, and professional response in {target_language}.
	""")

	return response.response

	def generate_comparison_graph(data):
	if not data["Date"] or not data["Revenue"]:
	st.write("Insufficient data for generating the revenue comparison graph.")
	return

	fig, ax = plt.subplots(figsize=(10, 6))
	ax.plot(data["Date"], data["Revenue"], marker="o", linestyle="-", color="b", label="Revenue")
	ax.set_title("Revenue Comparison")
	ax.set_xlabel("Date")
	ax.set_ylabel("Revenue (in millions)")
	ax.grid(True)
	ax.legend()
	plt.xticks(rotation=45, ha="right")
	plt.tight_layout()
	st.pyplot(fig)

	# Streamlit app
	def main():
	st.title("Fortune 500 Financial Document Analyzer")
	st.write("Upload a financial document, ask questions in your preferred language, and get detailed analysis!")

	uploaded_file = st.file_uploader("Choose a financial document file", type=["pdf"])

	# Add language selection
	languages = {
	'English': 'en',
	'Hindi': 'hi',
	'Kannada': 'kn',
	'Spanish': 'es',
	'French': 'fr',
	'German': 'de',

	}
	selected_language = st.selectbox("Select your preferred language", list(languages.keys()))
	target_language = languages[selected_language]

	if uploaded_file is not None:
	if uploaded_file.type == "application/pdf":
	pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.getvalue()))
	document_text = ""
	for page in pdf_reader.pages:
	document_text += page.extract_text()
	else:
	document_text = uploaded_file.getvalue().decode("utf-8")

	write_to_file(uploaded_file.getvalue())

	st.write("Analyzing financial document...")

	# Extract financial data
	financial_data = extract_financial_data(document_text)

	# Ingest documents for summarization and query-driven analysis
	documents = ingest_documents()
	index = load_data(documents)

	# Modify the query input to use the selected language
	query = st.text_input(f"Enter your financial analysis query in {selected_language}", "")

	if query:
	summary = generate_summary(index, document_text, query, target_language)
	st.write(f"## Financial Analysis Result (in {selected_language})")
	st.write(summary)

	# Display revenue comparison graph
	if financial_data["Revenue"] and financial_data["Date"]:
	st.write("## Revenue Comparison")
	generate_comparison_graph(financial_data)
	else:
	st.write("No revenue data found for comparison.")

	if __name__ == "__main__":
	main()