Spaces:

ahmedelbeshry
/

financial_chatbot

Runtime error

App Files Files Community

ahmedelbeshry commited on May 28

Commit

7e02cc7

•

1 Parent(s): c2f18c6

Upload 23 files

Browse files

Files changed (23) hide show

.gitattributes +35 -35
.gitignore +0 -0
Dockerfile +26 -0
README.md +11 -11
__init__.py +667 -0
app.log +0 -0
app.py +544 -0
app2.py +560 -0
bm25retriever.pkl +3 -0
chain.py +28 -0
chat.py +667 -0
chatflask.py +646 -0
config.py +18 -0
embeddings.py +62 -0
flasktest.py +49 -0
index.html +70 -0
llm.py +45 -0
logging_config.py +38 -0
main.py +100 -0
rag.py +114 -0
requirements.txt +30 -0
retriever.py +53 -0
tools.py +188 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

Binary file (38 Bytes). View file

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+# Use an official Python runtime as a parent image
+FROM python:3.11-slim
+# Set environment variables to avoid interactive prompts
+ENV DEBIAN_FRONTEND=noninteractive
+# Set the working directory in the container
+WORKDIR /app
+# Copy the current directory contents into the container at /app
+COPY . /app
+# Install any needed packages specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy and set environment variables from .env file
+COPY .env .env
+# Expose the port the Flask app runs on
+EXPOSE 5000
+# Expose the port the Streamlit app runs on
+EXPOSE 8501
+# Run the Flask app and Streamlit app using a single CMD
+CMD ["streamlit", "run", "app.py"]

README.md CHANGED Viewed

@@ -1,11 +1,11 @@
----
-title: Financial Chatbot
-emoji: 🚀
-colorFrom: indigo
-colorTo: blue
-sdk: docker
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Financial Chatbot
+emoji: 🚀
+colorFrom: indigo
+colorTo: blue
+sdk: docker
+pinned: false
+license: mit
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

__init__.py ADDED Viewed

	@@ -0,0 +1,667 @@

+import requests
+from bs4 import BeautifulSoup
+import yfinance as yf
+import pandas as pd
+from datetime import datetime, timedelta
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from langchain_google_genai import ChatGoogleGenerativeAI
+from config import Config
+import numpy as np
+from typing import Optional, Tuple, List, Dict
+from rag import get_answer
+import time
+from tenacity import retry, stop_after_attempt, wait_exponential
+# Set up logging
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    handlers=[logging.FileHandler("app.log"),
+                              logging.StreamHandler()])
+logger = logging.getLogger(__name__)
+# Initialize the Gemini model
+llm = ChatGoogleGenerativeAI(api_key=Config.GEMINI_API_KEY, model="gemini-1.5-flash-latest", temperature=0.5)
+# Configuration for Google Custom Search API
+GOOGLE_API_KEY = Config.GOOGLE_API_KEY
+SEARCH_ENGINE_ID = Config.SEARCH_ENGINE_ID
+@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=2, max=8), reraise=True)
+def invoke_llm(prompt):
+    return llm.invoke(prompt)
+class DataSummarizer:
+    def __init__(self):
+        pass
+    def google_search(self, query: str) -> Optional[str]:
+        start_time = time.time()
+        try:
+            url = "https://www.googleapis.com/customsearch/v1"
+            params = {
+                'key': GOOGLE_API_KEY,
+                'cx': SEARCH_ENGINE_ID,
+                'q': query
+            }
+            response = requests.get(url, params=params)
+            response.raise_for_status()
+            search_results = response.json()
+            logger.info("google_search took %.2f seconds", time.time() - start_time)
+            # Summarize the search results using Gemini
+            items = search_results.get('items', [])
+            content = "\n\n".join([f"{item.get('title', '')}\n{item.get('snippet', '')}" for item in items])
+            prompt = f"Summarize the following search results:\n\n{content}"
+            summary_response = invoke_llm(prompt)
+            return summary_response.content.strip()
+        except Exception as e:
+            logger.error(f"Error during Google Search API request: {e}")
+            return None
+    def extract_content_from_item(self, item: Dict) -> Optional[str]:
+        try:
+            snippet = item.get('snippet', '')
+            title = item.get('title', '')
+            return f"{title}\n{snippet}"
+        except Exception as e:
+            logger.error(f"Error extracting content from item: {e}")
+            return None
+    def calculate_moving_average(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = df['close'].rolling(window=window).mean()
+            logger.info("calculate_moving_average took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating moving average: {e}")
+            return None
+    def calculate_rsi(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            delta = df['close'].diff()
+            gain = delta.where(delta > 0, 0).rolling(window=window).mean()
+            loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
+            rs = gain / loss
+            result = 100 - (100 / (1 + rs))
+            logger.info("calculate_rsi took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating RSI: {e}")
+            return None
+    def calculate_ema(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = df['close'].ewm(span=window, adjust=False).mean()
+            logger.info("calculate_ema took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating EMA: {e}")
+            return None
+    def calculate_bollinger_bands(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            ma = df['close'].rolling(window=window).mean()
+            std = df['close'].rolling(window=window).std()
+            upper_band = ma + (std * 2)
+            lower_band = ma - (std * 2)
+            result = pd.DataFrame({'MA': ma, 'Upper Band': upper_band, 'Lower Band': lower_band})
+            logger.info("calculate_bollinger_bands took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating Bollinger Bands: {e}")
+            return None
+    def calculate_macd(self, df: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9) -> \
+            Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            short_ema = df['close'].ewm(span=short_window, adjust=False).mean()
+            long_ema = df['close'].ewm(span=long_window, adjust=False).mean()
+            macd = short_ema - long_ema
+            signal = macd.ewm(span=signal_window, adjust=False).mean()
+            result = pd.DataFrame({'MACD': macd, 'Signal Line': signal})
+            logger.info("calculate_macd took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating MACD: {e}")
+            return None
+    def calculate_volatility(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            log_returns = np.log(df['close'] / df['close'].shift(1))
+            result = log_returns.rolling(window=window).std() * np.sqrt(window)
+            logger.info("calculate_volatility took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating volatility: {e}")
+            return None
+    def calculate_atr(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            high_low = df['high'] - df['low']
+            high_close = np.abs(df['high'] - df['close'].shift())
+            low_close = np.abs(df['low'] - df['close'].shift())
+            true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
+            result = true_range.rolling(window=window).mean()
+            logger.info("calculate_atr took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating ATR: {e}")
+            return None
+    def calculate_obv(self, df: pd.DataFrame) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
+            logger.info("calculate_obv took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating OBV: {e}")
+            return None
+    def calculate_yearly_summary(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            df['year'] = pd.to_datetime(df['date']).dt.year
+            yearly_summary = df.groupby('year').agg({
+                'close': ['mean', 'max', 'min'],
+                'volume': 'sum'
+            })
+            yearly_summary.columns = ['_'.join(col) for col in yearly_summary.columns]
+            logger.info("calculate_yearly_summary took %.2f seconds", time.time() - start_time)
+            return yearly_summary
+        except Exception as e:
+            logger.error(f"Error calculating yearly summary: {e}")
+            return None
+    def get_full_last_year(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            today = datetime.today().date()
+            last_year_start = datetime(today.year - 1, 1, 1).date()
+            last_year_end = datetime(today.year - 1, 12, 31).date()
+            mask = (df['date'] >= last_year_start) & (df['date'] <= last_year_end)
+            result = df.loc[mask]
+            logger.info("get_full_last_year took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error filtering data for the last year: {e}")
+            return None
+    def calculate_ytd_performance(self, df: pd.DataFrame) -> Optional[float]:
+        start_time = time.time()
+        try:
+            today = datetime.today().date()
+            year_start = datetime(today.year, 1, 1).date()
+            mask = (df['date'] >= year_start) & (df['date'] <= today)
+            ytd_data = df.loc[mask]
+            opening_price = ytd_data.iloc[0]['open']
+            closing_price = ytd_data.iloc[-1]['close']
+            result = ((closing_price - opening_price) / opening_price) * 100
+            logger.info("calculate_ytd_performance took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating YTD performance: {e}")
+            return None
+    def calculate_pe_ratio(self, current_price: float, eps: float) -> Optional[float]:
+        start_time = time.time()
+        try:
+            if eps == 0:
+                raise ValueError("EPS cannot be zero for P/E ratio calculation.")
+            result = current_price / eps
+            logger.info("calculate_pe_ratio took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating P/E ratio: {e}")
+            return None
+    def fetch_google_snippet(self, query: str) -> Optional[str]:
+        try:
+            search_url = f"https://www.google.com/search?q={query}"
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
+            }
+            response = requests.get(search_url, headers=headers)
+            soup = BeautifulSoup(response.text, 'html.parser')
+            snippet_classes = [
+                'BNeawe iBp4i AP7Wnd',
+                'BNeawe s3v9rd AP7Wnd',
+                'BVG0Nb',
+                'kno-rdesc'
+            ]
+            snippet = None
+            for cls in snippet_classes:
+                snippet = soup.find('div', class_=cls)
+                if snippet:
+                    break
+            return snippet.get_text() if snippet else "Snippet not found."
+        except Exception as e:
+            logger.error(f"Error fetching Google snippet: {e}")
+            return None
+def extract_ticker_from_response(response: str) -> Optional[str]:
+    start_time = time.time()
+    try:
+        if "is **" in response and "**." in response:
+            result = response.split("is **")[1].split("**.")[0].strip()
+            logger.info("extract_ticker_from_response took %.2f seconds", time.time() - start_time)
+            return result
+        result = response.strip()
+        logger.info("extract_ticker_from_response took %.2f seconds", time.time() - start_time)
+        return result
+    except Exception as e:
+        logger.error(f"Error extracting ticker from response: {e}")
+        return None
+def detect_translate_entity_and_ticker(query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    try:
+        start_time = time.time()
+        # Step 1: Detect Language
+        prompt = f"Detect the language for the following text: {query}"
+        response = invoke_llm(prompt)
+        detected_language = response.content.strip()
+        logger.info(f"Language detected: {detected_language}")
+        # Step 2: Translate to English (if necessary)
+        translated_query = query
+        if detected_language != "English":
+            prompt = f"Translate the following text to English: {query}"
+            response = invoke_llm(prompt)
+            translated_query = response.content.strip()
+            logger.info(f"Translation completed: {translated_query}")
+            print(f"Translation: {translated_query}")
+        # Step 3: Detect Entity
+        prompt = f"Detect the entity in the following text that is a company name: {translated_query}"
+        response = invoke_llm(prompt)
+        detected_entity = response.content.strip()
+        logger.info(f"Entity detected: {detected_entity}")
+        print(f"Entity: {detected_entity}")
+        if not detected_entity:
+            logger.error("No entity detected")
+            return detected_language, None, translated_query, None
+        # Step 4: Get Stock Ticker
+        prompt = f"What is the stock ticker symbol for the company {detected_entity}?"
+        response = invoke_llm(prompt)
+        stock_ticker = extract_ticker_from_response(response.content.strip())
+        if not stock_ticker:
+            logger.error("No stock ticker detected")
+            return detected_language, detected_entity, translated_query, None
+        logger.info("detect_translate_entity_and_ticker took %.2f seconds", time.time() - start_time)
+        return detected_language, detected_entity, translated_query, stock_ticker
+    except Exception as e:
+        logger.error(f"Error in detecting, translating, or extracting entity and ticker: {e}")
+        return None, None, None, None
+def fetch_stock_data_yahoo(symbol: str) -> pd.DataFrame:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        logger.info(f"Fetching data for symbol: {symbol}")
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=3 * 365)
+        historical_data = stock.history(start=start_date, end=end_date)
+        if historical_data.empty:
+            raise ValueError(f"No historical data found for symbol: {symbol}")
+        historical_data = historical_data.rename(
+            columns={"Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume"}
+        )
+        historical_data.reset_index(inplace=True)
+        historical_data['date'] = historical_data['Date'].dt.date
+        historical_data = historical_data.drop(columns=['Date'])
+        historical_data = historical_data[['date', 'open', 'high', 'low', 'close', 'volume']]
+        if 'close' not in historical_data.columns:
+            raise KeyError("The historical data must contain a 'close' column.")
+        logger.info("fetch_stock_data_yahoo took %.2f seconds", time.time() - start_time)
+        return historical_data
+    except Exception as e:
+        logger.error(f"Failed to fetch stock data for {symbol} from Yahoo Finance: {e}")
+        return pd.DataFrame()
+def fetch_current_stock_price(symbol: str) -> Optional[float]:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        result = stock.info['currentPrice']
+        logger.info("fetch_current_stock_price took %.2f seconds", time.time() - start_time)
+        return result
+    except Exception as e:
+        logger.error(f"Failed to fetch current stock price for {symbol}: {e}")
+        return None
+def format_stock_data_for_gemini(stock_data: pd.DataFrame) -> str:
+    start_time = time.time()
+    try:
+        if stock_data.empty:
+            return "No historical data available."
+        formatted_data = "Historical stock data for the last three years:\n\n"
+        formatted_data += "Date       | Open   | High   | Low    | Close  | Volume\n"
+        formatted_data += "------------------------------------------------------\n"
+        for index, row in stock_data.iterrows():
+            formatted_data += f"{row['date']} | {row['open']:.2f} | {row['high']:.2f} | {row['low']:.2f} | {row['close']:.2f} | {int(row['volume'])}\n"
+        logger.info("format_stock_data_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_data
+    except Exception as e:
+        logger.error(f"Error formatting stock data for Gemini: {e}")
+        return "Error formatting stock data."
+def fetch_company_info_yahoo(symbol: str) -> Dict:
+    start_time = time.time()
+    try:
+        if not symbol:
+            return {"error": "Invalid symbol"}
+        stock = yf.Ticker(symbol)
+        company_info = stock.info
+        logger.info("fetch_company_info_yahoo took %.2f seconds", time.time() - start_time)
+        return {
+            "name": company_info.get("longName", "N/A"),
+            "sector": company_info.get("sector", "N/A"),
+            "industry": company_info.get("industry", "N/A"),
+            "marketCap": company_info.get("marketCap", "N/A"),
+            "summary": company_info.get("longBusinessSummary", "N/A"),
+            "website": company_info.get("website", "N/A"),
+            "address": company_info.get("address1", "N/A"),
+            "city": company_info.get("city", "N/A"),
+            "state": company_info.get("state", "N/A"),
+            "country": company_info.get("country", "N/A"),
+            "phone": company_info.get("phone", "N/A")
+        }
+    except Exception as e:
+        logger.error(f"Error fetching company info for {symbol}: {e}")
+        return {"error": str(e)}
+def format_company_info_for_gemini(company_info: Dict) -> str:
+    start_time = time.time()
+    try:
+        if "error" in company_info:
+            return f"Error fetching company info: {company_info['error']}"
+        formatted_info = (f"\nCompany Information:\n"
+                          f"Name: {company_info['name']}\n"
+                          f"Sector: {company_info['sector']}\n"
+                          f"Industry: {company_info['industry']}\n"
+                          f"Market Cap: {company_info['marketCap']}\n"
+                          f"Summary: {company_info['summary']}\n"
+                          f"Website: {company_info['website']}\n"
+                          f"Address: {company_info['address']}, {company_info['city']}, {company_info['state']}, {company_info['country']}\n"
+                          f"Phone: {company_info['phone']}\n")
+        logger.info("format_company_info_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_info
+    except Exception as e:
+        logger.error(f"Error formatting company info for Gemini: {e}")
+        return "Error formatting company info."
+def fetch_company_news_yahoo(symbol: str) -> List[Dict]:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        news = stock.news
+        if not news:
+            raise ValueError(f"No news found for symbol: {symbol}")
+        logger.info("fetch_company_news_yahoo took %.2f seconds", time.time() - start_time)
+        return news
+    except Exception as e:
+        logger.error(f"Failed to fetch news for {symbol} from Yahoo Finance: {e}")
+        return []
+def format_company_news_for_gemini(news: List[Dict]) -> str:
+    start_time = time.time()
+    try:
+        if not news:
+            return "No news available."
+        formatted_news = "Latest company news:\n\n"
+        for article in news:
+            formatted_news += (f"Title: {article['title']}\n"
+                               f"Publisher: {article['publisher']}\n"
+                               f"Link: {article['link']}\n"
+                               f"Published: {article['providerPublishTime']}\n\n")
+        logger.info("format_company_news_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_news
+    except Exception as e:
+        logger.error(f"Error formatting company news for Gemini: {e}")
+        return "Error formatting company news."
+def send_to_gemini_for_summarization(content: str) -> str:
+    start_time = time.time()
+    try:
+        unified_content = " ".join(content)
+        prompt = f"Summarize the main points of this article.\n\n{unified_content}"
+        response = invoke_llm(prompt)
+        logger.info("send_to_gemini_for_summarization took %.2f seconds", time.time() - start_time)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error sending content to Gemini for summarization: {e}")
+        return "Error summarizing content."
+def answer_question_with_data(question: str, data: Dict) -> str:
+    start_time = time.time()
+    try:
+        data_str = ""
+        for key, value in data.items():
+            data_str += f"{key}:\n{value}\n\n"
+        prompt = (f"You are a financial advisor. Begin your answer by stating that and only give the answer after.\n"
+                  f"Using the following data, answer this question: {question}\n\nData:\n{data_str}\n"
+                  f"Make your answer in the best form and professional.\n"
+                  f"Don't say anything about the source of the data.\n"
+                  f"If you don't have the data to answer, say this data is not available yet. If the data is not available in the stock history data, say this was a weekend and there is no data for it.")
+        response = invoke_llm(prompt)
+        logger.info("answer_question_with_data took %.2f seconds", time.time() - start_time)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error answering question with data: {e}")
+        return "Error answering question."
+def calculate_metrics(stock_data: pd.DataFrame, summarizer: DataSummarizer, company_info: Dict) -> Dict[str, str]:
+    start_time = time.time()
+    try:
+        moving_average = summarizer.calculate_moving_average(stock_data)
+        rsi = summarizer.calculate_rsi(stock_data)
+        ema = summarizer.calculate_ema(stock_data)
+        bollinger_bands = summarizer.calculate_bollinger_bands(stock_data)
+        macd = summarizer.calculate_macd(stock_data)
+        volatility = summarizer.calculate_volatility(stock_data)
+        atr = summarizer.calculate_atr(stock_data)
+        obv = summarizer.calculate_obv(stock_data)
+        yearly_summary = summarizer.calculate_yearly_summary(stock_data)
+        ytd_performance = summarizer.calculate_ytd_performance(stock_data)
+        eps = company_info.get('trailingEps', None)
+        if eps:
+            current_price = stock_data.iloc[-1]['close']
+            pe_ratio = summarizer.calculate_pe_ratio(current_price, eps)
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%",
+                "P/E Ratio": f"{pe_ratio:.2f}"
+            }
+        else:
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%"
+            }
+        logger.info("calculate_metrics took %.2f seconds", time.time() - start_time)
+        return formatted_metrics
+    except Exception as e:
+        logger.error(f"Error calculating metrics: {e}")
+        return {"Error": "Error calculating metrics"}
+def prepare_data(formatted_stock_data: str, formatted_company_info: str, formatted_company_news: str,
+                 google_results: str, formatted_metrics: Dict[str, str], google_snippet: str, rag_response: str) -> \
+Dict[str, str]:
+    start_time = time.time()
+    collected_data = {
+        "Formatted Stock Data": formatted_stock_data,
+        "Formatted Company Info": formatted_company_info,
+        "Formatted Company News": formatted_company_news,
+        "Google Search Results": google_results,
+        "Google Snippet": google_snippet,
+        "RAG Response": rag_response,
+        "Calculations": formatted_metrics
+    }
+    collected_data.update(formatted_metrics)
+    logger.info("prepare_data took %.2f seconds", time.time() - start_time)
+    return collected_data
+def main():
+    print("Welcome to the Financial Data Chatbot. How can I assist you today?")
+    summarizer = DataSummarizer()
+    conversation_history = []
+    while True:
+        user_input = input("You: ")
+        if user_input.lower() in ['exit', 'quit', 'bye']:
+            print("Goodbye! Have a great day!")
+            break
+        conversation_history.append(f"You: {user_input}")
+        try:
+            # Detect language, entity, translation, and stock ticker
+            language, entity, translation, stock_ticker = detect_translate_entity_and_ticker(user_input)
+            logger.info(
+                f"Detected Language: {language}, Entity: {entity}, Translation: {translation}, Stock Ticker: {stock_ticker}")
+            if entity and stock_ticker:
+                with ThreadPoolExecutor() as executor:
+                    futures = {
+                        executor.submit(fetch_stock_data_yahoo, stock_ticker): "stock_data",
+                        executor.submit(fetch_company_info_yahoo, stock_ticker): "company_info",
+                        executor.submit(fetch_company_news_yahoo, stock_ticker): "company_news",
+                        executor.submit(fetch_current_stock_price, stock_ticker): "current_stock_price",
+                        executor.submit(get_answer, user_input): "rag_response",
+                        executor.submit(summarizer.google_search, user_input): "google_results",
+                        executor.submit(summarizer.fetch_google_snippet, user_input): "google_snippet"
+                    }
+                    results = {futures[future]: future.result() for future in as_completed(futures)}
+                stock_data = results.get("stock_data", pd.DataFrame())
+                formatted_stock_data = format_stock_data_for_gemini(
+                    stock_data) if not stock_data.empty else "No historical data available."
+                company_info = results.get("company_info", {})
+                formatted_company_info = format_company_info_for_gemini(
+                    company_info) if company_info else "No company info available."
+                company_news = results.get("company_news", [])
+                formatted_company_news = format_company_news_for_gemini(
+                    company_news) if company_news else "No news available."
+                current_stock_price = results.get("current_stock_price", None)
+                formatted_metrics = calculate_metrics(stock_data, summarizer,
+                                                      company_info) if not stock_data.empty else {
+                    "Error": "No stock data for metrics"}
+                google_results = results.get("google_results", "No additional news found through Google Search.")
+                google_snippet = results.get("google_snippet", "Snippet not found.")
+                rag_response = results.get("rag_response", "No response from RAG.")
+                collected_data = prepare_data(formatted_stock_data, formatted_company_info, formatted_company_news,
+                                              google_results, formatted_metrics, google_snippet, rag_response)
+                collected_data[
+                    "Current Stock Price"] = f"${current_stock_price:.2f}" if current_stock_price is not None else "N/A"
+                conversation_history.append(f"RAG Response: {rag_response}")
+                history_context = "\n".join(conversation_history)
+                answer = answer_question_with_data(f"{history_context}\n\nUser's query: {translation}", collected_data)
+                print(f"\nBot: {answer}")
+                conversation_history.append(f"Bot: {answer}")
+            else:
+                with ThreadPoolExecutor() as executor:
+                    futures = {
+                        executor.submit(get_answer, user_input): "rag_response",
+                        executor.submit(summarizer.google_search, user_input): "google_results",
+                        executor.submit(summarizer.fetch_google_snippet, user_input): "google_snippet"
+                    }
+                    results = {futures[future]: future.result() for future in as_completed(futures)}
+                google_results = results.get("google_results", "No additional news found through Google Search.")
+                google_snippet = results.get("google_snippet", "Snippet not found.")
+                rag_response = results.get("rag_response", "No response from RAG.")
+                collected_data = prepare_data("", "", "", google_results, {}, google_snippet, rag_response)
+                conversation_history.append(f"RAG Response: {rag_response}")
+                history_context = "\n".join(conversation_history)
+                answer = answer_question_with_data(f"{history_context}\n\nUser's query: {user_input}", collected_data)
+                print(f"\nBot: {answer}")
+                conversation_history.append(f"Bot: {answer}")
+        except Exception as e:
+            logger.error(f"An error occurred: {e}")
+            response = "An error occurred while processing your request. Please try again later."
+            print(f"Bot: {response}")
+            conversation_history.append(f"Bot: {response}")
+if __name__ == "__main__":
+    main()

app.log ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,544 @@

+from flask import Flask, request, jsonify
+import requests
+from bs4 import BeautifulSoup
+import yfinance as yf
+import pandas as pd
+from datetime import datetime, timedelta
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from langchain_google_genai import ChatGoogleGenerativeAI
+from config import Config
+import numpy as np
+from typing import Optional, Tuple, List, Dict
+from rag import get_answer
+import time
+from tenacity import retry, stop_after_attempt, wait_exponential
+import threading
+import streamlit as st
+import json
+# Initialize Flask app
+app = Flask(__name__)
+# Set up logging
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    handlers=[logging.FileHandler("app.log"),
+                              logging.StreamHandler()])
+logger = logging.getLogger(__name__)
+# Initialize the Gemini model
+llm = ChatGoogleGenerativeAI(api_key=Config.GEMINI_API_KEY, model="gemini-1.5-flash-latest", temperature=0.5)
+# Configuration for Google Custom Search API
+GOOGLE_API_KEY = Config.GOOGLE_API_KEY
+SEARCH_ENGINE_ID = Config.SEARCH_ENGINE_ID
+@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=2, max=8), reraise=True)
+def invoke_llm(prompt):
+    return llm.invoke(prompt)
+class DataSummarizer:
+    def google_search(self, query: str) -> Optional[str]:
+        try:
+            url = "https://www.googleapis.com/customsearch/v1"
+            params = {
+                'key': GOOGLE_API_KEY,
+                'cx': SEARCH_ENGINE_ID,
+                'q': query
+            }
+            response = requests.get(url, params=params)
+            response.raise_for_status()
+            search_results = response.json()
+            items = search_results.get('items', [])
+            content = "\n\n".join([f"{item.get('title', '')}\n{item.get('snippet', '')}" for item in items])
+            prompt = f"Summarize the following search results:\n\n{content}"
+            summary_response = invoke_llm(prompt)
+            return summary_response.content.strip()
+        except Exception as e:
+            logger.error(f"Error during Google Search API request: {e}")
+            return None
+    def extract_content_from_item(self, item: Dict) -> Optional[str]:
+        try:
+            snippet = item.get('snippet', '')
+            title = item.get('title', '')
+            return f"{title}\n{snippet}"
+        except Exception as e:
+            logger.error(f"Error extracting content from item: {e}")
+            return None
+    def calculate_moving_average(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        try:
+            result = df['close'].rolling(window=window).mean()
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating moving average: {e}")
+            return None
+    def calculate_rsi(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        try:
+            delta = df['close'].diff()
+            gain = delta.where(delta > 0, 0).rolling(window=window).mean()
+            loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
+            rs = gain / loss
+            result = 100 - (100 / (1 + rs))
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating RSI: {e}")
+            return None
+    def calculate_ema(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        try:
+            result = df['close'].ewm(span=window, adjust=False).mean()
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating EMA: {e}")
+            return None
+    def calculate_bollinger_bands(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.DataFrame]:
+        try:
+            ma = df['close'].rolling(window=window).mean()
+            std = df['close'].rolling(window=window).std()
+            upper_band = ma + (std * 2)
+            lower_band = ma - (std * 2)
+            result = pd.DataFrame({'MA': ma, 'Upper Band': upper_band, 'Lower Band': lower_band})
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating Bollinger Bands: {e}")
+            return None
+    def calculate_macd(self, df: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9) -> Optional[pd.DataFrame]:
+        try:
+            short_ema = df['close'].ewm(span=short_window, adjust=False).mean()
+            long_ema = df['close'].ewm(span=long_window, adjust=False).mean()
+            macd = short_ema - long_ema
+            signal = macd.ewm(span=signal_window, adjust=False).mean()
+            result = pd.DataFrame({'MACD': macd, 'Signal Line': signal})
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating MACD: {e}")
+            return None
+    def calculate_volatility(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        try:
+            log_returns = np.log(df['close'] / df['close'].shift(1))
+            result = log_returns.rolling(window=window).std() * np.sqrt(window)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating volatility: {e}")
+            return None
+    def calculate_atr(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        try:
+            high_low = df['high'] - df['low']
+            high_close = np.abs(df['high'] - df['close'].shift())
+            low_close = np.abs(df['low'] - df['close'].shift())
+            true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
+            result = true_range.rolling(window=window).mean()
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating ATR: {e}")
+            return None
+    def calculate_obv(self, df: pd.DataFrame) -> Optional[pd.Series]:
+        try:
+            result = (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating OBV: {e}")
+            return None
+    def calculate_yearly_summary(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        try:
+            df['year'] = pd.to_datetime(df['date']).dt.year
+            yearly_summary = df.groupby('year').agg({
+                'close': ['mean', 'max', 'min'],
+                'volume': 'sum'
+            })
+            yearly_summary.columns = ['_'.join(col) for col in yearly_summary.columns]
+            return yearly_summary
+        except Exception as e:
+            logger.error(f"Error calculating yearly summary: {e}")
+            return None
+    def get_full_last_year(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        try:
+            today = datetime.today().date()
+            last_year_start = datetime(today.year - 1, 1, 1).date()
+            last_year_end = datetime(today.year - 1, 12, 31).date()
+            mask = (df['date'] >= last_year_start) & (df['date'] <= last_year_end)
+            result = df.loc[mask]
+            return result
+        except Exception as e:
+            logger.error(f"Error filtering data for the last year: {e}")
+            return None
+    def calculate_ytd_performance(self, df: pd.DataFrame) -> Optional[float]:
+        try:
+            today = datetime.today().date()
+            year_start = datetime(today.year, 1, 1).date()
+            mask = (df['date'] >= year_start) & (df['date'] <= today)
+            ytd_data = df.loc[mask]
+            opening_price = ytd_data.iloc[0]['open']
+            closing_price = ytd_data.iloc[-1]['close']
+            result = ((closing_price - opening_price) / opening_price) * 100
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating YTD performance: {e}")
+            return None
+    def calculate_pe_ratio(self, current_price: float, eps: float) -> Optional[float]:
+        try:
+            if eps == 0:
+                raise ValueError("EPS cannot be zero for P/E ratio calculation.")
+            result = current_price / eps
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating P/E ratio: {e}")
+            return None
+    def fetch_google_snippet(self, query: str) -> Optional[str]:
+        try:
+            search_url = f"https://www.google.com/search?q={query}"
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
+            }
+            response = requests.get(search_url, headers=headers)
+            soup = BeautifulSoup(response.text, 'html.parser')
+            snippet_classes = [
+                'BNeawe iBp4i AP7Wnd',
+                'BNeawe s3v9rd AP7Wnd',
+                'BVG0Nb',
+                'kno-rdesc'
+            ]
+            snippet = None
+            for cls in snippet_classes:
+                snippet = soup.find('div', class_=cls)
+                if snippet:
+                    break
+            return snippet.get_text() if snippet else "Snippet not found."
+        except Exception as e:
+            logger.error(f"Error fetching Google snippet: {e}")
+            return None
+def extract_ticker_from_response(response: str) -> Optional[str]:
+    try:
+        if "is **" in response and "**." in response:
+            return response.split("is **")[1].split("**.")[0].strip()
+        return response.strip()
+    except Exception as e:
+        logger.error(f"Error extracting ticker from response: {e}")
+        return None
+def detect_translate_entity_and_ticker(query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    try:
+        # Step 1: Detect Language
+        prompt = f"Detect the language for the following text: {query}"
+        response = invoke_llm(prompt)
+        detected_language = response.content.strip()
+        # Step 2: Translate to English (if necessary)
+        translated_query = query
+        if detected_language != "English":
+            prompt = f"Translate the following text to English: {query}"
+            response = invoke_llm(prompt)
+            translated_query = response.content.strip()
+        # Step 3: Detect Entity
+        prompt = f"Detect the entity in the following text that is a company name: {translated_query}"
+        response = invoke_llm(prompt)
+        detected_entity = response.content.strip()
+        if not detected_entity:
+            return detected_language, None, translated_query, None
+        # Step 4: Get Stock Ticker
+        prompt = f"What is the stock ticker symbol for the company {detected_entity}?"
+        response = invoke_llm(prompt)
+        stock_ticker = extract_ticker_from_response(response.content.strip())
+        if not stock_ticker:
+            return detected_language, detected_entity, translated_query, None
+        return detected_language, detected_entity, translated_query, stock_ticker
+    except Exception as e:
+        logger.error(f"Error in detecting, translating, or extracting entity and ticker: {e}")
+        return None, None, None, None
+def fetch_stock_data_yahoo(symbol: str) -> pd.DataFrame:
+    try:
+        stock = yf.Ticker(symbol)
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=3 * 365)
+        historical_data = stock.history(start=start_date, end=end_date)
+        historical_data = historical_data.rename(columns={"Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume"})
+        historical_data.reset_index(inplace=True)
+        historical_data['date'] = historical_data['Date'].dt.date
+        historical_data = historical_data.drop(columns=['Date'])
+        historical_data = historical_data[['date', 'open', 'high', 'low', 'close', 'volume']]
+        return historical_data
+    except Exception as e:
+        logger.error(f"Failed to fetch stock data for {symbol} from Yahoo Finance: {e}")
+        return pd.DataFrame()
+def fetch_current_stock_price(symbol: str) -> Optional[float]:
+    try:
+        stock = yf.Ticker(symbol)
+        result = stock.info['currentPrice']
+        return result
+    except Exception as e:
+        logger.error(f"Failed to fetch current stock price for {symbol}: {e}")
+        return None
+def format_stock_data_for_gemini(stock_data: pd.DataFrame) -> str:
+    try:
+        if stock_data.empty:
+            return "No historical data available."
+        formatted_data = "Historical stock data for the last three years:\n\n"
+        formatted_data += "Date       | Open   | High   | Low    | Close  | Volume\n"
+        formatted_data += "------------------------------------------------------\n"
+        for index, row in stock_data.iterrows():
+            formatted_data += f"{row['date']} | {row['open']:.2f} | {row['high']:.2f} | {row['low']:.2f} | {row['close']:.2f} | {int(row['volume'])}\n"
+        return formatted_data
+    except Exception as e:
+        logger.error(f"Error formatting stock data for Gemini: {e}")
+        return "Error formatting stock data."
+def fetch_company_info_yahoo(symbol: str) -> Dict:
+    try:
+        stock = yf.Ticker(symbol)
+        company_info = stock.info
+        return {
+            "name": company_info.get("longName", "N/A"),
+            "sector": company_info.get("sector", "N/A"),
+            "industry": company_info.get("industry", "N/A"),
+            "marketCap": company_info.get("marketCap", "N/A"),
+            "summary": company_info.get("longBusinessSummary", "N/A"),
+            "website": company_info.get("website", "N/A"),
+            "address": company_info.get("address1", "N/A"),
+            "city": company_info.get("city", "N/A"),
+            "state": company_info.get("state", "N/A"),
+            "country": company_info.get("country", "N/A"),
+            "phone": company_info.get("phone", "N/A")
+        }
+    except Exception as e:
+        logger.error(f"Error fetching company info for {symbol}: {e}")
+        return {"error": str(e)}
+def format_company_info_for_gemini(company_info: Dict) -> str:
+    try:
+        if "error" in company_info:
+            return f"Error fetching company info: {company_info['error']}"
+        formatted_info = (f"\nCompany Information:\n"
+                          f"Name: {company_info['name']}\n"
+                          f"Sector: {company_info['sector']}\n"
+                          f"Industry: {company_info['industry']}\n"
+                          f"Market Cap: {company_info['marketCap']}\n"
+                          f"Summary: {company_info['summary']}\n"
+                          f"Website: {company_info['website']}\n"
+                          f"Address: {company_info['address']}, {company_info['city']}, {company_info['state']}, {company_info['country']}\n"
+                          f"Phone: {company_info['phone']}\n")
+        return formatted_info
+    except Exception as e:
+        logger.error(f"Error formatting company info for Gemini: {e}")
+        return "Error formatting company info."
+def fetch_company_news_yahoo(symbol: str) -> List[Dict]:
+    try:
+        stock = yf.Ticker(symbol)
+        news = stock.news
+        return news if news else []
+    except Exception as e:
+        logger.error(f"Failed to fetch news for {symbol} from Yahoo Finance: {e}")
+        return []
+def format_company_news_for_gemini(news: List[Dict]) -> str:
+    try:
+        if not news:
+            return "No news available."
+        formatted_news = "Latest company news:\n\n"
+        for article in news:
+            formatted_news += (f"Title: {article['title']}\n"
+                               f"Publisher: {article['publisher']}\n"
+                               f"Link: {article['link']}\n"
+                               f"Published: {article['providerPublishTime']}\n\n")
+        return formatted_news
+    except Exception as e:
+        logger.error(f"Error formatting company news for Gemini: {e}")
+        return "Error formatting company news."
+def send_to_gemini_for_summarization(content: str) -> str:
+    try:
+        unified_content = " ".join(content)
+        prompt = f"Summarize the main points of this article.\n\n{unified_content}"
+        response = invoke_llm(prompt)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error sending content to Gemini for summarization: {e}")
+        return "Error summarizing content."
+def answer_question_with_data(question: str, data: Dict) -> str:
+    try:
+        data_str = ""
+        for key, value in data.items():
+            data_str += f"{key}:\n{value}\n\n"
+        prompt = (f"You are a financial advisor. Begin your answer and only give the answer after.\n"
+                  f"Using the following data, answer this question: {question}\n\nData:\n{data_str}\n"
+                  f"Make your answer in the best form and professional.\n"
+                  f"Don't say anything about the source of the data.\n"
+                  f"If you don't have the data to answer, say this data is not available yet. If the data is not available in the stock history data, say this was a weekend and there is no data for it.")
+        response = invoke_llm(prompt)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error answering question with data: {e}")
+        return "Error answering question."
+def calculate_metrics(stock_data: pd.DataFrame, summarizer: DataSummarizer, company_info: Dict) -> Dict[str, str]:
+    try:
+        moving_average = summarizer.calculate_moving_average(stock_data)
+        rsi = summarizer.calculate_rsi(stock_data)
+        ema = summarizer.calculate_ema(stock_data)
+        bollinger_bands = summarizer.calculate_bollinger_bands(stock_data)
+        macd = summarizer.calculate_macd(stock_data)
+        volatility = summarizer.calculate_volatility(stock_data)
+        atr = summarizer.calculate_atr(stock_data)
+        obv = summarizer.calculate_obv(stock_data)
+        yearly_summary = summarizer.calculate_yearly_summary(stock_data)
+        ytd_performance = summarizer.calculate_ytd_performance(stock_data)
+        eps = company_info.get('trailingEps', None)
+        if eps:
+            current_price = stock_data.iloc[-1]['close']
+            pe_ratio = summarizer.calculate_pe_ratio(current_price, eps)
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%",
+                "P/E Ratio": f"{pe_ratio:.2f}"
+            }
+        else:
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%"
+            }
+        return formatted_metrics
+    except Exception as e:
+        logger.error(f"Error calculating metrics: {e}")
+        return {"Error": "Error calculating metrics"}
+def prepare_data(formatted_stock_data: str, formatted_company_info: str, formatted_company_news: str,
+                 google_results: str, formatted_metrics: Dict[str, str], google_snippet: str, rag_response: str) -> Dict[str, str]:
+    collected_data = {
+        "Formatted Stock Data": formatted_stock_data,
+        "Formatted Company Info": formatted_company_info,
+        "Formatted Company News": formatted_company_news,
+        "Google Search Results": google_results,
+        "Google Snippet": google_snippet,
+        "RAG Response": rag_response,
+        "Calculations": formatted_metrics
+    }
+    collected_data.update(formatted_metrics)
+    return collected_data
+@app.route('/ask', methods=['POST'])
+def ask():
+    try:
+        user_input = request.json.get('question')
+        summarizer = DataSummarizer()
+        language, entity, translation, stock_ticker = detect_translate_entity_and_ticker(user_input)
+        if entity and stock_ticker:
+            with ThreadPoolExecutor() as executor:
+                futures = {
+                    executor.submit(fetch_stock_data_yahoo, stock_ticker): "stock_data",
+                    executor.submit(fetch_company_info_yahoo, stock_ticker): "company_info",
+                    executor.submit(fetch_company_news_yahoo, stock_ticker): "company_news",
+                    executor.submit(fetch_current_stock_price, stock_ticker): "current_stock_price",
+                    executor.submit(get_answer, user_input): "rag_response",
+                    executor.submit(summarizer.google_search, user_input): "google_results",
+                    executor.submit(summarizer.fetch_google_snippet, user_input): "google_snippet"
+                }
+                results = {futures[future]: future.result() for future in as_completed(futures)}
+            stock_data = results.get("stock_data", pd.DataFrame())
+            formatted_stock_data = format_stock_data_for_gemini(stock_data) if not stock_data.empty else "No historical data available."
+            company_info = results.get("company_info", {})
+            formatted_company_info = format_company_info_for_gemini(company_info) if company_info else "No company info available."
+            company_news = results.get("company_news", [])
+            formatted_company_news = format_company_news_for_gemini(company_news) if company_news else "No news available."
+            current_stock_price = results.get("current_stock_price", None)
+            formatted_metrics = calculate_metrics(stock_data, summarizer, company_info) if not stock_data.empty else {"Error": "No stock data for metrics"}
+            google_results = results.get("google_results", "No additional news found through Google Search.")
+            google_snippet = results.get("google_snippet", "Snippet not found.")
+            rag_response = results.get("rag_response", "No response from RAG.")
+            collected_data = prepare_data(formatted_stock_data, formatted_company_info, formatted_company_news, google_results, formatted_metrics, google_snippet, rag_response)
+            collected_data["Current Stock Price"] = f"${current_stock_price:.2f}" if current_stock_price is not None else "N/A"
+            answer = answer_question_with_data(f"{translation}", collected_data)
+            return jsonify({"answer": answer})
+        else:
+            with ThreadPoolExecutor() as executor:
+                futures = {
+                    executor.submit(get_answer, user_input): "rag_response",
+                    executor.submit(summarizer.google_search, user_input): "google_results",
+                    executor.submit(summarizer.fetch_google_snippet, user_input): "google_snippet"
+                }
+                results = {futures[future]: future.result() for future in as_completed(futures)}
+            google_results = results.get("google_results", "No additional news found through Google Search.")
+            google_snippet = results.get("google_snippet", "Snippet not found.")
+            rag_response = results.get("rag_response", "No response from RAG.")
+            collected_data = prepare_data("", "", "", google_results, {}, google_snippet, rag_response)
+            answer = answer_question_with_data(f"{user_input}", collected_data)
+            return jsonify({"answer": answer})
+    except Exception as e:
+        logger.error(f"An error occurred: {e}")
+        return jsonify({"error": "An error occurred while processing your request. Please try again later."}), 500
+# Streamlit App
+def send_question_to_api(question):
+    url = 'http://localhost:5000/ask'
+    headers = {'Content-Type': 'application/json'}
+    data = {'question': question}
+    response = requests.post(url, headers=headers, data=json.dumps(data))
+    if response.status_code == 200:
+        return response.json().get('answer')
+    else:
+        return f"Error: {response.status_code} - {response.text}"
+def run_streamlit():
+    st.title("Financial Data Chatbot Tester")
+    st.write("Enter your question below and get a response from the chatbot.")
+    if 'history' not in st.session_state:
+        st.session_state.history = []
+    user_input = st.text_input("Your question:", "")
+    if st.button("Submit"):
+        if user_input:
+            with st.spinner('Getting the answer...'):
+                answer = send_question_to_api(user_input)
+                st.session_state.history.append((user_input, answer))
+                st.success(answer)
+        else:
+            st.warning("Please enter a question before submitting.")
+    if st.session_state.history:
+        st.write("### History")
+        for idx, (question, answer) in enumerate(st.session_state.history, 1):
+            st.write(f"**Q{idx}:** {question}")
+            st.write(f"**A{idx}:** {answer}")
+            st.write("---")
+if __name__ == '__main__':
+    threading.Thread(target=lambda: app.run(host='0.0.0.0', port=5000)).start()
+    run_streamlit()

app2.py ADDED Viewed

	@@ -0,0 +1,560 @@

+import requests
+from bs4 import BeautifulSoup
+import yfinance as yf
+import pandas as pd
+from datetime import datetime, timedelta
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from langchain_google_genai import ChatGoogleGenerativeAI
+from config import Config
+import numpy as np
+from typing import Optional, Tuple, List, Dict
+from rag import get_answer
+# Set up logging
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    handlers=[logging.FileHandler("app.log"),
+                              logging.StreamHandler()])
+logger = logging.getLogger(__name__)
+# Initialize the Gemini model
+llm = ChatGoogleGenerativeAI(api_key=Config.GEMINI_API_KEY, model="gemini-1.5-flash-latest", temperature=0.5)
+# Configuration for Google Custom Search API
+GOOGLE_API_KEY = Config.GOOGLE_API_KEY
+SEARCH_ENGINE_ID = Config.SEARCH_ENGINE_ID
+def fetch_google_snippet(query: str) -> Optional[str]:
+    try:
+        search_url = f"https://www.google.com/search?q={query}"
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
+        }
+        response = requests.get(search_url, headers=headers)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        snippet_classes = [
+            'BNeawe iBp4i AP7Wnd',
+            'BNeawe s3v9rd AP7Wnd',
+            'BVG0Nb',
+            'kno-rdesc'
+        ]
+        for cls in snippet_classes:
+            snippet = soup.find('div', class_=cls)
+            if snippet:
+                return snippet.get_text()
+        return "Snippet not found."
+    except Exception as e:
+        logger.error(f"Error fetching Google snippet: {e}")
+        return None
+class DataSummarizer:
+    def __init__(self):
+        pass
+    def google_search(self, query: str) -> Optional[Dict]:
+        try:
+            url = "https://www.googleapis.com/customsearch/v1"
+            params = {
+                'key': GOOGLE_API_KEY,
+                'cx': SEARCH_ENGINE_ID,
+                'q': query
+            }
+            response = requests.get(url, params=params)
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            logger.error(f"Error during Google Search API request: {e}")
+            return None
+    def extract_content_from_item(self, item: Dict) -> Optional[str]:
+        try:
+            snippet = item.get('snippet', '')
+            title = item.get('title', '')
+            return f"{title}\n{snippet}"
+        except Exception as e:
+            logger.error(f"Error extracting content from item: {e}")
+            return None
+    def calculate_moving_average(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        try:
+            return df['close'].rolling(window=window).mean()
+        except Exception as e:
+            logger.error(f"Error calculating moving average: {e}")
+            return None
+    def calculate_rsi(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        try:
+            delta = df['close'].diff()
+            gain = delta.where(delta > 0, 0).rolling(window=window).mean()
+            loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
+            rs = gain / loss
+            return 100 - (100 / (1 + rs))
+        except Exception as e:
+            logger.error(f"Error calculating RSI: {e}")
+            return None
+    def calculate_ema(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        try:
+            return df['close'].ewm(span=window, adjust=False).mean()
+        except Exception as e:
+            logger.error(f"Error calculating EMA: {e}")
+            return None
+    def calculate_bollinger_bands(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.DataFrame]:
+        try:
+            ma = df['close'].rolling(window=window).mean()
+            std = df['close'].rolling(window=window).std()
+            upper_band = ma + (std * 2)
+            lower_band = ma - (std * 2)
+            return pd.DataFrame({'MA': ma, 'Upper Band': upper_band, 'Lower Band': lower_band})
+        except Exception as e:
+            logger.error(f"Error calculating Bollinger Bands: {e}")
+            return None
+    def calculate_macd(self, df: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9) -> \
+            Optional[pd.DataFrame]:
+        try:
+            short_ema = df['close'].ewm(span=short_window, adjust=False).mean()
+            long_ema = df['close'].ewm(span=long_window, adjust=False).mean()
+            macd = short_ema - long_ema
+            signal = macd.ewm(span=signal_window, adjust=False).mean()
+            return pd.DataFrame({'MACD': macd, 'Signal Line': signal})
+        except Exception as e:
+            logger.error(f"Error calculating MACD: {e}")
+            return None
+    def calculate_volatility(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        try:
+            log_returns = np.log(df['close'] / df['close'].shift(1))
+            return log_returns.rolling(window=window).std() * np.sqrt(window)
+        except Exception as e:
+            logger.error(f"Error calculating volatility: {e}")
+            return None
+    def calculate_atr(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        try:
+            high_low = df['high'] - df['low']
+            high_close = np.abs(df['high'] - df['close'].shift())
+            low_close = np.abs(df['low'] - df['close'].shift())
+            true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
+            return true_range.rolling(window=window).mean()
+        except Exception as e:
+            logger.error(f"Error calculating ATR: {e}")
+            return None
+    def calculate_obv(self, df: pd.DataFrame) -> Optional[pd.Series]:
+        try:
+            return (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
+        except Exception as e:
+            logger.error(f"Error calculating OBV: {e}")
+            return None
+    def calculate_yearly_summary(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        try:
+            df['year'] = pd.to_datetime(df['date']).dt.year
+            yearly_summary = df.groupby('year').agg({
+                'close': ['mean', 'max', 'min'],
+                'volume': 'sum'
+            })
+            yearly_summary.columns = ['_'.join(col) for col in yearly_summary.columns]
+            return yearly_summary
+        except Exception as e:
+            logger.error(f"Error calculating yearly summary: {e}")
+            return None
+    def get_full_last_year(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        try:
+            today = datetime.today().date()
+            last_year_start = datetime(today.year - 1, 1, 1).date()
+            last_year_end = datetime(today.year - 1, 12, 31).date()
+            mask = (df['date'] >= last_year_start) & (df['date'] <= last_year_end)
+            return df.loc[mask]
+        except Exception as e:
+            logger.error(f"Error filtering data for the last year: {e}")
+            return None
+    def calculate_ytd_performance(self, df: pd.DataFrame) -> Optional[float]:
+        try:
+            today = datetime.today().date()
+            year_start = datetime(today.year, 1, 1).date()
+            mask = (df['date'] >= year_start) & (df['date'] <= today)
+            ytd_data = df.loc[mask]
+            opening_price = ytd_data.iloc[0]['open']
+            closing_price = ytd_data.iloc[-1]['close']
+            return ((closing_price - opening_price) / opening_price) * 100
+        except Exception as e:
+            logger.error(f"Error calculating YTD performance: {e}")
+            return None
+    def calculate_pe_ratio(self, current_price: float, eps: float) -> Optional[float]:
+        try:
+            if eps == 0:
+                raise ValueError("EPS cannot be zero for P/E ratio calculation.")
+            return current_price / eps
+        except Exception as e:
+            logger.error(f"Error calculating P/E ratio: {e}")
+            return None
+    def fetch_google_snippet(self, query: str) -> Optional[str]:
+        try:
+            return fetch_google_snippet(query)
+        except Exception as e:
+            logger.error(f"Error fetching Google snippet: {e}")
+            return None
+def extract_ticker_from_response(response: str) -> Optional[str]:
+    try:
+        if "is **" in response and "**." in response:
+            return response.split("is **")[1].split("**.")[0].strip()
+        return response.strip()
+    except Exception as e:
+        logger.error(f"Error extracting ticker from response: {e}")
+        return None
+def detect_translate_entity_and_ticker(query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    try:
+        prompt = f"Detect the language for the following text: {query}"
+        response = llm.invoke(prompt)
+        detected_language = response.content.strip()
+        translated_query = query
+        if detected_language != "English":
+            prompt = f"Translate the following text to English: {query}"
+            response = llm.invoke(prompt)
+            translated_query = response.content.strip()
+        prompt = f"Detect the entity in the following text that is a company name: {translated_query}"
+        response = llm.invoke(prompt)
+        detected_entity = response.content.strip()
+        prompt = f"What is the stock ticker symbol for the company {detected_entity}?"
+        response = llm.invoke(prompt)
+        stock_ticker = extract_ticker_from_response(response.content.strip())
+        return detected_language, detected_entity, translated_query, stock_ticker
+    except Exception as e:
+        logger.error(f"Error in detecting, translating, or extracting entity and ticker: {e}")
+        return None, None, None, None
+def fetch_stock_data_yahoo(symbol: str) -> pd.DataFrame:
+    try:
+        stock = yf.Ticker(symbol)
+        logger.info(f"Fetching data for symbol: {symbol}")
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=3 * 365)
+        historical_data = stock.history(start=start_date, end=end_date)
+        if historical_data.empty:
+            raise ValueError(f"No historical data found for symbol: {symbol}")
+        historical_data = historical_data.rename(
+            columns={"Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume"}
+        )
+        historical_data.reset_index(inplace=True)
+        historical_data['date'] = historical_data['Date'].dt.date
+        historical_data = historical_data.drop(columns=['Date'])
+        historical_data = historical_data[['date', 'open', 'high', 'low', 'close', 'volume']]
+        if 'close' not in historical_data.columns:
+            raise KeyError("The historical data must contain a 'close' column.")
+        return historical_data
+    except Exception as e:
+        logger.error(f"Failed to fetch stock data for {symbol} from Yahoo Finance: {e}")
+        return pd.DataFrame()
+def fetch_current_stock_price(symbol: str) -> Optional[float]:
+    try:
+        stock = yf.Ticker(symbol)
+        return stock.info['currentPrice']
+    except Exception as e:
+        logger.error(f"Failed to fetch current stock price for {symbol}: {e}")
+        return None
+def format_stock_data_for_gemini(stock_data: pd.DataFrame) -> str:
+    try:
+        if stock_data.empty:
+            return "No historical data available."
+        formatted_data = "Historical stock data for the last three years:\n\n"
+        formatted_data += "Date       | Open   | High   | Low    | Close  | Volume\n"
+        formatted_data += "------------------------------------------------------\n"
+        for index, row in stock_data.iterrows():
+            formatted_data += f"{row['date']} | {row['open']:.2f} | {row['high']:.2f} | {row['low']:.2f} | {row['close']:.2f} | {int(row['volume'])}\n"
+        return formatted_data
+    except Exception as e:
+        logger.error(f"Error formatting stock data for Gemini: {e}")
+        return "Error formatting stock data."
+def fetch_company_info_yahoo(symbol: str) -> Dict:
+    try:
+        if not symbol:
+            return {"error": "Invalid symbol"}
+        stock = yf.Ticker(symbol)
+        company_info = stock.info
+        return {
+            "name": company_info.get("longName", "N/A"),
+            "sector": company_info.get("sector", "N/A"),
+            "industry": company_info.get("industry", "N/A"),
+            "marketCap": company_info.get("marketCap", "N/A"),
+            "summary": company_info.get("longBusinessSummary", "N/A"),
+            "website": company_info.get("website", "N/A"),
+            "address": company_info.get("address1", "N/A"),
+            "city": company_info.get("city", "N/A"),
+            "state": company_info.get("state", "N/A"),
+            "country": company_info.get("country", "N/A"),
+            "phone": company_info.get("phone", "N/A")
+        }
+    except Exception as e:
+        logger.error(f"Error fetching company info for {symbol}: {e}")
+        return {"error": str(e)}
+def format_company_info_for_gemini(company_info: Dict) -> str:
+    try:
+        if "error" in company_info:
+            return f"Error fetching company info: {company_info['error']}"
+        formatted_info = (f"\nCompany Information:\n"
+                          f"Name: {company_info['name']}\n"
+                          f"Sector: {company_info['sector']}\n"
+                          f"Industry: {company_info['industry']}\n"
+                          f"Market Cap: {company_info['marketCap']}\n"
+                          f"Summary: {company_info['summary']}\n"
+                          f"Website: {company_info['website']}\n"
+                          f"Address: {company_info['address']}, {company_info['city']}, {company_info['state']}, {company_info['country']}\n"
+                          f"Phone: {company_info['phone']}\n")
+        return formatted_info
+    except Exception as e:
+        logger.error(f"Error formatting company info for Gemini: {e}")
+        return "Error formatting company info."
+def fetch_company_news_yahoo(symbol: str) -> List[Dict]:
+    try:
+        stock = yf.Ticker(symbol)
+        news = stock.news
+        if not news:
+            raise ValueError(f"No news found for symbol: {symbol}")
+        return news
+    except Exception as e:
+        logger.error(f"Failed to fetch news for {symbol} from Yahoo Finance: {e}")
+        return []
+def format_company_news_for_gemini(news: List[Dict]) -> str:
+    try:
+        if not news:
+            return "No news available."
+        formatted_news = "Latest company news:\n\n"
+        for article in news:
+            formatted_news += (f"Title: {article['title']}\n"
+                               f"Publisher: {article['publisher']}\n"
+                               f"Link: {article['link']}\n"
+                               f"Published: {article['providerPublishTime']}\n\n")
+        return formatted_news
+    except Exception as e:
+        logger.error(f"Error formatting company news for Gemini: {e}")
+        return "Error formatting company news."
+def send_to_gemini_for_summarization(content: str) -> str:
+    try:
+        unified_content = " ".join(content)
+        prompt = f"Summarize the main points of this article.\n\n{unified_content}"
+        response = llm.invoke(prompt)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error sending content to Gemini for summarization: {e}")
+        return "Error summarizing content."
+def answer_question_with_data(question: str, data: Dict) -> str:
+    try:
+        data_str = ""
+        for key, value in data.items():
+            data_str += f"{key}:\n{value}\n\n"
+        prompt = (f"You are a financial advisor. Begin your answer by stating that and only give the answer after.\n"
+                  f"Using the following data, answer this question: {question}\n\nData:\n{data_str}\n"
+                  f"Make your answer in the best form and professional.\n"
+                  f"Don't say anything about the source of the data.\n"
+                  f"If you don't have the data to answer, say this data is not available yet. If the data is not available in the stock history data, say this was a weekend and there is no data for it.")
+        response = llm.invoke(prompt)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error answering question with data: {e}")
+        return "Error answering question."
+def format_google_results(google_results: Optional[Dict], summarizer: DataSummarizer, query: str) -> str:
+    try:
+        if google_results:
+            google_content = [summarizer.extract_content_from_item(item) for item in google_results.get('items', [])]
+            formatted_google_content = "\n\n".join(google_content)
+        else:
+            formatted_google_content = "No additional news found through Google Search."
+        snippet_query1 = f"{query} I want the answer only"
+        snippet_query2 = f"{query}"
+        google_snippet1 = summarizer.fetch_google_snippet(snippet_query1)
+        google_snippet2 = summarizer.fetch_google_snippet(snippet_query2)
+        google_snippet = google_snippet1 if google_snippet1 and google_snippet1 != "Snippet not found." else google_snippet2
+        formatted_google_content += f"\n\nGoogle Snippet: {google_snippet}"
+        return formatted_google_content
+    except Exception as e:
+        logger.error(f"Error formatting Google results: {e}")
+        return "Error formatting Google results."
+def calculate_metrics(stock_data: pd.DataFrame, summarizer: DataSummarizer, company_info: Dict) -> Dict[str, str]:
+    try:
+        moving_average = summarizer.calculate_moving_average(stock_data)
+        rsi = summarizer.calculate_rsi(stock_data)
+        ema = summarizer.calculate_ema(stock_data)
+        bollinger_bands = summarizer.calculate_bollinger_bands(stock_data)
+        macd = summarizer.calculate_macd(stock_data)
+        volatility = summarizer.calculate_volatility(stock_data)
+        atr = summarizer.calculate_atr(stock_data)
+        obv = summarizer.calculate_obv(stock_data)
+        yearly_summary = summarizer.calculate_yearly_summary(stock_data)
+        ytd_performance = summarizer.calculate_ytd_performance(stock_data)
+        eps = company_info.get('trailingEps', None)
+        if eps:
+            current_price = stock_data.iloc[-1]['close']
+            pe_ratio = summarizer.calculate_pe_ratio(current_price, eps)
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%",
+                "P/E Ratio": f"{pe_ratio:.2f}"
+            }
+        else:
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%"
+            }
+        return formatted_metrics
+    except Exception as e:
+        logger.error(f"Error calculating metrics: {e}")
+        return {"Error": "Error calculating metrics"}
+def prepare_data(formatted_stock_data: str, formatted_company_info: str, formatted_company_news: str,
+                 summarized_google_content: str, formatted_metrics: Dict[str, str]) -> Dict[str, str]:
+    collected_data = {
+        "Formatted Stock Data": formatted_stock_data,
+        "Formatted Company Info": formatted_company_info,
+        "Formatted Company News": formatted_company_news,
+        "Google Search Results": summarized_google_content,
+        "Calculations": formatted_metrics
+    }
+    collected_data.update(formatted_metrics)
+    return collected_data
+def translate_response(response: str, target_language: str) -> str:
+    try:
+        prompt = f"Translate the following text to {target_language}: {response}"
+        translation = llm.invoke(prompt)
+        return translation.content.strip()
+    except Exception as e:
+        logger.error(f"Error translating response: {e}")
+        return response  # Return the original response if translation fails
+def main():
+    print("Welcome to the Financial Data Chatbot. How can I assist you today?")
+    summarizer = DataSummarizer()
+    conversation_history = []
+    while True:
+        user_input = input("You: ")
+        if user_input.lower() in ['exit', 'quit', 'bye']:
+            print("Goodbye! Have a great day!")
+            break
+        conversation_history.append(f"You: {user_input}")
+        try:
+            # Detect language, entity, translation, and stock ticker
+            language, entity, translation, stock_ticker = detect_translate_entity_and_ticker(user_input)
+            if language and entity and translation and stock_ticker:
+                with ThreadPoolExecutor() as executor:
+                    futures = {
+                        executor.submit(fetch_stock_data_yahoo, stock_ticker): "stock_data",
+                        executor.submit(fetch_company_info_yahoo, stock_ticker): "company_info",
+                        executor.submit(fetch_company_news_yahoo, stock_ticker): "company_news",
+                        executor.submit(fetch_current_stock_price, stock_ticker): "current_stock_price",
+                        executor.submit(summarizer.google_search, f"{user_input} latest financial news"): "google_results"
+                    }
+                    results = {futures[future]: future.result() for future in as_completed(futures)}
+                stock_data = results["stock_data"]
+                formatted_stock_data = format_stock_data_for_gemini(stock_data)
+                company_info = results["company_info"]
+                formatted_company_info = format_company_info_for_gemini(company_info)
+                company_news = results["company_news"]
+                formatted_company_news = format_company_news_for_gemini(company_news)
+                current_stock_price = results["current_stock_price"]
+                google_results = results["google_results"]
+                formatted_google_content = format_google_results(google_results, summarizer, user_input)
+                summarized_google_content = send_to_gemini_for_summarization(formatted_google_content)
+                formatted_metrics = calculate_metrics(stock_data, summarizer, company_info)
+                collected_data = prepare_data(formatted_stock_data, formatted_company_info, formatted_company_news,
+                                              summarized_google_content, formatted_metrics)
+                collected_data["Current Stock Price"] = f"${current_stock_price:.2f}" if current_stock_price else "N/A"
+                rag_response = get_answer(user_input)
+                collected_data["RAG Response"] = rag_response
+                conversation_history.append(f"RAG Response: {rag_response}")
+                history_context = "\n".join(conversation_history)
+                answer = answer_question_with_data(f"{history_context}\n\nUser's query: {user_input}", collected_data)
+                if language != "English":
+                    answer = translate_response(answer, language)
+                print(f"\nBot: {answer}")
+                conversation_history.append(f"Bot: {answer}")
+            else:
+                response = "I'm sorry, I couldn't process your request. Could you please rephrase?"
+                print(f"Bot: {response}")
+                conversation_history.append(f"Bot: {response}")
+        except Exception as e:
+            logger.error(f"An error occurred: {e}")
+            response = "An error occurred while processing your request. Please try again later."
+            print(f"Bot: {response}")
+            conversation_history.append(f"Bot: {response}")
+if __name__ == "__main__":
+    main()

bm25retriever.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df285be2ae20135ec5219dd34edf52abe2c630b6372f33f1502e48fd52042526
+size 4215997

chain.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from langchain.prompts import (
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+    ChatPromptTemplate,
+    MessagesPlaceholder
+)
+from langchain.chains import ConversationChain
+class Chain:
+    def __init__(self, llm, history=None):
+        self.llm = llm
+        # self.chain = self.get_conversational_chain()
+        if history is not None:
+            self.history = history
+    def run_conversational_chain(self, prompt_template):
+        ans = self.llm.invoke(prompt_template).content
+        return ans
+    def get_chain_with_history(self):
+        system_msg_template = SystemMessagePromptTemplate.from_template(template="""Answer the question as truthfully as possible using the provided context,
+        and if the answer is not contained within the text below, say 'I don't know'""")
+        human_msg_template = HumanMessagePromptTemplate.from_template(template="{input}")
+        prompt_template = ChatPromptTemplate.from_messages([system_msg_template, MessagesPlaceholder(variable_name="history"), human_msg_template])
+        conversation = ConversationChain(memory=self.history, prompt=prompt_template, llm=self.llm, verbose=True)
+        return conversation

chat.py ADDED Viewed

	@@ -0,0 +1,667 @@

+import requests
+from bs4 import BeautifulSoup
+import yfinance as yf
+import pandas as pd
+from datetime import datetime, timedelta
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from langchain_google_genai import ChatGoogleGenerativeAI
+from config import Config
+import numpy as np
+from typing import Optional, Tuple, List, Dict
+from rag import get_answer
+import time
+from tenacity import retry, stop_after_attempt, wait_exponential
+# Set up logging
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    handlers=[logging.FileHandler("app.log"),
+                              logging.StreamHandler()])
+logger = logging.getLogger(__name__)
+# Initialize the Gemini model
+llm = ChatGoogleGenerativeAI(api_key=Config.GEMINI_API_KEY, model="gemini-1.5-flash-latest", temperature=0.5)
+# Configuration for Google Custom Search API
+GOOGLE_API_KEY = Config.GOOGLE_API_KEY
+SEARCH_ENGINE_ID = Config.SEARCH_ENGINE_ID
+@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=2, max=8), reraise=True)
+def invoke_llm(prompt):
+    return llm.invoke(prompt)
+class DataSummarizer:
+    def __init__(self):
+        pass
+    def google_search(self, query: str) -> Optional[str]:
+        start_time = time.time()
+        try:
+            url = "https://www.googleapis.com/customsearch/v1"
+            params = {
+                'key': GOOGLE_API_KEY,
+                'cx': SEARCH_ENGINE_ID,
+                'q': query
+            }
+            response = requests.get(url, params=params)
+            response.raise_for_status()
+            search_results = response.json()
+            logger.info("google_search took %.2f seconds", time.time() - start_time)
+            # Summarize the search results using Gemini
+            items = search_results.get('items', [])
+            content = "\n\n".join([f"{item.get('title', '')}\n{item.get('snippet', '')}" for item in items])
+            prompt = f"Summarize the following search results:\n\n{content}"
+            summary_response = invoke_llm(prompt)
+            return summary_response.content.strip()
+        except Exception as e:
+            logger.error(f"Error during Google Search API request: {e}")
+            return None
+    def extract_content_from_item(self, item: Dict) -> Optional[str]:
+        try:
+            snippet = item.get('snippet', '')
+            title = item.get('title', '')
+            return f"{title}\n{snippet}"
+        except Exception as e:
+            logger.error(f"Error extracting content from item: {e}")
+            return None
+    def calculate_moving_average(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = df['close'].rolling(window=window).mean()
+            logger.info("calculate_moving_average took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating moving average: {e}")
+            return None
+    def calculate_rsi(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            delta = df['close'].diff()
+            gain = delta.where(delta > 0, 0).rolling(window=window).mean()
+            loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
+            rs = gain / loss
+            result = 100 - (100 / (1 + rs))
+            logger.info("calculate_rsi took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating RSI: {e}")
+            return None
+    def calculate_ema(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = df['close'].ewm(span=window, adjust=False).mean()
+            logger.info("calculate_ema took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating EMA: {e}")
+            return None
+    def calculate_bollinger_bands(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            ma = df['close'].rolling(window=window).mean()
+            std = df['close'].rolling(window=window).std()
+            upper_band = ma + (std * 2)
+            lower_band = ma - (std * 2)
+            result = pd.DataFrame({'MA': ma, 'Upper Band': upper_band, 'Lower Band': lower_band})
+            logger.info("calculate_bollinger_bands took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating Bollinger Bands: {e}")
+            return None
+    def calculate_macd(self, df: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9) -> \
+            Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            short_ema = df['close'].ewm(span=short_window, adjust=False).mean()
+            long_ema = df['close'].ewm(span=long_window, adjust=False).mean()
+            macd = short_ema - long_ema
+            signal = macd.ewm(span=signal_window, adjust=False).mean()
+            result = pd.DataFrame({'MACD': macd, 'Signal Line': signal})
+            logger.info("calculate_macd took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating MACD: {e}")
+            return None
+    def calculate_volatility(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            log_returns = np.log(df['close'] / df['close'].shift(1))
+            result = log_returns.rolling(window=window).std() * np.sqrt(window)
+            logger.info("calculate_volatility took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating volatility: {e}")
+            return None
+    def calculate_atr(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            high_low = df['high'] - df['low']
+            high_close = np.abs(df['high'] - df['close'].shift())
+            low_close = np.abs(df['low'] - df['close'].shift())
+            true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
+            result = true_range.rolling(window=window).mean()
+            logger.info("calculate_atr took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating ATR: {e}")
+            return None
+    def calculate_obv(self, df: pd.DataFrame) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
+            logger.info("calculate_obv took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating OBV: {e}")
+            return None
+    def calculate_yearly_summary(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            df['year'] = pd.to_datetime(df['date']).dt.year
+            yearly_summary = df.groupby('year').agg({
+                'close': ['mean', 'max', 'min'],
+                'volume': 'sum'
+            })
+            yearly_summary.columns = ['_'.join(col) for col in yearly_summary.columns]
+            logger.info("calculate_yearly_summary took %.2f seconds", time.time() - start_time)
+            return yearly_summary
+        except Exception as e:
+            logger.error(f"Error calculating yearly summary: {e}")
+            return None
+    def get_full_last_year(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            today = datetime.today().date()
+            last_year_start = datetime(today.year - 1, 1, 1).date()
+            last_year_end = datetime(today.year - 1, 12, 31).date()
+            mask = (df['date'] >= last_year_start) & (df['date'] <= last_year_end)
+            result = df.loc[mask]
+            logger.info("get_full_last_year took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error filtering data for the last year: {e}")
+            return None
+    def calculate_ytd_performance(self, df: pd.DataFrame) -> Optional[float]:
+        start_time = time.time()
+        try:
+            today = datetime.today().date()
+            year_start = datetime(today.year, 1, 1).date()
+            mask = (df['date'] >= year_start) & (df['date'] <= today)
+            ytd_data = df.loc[mask]
+            opening_price = ytd_data.iloc[0]['open']
+            closing_price = ytd_data.iloc[-1]['close']
+            result = ((closing_price - opening_price) / opening_price) * 100
+            logger.info("calculate_ytd_performance took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating YTD performance: {e}")
+            return None
+    def calculate_pe_ratio(self, current_price: float, eps: float) -> Optional[float]:
+        start_time = time.time()
+        try:
+            if eps == 0:
+                raise ValueError("EPS cannot be zero for P/E ratio calculation.")
+            result = current_price / eps
+            logger.info("calculate_pe_ratio took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating P/E ratio: {e}")
+            return None
+    def fetch_google_snippet(self, query: str) -> Optional[str]:
+        try:
+            search_url = f"https://www.google.com/search?q={query}"
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
+            }
+            response = requests.get(search_url, headers=headers)
+            soup = BeautifulSoup(response.text, 'html.parser')
+            snippet_classes = [
+                'BNeawe iBp4i AP7Wnd',
+                'BNeawe s3v9rd AP7Wnd',
+                'BVG0Nb',
+                'kno-rdesc'
+            ]
+            snippet = None
+            for cls in snippet_classes:
+                snippet = soup.find('div', class_=cls)
+                if snippet:
+                    break
+            return snippet.get_text() if snippet else "Snippet not found."
+        except Exception as e:
+            logger.error(f"Error fetching Google snippet: {e}")
+            return None
+def extract_ticker_from_response(response: str) -> Optional[str]:
+    start_time = time.time()
+    try:
+        if "is **" in response and "**." in response:
+            result = response.split("is **")[1].split("**.")[0].strip()
+            logger.info("extract_ticker_from_response took %.2f seconds", time.time() - start_time)
+            return result
+        result = response.strip()
+        logger.info("extract_ticker_from_response took %.2f seconds", time.time() - start_time)
+        return result
+    except Exception as e:
+        logger.error(f"Error extracting ticker from response: {e}")
+        return None
+def detect_translate_entity_and_ticker(query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    try:
+        start_time = time.time()
+        # Step 1: Detect Language
+        prompt = f"Detect the language for the following text: {query}"
+        response = invoke_llm(prompt)
+        detected_language = response.content.strip()
+        logger.info(f"Language detected: {detected_language}")
+        # Step 2: Translate to English (if necessary)
+        translated_query = query
+        if detected_language != "English":
+            prompt = f"Translate the following text to English: {query}"
+            response = invoke_llm(prompt)
+            translated_query = response.content.strip()
+            logger.info(f"Translation completed: {translated_query}")
+            print(f"Translation: {translated_query}")
+        # Step 3: Detect Entity
+        prompt = f"Detect the entity in the following text that is a company name: {translated_query}"
+        response = invoke_llm(prompt)
+        detected_entity = response.content.strip()
+        logger.info(f"Entity detected: {detected_entity}")
+        print(f"Entity: {detected_entity}")
+        if not detected_entity:
+            logger.error("No entity detected")
+            return detected_language, None, translated_query, None
+        # Step 4: Get Stock Ticker
+        prompt = f"What is the stock ticker symbol for the company {detected_entity}?"
+        response = invoke_llm(prompt)
+        stock_ticker = extract_ticker_from_response(response.content.strip())
+        if not stock_ticker:
+            logger.error("No stock ticker detected")
+            return detected_language, detected_entity, translated_query, None
+        logger.info("detect_translate_entity_and_ticker took %.2f seconds", time.time() - start_time)
+        return detected_language, detected_entity, translated_query, stock_ticker
+    except Exception as e:
+        logger.error(f"Error in detecting, translating, or extracting entity and ticker: {e}")
+        return None, None, None, None
+def fetch_stock_data_yahoo(symbol: str) -> pd.DataFrame:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        logger.info(f"Fetching data for symbol: {symbol}")
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=3 * 365)
+        historical_data = stock.history(start=start_date, end=end_date)
+        if historical_data.empty:
+            raise ValueError(f"No historical data found for symbol: {symbol}")
+        historical_data = historical_data.rename(
+            columns={"Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume"}
+        )
+        historical_data.reset_index(inplace=True)
+        historical_data['date'] = historical_data['Date'].dt.date
+        historical_data = historical_data.drop(columns=['Date'])
+        historical_data = historical_data[['date', 'open', 'high', 'low', 'close', 'volume']]
+        if 'close' not in historical_data.columns:
+            raise KeyError("The historical data must contain a 'close' column.")
+        logger.info("fetch_stock_data_yahoo took %.2f seconds", time.time() - start_time)
+        return historical_data
+    except Exception as e:
+        logger.error(f"Failed to fetch stock data for {symbol} from Yahoo Finance: {e}")
+        return pd.DataFrame()
+def fetch_current_stock_price(symbol: str) -> Optional[float]:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        result = stock.info['currentPrice']
+        logger.info("fetch_current_stock_price took %.2f seconds", time.time() - start_time)
+        return result
+    except Exception as e:
+        logger.error(f"Failed to fetch current stock price for {symbol}: {e}")
+        return None
+def format_stock_data_for_gemini(stock_data: pd.DataFrame) -> str:
+    start_time = time.time()
+    try:
+        if stock_data.empty:
+            return "No historical data available."
+        formatted_data = "Historical stock data for the last three years:\n\n"
+        formatted_data += "Date       | Open   | High   | Low    | Close  | Volume\n"
+        formatted_data += "------------------------------------------------------\n"
+        for index, row in stock_data.iterrows():
+            formatted_data += f"{row['date']} | {row['open']:.2f} | {row['high']:.2f} | {row['low']:.2f} | {row['close']:.2f} | {int(row['volume'])}\n"
+        logger.info("format_stock_data_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_data
+    except Exception as e:
+        logger.error(f"Error formatting stock data for Gemini: {e}")
+        return "Error formatting stock data."
+def fetch_company_info_yahoo(symbol: str) -> Dict:
+    start_time = time.time()
+    try:
+        if not symbol:
+            return {"error": "Invalid symbol"}
+        stock = yf.Ticker(symbol)
+        company_info = stock.info
+        logger.info("fetch_company_info_yahoo took %.2f seconds", time.time() - start_time)
+        return {
+            "name": company_info.get("longName", "N/A"),
+            "sector": company_info.get("sector", "N/A"),
+            "industry": company_info.get("industry", "N/A"),
+            "marketCap": company_info.get("marketCap", "N/A"),
+            "summary": company_info.get("longBusinessSummary", "N/A"),
+            "website": company_info.get("website", "N/A"),
+            "address": company_info.get("address1", "N/A"),
+            "city": company_info.get("city", "N/A"),
+            "state": company_info.get("state", "N/A"),
+            "country": company_info.get("country", "N/A"),
+            "phone": company_info.get("phone", "N/A")
+        }
+    except Exception as e:
+        logger.error(f"Error fetching company info for {symbol}: {e}")
+        return {"error": str(e)}
+def format_company_info_for_gemini(company_info: Dict) -> str:
+    start_time = time.time()
+    try:
+        if "error" in company_info:
+            return f"Error fetching company info: {company_info['error']}"
+        formatted_info = (f"\nCompany Information:\n"
+                          f"Name: {company_info['name']}\n"
+                          f"Sector: {company_info['sector']}\n"
+                          f"Industry: {company_info['industry']}\n"
+                          f"Market Cap: {company_info['marketCap']}\n"
+                          f"Summary: {company_info['summary']}\n"
+                          f"Website: {company_info['website']}\n"
+                          f"Address: {company_info['address']}, {company_info['city']}, {company_info['state']}, {company_info['country']}\n"
+                          f"Phone: {company_info['phone']}\n")
+        logger.info("format_company_info_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_info
+    except Exception as e:
+        logger.error(f"Error formatting company info for Gemini: {e}")
+        return "Error formatting company info."
+def fetch_company_news_yahoo(symbol: str) -> List[Dict]:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        news = stock.news
+        if not news:
+            raise ValueError(f"No news found for symbol: {symbol}")
+        logger.info("fetch_company_news_yahoo took %.2f seconds", time.time() - start_time)
+        return news
+    except Exception as e:
+        logger.error(f"Failed to fetch news for {symbol} from Yahoo Finance: {e}")
+        return []
+def format_company_news_for_gemini(news: List[Dict]) -> str:
+    start_time = time.time()
+    try:
+        if not news:
+            return "No news available."
+        formatted_news = "Latest company news:\n\n"
+        for article in news:
+            formatted_news += (f"Title: {article['title']}\n"
+                               f"Publisher: {article['publisher']}\n"
+                               f"Link: {article['link']}\n"
+                               f"Published: {article['providerPublishTime']}\n\n")
+        logger.info("format_company_news_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_news
+    except Exception as e:
+        logger.error(f"Error formatting company news for Gemini: {e}")
+        return "Error formatting company news."
+def send_to_gemini_for_summarization(content: str) -> str:
+    start_time = time.time()
+    try:
+        unified_content = " ".join(content)
+        prompt = f"Summarize the main points of this article.\n\n{unified_content}"
+        response = invoke_llm(prompt)
+        logger.info("send_to_gemini_for_summarization took %.2f seconds", time.time() - start_time)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error sending content to Gemini for summarization: {e}")
+        return "Error summarizing content."
+def answer_question_with_data(question: str, data: Dict) -> str:
+    start_time = time.time()
+    try:
+        data_str = ""
+        for key, value in data.items():
+            data_str += f"{key}:\n{value}\n\n"
+        prompt = (f"You are a financial advisor. Begin your answer by stating that and only give the answer after.\n"
+                  f"Using the following data, answer this question: {question}\n\nData:\n{data_str}\n"
+                  f"Make your answer in the best form and professional.\n"
+                  f"Don't say anything about the source of the data.\n"
+                  f"If you don't have the data to answer, say this data is not available yet. If the data is not available in the stock history data, say this was a weekend and there is no data for it.")
+        response = invoke_llm(prompt)
+        logger.info("answer_question_with_data took %.2f seconds", time.time() - start_time)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error answering question with data: {e}")
+        return "Error answering question."
+def calculate_metrics(stock_data: pd.DataFrame, summarizer: DataSummarizer, company_info: Dict) -> Dict[str, str]:
+    start_time = time.time()
+    try:
+        moving_average = summarizer.calculate_moving_average(stock_data)
+        rsi = summarizer.calculate_rsi(stock_data)
+        ema = summarizer.calculate_ema(stock_data)
+        bollinger_bands = summarizer.calculate_bollinger_bands(stock_data)
+        macd = summarizer.calculate_macd(stock_data)
+        volatility = summarizer.calculate_volatility(stock_data)
+        atr = summarizer.calculate_atr(stock_data)
+        obv = summarizer.calculate_obv(stock_data)
+        yearly_summary = summarizer.calculate_yearly_summary(stock_data)
+        ytd_performance = summarizer.calculate_ytd_performance(stock_data)
+        eps = company_info.get('trailingEps', None)
+        if eps:
+            current_price = stock_data.iloc[-1]['close']
+            pe_ratio = summarizer.calculate_pe_ratio(current_price, eps)
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%",
+                "P/E Ratio": f"{pe_ratio:.2f}"
+            }
+        else:
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%"
+            }
+        logger.info("calculate_metrics took %.2f seconds", time.time() - start_time)
+        return formatted_metrics
+    except Exception as e:
+        logger.error(f"Error calculating metrics: {e}")
+        return {"Error": "Error calculating metrics"}
+def prepare_data(formatted_stock_data: str, formatted_company_info: str, formatted_company_news: str,
+                 google_results: str, formatted_metrics: Dict[str, str], google_snippet: str, rag_response: str) -> \
+Dict[str, str]:
+    start_time = time.time()
+    collected_data = {
+        "Formatted Stock Data": formatted_stock_data,
+        "Formatted Company Info": formatted_company_info,
+        "Formatted Company News": formatted_company_news,
+        "Google Search Results": google_results,
+        "Google Snippet": google_snippet,
+        "RAG Response": rag_response,
+        "Calculations": formatted_metrics
+    }
+    collected_data.update(formatted_metrics)
+    logger.info("prepare_data took %.2f seconds", time.time() - start_time)
+    return collected_data
+def main():
+    print("Welcome to the Financial Data Chatbot. How can I assist you today?")
+    summarizer = DataSummarizer()
+    conversation_history = []
+    while True:
+        user_input = input("You: ")
+        if user_input.lower() in ['exit', 'quit', 'bye']:
+            print("Goodbye! Have a great day!")
+            break
+        conversation_history.append(f"You: {user_input}")
+        try:
+            # Detect language, entity, translation, and stock ticker
+            language, entity, translation, stock_ticker = detect_translate_entity_and_ticker(user_input)
+            logger.info(
+                f"Detected Language: {language}, Entity: {entity}, Translation: {translation}, Stock Ticker: {stock_ticker}")
+            if entity and stock_ticker:
+                with ThreadPoolExecutor() as executor:
+                    futures = {
+                        executor.submit(fetch_stock_data_yahoo, stock_ticker): "stock_data",
+                        executor.submit(fetch_company_info_yahoo, stock_ticker): "company_info",
+                        executor.submit(fetch_company_news_yahoo, stock_ticker): "company_news",
+                        executor.submit(fetch_current_stock_price, stock_ticker): "current_stock_price",
+                        executor.submit(get_answer, user_input): "rag_response",
+                        executor.submit(summarizer.google_search, user_input): "google_results",
+                        executor.submit(summarizer.fetch_google_snippet, user_input): "google_snippet"
+                    }
+                    results = {futures[future]: future.result() for future in as_completed(futures)}
+                stock_data = results.get("stock_data", pd.DataFrame())
+                formatted_stock_data = format_stock_data_for_gemini(
+                    stock_data) if not stock_data.empty else "No historical data available."
+                company_info = results.get("company_info", {})
+                formatted_company_info = format_company_info_for_gemini(
+                    company_info) if company_info else "No company info available."
+                company_news = results.get("company_news", [])
+                formatted_company_news = format_company_news_for_gemini(
+                    company_news) if company_news else "No news available."
+                current_stock_price = results.get("current_stock_price", None)
+                formatted_metrics = calculate_metrics(stock_data, summarizer,
+                                                      company_info) if not stock_data.empty else {
+                    "Error": "No stock data for metrics"}
+                google_results = results.get("google_results", "No additional news found through Google Search.")
+                google_snippet = results.get("google_snippet", "Snippet not found.")
+                rag_response = results.get("rag_response", "No response from RAG.")
+                collected_data = prepare_data(formatted_stock_data, formatted_company_info, formatted_company_news,
+                                              google_results, formatted_metrics, google_snippet, rag_response)
+                collected_data[
+                    "Current Stock Price"] = f"${current_stock_price:.2f}" if current_stock_price is not None else "N/A"
+                conversation_history.append(f"RAG Response: {rag_response}")
+                history_context = "\n".join(conversation_history)
+                answer = answer_question_with_data(f"{history_context}\n\nUser's query: {translation}", collected_data)
+                print(f"\nBot: {answer}")
+                conversation_history.append(f"Bot: {answer}")
+            else:
+                with ThreadPoolExecutor() as executor:
+                    futures = {
+                        executor.submit(get_answer, user_input): "rag_response",
+                        executor.submit(summarizer.google_search, user_input): "google_results",
+                        executor.submit(summarizer.fetch_google_snippet, user_input): "google_snippet"
+                    }
+                    results = {futures[future]: future.result() for future in as_completed(futures)}
+                google_results = results.get("google_results", "No additional news found through Google Search.")
+                google_snippet = results.get("google_snippet", "Snippet not found.")
+                rag_response = results.get("rag_response", "No response from RAG.")
+                collected_data = prepare_data("", "", "", google_results, {}, google_snippet, rag_response)
+                conversation_history.append(f"RAG Response: {rag_response}")
+                history_context = "\n".join(conversation_history)
+                answer = answer_question_with_data(f"{history_context}\n\nUser's query: {user_input}", collected_data)
+                print(f"\nBot: {answer}")
+                conversation_history.append(f"Bot: {answer}")
+        except Exception as e:
+            logger.error(f"An error occurred: {e}")
+            response = "An error occurred while processing your request. Please try again later."
+            print(f"Bot: {response}")
+            conversation_history.append(f"Bot: {response}")
+if __name__ == "__main__":
+    main()

chatflask.py ADDED Viewed

	@@ -0,0 +1,646 @@

+from flask import Flask, request, jsonify
+import requests
+from bs4 import BeautifulSoup
+import yfinance as yf
+import pandas as pd
+from datetime import datetime, timedelta
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from langchain_google_genai import ChatGoogleGenerativeAI
+from config import Config
+import numpy as np
+from typing import Optional, Tuple, List, Dict
+from rag import get_answer
+import time
+from tenacity import retry, stop_after_attempt, wait_exponential
+# Initialize Flask app
+app = Flask(__name__)
+# Set up logging
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+                    handlers=[logging.FileHandler("app.log"),
+                              logging.StreamHandler()])
+logger = logging.getLogger(__name__)
+# Initialize the Gemini model
+llm = ChatGoogleGenerativeAI(api_key=Config.GEMINI_API_KEY, model="gemini-1.5-flash-latest", temperature=0.5)
+# Configuration for Google Custom Search API
+GOOGLE_API_KEY = Config.GOOGLE_API_KEY
+SEARCH_ENGINE_ID = Config.SEARCH_ENGINE_ID
+@retry(stop=stop_after_attempt(5), wait=wait_exponential(multiplier=1, min=2, max=8), reraise=True)
+def invoke_llm(prompt):
+    return llm.invoke(prompt)
+class DataSummarizer:
+    def __init__(self):
+        pass
+    def google_search(self, query: str) -> Optional[str]:
+        start_time = time.time()
+        try:
+            url = "https://www.googleapis.com/customsearch/v1"
+            params = {
+                'key': GOOGLE_API_KEY,
+                'cx': SEARCH_ENGINE_ID,
+                'q': query
+            }
+            response = requests.get(url, params=params)
+            response.raise_for_status()
+            search_results = response.json()
+            logger.info("google_search took %.2f seconds", time.time() - start_time)
+            # Summarize the search results using Gemini
+            items = search_results.get('items', [])
+            content = "\n\n".join([f"{item.get('title', '')}\n{item.get('snippet', '')}" for item in items])
+            prompt = f"Summarize the following search results:\n\n{content}"
+            summary_response = invoke_llm(prompt)
+            return summary_response.content.strip()
+        except Exception as e:
+            logger.error(f"Error during Google Search API request: {e}")
+            return None
+    def extract_content_from_item(self, item: Dict) -> Optional[str]:
+        try:
+            snippet = item.get('snippet', '')
+            title = item.get('title', '')
+            return f"{title}\n{snippet}"
+        except Exception as e:
+            logger.error(f"Error extracting content from item: {e}")
+            return None
+    def calculate_moving_average(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = df['close'].rolling(window=window).mean()
+            logger.info("calculate_moving_average took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating moving average: {e}")
+            return None
+    def calculate_rsi(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            delta = df['close'].diff()
+            gain = delta.where(delta > 0, 0).rolling(window=window).mean()
+            loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
+            rs = gain / loss
+            result = 100 - (100 / (1 + rs))
+            logger.info("calculate_rsi took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating RSI: {e}")
+            return None
+    def calculate_ema(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = df['close'].ewm(span=window, adjust=False).mean()
+            logger.info("calculate_ema took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating EMA: {e}")
+            return None
+    def calculate_bollinger_bands(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            ma = df['close'].rolling(window=window).mean()
+            std = df['close'].rolling(window=window).std()
+            upper_band = ma + (std * 2)
+            lower_band = ma - (std * 2)
+            result = pd.DataFrame({'MA': ma, 'Upper Band': upper_band, 'Lower Band': lower_band})
+            logger.info("calculate_bollinger_bands took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating Bollinger Bands: {e}")
+            return None
+    def calculate_macd(self, df: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9) -> \
+            Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            short_ema = df['close'].ewm(span=short_window, adjust=False).mean()
+            long_ema = df['close'].ewm(span=long_window, adjust=False).mean()
+            macd = short_ema - long_ema
+            signal = macd.ewm(span=signal_window, adjust=False).mean()
+            result = pd.DataFrame({'MACD': macd, 'Signal Line': signal})
+            logger.info("calculate_macd took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating MACD: {e}")
+            return None
+    def calculate_volatility(self, df: pd.DataFrame, window: int = 20) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            log_returns = np.log(df['close'] / df['close'].shift(1))
+            result = log_returns.rolling(window=window).std() * np.sqrt(window)
+            logger.info("calculate_volatility took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating volatility: {e}")
+            return None
+    def calculate_atr(self, df: pd.DataFrame, window: int = 14) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            high_low = df['high'] - df['low']
+            high_close = np.abs(df['high'] - df['close'].shift())
+            low_close = np.abs(df['low'] - df['close'].shift())
+            true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
+            result = true_range.rolling(window=window).mean()
+            logger.info("calculate_atr took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating ATR: {e}")
+            return None
+    def calculate_obv(self, df: pd.DataFrame) -> Optional[pd.Series]:
+        start_time = time.time()
+        try:
+            result = (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
+            logger.info("calculate_obv took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating OBV: {e}")
+            return None
+    def calculate_yearly_summary(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            df['year'] = pd.to_datetime(df['date']).dt.year
+            yearly_summary = df.groupby('year').agg({
+                'close': ['mean', 'max', 'min'],
+                'volume': 'sum'
+            })
+            yearly_summary.columns = ['_'.join(col) for col in yearly_summary.columns]
+            logger.info("calculate_yearly_summary took %.2f seconds", time.time() - start_time)
+            return yearly_summary
+        except Exception as e:
+            logger.error(f"Error calculating yearly summary: {e}")
+            return None
+    def get_full_last_year(self, df: pd.DataFrame) -> Optional[pd.DataFrame]:
+        start_time = time.time()
+        try:
+            today = datetime.today().date()
+            last_year_start = datetime(today.year - 1, 1, 1).date()
+            last_year_end = datetime(today.year - 1, 12, 31).date()
+            mask = (df['date'] >= last_year_start) & (df['date'] <= last_year_end)
+            result = df.loc[mask]
+            logger.info("get_full_last_year took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error filtering data for the last year: {e}")
+            return None
+    def calculate_ytd_performance(self, df: pd.DataFrame) -> Optional[float]:
+        start_time = time.time()
+        try:
+            today = datetime.today().date()
+            year_start = datetime(today.year, 1, 1).date()
+            mask = (df['date'] >= year_start) & (df['date'] <= today)
+            ytd_data = df.loc[mask]
+            opening_price = ytd_data.iloc[0]['open']
+            closing_price = ytd_data.iloc[-1]['close']
+            result = ((closing_price - opening_price) / opening_price) * 100
+            logger.info("calculate_ytd_performance took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating YTD performance: {e}")
+            return None
+    def calculate_pe_ratio(self, current_price: float, eps: float) -> Optional[float]:
+        start_time = time.time()
+        try:
+            if eps == 0:
+                raise ValueError("EPS cannot be zero for P/E ratio calculation.")
+            result = current_price / eps
+            logger.info("calculate_pe_ratio took %.2f seconds", time.time() - start_time)
+            return result
+        except Exception as e:
+            logger.error(f"Error calculating P/E ratio: {e}")
+            return None
+    def fetch_google_snippet(self, query: str) -> Optional[str]:
+        try:
+            search_url = f"https://www.google.com/search?q={query}"
+            headers = {
+                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
+            }
+            response = requests.get(search_url, headers=headers)
+            soup = BeautifulSoup(response.text, 'html.parser')
+            snippet_classes = [
+                'BNeawe iBp4i AP7Wnd',
+                'BNeawe s3v9rd AP7Wnd',
+                'BVG0Nb',
+                'kno-rdesc'
+            ]
+            snippet = None
+            for cls in snippet_classes:
+                snippet = soup.find('div', class_=cls)
+                if snippet:
+                    break
+            return snippet.get_text() if snippet else "Snippet not found."
+        except Exception as e:
+            logger.error(f"Error fetching Google snippet: {e}")
+            return None
+def extract_ticker_from_response(response: str) -> Optional[str]:
+    start_time = time.time()
+    try:
+        if "is **" in response and "**." in response:
+            result = response.split("is **")[1].split("**.")[0].strip()
+            logger.info("extract_ticker_from_response took %.2f seconds", time.time() - start_time)
+            return result
+        result = response.strip()
+        logger.info("extract_ticker_from_response took %.2f seconds", time.time() - start_time)
+        return result
+    except Exception as e:
+        logger.error(f"Error extracting ticker from response: {e}")
+        return None
+def detect_translate_entity_and_ticker(query: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    try:
+        start_time = time.time()
+        # Step 1: Detect Language
+        prompt = f"Detect the language for the following text: {query}"
+        response = invoke_llm(prompt)
+        detected_language = response.content.strip()
+        logger.info(f"Language detected: {detected_language}")
+        # Step 2: Translate to English (if necessary)
+        translated_query = query
+        if detected_language != "English":
+            prompt = f"Translate the following text to English: {query}"
+            response = invoke_llm(prompt)
+            translated_query = response.content.strip()
+            logger.info(f"Translation completed: {translated_query}")
+            print(f"Translation: {translated_query}")
+        # Step 3: Detect Entity
+        prompt = f"Detect the entity in the following text that is a company name: {translated_query}"
+        response = invoke_llm(prompt)
+        detected_entity = response.content.strip()
+        logger.info(f"Entity detected: {detected_entity}")
+        print(f"Entity: {detected_entity}")
+        if not detected_entity:
+            logger.error("No entity detected")
+            return detected_language, None, translated_query, None
+        # Step 4: Get Stock Ticker
+        prompt = f"What is the stock ticker symbol for the company {detected_entity}?"
+        response = invoke_llm(prompt)
+        stock_ticker = extract_ticker_from_response(response.content.strip())
+        if not stock_ticker:
+            logger.error("No stock ticker detected")
+            return detected_language, detected_entity, translated_query, None
+        logger.info("detect_translate_entity_and_ticker took %.2f seconds", time.time() - start_time)
+        return detected_language, detected_entity, translated_query, stock_ticker
+    except Exception as e:
+        logger.error(f"Error in detecting, translating, or extracting entity and ticker: {e}")
+        return None, None, None, None
+def fetch_stock_data_yahoo(symbol: str) -> pd.DataFrame:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        logger.info(f"Fetching data for symbol: {symbol}")
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=3 * 365)
+        historical_data = stock.history(start=start_date, end=end_date)
+        if historical_data.empty:
+            raise ValueError(f"No historical data found for symbol: {symbol}")
+        historical_data = historical_data.rename(
+            columns={"Open": "open", "High": "high", "Low": "low", "Close": "close", "Volume": "volume"}
+        )
+        historical_data.reset_index(inplace=True)
+        historical_data['date'] = historical_data['Date'].dt.date
+        historical_data = historical_data.drop(columns=['Date'])
+        historical_data = historical_data[['date', 'open', 'high', 'low', 'close', 'volume']]
+        if 'close' not in historical_data.columns:
+            raise KeyError("The historical data must contain a 'close' column.")
+        logger.info("fetch_stock_data_yahoo took %.2f seconds", time.time() - start_time)
+        return historical_data
+    except Exception as e:
+        logger.error(f"Failed to fetch stock data for {symbol} from Yahoo Finance: {e}")
+        return pd.DataFrame()
+def fetch_current_stock_price(symbol: str) -> Optional[float]:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        result = stock.info['currentPrice']
+        logger.info("fetch_current_stock_price took %.2f seconds", time.time() - start_time)
+        return result
+    except Exception as e:
+        logger.error(f"Failed to fetch current stock price for {symbol}: {e}")
+        return None
+def format_stock_data_for_gemini(stock_data: pd.DataFrame) -> str:
+    start_time = time.time()
+    try:
+        if stock_data.empty:
+            return "No historical data available."
+        formatted_data = "Historical stock data for the last three years:\n\n"
+        formatted_data += "Date       | Open   | High   | Low    | Close  | Volume\n"
+        formatted_data += "------------------------------------------------------\n"
+        for index, row in stock_data.iterrows():
+            formatted_data += f"{row['date']} | {row['open']:.2f} | {row['high']:.2f} | {row['low']:.2f} | {row['close']:.2f} | {int(row['volume'])}\n"
+        logger.info("format_stock_data_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_data
+    except Exception as e:
+        logger.error(f"Error formatting stock data for Gemini: {e}")
+        return "Error formatting stock data."
+def fetch_company_info_yahoo(symbol: str) -> Dict:
+    start_time = time.time()
+    try:
+        if not symbol:
+            return {"error": "Invalid symbol"}
+        stock = yf.Ticker(symbol)
+        company_info = stock.info
+        logger.info("fetch_company_info_yahoo took %.2f seconds", time.time() - start_time)
+        return {
+            "name": company_info.get("longName", "N/A"),
+            "sector": company_info.get("sector", "N/A"),
+            "industry": company_info.get("industry", "N/A"),
+            "marketCap": company_info.get("marketCap", "N/A"),
+            "summary": company_info.get("longBusinessSummary", "N/A"),
+            "website": company_info.get("website", "N/A"),
+            "address": company_info.get("address1", "N/A"),
+            "city": company_info.get("city", "N/A"),
+            "state": company_info.get("state", "N/A"),
+            "country": company_info.get("country", "N/A"),
+            "phone": company_info.get("phone", "N/A")
+        }
+    except Exception as e:
+        logger.error(f"Error fetching company info for {symbol}: {e}")
+        return {"error": str(e)}
+def format_company_info_for_gemini(company_info: Dict) -> str:
+    start_time = time.time()
+    try:
+        if "error" in company_info:
+            return f"Error fetching company info: {company_info['error']}"
+        formatted_info = (f"\nCompany Information:\n"
+                          f"Name: {company_info['name']}\n"
+                          f"Sector: {company_info['sector']}\n"
+                          f"Industry: {company_info['industry']}\n"
+                          f"Market Cap: {company_info['marketCap']}\n"
+                          f"Summary: {company_info['summary']}\n"
+                          f"Website: {company_info['website']}\n"
+                          f"Address: {company_info['address']}, {company_info['city']}, {company_info['state']}, {company_info['country']}\n"
+                          f"Phone: {company_info['phone']}\n")
+        logger.info("format_company_info_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_info
+    except Exception as e:
+        logger.error(f"Error formatting company info for Gemini: {e}")
+        return "Error formatting company info."
+def fetch_company_news_yahoo(symbol: str) -> List[Dict]:
+    start_time = time.time()
+    try:
+        stock = yf.Ticker(symbol)
+        news = stock.news
+        if not news:
+            raise ValueError(f"No news found for symbol: {symbol}")
+        logger.info("fetch_company_news_yahoo took %.2f seconds", time.time() - start_time)
+        return news
+    except Exception as e:
+        logger.error(f"Failed to fetch news for {symbol} from Yahoo Finance: {e}")
+        return []
+def format_company_news_for_gemini(news: List[Dict]) -> str:
+    start_time = time.time()
+    try:
+        if not news:
+            return "No news available."
+        formatted_news = "Latest company news:\n\n"
+        for article in news:
+            formatted_news += (f"Title: {article['title']}\n"
+                               f"Publisher: {article['publisher']}\n"
+                               f"Link: {article['link']}\n"
+                               f"Published: {article['providerPublishTime']}\n\n")
+        logger.info("format_company_news_for_gemini took %.2f seconds", time.time() - start_time)
+        return formatted_news
+    except Exception as e:
+        logger.error(f"Error formatting company news for Gemini: {e}")
+        return "Error formatting company news."
+def send_to_gemini_for_summarization(content: str) -> str:
+    start_time = time.time()
+    try:
+        unified_content = " ".join(content)
+        prompt = f"Summarize the main points of this article.\n\n{unified_content}"
+        response = invoke_llm(prompt)
+        logger.info("send_to_gemini_for_summarization took %.2f seconds", time.time() - start_time)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error sending content to Gemini for summarization: {e}")
+        return "Error summarizing content."
+def answer_question_with_data(question: str, data: Dict) -> str:
+    start_time = time.time()
+    try:
+        data_str = ""
+        for key, value in data.items():
+            data_str += f"{key}:\n{value}\n\n"
+        prompt = (f"You are a financial advisor. Begin your answer and only give the answer after.\n"
+                  f"Using the following data, answer this question: {question}\n\nData:\n{data_str}\n"
+                  f"Make your answer in the best form and professional.\n"
+                  f"Don't say anything about the source of the data.\n"
+                  f"If you don't have the data to answer, say this data is not available yet. If the data is not available in the stock history data, say this was a weekend and there is no data for it.")
+        response = invoke_llm(prompt)
+        logger.info("answer_question_with_data took %.2f seconds", time.time() - start_time)
+        return response.content.strip()
+    except Exception as e:
+        logger.error(f"Error answering question with data: {e}")
+        return "Error answering question."
+def calculate_metrics(stock_data: pd.DataFrame, summarizer: DataSummarizer, company_info: Dict) -> Dict[str, str]:
+    start_time = time.time()
+    try:
+        moving_average = summarizer.calculate_moving_average(stock_data)
+        rsi = summarizer.calculate_rsi(stock_data)
+        ema = summarizer.calculate_ema(stock_data)
+        bollinger_bands = summarizer.calculate_bollinger_bands(stock_data)
+        macd = summarizer.calculate_macd(stock_data)
+        volatility = summarizer.calculate_volatility(stock_data)
+        atr = summarizer.calculate_atr(stock_data)
+        obv = summarizer.calculate_obv(stock_data)
+        yearly_summary = summarizer.calculate_yearly_summary(stock_data)
+        ytd_performance = summarizer.calculate_ytd_performance(stock_data)
+        eps = company_info.get('trailingEps', None)
+        if eps:
+            current_price = stock_data.iloc[-1]['close']
+            pe_ratio = summarizer.calculate_pe_ratio(current_price, eps)
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%",
+                "P/E Ratio": f"{pe_ratio:.2f}"
+            }
+        else:
+            formatted_metrics = {
+                "Moving Average": moving_average.to_string(),
+                "RSI": rsi.to_string(),
+                "EMA": ema.to_string(),
+                "Bollinger Bands": bollinger_bands.to_string(),
+                "MACD": macd.to_string(),
+                "Volatility": volatility.to_string(),
+                "ATR": atr.to_string(),
+                "OBV": obv.to_string(),
+                "Yearly Summary": yearly_summary.to_string(),
+                "YTD Performance": f"{ytd_performance:.2f}%"
+            }
+        logger.info("calculate_metrics took %.2f seconds", time.time() - start_time)
+        return formatted_metrics
+    except Exception as e:
+        logger.error(f"Error calculating metrics: {e}")
+        return {"Error": "Error calculating metrics"}
+def prepare_data(formatted_stock_data: str, formatted_company_info: str, formatted_company_news: str,
+                 google_results: str, formatted_metrics: Dict[str, str], google_snippet: str, rag_response: str) -> \
+Dict[str, str]:
+    start_time = time.time()
+    collected_data = {
+        "Formatted Stock Data": formatted_stock_data,
+        "Formatted Company Info": formatted_company_info,
+        "Formatted Company News": formatted_company_news,
+        "Google Search Results": google_results,
+        "Google Snippet": google_snippet,
+        "RAG Response": rag_response,
+        "Calculations": formatted_metrics
+    }
+    collected_data.update(formatted_metrics)
+    logger.info("prepare_data took %.2f seconds", time.time() - start_time)
+    return collected_data
+@app.route('/ask', methods=['POST'])
+def ask():
+    try:
+        user_input = request.json.get('question')
+        logger.info(f"Received question: {user_input}")
+        summarizer = DataSummarizer()
+        # Detect language, entity, translation, and stock ticker
+        language, entity, translation, stock_ticker = detect_translate_entity_and_ticker(user_input)
+        logger.info(f"Detected Language: {language}, Entity: {entity}, Translation: {translation}, Stock Ticker: {stock_ticker}")
+        if entity and stock_ticker:
+            with ThreadPoolExecutor() as executor:
+                futures = {
+                    executor.submit(fetch_stock_data_yahoo, stock_ticker): "stock_data",
+                    executor.submit(fetch_company_info_yahoo, stock_ticker): "company_info",
+                    executor.submit(fetch_company_news_yahoo, stock_ticker): "company_news",
+                    executor.submit(fetch_current_stock_price, stock_ticker): "current_stock_price",
+                    executor.submit(get_answer, user_input): "rag_response",
+                    executor.submit(summarizer.google_search, user_input): "google_results",
+                    executor.submit(summarizer.fetch_google_snippet, user_input): "google_snippet"
+                }
+                results = {futures[future]: future.result() for future in as_completed(futures)}
+            stock_data = results.get("stock_data", pd.DataFrame())
+            formatted_stock_data = format_stock_data_for_gemini(stock_data) if not stock_data.empty else "No historical data available."
+            company_info = results.get("company_info", {})
+            formatted_company_info = format_company_info_for_gemini(company_info) if company_info else "No company info available."
+            company_news = results.get("company_news", [])
+            formatted_company_news = format_company_news_for_gemini(company_news) if company_news else "No news available."
+            current_stock_price = results.get("current_stock_price", None)
+            formatted_metrics = calculate_metrics(stock_data, summarizer, company_info) if not stock_data.empty else {"Error": "No stock data for metrics"}
+            google_results = results.get("google_results", "No additional news found through Google Search.")
+            google_snippet = results.get("google_snippet", "Snippet not found.")
+            rag_response = results.get("rag_response", "No response from RAG.")
+            collected_data = prepare_data(formatted_stock_data, formatted_company_info, formatted_company_news,
+                                          google_results, formatted_metrics, google_snippet, rag_response)
+            collected_data["Current Stock Price"] = f"${current_stock_price:.2f}" if current_stock_price is not None else "N/A"
+            answer = answer_question_with_data(f"{translation}", collected_data)
+            return jsonify({"answer": answer})
+        else:
+            with ThreadPoolExecutor() as executor:
+                futures = {
+                    executor.submit(get_answer, user_input): "rag_response",
+                    executor.submit(summarizer.google_search, user_input): "google_results",
+                    executor.submit(summarizer.fetch_google_snippet, user_input): "google_snippet"
+                }
+                results = {futures[future]: future.result() for future in as_completed(futures)}
+            google_results = results.get("google_results", "No additional news found through Google Search.")
+            google_snippet = results.get("google_snippet", "Snippet not found.")
+            rag_response = results.get("rag_response", "No response from RAG.")
+            collected_data = prepare_data("", "", "", google_results, {}, google_snippet, rag_response)
+            answer = answer_question_with_data(f"{user_input}", collected_data)
+            return jsonify({"answer": answer})
+    except Exception as e:
+        logger.error(f"An error occurred: {e}")
+        return jsonify({"error": "An error occurred while processing your request. Please try again later."}), 500
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5000)

config.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import os
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+class Config:
+    GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+    ALPHA_VANTAGE_KEY = os.getenv("ALPHA_VANTAGE_KEY")
+    YAHOO_FINANCE_API_KEY = os.getenv("YAHOO_FINANCE_API_KEY")
+    FINNHUB_API_KEY = os.getenv("FINNHUB_API_KEY")
+    POLYGON_API_KEY = os.getenv("POLYGON_API_KEY")
+    SECRET_KEY = os.getenv("SECRET_KEY")
+    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+    SEARCH_ENGINE_ID = os.getenv("SEARCH_ENGINE_ID")
+    # Add any additional configuration variables here

embeddings.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import google.generativeai as genai
+from dotenv import load_dotenv
+import os
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain_cohere import CohereEmbeddings
+from langchain_openai import OpenAIEmbeddings
+from langchain.embeddings import HuggingFaceEmbeddings
+load_dotenv()
+class Embeddings:
+    '''
+        google, models/embedding-001
+        openai, openai
+        cohere, cohere
+        hf, all-MiniLM-L6-v2
+        hf, BAAI/bge-large-en-v1.5
+        hf, Alibaba-NLP/gte-large-en-v1.5, True
+        ...
+        ...
+    '''
+    def __init__(self, emb, model, trust_remote=False, normalize = False):
+        self.emb=emb
+        self.model = model
+        self.trust_remote = trust_remote
+        self.normalize = normalize
+        self.embedding = self.get_embedding()
+        self.seq_len = self.get_emb_len()
+    def get_emb_len(self):
+        return len(self.embedding.embed_query('hi how are you'))
+    def google_embedding(self):
+        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+        embeddings = GoogleGenerativeAIEmbeddings(model = self.model)
+        return embeddings
+    def openai_embedding(self):
+        embeddings_model = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"))
+        return embeddings_model
+    def cohere_embedding(self):
+        embeddings_model = CohereEmbeddings(cohere_api_key=os.getenv("COHERE_API_KEY"))
+        return embeddings_model
+    def hf_embedding(self):
+        model_args = {'trust_remote_code': True} if self.trust_remote else {}
+        encode_args = {'normalize_embeddings': True} if self.normalize else {}
+        embedding = HuggingFaceEmbeddings(model_name=self.model, model_kwargs = model_args, encode_kwargs = encode_args)
+        return embedding
+    def get_embedding(self):
+        if self.emb == 'google':
+            return self.google_embedding()
+        elif self.emb == 'openai':
+            return self.openai_embedding()
+        elif self.emb == 'cohere':
+            return self.cohere_embedding()
+        elif self.emb == 'hf':
+            return self.hf_embedding()

flasktest.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+import requests
+import json
+def send_question_to_api(question):
+    url = 'http://localhost:5000/ask'
+    headers = {'Content-Type': 'application/json'}
+    data = {'question': question}
+    response = requests.post(url, headers=headers, data=json.dumps(data))
+    if response.status_code == 200:
+        return response.json().get('answer')
+    else:
+        return f"Error: {response.status_code} - {response.text}"
+def main():
+    st.title("Financial Data Chatbot Tester")
+    st.write("Enter your question below and get a response from the chatbot.")
+    # Initialize session state to store question history
+    if 'history' not in st.session_state:
+        st.session_state.history = []
+    user_input = st.text_input("Your question:", "")
+    if st.button("Submit"):
+        if user_input:
+            with st.spinner('Getting the answer...'):
+                answer = send_question_to_api(user_input)
+                st.session_state.history.append((user_input, answer))
+                st.success(answer)
+        else:
+            st.warning("Please enter a question before submitting.")
+    # Display the history of questions and answers
+    if st.session_state.history:
+        st.write("### History")
+        for idx, (question, answer) in enumerate(st.session_state.history, 1):
+            st.write(f"**Q{idx}:** {question}")
+            st.write(f"**A{idx}:** {answer}")
+            st.write("---")
+if __name__ == '__main__':
+    main()

index.html ADDED Viewed

	@@ -0,0 +1,70 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Chatbot</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            background-color: #f4f4f9;
+            margin: 40px;
+            text-align: center;
+        }
+        input[type="text"] {
+            width: 300px;
+            padding: 10px;
+            font-size: 16px;
+            margin-top: 20px;
+            border: 2px solid #ccc;
+            border-radius: 5px;
+        }
+        button {
+            background-color: #4CAF50;
+            color: white;
+            padding: 10px 20px;
+            margin-top: 10px;
+            border: none;
+            border-radius: 5px;
+            cursor: pointer;
+            font-size: 16px;
+        }
+        button:hover {
+            background-color: #45a049;
+        }
+        p {
+            margin-top: 20px;
+            font-size: 18px;
+            color: #333;
+        }
+    </style>
+</head>
+<body>
+    <h1>Chatbot Interface</h1>
+    <input type="text" id="question" placeholder="Ask a question...">
+    <button onclick="askQuestion()">Ask</button>
+    <p id="answer">Answer will appear here...</p>
+    <script>
+        async function askQuestion() {
+            const questionInput = document.getElementById('question');
+            const answerDisplay = document.getElementById('answer');
+            const question = questionInput.value;
+            const response = await fetch('/chat/', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({ question: question })
+            });
+            if (response.ok) {
+                const data = await response.json();
+                answerDisplay.textContent = 'Answer: ' + data.answer;
+            } else {
+                answerDisplay.textContent = 'Error: Unable to fetch answer.';
+            }
+        }
+    </script>
+</body>
+</html>

llm.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from langchain_google_genai import ChatGoogleGenerativeAI
+import google.generativeai as genai
+from langchain.chat_models import ChatOpenAI
+from langchain_groq import ChatGroq
+import vertexai
+from langchain_google_vertexai import ChatVertexAI
+from dotenv import load_dotenv
+import os
+load_dotenv()
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+class LLM:
+    def __init__(self, llm, model=None):
+        if llm == 'gemini':
+            if model is None:
+                model = "gemini-pro"
+            self.llm = ChatGoogleGenerativeAI(model=model, temperature=0.3)
+        elif llm == 'vertex':
+            vertexai.init(project="website-254017", location="us-central1")
+            if model is None:
+                model = "gemini-1.5-pro-preview-0514"
+            self.llm = ChatVertexAI(model_name=model, temperature=0, max_tokens=8000)
+        elif llm == 'openai':
+            if model is None:
+                model = 'gpt-3.5-turbo-0125'
+            # ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0125")
+            self.llm = ChatOpenAI(api_key=os.getenv("OPENAI_API_KEY"), model=model)
+        elif llm == 'mixtral':
+            model = "mixtral-8x7b-32768"
+            self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROK_API_KEY"), model_name=model)
+        elif llm == 'llama':
+            if model is None:
+                model = 'llama3-8b-8192'
+            self.llm = ChatGroq(temperature=0, groq_api_key=os.getenv("GROK_API_KEY"), model_name=model)
+    def get_llm(self):
+        return self.llm

logging_config.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import logging.config
+def setup_logging():
+    logging_config = {
+        'version': 1,
+        'disable_existing_loggers': False,
+        'formatters': {
+            'standard': {
+                'format': '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+            },
+        },
+        'handlers': {
+            'console': {
+                'level': 'DEBUG',
+                'class': 'logging.StreamHandler',
+                'formatter': 'standard',
+            },
+            'file': {
+                'level': 'DEBUG',
+                'class': 'logging.FileHandler',
+                'filename': 'financial_adviser.log',
+                'formatter': 'standard',
+            },
+        },
+        'loggers': {
+            '': {
+                'handlers': ['console', 'file'],
+                'level': 'DEBUG',
+                'propagate': True,
+            },
+        },
+    }
+    logging.config.dictConfig(logging_config)
+# Initialize the logger
+setup_logging()
+logger = logging.getLogger(__name__)

main.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import streamlit as st
+import os
+from langchain.chains.conversation.memory import ConversationBufferWindowMemory
+from data_extraction import Extraction
+import nest_asyncio
+from chunking import Chunker
+from embeddings import Embeddings
+from vectorstore import VectorDB
+from retriever import Retriever, CreateBM25Retriever
+from llm import LLM
+from langchain_core.prompts import PromptTemplate
+from chain import Chain
+from streamlit_chat import message
+if 'responses' not in st.session_state:
+    st.session_state['responses'] = ["How can I assist you?"]
+if 'requests' not in st.session_state:
+    st.session_state['requests'] = []
+if 'buffer_memory' not in st.session_state:
+    st.session_state.buffer_memory=ConversationBufferWindowMemory(k=3,return_messages=True)
+nest_asyncio.apply()
+ext = Extraction('fast')
+chnk = Chunker(chunk_size=1000, chunk_overlap=200)
+emb = Embeddings("hf", "all-MiniLM-L6-v2")
+_llm = LLM('vertex').get_llm()
+ch = Chain(_llm, st.session_state.buffer_memory)
+conversation = ch.get_chain_with_history()
+def query_refiner(conversation, query):
+    prompt=f"Given the following user query and historical user queries, rephrase the users current query to form a meaningful and clear question.Previously user has asked the following: \n{conversation}\n\n User's Current Query: {query}. What will be the refined query? Only provide the query without any extra details or explanations.",
+    ans = _llm.invoke(prompt).content
+    return ans
+def get_conversation_string():
+    conversation_string = ""
+    for i in range(len(st.session_state['responses'])-1):
+        conversation_string += "Human: "+st.session_state['requests'][i] + "\n"
+        # conversation_string += "Bot: "+ st.session_state['responses'][i+1] + "\n"
+    return conversation_string
+def main():
+    inp_dir = "./inputs"
+    db = 'pinecone'
+    db_dir = 'pineconedb'
+    st.set_page_config("Chat PDF")
+    st.header("Chat with PDF")
+    response_container = st.container()
+    textcontainer = st.container()
+    ret = None
+    with textcontainer:
+        query = st.text_input("Query: ", key="input")
+        if query:
+            if ret is None:
+                ret = Retriever(db, db_dir, emb.embedding, 'ensemble', 5)
+            with st.spinner("typing..."):
+                conversation_string = get_conversation_string()
+                if len(st.session_state['responses']) != 0:
+                    refined_query = query_refiner(conversation_string, query)
+                else:
+                    refined_query = query
+                st.subheader("Refined Query:")
+                st.write(refined_query)
+                context, context_list = ret.get_context(refined_query)
+                response = conversation.predict(input=f"Context:\n {context} \n\n Query:\n{query}")
+                # response += '\n' + "Source: " + src
+            st.session_state.requests.append(query)
+            st.session_state.responses.append(response)
+    with response_container:
+        if st.session_state['responses']:
+            for i in range(len(st.session_state['responses'])):
+                message(st.session_state['responses'][i],key=str(i))
+                if i < len(st.session_state['requests']):
+                    message(st.session_state["requests"][i], is_user=True,key=str(i)+ '_user')
+    with st.sidebar:
+        st.title("Menu:")
+        pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
+        pdfs = []
+        if pdf_docs:
+            for pdf_file in pdf_docs:
+                filename = pdf_file.name
+                path = os.path.join(inp_dir,filename)
+                with open(path, "wb") as f:
+                    f.write(pdf_file.getvalue())
+                pdfs.append(path)
+            with st.spinner("Processing..."):
+                texts, metas = ext.get_text(pdfs)
+                docs = chnk.get_chunks(texts, metas)
+                vs = VectorDB(db, emb.embedding, db_dir, docs=docs)
+                bm = CreateBM25Retriever(docs)
+                st.success("Done")
+if __name__ == "__main__":
+    main()

rag.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from embeddings import Embeddings
+from chain import Chain
+from llm import LLM
+from retriever import Retriever
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import HTMLResponse
+from functools import lru_cache
+from tools import *
+import re
+emb = Embeddings("hf", "all-MiniLM-L6-v2")
+llm = LLM('gemini').get_llm()
+ch = Chain(llm,None)
+ret = Retriever('pinecone', 'pinecone', emb.embedding, 'ensemble', 5)
+is_arabic = False
+@lru_cache()
+def investment_banker(query):
+    global is_arabic
+    context, context_list = ret.get_context(query)
+    if not is_arabic:
+        prompt_template = f"""
+            You are an investment banker and financial advisor.
+            Answer the question as detailed as possible from the provided context and make sure to provide all the details.
+            Answer only from the context. If the answer is not in provided context, say "Answer not in context".\n\n
+            Context:\n {context}\n\n
+            Question: \n{query}\n
+            Answer:
+        """
+    else:
+        prompt_template = f"""
+            You are an investment banker and financial advisor.
+            Answer the question as detailed as possible from the provided context and make sure to provide all the details.
+            Answer only from the context. If the answer is not in provided context, say "Answer not in context".
+            Return the answer in Arabic only.\n\n
+            Context:\n {context}\n\n
+            Question: \n{query}\n
+            Answer:
+        """
+    response = ch.run_conversational_chain(prompt_template)
+    is_arabic = False
+    return response
+def check_arabic(s):
+    arabic_pattern = re.compile(r'[\u0600-\u06FF]')
+    if arabic_pattern.search(s):
+        return True
+    else:
+        return False
+history = ""
+@lru_cache()
+def refine_query(query, conversation):
+    prompt=f"""Given the following user query and historical user conversation with banker.
+    If the current user query is in arabic, convert it to english and then proceed.
+    If conversation history is empty return the current query as it is.
+    If the query is a continuation of previous conversation then only rephrase the users current query to form a meaningful and clear question.
+    Otherwise return the user query as it is.
+    Previously user and banker had the following conversation: \n{conversation}\n\n User's Current Query: {query}.
+    What will be the refined query? Only provide the query without any extra details or explanations."""
+    ans = llm.invoke(prompt).content
+    return ans
+def get_answer(query):
+    global history
+    global is_arabic
+    is_arabic = check_arabic(query)
+    ref_query = refine_query(query, history)
+    ans = investment_banker(ref_query)
+    history += "Human: "+ ref_query + "\n"
+    history += "Banker: "+ ans + "\n"
+    return ans
+if __name__ == "__main__":
+    response = get_answer()
+    print(response)
+# app = FastAPI()
+# class Query(BaseModel):
+#     question: str
+# @app.post("/chat/")
+# async def chat(query: Query):
+#     global history
+#     global is_arabic
+#     try:
+#         is_arabic = check_arabic(query.question)
+#         ref_query = refine_query(query.question, history)
+#         print(query.question, ref_query)
+#         print(is_arabic)
+#         ans = investment_banker(ref_query)
+#         history += "Human: "+ ref_query + "\n"
+#         history += "Banker: "+ ans + "\n"
+#         return {"question": query.question, "answer": ans}
+#     except Exception as e:
+#         raise HTTPException(status_code=500, detail=str(e))
+# @app.get("/", response_class=HTMLResponse)
+# async def read_index():
+#     with open('index.html', 'r') as f:
+#         return HTMLResponse(content=f.read())

requirements.txt ADDED Viewed

	@@ -0,0 +1,30 @@

+pydantic
+langchain
+yfinance
+langchain_google_genai
+langchain_openai
+langchain_cohere
+google-generativeai
+langchain_groq
+python-dotenv
+vertexai
+langchain_pinecone
+qdrant_client
+uvicorn
+langchain-community
+langchain_google_vertexai
+sentence-transformers
+rank_bm25
+matplotlib
+pandas
+numpy
+requests
+spacy
+transformers
+torch
+sentencepiece
+streamlit
+flask
+bs4
+tenacity
+loguru

retriever.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from langchain.retrievers import BM25Retriever, EnsembleRetriever
+from langchain.vectorstores import FAISS, Chroma, Qdrant
+from qdrant_client import QdrantClient
+from langchain_pinecone import PineconeVectorStore
+import os
+from dotenv import load_dotenv
+import pickle
+load_dotenv()
+class CreateBM25Retriever:
+    def __init__(self, docs):
+        self.bm25_retriever = BM25Retriever.from_documents(docs)
+        with open('bm25retriever.pkl', 'wb') as outp:
+            pickle.dump(self.bm25_retriever, outp, pickle.HIGHEST_PROTOCOL)
+class Retriever:
+    def __init__(self, db,per_dir,embeddings, strategy, k, collection_name="mydocuments"):
+        self.db = db
+        self.strategy = strategy
+        self.per_dir = per_dir
+        if self.db == 'faiss':
+            self.db_ = FAISS.load_local(self.per_dir, embeddings, allow_dangerous_deserialization=True)
+        elif self.db == 'chroma':
+            self.db_ = Chroma(persist_directory=self.per_dir, embedding_function=embeddings)
+        elif self.db == 'qdrant':
+            self.db_ = Qdrant(client=QdrantClient(path=self.per_dir), collection_name=collection_name, embeddings=embeddings)
+        elif self.db == 'pinecone':
+            self.db_ = PineconeVectorStore(pinecone_api_key=os.getenv("PINECONE_API_KEY"),index_name=collection_name, embedding=embeddings)
+        self.retriever = self.db_.as_retriever(search_kwargs={"k": k})
+        if strategy == 'ensemble':
+            with open('bm25retriever.pkl', 'rb') as inp:
+                self.bm25_retriever = pickle.load(inp)
+            self.bm25_retriever.k = k
+            self.retriever = EnsembleRetriever(retrievers=[self.bm25_retriever, self.retriever],
+                                       weights=[0.4, 0.6])
+    def get_docs(self, query):
+        return self.retriever.get_relevant_documents(query)
+    def get_context(self, query):
+        docs = self.get_docs(query)
+        context = ""
+        context_list = []
+        # src = []
+        for txt in docs:
+            context += '\n\n'+txt.page_content + "\n" + "Source: "+txt.metadata['source']
+            context_list.append(txt.page_content)
+        #     src.append(txt.metadata['source'])
+        # src = max(set(src), key=src.count)
+        return context, context_list

tools.py ADDED Viewed

	@@ -0,0 +1,188 @@

+from pydantic import BaseModel
+from datetime import datetime, timedelta
+import yfinance as yf
+from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate, AIMessagePromptTemplate
+from pydantic import BaseModel, Field
+from langchain.tools import BaseTool
+from typing import Optional, Type
+from typing import List
+from functools import lru_cache
+@lru_cache()
+def get_stock_price(symbol):
+    ticker = yf.Ticker(symbol)
+    todays_data = ticker.history(period='1d')
+    price = round(todays_data['Close'][0], 2)
+    currency = ticker.info['currency']
+    return price, currency
+@lru_cache()
+def get_stock_data_yahoo(ticker):
+    stock = yf.Ticker(ticker)
+    data = stock.history(period="1y")
+    return data
+@lru_cache()
+def get_company_profile_yahoo(ticker):
+    stock = yf.Ticker(ticker)
+    info = stock.info
+    profile = {
+        "name": info.get("shortName"),
+        "sector": info.get("sector"),
+        "industry": info.get("industry"),
+        "marketCap": info.get("marketCap"),
+        "website": info.get("website"),
+        "description": info.get("longBusinessSummary"),
+    }
+    return profile
+@lru_cache()
+def get_company_news_yahoo(ticker):
+    stock = yf.Ticker(ticker)
+    news = stock.news
+    return news
+@lru_cache()
+def get_price_change_percent(symbol, days_ago):
+    ticker = yf.Ticker(symbol)
+    end_date = datetime.now()
+    start_date = end_date - timedelta(days=days_ago)
+    # Convert dates to string format that yfinance can accept
+    start_date = start_date.strftime('%Y-%m-%d')
+    end_date = end_date.strftime('%Y-%m-%d')
+    historical_data = ticker.history(start=start_date, end=end_date)
+    old_price = historical_data['Close'].iloc[0]
+    new_price = historical_data['Close'].iloc[-1]
+    percent_change = ((new_price - old_price) / old_price) * 100
+    return round(percent_change, 2)
+@lru_cache()
+def calculate_performance(symbol, days_ago):
+    ticker = yf.Ticker(symbol)
+    end_date = datetime.now()
+    start_date = end_date - timedelta(days=days_ago)
+    start_date = start_date.strftime('%Y-%m-%d')
+    end_date = end_date.strftime('%Y-%m-%d')
+    historical_data = ticker.history(start=start_date, end=end_date)
+    old_price = historical_data['Close'].iloc[0]
+    new_price = historical_data['Close'].iloc[-1]
+    percent_change = ((new_price - old_price) / old_price) * 100
+    return round(percent_change, 2)
+@lru_cache()
+def get_best_performing(stocks, days_ago):
+    best_stock = None
+    best_performance = None
+    for stock in stocks:
+        try:
+            performance = calculate_performance(stock, days_ago)
+            if best_performance is None or performance > best_performance:
+                best_stock = stock
+                best_performance = performance
+        except Exception as e:
+            print(f"Could not calculate performance for {stock}: {e}")
+    return best_stock, best_performance
+class StockPriceCheckInput(BaseModel):
+    """Input for Stock price check."""
+    stockticker: str = Field(..., description="Ticker symbol for stock or index")
+class StockPriceTool(BaseTool):
+    name = "get_stock_ticker_price"
+    description = "Useful for when you need to find out the price of the stock today. You should input the stock ticker used on the yfinance API"
+    def _run(self, stockticker: str):
+        # print("i'm running")
+        price_response, currency = get_stock_price(stockticker)
+        return f"{currency} {price_response}"
+    def _arun(self, stockticker: str):
+        raise NotImplementedError("This tool does not support async")
+    args_schema: Optional[Type[BaseModel]] = StockPriceCheckInput
+class PrevYearStockTool(BaseTool):
+    name = "get_past_year_stock_data"
+    description = "Useful for when you need to find out the past 1 year performance of a stock. You should input the stock ticker used on the yfinance API"
+    def _run(self, stockticker: str):
+        price_response = get_stock_data_yahoo(stockticker)
+        return price_response
+    def _arun(self, stockticker: str):
+        raise NotImplementedError("This tool does not support async")
+    args_schema: Optional[Type[BaseModel]] = StockPriceCheckInput
+class StockNewsTool(BaseTool):
+    name = "get_news_about_stock"
+    description = "Useful for when you need recent news related to a stock. You should input the stock ticker used on the yfinance API"
+    def _run(self, stockticker: str):
+        price_response = get_company_news_yahoo(stockticker)
+        return price_response
+    def _arun(self, stockticker: str):
+        raise NotImplementedError("This tool does not support async")
+    args_schema: Optional[Type[BaseModel]] = StockPriceCheckInput
+class StockProfileTool(BaseTool):
+    name = "get_profile_of_stock"
+    description = "Useful for when you need details or profile of a stock. You should input the stock ticker used on the yfinance API"
+    def _run(self, stockticker: str):
+        price_response = get_company_profile_yahoo(stockticker)
+        return price_response
+    def _arun(self, stockticker: str):
+        raise NotImplementedError("This tool does not support async")
+    args_schema: Optional[Type[BaseModel]] = StockPriceCheckInput
+class StockChangePercentageCheckInput(BaseModel):
+    """Input for Stock ticker check. for percentage check"""
+    stockticker: str = Field(..., description="Ticker symbol for stock or index")
+    days_ago: int = Field(..., description="Int number of days to look back")
+class StockPercentageChangeTool(BaseTool):
+    name = "get_price_change_percent"
+    description = "Useful for when you need to find out the performance or percentage change in a stock's value. You should input the stock ticker used on the yfinance API and also input the number of days to check the change over"
+    def _run(self, stockticker: str, days_ago: int):
+        price_change_response = get_price_change_percent(stockticker, days_ago)
+        return price_change_response
+    def _arun(self, stockticker: str, days_ago: int):
+        raise NotImplementedError("This tool does not support async")
+    args_schema: Optional[Type[BaseModel]] = StockChangePercentageCheckInput
+class StockBestPerformingInput(BaseModel):
+    """Input for Stock ticker check. for percentage check"""
+    stocktickers: List[str] = Field(..., description="Ticker symbols for stocks or indices")
+    days_ago: int = Field(..., description="Int number of days to look back")
+class StockGetBestPerformingTool(BaseTool):
+    name = "get_best_performing"
+    description = "Useful for when you need to the performance of multiple stocks over a period. You should input a list of stock tickers used on the yfinance API and also input the number of days to check the change over"
+    def _run(self, stocktickers: List[str], days_ago: int):
+        price_change_response = get_best_performing(stocktickers, days_ago)
+        return price_change_response
+    def _arun(self, stockticker: List[str], days_ago: int):
+        raise NotImplementedError("This tool does not support async")
+    args_schema: Optional[Type[BaseModel]] = StockBestPerformingInput