harshxmishra commited on
Commit
e196f47
Β·
verified Β·
1 Parent(s): 20b534a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -68
app.py CHANGED
@@ -1,81 +1,278 @@
 
 
1
  import os
 
 
 
2
 
3
- os.system('pip install youtube_transcript_api langchain_openai langchain')
 
 
 
 
 
 
 
4
 
 
5
 
6
- import re
7
- import streamlit as st
8
- from youtube_transcript_api import YouTubeTranscriptApi
9
- from langchain_openai import ChatOpenAI
10
- from langchain.schema import SystemMessage, HumanMessage
11
 
12
- def extract_video_id(url):
13
- """
14
- Extracts the YouTube video ID from a URL.
15
- Supports standard URLs (e.g., https://www.youtube.com/watch?v=VIDEO_ID)
16
- and short URLs (e.g., https://youtu.be/VIDEO_ID).
17
- """
18
- regex = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
19
- match = re.search(regex, url)
20
- if match:
21
- return match.group(1)
22
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- def get_transcript(video_id):
 
 
 
25
  """
26
- Retrieves and concatenates the transcript for the given YouTube video ID.
 
 
27
  """
28
  try:
29
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
30
- return " ".join([entry["text"] for entry in transcript])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  except Exception as e:
32
- return f"Error retrieving transcript: {str(e)}"
 
33
 
34
- def summarize_text(text):
35
- """
36
- Summarizes the provided text using a LangChain agent.
37
- Uses GPT-4 model from OpenAI.
38
- Only the first 4000 characters are used to avoid token limit issues.
39
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  try:
41
- llm = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=OPENAI_KEY)
42
- truncated_text = text[:4000]
43
- messages = [
44
- SystemMessage(content="Summarize the given YouTube transcript concisely."),
45
- HumanMessage(content=truncated_text)
46
- ]
47
- summary = llm(messages).content
48
- return summary
 
49
  except Exception as e:
50
- return f"Error during summarization: {str(e)}"
51
-
52
- def main():
53
- st.set_page_config(page_title="YouTube Summarizer", layout="wide")
54
- st.title("πŸŽ₯ YouTube Video Summarizer")
55
- video_url = st.text_input("Enter YouTube Video URL")
56
-
57
- if video_url:
58
- video_id = extract_video_id(video_url)
59
- if not video_id:
60
- st.error("Could not extract video ID from the provided URL.")
61
- return
62
-
63
- st.info(f"Extracted Video ID: {video_id}")
64
- transcript = get_transcript(video_id)
65
-
66
- if transcript.startswith("Error"):
67
- st.error(transcript)
68
- return
69
-
70
- st.subheader("Transcript:")
71
- if len(transcript) > 1000:
72
- st.write(transcript[:1000] + "...")
73
- else:
74
- st.write(transcript)
75
-
76
- st.subheader("AI-Generated Summary:")
77
- summary = summarize_text(transcript)
78
- st.write(summary)
79
-
80
- if __name__ == "__main__":
81
- main()
 
1
+ # buffett_bot_single_file.py
2
+ import streamlit as st
3
  import os
4
+ import json
5
+ import yfinance as yf
6
+ from dotenv import load_dotenv
7
 
8
+ # LangChain components
9
+ from langchain_openai import ChatOpenAI
10
+ from langchain.agents import AgentExecutor, create_openai_functions_agent
11
+ from langchain.memory import ConversationBufferMemory
12
+ from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
13
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
14
+ from langchain.tools import Tool
15
+ from langchain_community.utilities import SerpAPIWrapper
16
 
17
+ # --- Configuration & Setup ---
18
 
19
+ # Load environment variables (API Keys)
20
+ load_dotenv()
 
 
 
21
 
22
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
23
+ SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")
24
+
25
+ # Agent Configuration
26
+ MODEL_NAME = "gpt-4o" # Or "gpt-3.5-turbo", "gpt-4-turbo"
27
+ TEMPERATURE = 0.5
28
+ MEMORY_KEY = "chat_history"
29
+
30
+ # --- Buffett Persona Prompt ---
31
+
32
+ BUFFETT_SYSTEM_PROMPT = """
33
+ You are a conversational AI assistant modeled after Warren Buffett, the legendary value investor. Embody his persona accurately.
34
+
35
+ **Your Core Principles:**
36
+ * **Value Investing:** Focus on finding undervalued companies with solid fundamentals (earnings, low debt, strong management). Judge businesses, not stock tickers.
37
+ * **Long-Term Horizon:** Think in terms of decades, not days or months. Discourage short-term speculation and market timing.
38
+ * **Margin of Safety:** Only invest when the market price is significantly below your estimate of intrinsic value. Be conservative.
39
+ * **Business Moats:** Favor companies with durable competitive advantages (strong brands, network effects, low-cost production, regulatory advantages).
40
+ * **Understand the Business:** Only invest in companies you understand. "Risk comes from not knowing what you're doing."
41
+ * **Management Quality:** Assess the integrity and competence of the company's leadership.
42
+ * **Patience and Discipline:** Wait for the right opportunities ("fat pitches"). Avoid unnecessary activity. Be rational and unemotional.
43
+ * **Circle of Competence:** Stick to industries and businesses you can reasonably understand. Acknowledge what you don't know.
44
+
45
+ **Your Communication Style:**
46
+ * **Wise and Folksy:** Use simple language, analogies, and occasional humor, much like Buffett does in his letters and interviews.
47
+ * **Patient and Calm:** Respond thoughtfully, avoiding hype or panic.
48
+ * **Educational:** Explain your reasoning clearly, referencing your core principles.
49
+ * **Prudent:** Be cautious about making specific buy/sell recommendations without thorough analysis based on your principles. Often, you might explain *how* you would analyze it rather than giving a direct 'yes' or 'no'.
50
+ * **Quote Yourself:** Occasionally weave in famous Buffett quotes where appropriate (e.g., "Price is what you pay; value is what you get.", "Be fearful when others are greedy and greedy when others are fearful.").
51
+ * **Acknowledge Limitations:** If asked about something outside your expertise (e.g., complex tech you wouldn't invest in, short-term trading), politely state it's not your area.
52
+
53
+ **Interaction Guidelines:**
54
+ * When asked for stock recommendations, first use your tools to gather fundamental data (P/E, earnings, debt if possible) and recent news.
55
+ * Analyze the gathered information through the lens of your core principles (moat, management, valuation, long-term prospects).
56
+ * Explain your thought process clearly.
57
+ * If a company seems to fit your criteria, express cautious optimism, emphasizing the need for further due diligence by the investor.
58
+ * If a company doesn't fit (e.g., too speculative, high P/E without justification, outside circle of competence), explain why based on your principles.
59
+ * If asked for general advice, draw upon your well-known philosophies.
60
+ * Maintain conversational context using the provided chat history. Refer back to previous points if relevant.
61
+
62
+ Remember: You are simulating Warren Buffett. Your goal is to provide insights consistent with his philosophy and communication style, leveraging the tools for data when needed. Do not give definitive financial advice, but rather educate and explain the *Buffett way* of thinking about investments.
63
+ """
64
 
65
+ # --- Tool Definitions ---
66
+
67
+ # 1. Stock Data Tool (Yahoo Finance)
68
+ def get_stock_info(symbol: str) -> str:
69
  """
70
+ Fetches key financial data for a given stock symbol using Yahoo Finance.
71
+ Relevant data includes current price, P/E ratio, EPS, market cap, dividend yield, P/B ratio, sector, industry, and summary.
72
+ Returns a JSON string with the data or an error message.
73
  """
74
  try:
75
+ ticker = yf.Ticker(symbol)
76
+ info = ticker.info
77
+
78
+ # Check if info was retrieved
79
+ if not info or info.get('regularMarketPrice') is None and info.get('currentPrice') is None and info.get('previousClose') is None:
80
+ # Attempt to fetch history for validation if info is sparse
81
+ hist = ticker.history(period="5d")
82
+ if hist.empty:
83
+ return f"Error: Could not retrieve any data for symbol {symbol}. It might be delisted, invalid, or lack recent trading data."
84
+ # If history exists but info is bad, use historical close
85
+ last_close = hist['Close'].iloc[-1] if not hist.empty else 'N/A'
86
+ current_price = info.get("currentPrice") or info.get("regularMarketPrice") or last_close
87
+ else:
88
+ current_price = info.get("currentPrice") or info.get("regularMarketPrice") or info.get("previousClose", "N/A")
89
+
90
+ data = {
91
+ "symbol": symbol,
92
+ "companyName": info.get("longName", "N/A"),
93
+ "currentPrice": current_price,
94
+ "peRatio": info.get("trailingPE") or info.get("forwardPE", "N/A"),
95
+ "earningsPerShare": info.get("trailingEps", "N/A"),
96
+ "marketCap": info.get("marketCap", "N/A"),
97
+ "dividendYield": info.get("dividendYield", "N/A"),
98
+ "priceToBook": info.get("priceToBook", "N/A"),
99
+ "sector": info.get("sector", "N/A"),
100
+ "industry": info.get("industry", "N/A"),
101
+ "summary": info.get("longBusinessSummary", "N/A")[:500] + ("..." if len(info.get("longBusinessSummary", "")) > 500 else "") # Keep summary concise
102
+ }
103
+
104
+ # Basic validation
105
+ if data["currentPrice"] == "N/A":
106
+ return f"Error: Could not retrieve current price for {symbol}. Data might be incomplete."
107
+
108
+ return json.dumps(data)
109
+
110
  except Exception as e:
111
+ # More specific error handling could be added here (e.g., check for specific yfinance exceptions)
112
+ return f"Error fetching data for {symbol} using yfinance: {str(e)}. Symbol might be invalid or API issue."
113
 
114
+ stock_data_tool = Tool(
115
+ name="get_stock_financial_data",
116
+ func=get_stock_info,
117
+ description="""
118
+ Useful for fetching fundamental financial data for a specific stock symbol (ticker).
119
+ Input should be a single stock symbol (e.g., 'AAPL', 'MSFT', 'BRK-B').
120
+ Returns a JSON string containing key metrics like company name, current price, P/E ratio, EPS, market cap, dividend yield, price-to-book, sector, industry, and a business summary.
121
+ Use this to get the necessary financial context before forming an opinion based on Warren Buffett's principles.
122
+ """,
123
+ )
124
+
125
+ # 2. News Search Tool (SerpAPI)
126
+ try:
127
+ if not SERPAPI_API_KEY:
128
+ raise ValueError("SERPAPI_API_KEY environment variable not set.")
129
+ params = {
130
+ "engine": "google_news",
131
+ "gl": "us",
132
+ "hl": "en",
133
+ "num": 5 # Fetch top 5 news results
134
+ }
135
+ search_wrapper = SerpAPIWrapper(params=params, serpapi_api_key=SERPAPI_API_KEY)
136
+
137
+ news_search_tool = Tool(
138
+ name="search_stock_news",
139
+ func=search_wrapper.run,
140
+ description="""
141
+ Useful for searching recent news articles about a specific company or stock symbol.
142
+ Input should be the company name or stock symbol (e.g., 'Apple Inc. news', 'MSFT latest developments', 'Berkshire Hathaway earnings').
143
+ Returns a summary of recent news headlines and snippets.
144
+ Use this to understand recent events, sentiment, or developments related to a company before forming an opinion.
145
+ """,
146
+ )
147
+ serpapi_available = True
148
+ except Exception as e: # Catch broader exceptions during init
149
+ print(f"SerpAPI News Tool Warning: {e}")
150
+ # Provide a dummy tool if the key is missing or setup fails
151
+ news_search_tool = Tool(
152
+ name="search_stock_news",
153
+ func=lambda x: "News search unavailable (SerpAPI key missing or configuration error).",
154
+ description="News search tool (currently unavailable).",
155
+ # return_direct=True # Optional: returns message directly without LLM processing
156
+ )
157
+ serpapi_available = False
158
+
159
+
160
+ tools = [stock_data_tool, news_search_tool]
161
+
162
+ # --- LangChain Agent Setup ---
163
+
164
+ # Check for OpenAI Key
165
+ if not OPENAI_API_KEY:
166
+ st.error("Error: OPENAI_API_KEY environment variable not set. Cannot initialize the chatbot.", icon="❌")
167
+ st.stop() # Stop execution if key is missing
168
+
169
+ # LLM
170
+ llm = ChatOpenAI(
171
+ model=MODEL_NAME,
172
+ temperature=TEMPERATURE,
173
+ openai_api_key=OPENAI_API_KEY,
174
+ )
175
+
176
+ # Prompt Template
177
+ prompt_template = ChatPromptTemplate.from_messages(
178
+ [
179
+ SystemMessage(content=BUFFETT_SYSTEM_PROMPT),
180
+ MessagesPlaceholder(variable_name=MEMORY_KEY),
181
+ ("human", "{input}"),
182
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
183
+ ]
184
+ )
185
+
186
+ # Memory (initialized fresh for each session in Streamlit context below)
187
+ # The agent executor itself needs the memory factory/object,
188
+ # but the actual state lives in st.session_state['memory'] for persistence across reruns.
189
+
190
+ # Agent
191
+ agent = create_openai_functions_agent(llm, tools, prompt_template)
192
+
193
+ # Agent Executor (initialized in Streamlit session state)
194
+
195
+
196
+ # --- Streamlit Frontend ---
197
+
198
+ st.set_page_config(page_title="Warren Buffett Bot", layout="wide")
199
+ st.title("Warren Buffett Investment Chatbot πŸ“ˆ")
200
+ st.caption("Ask me about investing, stocks, or market wisdom - in the style of Warren Buffett.")
201
+
202
+ # Display API Key status
203
+ st.sidebar.header("API Status")
204
+ if OPENAI_API_KEY: # Use a standard if/else block
205
+ st.sidebar.success("OpenAI API Key Loaded", icon="βœ…")
206
+ else:
207
+ st.sidebar.error("OpenAI API Key Missing", icon="❌")
208
+
209
+ # Keep the SerpAPI check as it is (already uses standard if/else)
210
+ if serpapi_available:
211
+ st.sidebar.success("SerpAPI Key Loaded (News Enabled)", icon="βœ…")
212
+ else:
213
+ st.sidebar.warning("SerpAPI Key Missing (News Disabled)", icon="⚠️")
214
+
215
+ # Initialize chat history and memory in Streamlit session state
216
+ if "messages" not in st.session_state:
217
+ st.session_state["messages"] = [
218
+ {"role": "assistant", "content": "Greetings! I'm here to chat about investing with the prudence and long-term view of Warren Buffett. How can I help you today?"}
219
+ ]
220
+ # Initialize memory object in session state
221
+ if 'memory' not in st.session_state:
222
+ st.session_state['memory'] = ConversationBufferMemory(memory_key=MEMORY_KEY, return_messages=True)
223
+
224
+ # Initialize AgentExecutor in session state if it doesn't exist
225
+ if 'agent_executor' not in st.session_state:
226
+ # The AgentExecutor needs the memory object from session state
227
+ st.session_state['agent_executor'] = AgentExecutor(
228
+ agent=agent,
229
+ tools=tools,
230
+ memory=st.session_state['memory'], # Use memory from session state
231
+ verbose=True, # Set to False for cleaner production output
232
+ handle_parsing_errors=True,
233
+ max_iterations=5,
234
+ )
235
+
236
+ # Display chat messages from history
237
+ for msg in st.session_state.messages:
238
+ st.chat_message(msg["role"]).write(msg["content"])
239
+
240
+ # Accept user input
241
+ if prompt := st.chat_input("Ask Buffett Bot..."):
242
+ # Add user message to chat history
243
+ st.session_state.messages.append({"role": "user", "content": prompt})
244
+ st.chat_message("user").write(prompt)
245
+
246
+ # Prepare agent input
247
+ # The agent executor uses the memory object linked during its initialization
248
+ agent_input = {"input": prompt}
249
+
250
+ # Invoke the agent using the executor stored in session state
251
  try:
252
+ with st.spinner("Buffett is pondering..."):
253
+ agent_executor_instance = st.session_state['agent_executor']
254
+ response = agent_executor_instance.invoke(agent_input)
255
+
256
+ # Extract and display response
257
+ output = response.get('output', "Sorry, I encountered an issue and couldn't formulate a response.")
258
+ st.session_state.messages.append({"role": "assistant", "content": output})
259
+ st.chat_message("assistant").write(output)
260
+
261
  except Exception as e:
262
+ error_message = f"An error occurred: {str(e)}"
263
+ st.error(error_message, icon="πŸ”₯")
264
+ # Add error message to chat
265
+ st.session_state.messages.append({"role": "assistant", "content": f"Sorry, I ran into a technical difficulty: {e}"})
266
+ st.chat_message("assistant").write(f"Sorry, I ran into a technical difficulty: {e}")
267
+
268
+
269
+ # Optional: Add a way to clear history/memory for a new session
270
+ if st.sidebar.button("Clear Chat History"):
271
+ st.session_state.messages = [
272
+ {"role": "assistant", "content": "Chat history cleared. How can I help you start anew?"}
273
+ ]
274
+ st.session_state.memory.clear() # Clear the LangChain memory object
275
+ # Optionally reinstantiate executor if needed, though clearing memory might suffice
276
+ # if 'agent_executor' in st.session_state:
277
+ # del st.session_state['agent_executor']
278
+ st.rerun()