Spaces:
Runtime error
Runtime error
Sigrid De los Santos
commited on
Commit
·
97063b2
1
Parent(s):
9c57dcd
Remove remaining binary file for Hugging Face
Browse files- src/news_analysis.py +177 -27
src/news_analysis.py
CHANGED
|
@@ -1,3 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import csv
|
| 3 |
from datetime import datetime
|
|
@@ -7,15 +175,11 @@ from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
|
|
| 7 |
import requests
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
from fin_interpreter import analyze_article
|
| 10 |
-
from tavily import TavilyClient
|
| 11 |
|
| 12 |
-
# === Load environment
|
| 13 |
load_dotenv()
|
| 14 |
OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
|
| 15 |
-
TAVILY_KEY =
|
| 16 |
-
|
| 17 |
-
# === Initialize Tavily Client ===
|
| 18 |
-
tavily_client = TavilyClient(api_key=TAVILY_KEY)
|
| 19 |
|
| 20 |
# === Get OpenAI client when needed ===
|
| 21 |
def get_llm():
|
|
@@ -31,8 +195,9 @@ def get_related_terms(topic):
|
|
| 31 |
response = llm.invoke(prompt)
|
| 32 |
return response.content.split(",")
|
| 33 |
|
|
|
|
| 34 |
def tavily_search(query, days, max_results=10):
|
| 35 |
-
api_key = os.
|
| 36 |
url = "https://api.tavily.com/search"
|
| 37 |
headers = {"Authorization": f"Bearer {api_key}"}
|
| 38 |
payload = {
|
|
@@ -77,28 +242,13 @@ def fetch_deep_news(topic, days):
|
|
| 77 |
for query in all_queries:
|
| 78 |
try:
|
| 79 |
print(f"🔍 Tavily query: {query}")
|
| 80 |
-
response =
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
"Content-Type": "application/json"
|
| 85 |
-
},
|
| 86 |
-
json={
|
| 87 |
-
"query": query,
|
| 88 |
-
"search_depth": "advanced",
|
| 89 |
-
"topic": "news",
|
| 90 |
-
"days": int(days),
|
| 91 |
-
"max_results": 10,
|
| 92 |
-
"include_answer": False,
|
| 93 |
-
"include_raw_content": False
|
| 94 |
-
}
|
| 95 |
-
)
|
| 96 |
-
|
| 97 |
-
if response.status_code != 200:
|
| 98 |
-
print(f"⚠️ Tavily API error: {response.status_code} - {response.text}")
|
| 99 |
continue
|
| 100 |
|
| 101 |
-
for item in response.
|
| 102 |
url = item.get("url")
|
| 103 |
content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
|
| 104 |
if url and url not in seen_urls and len(content) > 150:
|
|
|
|
| 1 |
+
# import os
|
| 2 |
+
# import csv
|
| 3 |
+
# from datetime import datetime
|
| 4 |
+
# from langchain_openai import ChatOpenAI
|
| 5 |
+
# from langchain_core.prompts import PromptTemplate
|
| 6 |
+
# from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate
|
| 7 |
+
# import requests
|
| 8 |
+
# from dotenv import load_dotenv
|
| 9 |
+
# from fin_interpreter import analyze_article
|
| 10 |
+
# from tavily import TavilyClient
|
| 11 |
+
|
| 12 |
+
# # === Load environment or passed keys ===
|
| 13 |
+
# load_dotenv()
|
| 14 |
+
# OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
|
| 15 |
+
# TAVILY_KEY = os.environ.get("TAVILY_API_KEY") or os.getenv("TAVILY_KEY")
|
| 16 |
+
|
| 17 |
+
# # === Initialize Tavily Client ===
|
| 18 |
+
# tavily_client = TavilyClient(api_key=TAVILY_KEY)
|
| 19 |
+
|
| 20 |
+
# # === Get OpenAI client when needed ===
|
| 21 |
+
# def get_llm():
|
| 22 |
+
# openai_key = os.environ.get("OPENAI_API_KEY")
|
| 23 |
+
# if not openai_key:
|
| 24 |
+
# raise ValueError("OPENAI_API_KEY not found.")
|
| 25 |
+
# return ChatOpenAI(model_name="gpt-4.1", openai_api_key=openai_key)
|
| 26 |
+
|
| 27 |
+
# # === Related Terms ===
|
| 28 |
+
# def get_related_terms(topic):
|
| 29 |
+
# llm = get_llm()
|
| 30 |
+
# prompt = f"What are 5 closely related financial or industry terms to '{topic}'?"
|
| 31 |
+
# response = llm.invoke(prompt)
|
| 32 |
+
# return response.content.split(",")
|
| 33 |
+
|
| 34 |
+
# def tavily_search(query, days, max_results=10):
|
| 35 |
+
# api_key = os.getenv("TAVILY_KEY")
|
| 36 |
+
# url = "https://api.tavily.com/search"
|
| 37 |
+
# headers = {"Authorization": f"Bearer {api_key}"}
|
| 38 |
+
# payload = {
|
| 39 |
+
# "query": query,
|
| 40 |
+
# "search_depth": "advanced",
|
| 41 |
+
# "topic": "news",
|
| 42 |
+
# "days": int(days),
|
| 43 |
+
# "max_results": max_results,
|
| 44 |
+
# "include_answer": False,
|
| 45 |
+
# "include_raw_content": False
|
| 46 |
+
# }
|
| 47 |
+
# response = requests.post(url, json=payload, headers=headers)
|
| 48 |
+
# return response.json()
|
| 49 |
+
|
| 50 |
+
# # === Smart News Search ===
|
| 51 |
+
# def fetch_deep_news(topic, days):
|
| 52 |
+
# all_results = []
|
| 53 |
+
# seen_urls = set()
|
| 54 |
+
|
| 55 |
+
# base_queries = [
|
| 56 |
+
# topic,
|
| 57 |
+
# f"{topic} AND startup",
|
| 58 |
+
# f"{topic} AND acquisition OR merger OR funding",
|
| 59 |
+
# f"{topic} AND CEO OR executive OR leadership",
|
| 60 |
+
# f"{topic} AND venture capital OR Series A OR Series B",
|
| 61 |
+
# f"{topic} AND government grant OR approval OR contract",
|
| 62 |
+
# f"{topic} AND underrated OR small-cap OR micro-cap"
|
| 63 |
+
# ]
|
| 64 |
+
|
| 65 |
+
# investor_queries = [
|
| 66 |
+
# f"{topic} AND BlackRock OR Vanguard OR SoftBank",
|
| 67 |
+
# f"{topic} AND Elon Musk OR Sam Altman OR Peter Thiel",
|
| 68 |
+
# f"{topic} AND Berkshire Hathaway OR Warren Buffett",
|
| 69 |
+
# f"{topic} AND institutional investor OR hedge fund",
|
| 70 |
+
# ]
|
| 71 |
+
|
| 72 |
+
# related_terms = get_related_terms(topic)
|
| 73 |
+
# synonym_queries = [f"{term} AND {kw}" for term in related_terms for kw in ["startup", "funding", "merger", "acquisition"]]
|
| 74 |
+
|
| 75 |
+
# all_queries = base_queries + investor_queries + synonym_queries
|
| 76 |
+
|
| 77 |
+
# for query in all_queries:
|
| 78 |
+
# try:
|
| 79 |
+
# print(f"🔍 Tavily query: {query}")
|
| 80 |
+
# response = requests.post(
|
| 81 |
+
# url="https://api.tavily.com/search",
|
| 82 |
+
# headers={
|
| 83 |
+
# "Authorization": f"Bearer {TAVILY_KEY}",
|
| 84 |
+
# "Content-Type": "application/json"
|
| 85 |
+
# },
|
| 86 |
+
# json={
|
| 87 |
+
# "query": query,
|
| 88 |
+
# "search_depth": "advanced",
|
| 89 |
+
# "topic": "news",
|
| 90 |
+
# "days": int(days),
|
| 91 |
+
# "max_results": 10,
|
| 92 |
+
# "include_answer": False,
|
| 93 |
+
# "include_raw_content": False
|
| 94 |
+
# }
|
| 95 |
+
# )
|
| 96 |
+
|
| 97 |
+
# if response.status_code != 200:
|
| 98 |
+
# print(f"⚠️ Tavily API error: {response.status_code} - {response.text}")
|
| 99 |
+
# continue
|
| 100 |
+
|
| 101 |
+
# for item in response.json().get("results", []):
|
| 102 |
+
# url = item.get("url")
|
| 103 |
+
# content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
|
| 104 |
+
# if url and url not in seen_urls and len(content) > 150:
|
| 105 |
+
# all_results.append({
|
| 106 |
+
# "title": item.get("title"),
|
| 107 |
+
# "url": url,
|
| 108 |
+
# "content": content
|
| 109 |
+
# })
|
| 110 |
+
# seen_urls.add(url)
|
| 111 |
+
|
| 112 |
+
# except Exception as e:
|
| 113 |
+
# print(f"⚠️ Tavily request failed for query '{query}': {e}")
|
| 114 |
+
|
| 115 |
+
# print(f"📰 Total articles collected: {len(all_results)}")
|
| 116 |
+
# return all_results
|
| 117 |
+
|
| 118 |
+
# # === Generate Markdown Report ===
|
| 119 |
+
# def generate_value_investor_report(topic, news_results, max_articles=20, max_chars_per_article=400):
|
| 120 |
+
# news_results = news_results[:max_articles]
|
| 121 |
+
|
| 122 |
+
# for item in news_results:
|
| 123 |
+
# result = analyze_article(item["content"])
|
| 124 |
+
# item["fin_sentiment"] = result.get("sentiment", "neutral")
|
| 125 |
+
# item["fin_confidence"] = result.get("confidence", 0.0)
|
| 126 |
+
# item["investment_decision"] = result.get("investment_decision", "Watch")
|
| 127 |
+
|
| 128 |
+
# article_summary = "".join(
|
| 129 |
+
# f"- **{item['title']}**: {item['content'][:max_chars_per_article]}... "
|
| 130 |
+
# f"(Sentiment: {item['fin_sentiment'].title()}, Confidence: {item['fin_confidence']:.2f}, "
|
| 131 |
+
# f"Decision: {item['investment_decision']}) [link]({item['url']})\n"
|
| 132 |
+
# for item in news_results
|
| 133 |
+
# )
|
| 134 |
+
|
| 135 |
+
# prompt = PromptTemplate.from_template("""
|
| 136 |
+
# You're a highly focused value investor. Analyze this week's news on "{Topic}".
|
| 137 |
+
|
| 138 |
+
# Your goal is to uncover:
|
| 139 |
+
# - Meaningful events (e.g., CEO joining a startup, insider buys, big-name partnerships)
|
| 140 |
+
# - Startups or small caps that may signal undervalued opportunity
|
| 141 |
+
# - Connections to key individuals or institutions (e.g., Elon Musk investing, Sam Altman joining)
|
| 142 |
+
# - Companies with strong fundamentals: low P/E, low P/B, high ROE, recent IPOs, moats, or high free cash flow
|
| 143 |
+
|
| 144 |
+
# ### News
|
| 145 |
+
# {ArticleSummaries}
|
| 146 |
+
|
| 147 |
+
# Write a markdown memo with:
|
| 148 |
+
# 1. **Key Value Signals**
|
| 149 |
+
# 2. **Stocks or Startups to Watch**
|
| 150 |
+
# 3. **What Smart Money Might Be Acting On**
|
| 151 |
+
# 4. **References**
|
| 152 |
+
# 5. **Investment Hypothesis**
|
| 153 |
+
|
| 154 |
+
# Include context and macroeconomic/regulatory angles. Add an intro on sentiment and market trends for the week.
|
| 155 |
+
# """)
|
| 156 |
+
|
| 157 |
+
# chat_prompt = ChatPromptTemplate.from_messages([
|
| 158 |
+
# SystemMessagePromptTemplate(prompt=prompt)
|
| 159 |
+
# ])
|
| 160 |
+
# prompt_value = chat_prompt.format_prompt(
|
| 161 |
+
# Topic=topic,
|
| 162 |
+
# ArticleSummaries=article_summary
|
| 163 |
+
# ).to_messages()
|
| 164 |
+
|
| 165 |
+
# llm = get_llm()
|
| 166 |
+
# result = llm.invoke(prompt_value)
|
| 167 |
+
# return result.content
|
| 168 |
+
|
| 169 |
import os
|
| 170 |
import csv
|
| 171 |
from datetime import datetime
|
|
|
|
| 175 |
import requests
|
| 176 |
from dotenv import load_dotenv
|
| 177 |
from fin_interpreter import analyze_article
|
|
|
|
| 178 |
|
| 179 |
+
# === Load environment ===
|
| 180 |
load_dotenv()
|
| 181 |
OPENAI_KEY = os.environ.get("OPENAI_API_KEY") or os.getenv("OPENAI_KEY")
|
| 182 |
+
TAVILY_KEY = None # Will be accessed dynamically at runtime
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
# === Get OpenAI client when needed ===
|
| 185 |
def get_llm():
|
|
|
|
| 195 |
response = llm.invoke(prompt)
|
| 196 |
return response.content.split(",")
|
| 197 |
|
| 198 |
+
# === Tavily Search ===
|
| 199 |
def tavily_search(query, days, max_results=10):
|
| 200 |
+
api_key = os.environ.get("TAVILY_API_KEY") or TAVILY_KEY
|
| 201 |
url = "https://api.tavily.com/search"
|
| 202 |
headers = {"Authorization": f"Bearer {api_key}"}
|
| 203 |
payload = {
|
|
|
|
| 242 |
for query in all_queries:
|
| 243 |
try:
|
| 244 |
print(f"🔍 Tavily query: {query}")
|
| 245 |
+
response = tavily_search(query, days)
|
| 246 |
+
|
| 247 |
+
if not isinstance(response, dict) or "results" not in response:
|
| 248 |
+
print(f"⚠️ Tavily API response issue: {response}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
continue
|
| 250 |
|
| 251 |
+
for item in response.get("results", []):
|
| 252 |
url = item.get("url")
|
| 253 |
content = item.get("content", "") or item.get("summary", "") or item.get("title", "")
|
| 254 |
if url and url not in seen_urls and len(content) > 150:
|