ofermend commited on
Commit
e01b95d
1 Parent(s): 5c70cac

added tools

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +91 -96
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Finance chat
3
  emoji: 🐨
4
  colorFrom: indigo
5
  colorTo: indigo
@@ -8,7 +8,7 @@ app_port: 8501
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- short_description: Finance chatbot using vectara-agent
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Hacker News chat
3
  emoji: 🐨
4
  colorFrom: indigo
5
  colorTo: indigo
 
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: chatbot with HN data using vectara-agent
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -3,128 +3,127 @@ from omegaconf import OmegaConf
3
  import streamlit as st
4
  import os
5
  from PIL import Image
6
- import re
7
  import sys
8
  import datetime
9
- import pandas as pd
10
  import requests
11
  from dotenv import load_dotenv
 
12
 
13
  from pydantic import Field, BaseModel
14
  from vectara_agent.agent import Agent, AgentStatusType
15
  from vectara_agent.tools import ToolsFactory
16
 
17
-
18
- tickers = {
19
- "AAPL": "Apple Computer",
20
- "GOOG": "Google",
21
- "AMZN": "Amazon",
22
- "SNOW": "Snowflake",
23
- "TEAM": "Atlassian",
24
- "TSLA": "Tesla",
25
- "NVDA": "Nvidia",
26
- "MSFT": "Microsoft",
27
- "AMD": "Advanced Micro Devices",
28
- "INTC": "Intel",
29
- "NFLX": "Netflix",
30
- }
31
- years = [2020, 2021, 2022, 2023, 2024]
32
  initial_prompt = "How can I help you today?"
33
 
34
  load_dotenv(override=True)
35
 
36
  def create_tools(cfg):
37
-
38
- def get_company_info() -> list[str]:
39
- """
40
- Returns a dictionary of companies you can query about. Always check this before using any other tool.
41
- The output is a dictionary of valid ticker symbols mapped to company names.
42
- You can use this to identify the companies you can query about, and their ticker information.
43
- """
44
- return tickers
45
-
46
- def get_valid_years() -> list[str]:
47
- """
48
- Returns a list of the years for which financial reports are available.
49
- Always check this before using any other tool.
50
- """
51
- return years
52
 
53
- # Tool to get the income statement for a given company and year using the FMP API
54
- def get_income_statement(
55
- ticker=Field(description="the ticker symbol of the company."),
56
- year=Field(description="the year for which to get the income statement."),
57
- ) -> str:
58
- """
59
- Get the income statement for a given company and year using the FMP (https://financialmodelingprep.com) API.
60
- Returns a dictionary with the income statement data. All data is in USD, but you can convert it to more compact form like K, M, B.
61
- """
62
- fmp_api_key = os.environ.get("FMP_API_KEY", None)
63
- if fmp_api_key is None:
64
- return "FMP_API_KEY environment variable not set. This tool does not work."
65
- url = f"https://financialmodelingprep.com/api/v3/income-statement/{ticker}?apikey={fmp_api_key}"
66
- response = requests.get(url)
67
- if response.status_code == 200:
68
- data = response.json()
69
- income_statement = pd.DataFrame(data)
70
- income_statement["date"] = pd.to_datetime(income_statement["date"])
71
- income_statement_specific_year = income_statement[
72
- income_statement["date"].dt.year == int(year)
73
- ]
74
- values_dict = income_statement_specific_year.to_dict(orient="records")[0]
75
- return f"Financial results: {', '.join([f'{key}: {value}' for key, value in values_dict.items() if key not in ['date', 'cik', 'link', 'finalLink']])}"
76
- else:
77
- return "FMP API returned error. This tool does not work."
78
-
79
- class QueryTranscriptsArgs(BaseModel):
80
  query: str = Field(..., description="The user query.")
81
- year: int = Field(..., description=f"The year. an integer between {min(years)} and {max(years)}.")
82
- ticker: str = Field(..., description=f"The company ticker. Must be a valid ticket symbol from the list {tickers.keys()}.")
83
 
84
  tools_factory = ToolsFactory(vectara_api_key=cfg.api_key,
85
  vectara_customer_id=cfg.customer_id,
86
  vectara_corpus_id=cfg.corpus_id)
87
- ask_transcripts = tools_factory.create_rag_tool(
88
- tool_name = "ask_transcripts",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  tool_description = """
90
- Given a company name and year,
91
- returns a response (str) to a user question about a company, based on analyst call transcripts about the company's financial reports for that year.
92
- You can ask this tool any question about the compaany including risks, opportunities, financial performance, competitors and more.
93
- make sure to provide the a valid company ticker and year.
94
  """,
95
- tool_args_schema = QueryTranscriptsArgs,
96
- tool_filter_template = "doc.year = {year} and doc.ticker = '{ticker}'",
97
  reranker = "multilingual_reranker_v1", rerank_k = 100,
98
- n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.01,
99
  summary_num_results = 10,
100
  vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
 
101
  )
102
 
103
- return (tools_factory.get_tools(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  [
105
- get_company_info,
106
- get_valid_years,
107
- get_income_statement,
 
108
  ]
109
  ) +
110
- tools_factory.standard_tools() +
111
- tools_factory.financial_tools() +
112
- tools_factory.guardrail_tools() +
113
- [ask_transcripts]
114
  )
115
 
116
  def initialize_agent(_cfg):
117
  date = datetime.datetime.now().strftime("%Y-%m-%d")
118
- financial_bot_instructions = f"""
119
- - You are a helpful financial assistant, with expertise in finanal reporting, in conversation with a user.
120
  - Today's date is {date}.
121
- - Guardrails: never discuss politics, and always respond politely.
122
- - Respond in a compact format by using appropriate units of measure (e.g., K for thousands, M for millions, B for billions).
123
- Do not report the same number twice (e.g. $100K and 100,000 USD).
124
- - Use tools when available instead of depending on your own knowledge.
125
  - If a tool cannot respond properly, retry with a rephrased question or ask the user for more information.
126
- - When querying a tool for a numeric value or KPI, use a concise and non-ambiguous description of what you are looking for.
127
- - If you calculate a metric, make sure you have all the necessary information to complete the calculation. Don't guess.
128
  - Be very careful not to report results you are not confident about.
129
  """
130
 
@@ -134,8 +133,8 @@ def initialize_agent(_cfg):
134
 
135
  agent = Agent(
136
  tools=create_tools(_cfg),
137
- topic="10-K annual financial reports",
138
- custom_instructions=financial_bot_instructions,
139
  update_func=update_func
140
  )
141
  return agent
@@ -149,7 +148,7 @@ def launch_bot():
149
  st.session_state.log_messages = []
150
  st.session_state.show_logs = False
151
 
152
- st.set_page_config(page_title="Financial Assistant", layout="wide")
153
  if 'cfg' not in st.session_state:
154
  cfg = OmegaConf.create({
155
  'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
@@ -164,11 +163,7 @@ def launch_bot():
164
  with st.sidebar:
165
  image = Image.open('Vectara-logo.png')
166
  st.image(image, width=250)
167
- st.markdown("## Welcome to the financial assistant demo.\n\n\n")
168
- companies = ", ".join(tickers.values())
169
- st.markdown(
170
- f"This assistant can help you with any questions about the financials of several companies:\n\n **{companies}**.\n"
171
- )
172
 
173
  st.markdown("\n\n")
174
  bc1, bc2 = st.columns([1, 1])
@@ -208,8 +203,8 @@ def launch_bot():
208
  with st.chat_message("assistant", avatar='🤖'):
209
  with st.spinner(st.session_state.thinking_message):
210
  res = st.session_state.agent.chat(prompt)
211
- cleaned = re.sub(r'\[\d+\]', '', res).replace('$', '\\$')
212
- message = {"role": "assistant", "content": cleaned, "avatar": '🤖'}
213
  st.session_state.messages.append(message)
214
  st.rerun()
215
 
 
3
  import streamlit as st
4
  import os
5
  from PIL import Image
 
6
  import sys
7
  import datetime
 
8
  import requests
9
  from dotenv import load_dotenv
10
+ from typing import Tuple
11
 
12
  from pydantic import Field, BaseModel
13
  from vectara_agent.agent import Agent, AgentStatusType
14
  from vectara_agent.tools import ToolsFactory
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  initial_prompt = "How can I help you today?"
17
 
18
  load_dotenv(override=True)
19
 
20
  def create_tools(cfg):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ class QueryHackerNews(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  query: str = Field(..., description="The user query.")
 
 
24
 
25
  tools_factory = ToolsFactory(vectara_api_key=cfg.api_key,
26
  vectara_customer_id=cfg.customer_id,
27
  vectara_corpus_id=cfg.corpus_id)
28
+ ask_hackernews_semantic = tools_factory.create_rag_tool(
29
+ tool_name = "ask_hackernews_semantic",
30
+ tool_description = """
31
+ Responds to query based on information in hacker news from the last 6 months.
32
+ Performs a semantic search to find relevant information.
33
+ Use this tool to perform pure semantic search.
34
+ """,
35
+ tool_args_schema = QueryHackerNews,
36
+ reranker = "multilingual_reranker_v1", rerank_k = 100,
37
+ n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.0,
38
+ summary_num_results = 10,
39
+ vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
40
+ include_citations = True,
41
+ )
42
+
43
+ ask_hackernews_hybrid = tools_factory.create_rag_tool(
44
+ tool_name = "ask_hackernews_keyword",
45
  tool_description = """
46
+ Responds to query based on information in hacker news from the last 6 months
47
+ performs a hybrid search (both semantic and keyword) to find relevant information.
48
+ Use this tool when some amount of keyword search is expected to work better than semantic search,
49
+ For example, when you are looking for specific keywords or use rare words in the query.
50
  """,
51
+ tool_args_schema = QueryHackerNews,
 
52
  reranker = "multilingual_reranker_v1", rerank_k = 100,
53
+ n_sentences_before = 2, n_sentences_after = 2, lambda_val = 0.1,
54
  summary_num_results = 10,
55
  vectara_summarizer = 'vectara-summary-ext-24-05-med-omni',
56
+ include_citations = True,
57
  )
58
 
59
+ def get_top_stories(
60
+ n_stories: int = Field(default=10, description="The number of top stories to return.")
61
+ ) -> list[str]:
62
+ """
63
+ Get the top stories from hacker news.
64
+ Returns a list of story IDS for the top stories right now
65
+ """
66
+ db_url = 'https://hacker-news.firebaseio.com/v0/'
67
+ top_stories = requests.get(f"{db_url}topstories.json").json()
68
+ return top_stories[:n_stories]
69
+
70
+ def get_show_stories(
71
+ n_stories: int = Field(default=10, description="The number of top SHOW HN stories to return.")
72
+ ) -> list[str]:
73
+ """
74
+ Get the top SHOW HN stories from hacker news.
75
+ Returns a list of story IDS for the top SHOW HN stories right now
76
+ """
77
+ db_url = 'https://hacker-news.firebaseio.com/v0/'
78
+ top_stories = requests.get(f"{db_url}showstories.json").json()
79
+ return top_stories[:n_stories]
80
+
81
+ def get_ask_stories(
82
+ n_stories: int = Field(default=10, description="The number of top ASK HN stories to return.")
83
+ ) -> list[str]:
84
+ """
85
+ Get the top ASK HN stories from hacker news.
86
+ Returns a list of story IDS for the top ASK HN stories right now
87
+ """
88
+ db_url = 'https://hacker-news.firebaseio.com/v0/'
89
+ top_stories = requests.get(f"{db_url}askstories.json").json()
90
+ return top_stories[:n_stories]
91
+
92
+ def get_story_details(
93
+ story_id: str = Field(..., description="The story ID.")
94
+ ) -> Tuple[str, str]:
95
+ """
96
+ Get the title of a story from hacker news.
97
+ Returns the title of the story, and the URL associated with it
98
+ """
99
+ db_url = 'https://hacker-news.firebaseio.com/v0/'
100
+ story = requests.get(f"{db_url}item/{story_id}.json").json()
101
+ return story['title'], story['url']
102
+
103
+ return (
104
+ tools_factory.get_tools(
105
  [
106
+ get_top_stories,
107
+ get_show_stories,
108
+ get_ask_stories,
109
+ get_story_details,
110
  ]
111
  ) +
112
+ tools_factory.standard_tools() +
113
+ tools_factory.guardrail_tools() +
114
+ [ask_hackernews_semantic, ask_hackernews_hybrid]
 
115
  )
116
 
117
  def initialize_agent(_cfg):
118
  date = datetime.datetime.now().strftime("%Y-%m-%d")
119
+ bot_instructions = f"""
120
+ - You are a helpful assistant, answering user questions about content from hacker news.
121
  - Today's date is {date}.
122
+ - Never discuss politics, and always respond politely.
123
+ - Use tools when available instead of depending on your own knowledge.
124
+ - For RAG tools, if the tool returns an 'fcs' score, consider that as a confidence score for the response not being a hallucination.
125
+ 0 = high hallucination, 1 = low or no hallucination. Values below 0.5 might mean the text is hallucination.
126
  - If a tool cannot respond properly, retry with a rephrased question or ask the user for more information.
 
 
127
  - Be very careful not to report results you are not confident about.
128
  """
129
 
 
133
 
134
  agent = Agent(
135
  tools=create_tools(_cfg),
136
+ topic="hacker news",
137
+ custom_instructions=bot_instructions,
138
  update_func=update_func
139
  )
140
  return agent
 
148
  st.session_state.log_messages = []
149
  st.session_state.show_logs = False
150
 
151
+ st.set_page_config(page_title="Hacker News Bot", layout="wide")
152
  if 'cfg' not in st.session_state:
153
  cfg = OmegaConf.create({
154
  'customer_id': str(os.environ['VECTARA_CUSTOMER_ID']),
 
163
  with st.sidebar:
164
  image = Image.open('Vectara-logo.png')
165
  st.image(image, width=250)
166
+ st.markdown("## Welcome to the hacker news assistant demo.\n\n\n")
 
 
 
 
167
 
168
  st.markdown("\n\n")
169
  bc1, bc2 = st.columns([1, 1])
 
203
  with st.chat_message("assistant", avatar='🤖'):
204
  with st.spinner(st.session_state.thinking_message):
205
  res = st.session_state.agent.chat(prompt)
206
+ res = res.replace('$', '\\$') # escape dollar sign for markdown
207
+ message = {"role": "assistant", "content": res, "avatar": '🤖'}
208
  st.session_state.messages.append(message)
209
  st.rerun()
210