pvanand commited on
Commit
26e0ddc
·
verified ·
1 Parent(s): 7544f89

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +148 -138
main.py CHANGED
@@ -1,172 +1,182 @@
1
- from fastapi import FastAPI, HTTPException, Request, Query
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel
4
- from typing import List, Dict, Any
5
- from helper_functions_api import has_tables, extract_data_from_tag, openrouter_response,md_to_html, search_brave, fetch_and_extract_content, limit_tokens, together_response, insert_data
6
  import os
7
- from dotenv import load_dotenv, find_dotenv
8
  from datetime import datetime, timedelta
 
 
 
 
 
 
9
  from fastapi_cache import FastAPICache
10
  from fastapi_cache.backends.inmemory import InMemoryBackend
11
  from fastapi_cache.decorator import cache
12
- import asyncio
13
- import re
14
- # Load environment variables from .env file
15
- #load_dotenv("keys.env")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
17
  app = FastAPI()
18
 
19
  @app.on_event("startup")
20
  async def startup():
21
  FastAPICache.init(InMemoryBackend(), prefix="fastapi-cache")
22
 
23
- # Groq model names
24
- llm_default_small = "llama3-8b-8192"
25
- llm_default_medium = "llama3-70b-8192"
26
-
27
- # Together Model names (fallback)
28
- llm_fallback_small = "meta-llama/Llama-3-8b-chat-hf"
29
- llm_fallback_medium = "meta-llama/Llama-3-70b-chat-hf"
30
-
31
- SysPromptJson = "You are now in the role of an expert AI who can extract structured information from user request. Both key and value pairs must be in double quotes. You must respond ONLY with a valid JSON file. Do not add any additional comments."
32
- SysPromptList = "You are now in the role of an expert AI who can extract structured information from user request. All elements must be in double quotes. You must respond ONLY with a valid python List. Do not add any additional comments."
33
- SysPromptDefault = "You are an expert AI, complete the given task. Do not add any additional comments."
34
- SysPromptMd = "You are an expert AI who can create a structured report using information provided in the context from user request.The report should be in markdown format consists of markdown tables structured into subtopics. Do not add any additional comments."
35
-
36
- prompt_user = {}
37
- prompt_system = {}
38
- prompt_user["online"] = {}
39
- prompt_user["offline"] = {}
40
- prompt_user["online"]["chat"] = "Write a well thought out, detailed and structured answer to the query:: {description} #### , refer the provided internet search results reference:{reference}"
41
- prompt_user["online"]["report"] = "Write a well thought out, detailed and structured Report to the query:: {description} #### , refer the provided internet search results reference:{reference}, The report should be well formatted using markdown format structured into subtopics as necessory"
42
- prompt_user["online"]["report_table"] = "Write a well thought out Report to the query:: {description},#### , refer the provided internet search results reference:{reference}. The report should be well formatted using markdown format, structured into subtopics, include tables or lists as needed to make it well readable"
43
-
44
- prompt_user["offline"]["chat"] = "Write a well thought out, detailed and structured answer to the query:: {description}"
45
- prompt_user["offline"]["report"] = "Write a well thought out, detailed and structured Report to the query:: {description}. The report should be well formatted using markdown format, structured into subtopics"
46
- prompt_user["offline"]["report_table"] = "Write a detailed and structured Report to the query:: {description}, The report should be well formatted using markdown format, structured into subtopics, include tables or lists as needed to make it well readable"
47
-
48
- prompt_system["online"] = """You are an expert AI who can create a detailed structured report using internet search results.
49
-
50
- 1 filter and summarize relevant information, if there are conflicting information, use the latest source.
51
- 2. use it to construct a clear and factual answer.
52
- Your response should be structured and properly formatted using markdown headings, subheadings, tables, use as necessory. Ignore Links and references"""
53
-
54
- prompt_system["offline"] = """You are an expert AI who can create detailed answers. Your response should be properly formatted and well readable using markdown formatting."""
55
-
56
- TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')
57
- BRAVE_API_KEY = os.getenv('BRAVE_API_KEY')
58
- GROQ_API_KEY = os.getenv("GROQ_API_KEY")
59
- HELICON_API_KEY = os.getenv("HELICON_API_KEY")
60
- SUPABASE_USER = os.environ['SUPABASE_USER']
61
- SUPABASE_PASSWORD = os.environ['SUPABASE_PASSWORD']
62
- OPENROUTER_API_KEY = "sk-or-v1-"+os.environ['OPENROUTER_API_KEY']
63
-
64
- # sys_prompts = {
65
- # "offline": {
66
- # "Chat": "You are an expert AI, complete the given task. Do not add any additional comments.",
67
- # "Full Text Report": "You are an expert AI who can create a detailed report from user request. The report should be in markdown format. Do not add any additional comments.",
68
- # "Tabular Report": "You are an expert AI who can create a structured report from user request.The report should be in markdown format structured into subtopics/tables/lists. Do not add any additional comments.",
69
- # "Tables only": "You are an expert AI who can create a structured tabular report from user request.The report should be in markdown format consists of only markdown tables. Do not add any additional comments.",
70
- # },
71
- # "online": {
72
- # "Chat": "You are an expert AI, complete the given task using the provided context. Do not add any additional comments.",
73
- # "Full Text Report": "You are an expert AI who can create a detailed report using information scraped from the internet. You should decide which information is relevant to the given task and use it to create a report. The report should be in markdown format. Do not add any additional comments.",
74
- # "Tabular Report": """You are an expert AI who can provide answers using internet search results.
75
- # 1 filter and summarize relevant information, if there are conflicting information, use the latest source.
76
- # 2. use it to construct a clear and factual answer.
77
- # Your response should be properly formatted and well readable using markdown formatting. """,
78
- # "Tables only": "You are an expert AI who can create a structured tabular report using information scraped from the internet. You should decide which information is relevant to the given task. The report should be in markdown format consists of only markdown tables. Do not add any additional comments.",
79
- # },
80
- # }
81
-
82
-
83
  class QueryModel(BaseModel):
84
- user_query: str = Query(default="", description="Initial user query")
85
- topic: str = Query(default="", description="Topic name to generate Report")
86
- description: str = Query(description="Description/prompt for report (REQUIRED)")
87
- user_id: str = Query(default="", description="unique user id")
88
- user_name: str = Query(default="", description="user name")
89
- internet: bool = Query(default=True, description="Enable Internet search")
90
- output_format: str = Query(default="report_table", description="Output format for the report",
91
- enum=["chat", "report", "report_table"])
92
- data_format: str = Query(default="Structured data", description="Type of data to extract from the internet",
93
- enum=["No presets", "Structured data", "Quantitative data"])
94
- generate_charts: bool = Query(default=False, description="Include generated charts")
95
- output_as_md: bool = Query(default=False, description="Output report in markdown (default output in HTML)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  @cache(expire=604800)
98
- async def generate_report(query: QueryModel):
99
- query_str = query.topic
100
- description = query.description
101
- user_id = query.user_id
102
- internet = "online" if query.internet else "offline"
103
- user_prompt_final = prompt_user[internet][query.output_format]
104
- system_prompt_final = prompt_system[internet]
105
- data_format = query.data_format
106
  optimized_search_query = ""
107
- all_text_with_urls = [("", "")]
108
  full_search_object = {}
109
- generate_charts = query.generate_charts
110
- output_as_md = query.output_as_md
111
-
112
  if query.internet:
113
- search_query = re.sub(r'[^\w\s]', '', description).strip()
114
  try:
115
- urls, optimized_search_query, full_search_object = search_brave(search_query, num_results=8)
116
- all_text_with_urls = fetch_and_extract_content(data_format, urls, optimized_search_query)
117
- reference = limit_tokens(str(all_text_with_urls),token_limit=5000)
118
- user_prompt_final = user_prompt_final.format(description=description, reference=reference)
119
  except Exception as e:
120
- print(e)
121
- query.internet = False
122
- print("failed to search/scrape results, falling back to LLM response")
123
-
124
- if not query.internet:
125
- user_prompt_final = prompt_user["offline"][query.output_format].format(description=description)
126
- system_prompt_final = prompt_system["offline"]
127
-
128
- md_report = together_response(user_prompt_final, model=llm_default_medium, SysPrompt=system_prompt_final)
129
- html_report = md_to_html(md_report)
130
-
131
- # Render Charts
132
- if generate_charts and has_tables(html_report):
133
- print("tables found, creating charts")
134
  try:
135
-
136
- prompt = "convert the numerical data tables in the given content to embedded html plotly.js charts if appropriate, use appropriate colors, \
137
- output format:\
138
- <report>output the full content without any other changes in md format enclosed in tags like this</report> using the following:" + str(md_report)
139
-
140
- messages = [{"role": 'user', "content": prompt}]
141
- md_report = extract_data_from_tag(openrouter_response(messages, model="anthropic/claude-3.5-sonnet"),"report")
142
- print(md_report)
143
-
144
  except Exception as e:
145
- print(e)
146
- print("failed to generate charts, falling back to original report")
147
-
148
- if user_id != "test":
149
- insert_data(user_id, query_str, description, str(all_text_with_urls), md_report)
150
- references_html = {}
151
- for text, url in all_text_with_urls:
152
- references_html[url] = str(md_to_html(text))
153
-
154
- final_report = md_report if output_as_md else md_to_html(md_report)
155
 
156
  return {
157
  "report": final_report,
158
  "references": references_html,
159
  "search_query": optimized_search_query,
160
- "search_data_full":full_search_object
161
  }
162
 
163
- @app.post("/generate_report")
164
- async def api_generate_report(request: Request, query: QueryModel):
165
- return await generate_report(query)
166
-
 
 
 
 
167
  app.add_middleware(
168
  CORSMiddleware,
169
  allow_origins=["*"],
170
  allow_credentials=True,
171
  allow_methods=["*"],
172
- allow_headers=["*"],)
 
 
 
 
 
 
 
1
  import os
2
+ from typing import List, Dict, Any
3
  from datetime import datetime, timedelta
4
+ import re
5
+ from functools import lru_cache
6
+
7
+ from fastapi import FastAPI, HTTPException, Request, Query, Depends
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from pydantic import BaseModel, Field
10
  from fastapi_cache import FastAPICache
11
  from fastapi_cache.backends.inmemory import InMemoryBackend
12
  from fastapi_cache.decorator import cache
13
+ from dotenv import load_dotenv
14
+
15
+ from helper_functions_api import (
16
+ has_tables, extract_data_from_tag, openrouter_response, md_to_html,
17
+ search_brave, fetch_and_extract_content, limit_tokens, together_response, insert_data
18
+ )
19
+
20
+ # Load environment variables
21
+ load_dotenv()
22
+
23
+ # Constants
24
+ LLM_MODELS = {
25
+ "default": {
26
+ "small": "llama3-8b-8192",
27
+ "medium": "llama3-70b-8192"
28
+ },
29
+ "fallback": {
30
+ "small": "meta-llama/Llama-3-8b-chat-hf",
31
+ "medium": "meta-llama/Llama-3-70b-chat-hf"
32
+ }
33
+ }
34
+
35
+ SYSTEM_PROMPTS = {
36
+ "json": "You are now in the role of an expert AI who can extract structured information from user request. Both key and value pairs must be in double quotes. You must respond ONLY with a valid JSON file. Do not add any additional comments.",
37
+ "list": "You are now in the role of an expert AI who can extract structured information from user request. All elements must be in double quotes. You must respond ONLY with a valid python List. Do not add any additional comments.",
38
+ "default": "You are an expert AI, complete the given task. Do not add any additional comments.",
39
+ "md": "You are an expert AI who can create a structured report using information provided in the context from user request. The report should be in markdown format consists of markdown tables structured into subtopics. Do not add any additional comments.",
40
+ "online": """You are an expert AI who can create a detailed structured report using internet search results.
41
+ 1. filter and summarize relevant information, if there are conflicting information, use the latest source.
42
+ 2. use it to construct a clear and factual answer.
43
+ Your response should be structured and properly formatted using markdown headings, subheadings, tables, use as necessary. Ignore Links and references""",
44
+ "offline": "You are an expert AI who can create detailed answers. Your response should be properly formatted and well readable using markdown formatting."
45
+ }
46
+
47
+ # Prompt templates
48
+ PROMPT_TEMPLATES = {
49
+ "online": {
50
+ "chat": "Write a well thought out, detailed and structured answer to the query:: {description} #### , refer the provided internet search results reference:{reference}",
51
+ "report": "Write a well thought out, detailed and structured Report to the query:: {description} #### , refer the provided internet search results reference:{reference}, The report should be well formatted using markdown format structured into subtopics as necessary",
52
+ "report_table": "Write a well thought out Report to the query:: {description},#### , refer the provided internet search results reference:{reference}. The report should be well formatted using markdown format, structured into subtopics, include tables or lists as needed to make it well readable"
53
+ },
54
+ "offline": {
55
+ "chat": "Write a well thought out, detailed and structured answer to the query:: {description}",
56
+ "report": "Write a well thought out, detailed and structured Report to the query:: {description}. The report should be well formatted using markdown format, structured into subtopics",
57
+ "report_table": "Write a detailed and structured Report to the query:: {description}, The report should be well formatted using markdown format, structured into subtopics, include tables or lists as needed to make it well readable"
58
+ }
59
+ }
60
 
61
+ # FastAPI app setup
62
  app = FastAPI()
63
 
64
  @app.on_event("startup")
65
  async def startup():
66
  FastAPICache.init(InMemoryBackend(), prefix="fastapi-cache")
67
 
68
+ # Pydantic model for query parameters
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  class QueryModel(BaseModel):
70
+ user_query: str = Field(default="", description="Initial user query")
71
+ topic: str = Field(default="", description="Topic name to generate Report")
72
+ description: str = Field(..., description="Description/prompt for report (REQUIRED)")
73
+ user_id: str = Field(default="", description="unique user id")
74
+ user_name: str = Field(default="", description="user name")
75
+ internet: bool = Field(default=True, description="Enable Internet search")
76
+ output_format: str = Field(default="report_table", description="Output format for the report")
77
+ data_format: str = Field(default="Structured data", description="Type of data to extract from the internet")
78
+ generate_charts: bool = Field(default=False, description="Include generated charts")
79
+ output_as_md: bool = Field(default=False, description="Output report in markdown (default output in HTML)")
80
+
81
+ class Config:
82
+ schema_extra = {
83
+ "example": {
84
+ "user_query": "How does climate change affect biodiversity?",
85
+ "topic": "Climate Change and Biodiversity",
86
+ "description": "Provide a detailed report on the impacts of climate change on global biodiversity",
87
+ "user_id": "user123",
88
+ "user_name": "John Doe",
89
+ "internet": True,
90
+ "output_format": "report_table",
91
+ "data_format": "Structured data",
92
+ "generate_charts": True,
93
+ "output_as_md": False
94
+ }
95
+ }
96
+
97
+ @lru_cache()
98
+ def get_api_keys():
99
+ return {
100
+ "TOGETHER_API_KEY": os.getenv('TOGETHER_API_KEY'),
101
+ "BRAVE_API_KEY": os.getenv('BRAVE_API_KEY'),
102
+ "GROQ_API_KEY": os.getenv("GROQ_API_KEY"),
103
+ "HELICON_API_KEY": os.getenv("HELICON_API_KEY"),
104
+ "SUPABASE_USER": os.environ['SUPABASE_USER'],
105
+ "SUPABASE_PASSWORD": os.environ['SUPABASE_PASSWORD'],
106
+ "OPENROUTER_API_KEY": f"sk-or-v1-{os.environ['OPENROUTER_API_KEY']}"
107
+ }
108
+
109
+ def get_internet_data(description: str, data_format: str):
110
+ search_query = re.sub(r'[^\w\s]', '', description).strip()
111
+ urls, optimized_search_query, full_search_object = search_brave(search_query, num_results=8)
112
+ all_text_with_urls = fetch_and_extract_content(data_format, urls, optimized_search_query)
113
+ reference = limit_tokens(str(all_text_with_urls), token_limit=5000)
114
+ return all_text_with_urls, optimized_search_query, full_search_object, reference
115
+
116
+ def generate_charts(md_report: str):
117
+ chart_prompt = (
118
+ "Convert the numerical data tables in the given content to embedded html plotly.js charts if appropriate, "
119
+ "use appropriate colors. Output format: <report>output the full content without any other changes in md "
120
+ f"format enclosed in tags like this</report> using the following: {md_report}"
121
+ )
122
+ messages = [{"role": 'user', "content": chart_prompt}]
123
+ return extract_data_from_tag(openrouter_response(messages, model="anthropic/claude-3.5-sonnet"), "report")
124
 
125
  @cache(expire=604800)
126
+ async def generate_report(query: QueryModel, api_keys: Dict[str, str] = Depends(get_api_keys)):
127
+ internet_mode = "online" if query.internet else "offline"
128
+ user_prompt = PROMPT_TEMPLATES[internet_mode][query.output_format]
129
+ system_prompt = SYSTEM_PROMPTS[internet_mode]
130
+
131
+ all_text_with_urls = []
 
 
132
  optimized_search_query = ""
 
133
  full_search_object = {}
134
+
 
 
135
  if query.internet:
 
136
  try:
137
+ all_text_with_urls, optimized_search_query, full_search_object, reference = get_internet_data(query.description, query.data_format)
138
+ user_prompt = user_prompt.format(description=query.description, reference=reference)
 
 
139
  except Exception as e:
140
+ print(f"Failed to search/scrape results: {e}")
141
+ internet_mode = "offline"
142
+ user_prompt = PROMPT_TEMPLATES[internet_mode][query.output_format].format(description=query.description)
143
+ system_prompt = SYSTEM_PROMPTS[internet_mode]
144
+ else:
145
+ user_prompt = user_prompt.format(description=query.description)
146
+
147
+ md_report = together_response(user_prompt, model=LLM_MODELS["default"]["medium"], SysPrompt=system_prompt)
148
+
149
+ if query.generate_charts and has_tables(md_to_html(md_report)):
 
 
 
 
150
  try:
151
+ md_report = generate_charts(md_report)
 
 
 
 
 
 
 
 
152
  except Exception as e:
153
+ print(f"Failed to generate charts: {e}")
154
+
155
+ if query.user_id != "test":
156
+ insert_data(query.user_id, query.topic, query.description, str(all_text_with_urls), md_report)
157
+
158
+ references_html = {url: str(md_to_html(text)) for text, url in all_text_with_urls}
159
+ final_report = md_report if query.output_as_md else md_to_html(md_report)
 
 
 
160
 
161
  return {
162
  "report": final_report,
163
  "references": references_html,
164
  "search_query": optimized_search_query,
165
+ "search_data_full": full_search_object
166
  }
167
 
168
+ @app.post("/generate_report", response_model=Dict[str, Any])
169
+ async def api_generate_report(query: QueryModel, api_keys: Dict[str, str] = Depends(get_api_keys)):
170
+ try:
171
+ return await generate_report(query, api_keys)
172
+ except Exception as e:
173
+ raise HTTPException(status_code=500, detail=str(e))
174
+
175
+ # CORS middleware setup
176
  app.add_middleware(
177
  CORSMiddleware,
178
  allow_origins=["*"],
179
  allow_credentials=True,
180
  allow_methods=["*"],
181
+ allow_headers=["*"],
182
+ )