Spaces:
Sleeping
Sleeping
sainathBelagavi
commited on
Commit
•
260b91a
1
Parent(s):
c10a439
Update app.py
Browse files
app.py
CHANGED
@@ -5,11 +5,9 @@ import re
|
|
5 |
import requests
|
6 |
from bs4 import BeautifulSoup
|
7 |
import os
|
8 |
-
import pickle
|
9 |
-
import requests
|
10 |
from requests.exceptions import HTTPError
|
11 |
|
12 |
-
|
13 |
base_url = "https://api-inference.huggingface.co/models/"
|
14 |
API_KEY = os.environ.get('HUGGINGFACE_API_KEY')
|
15 |
|
@@ -24,7 +22,7 @@ model_info = {
|
|
24 |
},
|
25 |
}
|
26 |
|
27 |
-
def
|
28 |
prompt = ""
|
29 |
if custom_instructions:
|
30 |
prompt += f"[INST] {custom_instructions} [/INST]\n"
|
@@ -78,101 +76,79 @@ def save_conversation_history(conversation_history):
|
|
78 |
|
79 |
def scrape_startup_info(startup_name):
|
80 |
startup_details = {}
|
81 |
-
|
82 |
# Scrape from Wikipedia
|
83 |
try:
|
84 |
-
|
85 |
-
startup_summary = wikipedia.summary(startup_name, auto_suggest_advice=False)
|
86 |
startup_details['name'] = startup_name
|
87 |
startup_details['summary'] = startup_summary
|
88 |
-
|
89 |
-
# Extract additional details from Wikipedia summary
|
90 |
-
startup_details['additional_details'] = {}
|
91 |
-
for key, value in startup_summary.items():
|
92 |
-
startup_details['additional_details'][key] = value
|
93 |
-
|
94 |
-
except (ValueError, HTTPError) as e:
|
95 |
pass
|
96 |
|
97 |
-
#
|
98 |
-
if '
|
99 |
-
# Scrape
|
100 |
-
startup_name = startup_details['additional_details']
|
101 |
-
del startup_details['additional_details']
|
102 |
-
startup_details = {key: value for key, value in startup_details.items() if key != 'name'}
|
103 |
-
|
104 |
-
# Extract additional details from the search results
|
105 |
try:
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
startup_details["
|
135 |
-
|
136 |
-
|
137 |
-
funding_rounds_elem = soup.select("div.funding-rounds-list > div.card")
|
138 |
-
funding_rounds = []
|
139 |
-
for round_elem in funding_rounds_elem:
|
140 |
-
round_details = {}
|
141 |
-
round_type = round_elem.select_one("span.type")
|
142 |
-
if round_type:
|
143 |
-
round_details["type"] = round_type.text.strip()
|
144 |
-
round_amount = round_elem.select_one("span.amount")
|
145 |
-
if round_amount:
|
146 |
-
round_details["amount"] = round_amount.text.strip()
|
147 |
-
funding_rounds.append(round_details)
|
148 |
-
startup_details["funding_rounds"] = funding_rounds
|
149 |
|
150 |
# Scrape from AngelList
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
|
|
170 |
|
171 |
return startup_details
|
172 |
|
173 |
models = [key for key in model_links.keys()]
|
174 |
selected_model = st.sidebar.selectbox("Select Model", models)
|
175 |
-
temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0,
|
176 |
st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button
|
177 |
|
178 |
st.sidebar.write(f"You're now chatting with **{selected_model}**")
|
@@ -180,6 +156,7 @@ st.sidebar.markdown(model_info[selected_model]['description'])
|
|
180 |
st.sidebar.image(model_info[selected_model]['logo'])
|
181 |
|
182 |
st.sidebar.markdown("*Generating the code might go slow if you are using low power resources*")
|
|
|
183 |
if "prev_option" not in st.session_state:
|
184 |
st.session_state.prev_option = selected_model
|
185 |
|
@@ -207,7 +184,7 @@ if st.session_state.chat_state == "normal":
|
|
207 |
# Extract the startup name from the prompt
|
208 |
startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
|
209 |
if startup_name_match:
|
210 |
-
startup_name = startup_name_match.group(1)
|
211 |
startup_details = scrape_startup_info(startup_name)
|
212 |
if startup_details:
|
213 |
with st.chat_message("user"):
|
@@ -215,16 +192,16 @@ if st.session_state.chat_state == "normal":
|
|
215 |
|
216 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
217 |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
|
218 |
-
custom_instruction = f"Based on the provided startup details and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."
|
219 |
|
220 |
-
|
221 |
|
222 |
with st.chat_message("assistant"):
|
223 |
client = InferenceClient(model=model_links[selected_model])
|
224 |
max_new_tokens = 2048 # Adjust this value as needed
|
225 |
try:
|
226 |
output = client.text_generation(
|
227 |
-
|
228 |
temperature=temp_values,
|
229 |
max_new_tokens=max_new_tokens,
|
230 |
stream=True
|
@@ -255,14 +232,14 @@ if st.session_state.chat_state == "normal":
|
|
255 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
256 |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
|
257 |
|
258 |
-
|
259 |
|
260 |
with st.chat_message("assistant"):
|
261 |
client = InferenceClient(model=model_links[selected_model])
|
262 |
max_new_tokens = 3000 # Adjust this value as needed
|
263 |
try:
|
264 |
output = client.text_generation(
|
265 |
-
|
266 |
temperature=temp_values,
|
267 |
max_new_tokens=max_new_tokens,
|
268 |
stream=True
|
@@ -287,4 +264,4 @@ if st.session_state.chat_state == "normal":
|
|
287 |
|
288 |
elif st.session_state.chat_state == "reset":
|
289 |
st.session_state.chat_state = "normal"
|
290 |
-
st.experimental_rerun()
|
|
|
5 |
import requests
|
6 |
from bs4 import BeautifulSoup
|
7 |
import os
|
8 |
+
import pickle
|
|
|
9 |
from requests.exceptions import HTTPError
|
10 |
|
|
|
11 |
base_url = "https://api-inference.huggingface.co/models/"
|
12 |
API_KEY = os.environ.get('HUGGINGFACE_API_KEY')
|
13 |
|
|
|
22 |
},
|
23 |
}
|
24 |
|
25 |
+
def format_prompt(message, conversation_history, startup_details, custom_instructions=None):
|
26 |
prompt = ""
|
27 |
if custom_instructions:
|
28 |
prompt += f"[INST] {custom_instructions} [/INST]\n"
|
|
|
76 |
|
77 |
def scrape_startup_info(startup_name):
|
78 |
startup_details = {}
|
79 |
+
|
80 |
# Scrape from Wikipedia
|
81 |
try:
|
82 |
+
startup_summary = wikipedia.summary(startup_name, auto_suggest=False)
|
|
|
83 |
startup_details['name'] = startup_name
|
84 |
startup_details['summary'] = startup_summary
|
85 |
+
except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError, ValueError, HTTPError):
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
pass
|
87 |
|
88 |
+
# If no details from Wikipedia, scrape from Crunchbase and AngelList
|
89 |
+
if 'summary' not in startup_details:
|
90 |
+
# Scrape from Crunchbase
|
|
|
|
|
|
|
|
|
|
|
91 |
try:
|
92 |
+
crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.replace(' ', '-')}"
|
93 |
+
response = requests.get(crunchbase_url)
|
94 |
+
if response.status_code == 200:
|
95 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
96 |
+
startup_details["name"] = startup_name
|
97 |
+
|
98 |
+
# Extract founded year
|
99 |
+
founded_year_elem = soup.select_one("div[data-field='founded_year'] span.component--field-formatter")
|
100 |
+
if founded_year_elem:
|
101 |
+
startup_details["founded_year"] = int(founded_year_elem.text.strip())
|
102 |
+
|
103 |
+
# Extract industry
|
104 |
+
industry_elem = soup.select_one("div[data-field='industries'] span.component--field-formatter")
|
105 |
+
if industry_elem:
|
106 |
+
startup_details["industry"] = industry_elem.text.strip()
|
107 |
+
|
108 |
+
# Extract funding rounds
|
109 |
+
funding_rounds_elem = soup.select("div[data-field='funding_rounds'] ul li")
|
110 |
+
funding_rounds = []
|
111 |
+
for round_elem in funding_rounds_elem:
|
112 |
+
round_details = {}
|
113 |
+
round_type = round_elem.select_one("span.component--field-formatter")
|
114 |
+
if round_type:
|
115 |
+
round_details["type"] = round_type.text.strip()
|
116 |
+
round_amount = round_elem.select_one("span.component--field-formatter + span")
|
117 |
+
if round_amount:
|
118 |
+
round_details["amount"] = round_amount.text.strip()
|
119 |
+
funding_rounds.append(round_details)
|
120 |
+
startup_details["funding_rounds"] = funding_rounds
|
121 |
+
except Exception as e:
|
122 |
+
st.error(f"Error scraping Crunchbase: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
# Scrape from AngelList
|
125 |
+
try:
|
126 |
+
angellist_url = f"https://angel.co/company/{startup_name.replace(' ', '-')}"
|
127 |
+
response = requests.get(angellist_url)
|
128 |
+
if response.status_code == 200:
|
129 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
130 |
+
|
131 |
+
# Extract team members
|
132 |
+
team_members_elem = soup.select("div.team-member")
|
133 |
+
team_members = []
|
134 |
+
for member_elem in team_members_elem:
|
135 |
+
member_name = member_elem.select_one("div.name")
|
136 |
+
if member_name:
|
137 |
+
team_members.append(member_name.text.strip())
|
138 |
+
startup_details["team_members"] = team_members
|
139 |
+
|
140 |
+
# Extract user growth (if available)
|
141 |
+
user_growth_elem = soup.select_one("div.profile-content-section div.section-tagline")
|
142 |
+
if user_growth_elem:
|
143 |
+
startup_details["user_growth"] = user_growth_elem.text.strip()
|
144 |
+
except Exception as e:
|
145 |
+
st.error(f"Error scraping AngelList: {e}")
|
146 |
|
147 |
return startup_details
|
148 |
|
149 |
models = [key for key in model_links.keys()]
|
150 |
selected_model = st.sidebar.selectbox("Select Model", models)
|
151 |
+
temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
|
152 |
st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button
|
153 |
|
154 |
st.sidebar.write(f"You're now chatting with **{selected_model}**")
|
|
|
156 |
st.sidebar.image(model_info[selected_model]['logo'])
|
157 |
|
158 |
st.sidebar.markdown("*Generating the code might go slow if you are using low power resources*")
|
159 |
+
|
160 |
if "prev_option" not in st.session_state:
|
161 |
st.session_state.prev_option = selected_model
|
162 |
|
|
|
184 |
# Extract the startup name from the prompt
|
185 |
startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
|
186 |
if startup_name_match:
|
187 |
+
startup_name = startup_name_match.group(1).strip()
|
188 |
startup_details = scrape_startup_info(startup_name)
|
189 |
if startup_details:
|
190 |
with st.chat_message("user"):
|
|
|
192 |
|
193 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
194 |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
|
195 |
+
custom_instruction = f"Based on the provided startup details or information and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."
|
196 |
|
197 |
+
formatted_text = format_prompt(prompt, conversation_history, startup_details, custom_instruction)
|
198 |
|
199 |
with st.chat_message("assistant"):
|
200 |
client = InferenceClient(model=model_links[selected_model])
|
201 |
max_new_tokens = 2048 # Adjust this value as needed
|
202 |
try:
|
203 |
output = client.text_generation(
|
204 |
+
formatted_text,
|
205 |
temperature=temp_values,
|
206 |
max_new_tokens=max_new_tokens,
|
207 |
stream=True
|
|
|
232 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
233 |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
|
234 |
|
235 |
+
formatted_text = format_prompt(prompt, conversation_history, {})
|
236 |
|
237 |
with st.chat_message("assistant"):
|
238 |
client = InferenceClient(model=model_links[selected_model])
|
239 |
max_new_tokens = 3000 # Adjust this value as needed
|
240 |
try:
|
241 |
output = client.text_generation(
|
242 |
+
formatted_text,
|
243 |
temperature=temp_values,
|
244 |
max_new_tokens=max_new_tokens,
|
245 |
stream=True
|
|
|
264 |
|
265 |
elif st.session_state.chat_state == "reset":
|
266 |
st.session_state.chat_state = "normal"
|
267 |
+
st.experimental_rerun()
|