sainathBelagavi commited on
Commit
260b91a
1 Parent(s): c10a439

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -92
app.py CHANGED
@@ -5,11 +5,9 @@ import re
5
  import requests
6
  from bs4 import BeautifulSoup
7
  import os
8
- import pickle
9
- import requests
10
  from requests.exceptions import HTTPError
11
 
12
-
13
  base_url = "https://api-inference.huggingface.co/models/"
14
  API_KEY = os.environ.get('HUGGINGFACE_API_KEY')
15
 
@@ -24,7 +22,7 @@ model_info = {
24
  },
25
  }
26
 
27
- def format_promt(message, conversation_history, startup_details, custom_instructions=None):
28
  prompt = ""
29
  if custom_instructions:
30
  prompt += f"[INST] {custom_instructions} [/INST]\n"
@@ -78,101 +76,79 @@ def save_conversation_history(conversation_history):
78
 
79
  def scrape_startup_info(startup_name):
80
  startup_details = {}
81
-
82
  # Scrape from Wikipedia
83
  try:
84
- startup_details = {}
85
- startup_summary = wikipedia.summary(startup_name, auto_suggest_advice=False)
86
  startup_details['name'] = startup_name
87
  startup_details['summary'] = startup_summary
88
-
89
- # Extract additional details from Wikipedia summary
90
- startup_details['additional_details'] = {}
91
- for key, value in startup_summary.items():
92
- startup_details['additional_details'][key] = value
93
-
94
- except (ValueError, HTTPError) as e:
95
  pass
96
 
97
- # Check if the startup details were scraped successfully
98
- if 'additional_details' in startup_details:
99
- # Scrape additional details from Wikipedia
100
- startup_name = startup_details['additional_details']
101
- del startup_details['additional_details']
102
- startup_details = {key: value for key, value in startup_details.items() if key != 'name'}
103
-
104
- # Extract additional details from the search results
105
  try:
106
- startup_summary = startup_details['name']
107
- startup_details['additional_details'] = {}
108
- search_results = re.search(r'<(/?[a-zA-Z]+[a-zA-Z_-]*?)>.*', startup_summary, re.DOTALL)
109
- if search_results:
110
- tag_name = search_results.group(1)
111
- startup_details['additional_details'][tag_name] = search_results.group(0)
112
-
113
- except ValueError as e:
114
- print(e)
115
-
116
- # If startup details were not scraped successfully, scrape from CrunchBase and AngelList
117
- if 'additional_details' not in startup_details:
118
- # Scrape from CrunchBase
119
- crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.title()}"
120
- response = requests.get(crunchbase_url)
121
-
122
- if response.status_code == 200:
123
- soup = BeautifulSoup(response.content, "html.parser")
124
- startup_details["name"] = startup_name.title()
125
-
126
- # Extract founded year
127
- founded_year_elem = soup.select_one("div.field-label:contains('Founded') + div.field-value")
128
- if founded_year_elem:
129
- startup_details["founded_year"] = int(founded_year_elem.text.strip())
130
-
131
- # Extract industry
132
- industry_elem = soup.select_one("div.field-label:contains('Industries') + div.field-value")
133
- if industry_elem:
134
- startup_details["industry"] = industry_elem.text.strip()
135
-
136
- # Extract funding rounds
137
- funding_rounds_elem = soup.select("div.funding-rounds-list > div.card")
138
- funding_rounds = []
139
- for round_elem in funding_rounds_elem:
140
- round_details = {}
141
- round_type = round_elem.select_one("span.type")
142
- if round_type:
143
- round_details["type"] = round_type.text.strip()
144
- round_amount = round_elem.select_one("span.amount")
145
- if round_amount:
146
- round_details["amount"] = round_amount.text.strip()
147
- funding_rounds.append(round_details)
148
- startup_details["funding_rounds"] = funding_rounds
149
 
150
  # Scrape from AngelList
151
- angellist_url = f"https://angel.co/{startup_name.title()}"
152
- response = requests.get(angellist_url)
153
-
154
- if response.status_code == 200:
155
- soup = BeautifulSoup(response.content, "html.parser")
156
-
157
- # Extract team members
158
- team_members_elem = soup.select("div.team-members > div.team-member")
159
- team_members = []
160
- for member_elem in team_members_elem:
161
- member_name = member_elem.select_one("div.name")
162
- if member_name:
163
- team_members.append(member_name.text.strip())
164
- startup_details["team_members"] = team_members
165
-
166
- # Extract user growth (if available)
167
- user_growth_elem = soup.select_one("div.profile-content-section > div.section-content > div.section-tagline")
168
- if user_growth_elem:
169
- startup_details["user_growth"] = user_growth_elem.text.strip()
 
 
170
 
171
  return startup_details
172
 
173
  models = [key for key in model_links.keys()]
174
  selected_model = st.sidebar.selectbox("Select Model", models)
175
- temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, (0.5))
176
  st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button
177
 
178
  st.sidebar.write(f"You're now chatting with **{selected_model}**")
@@ -180,6 +156,7 @@ st.sidebar.markdown(model_info[selected_model]['description'])
180
  st.sidebar.image(model_info[selected_model]['logo'])
181
 
182
  st.sidebar.markdown("*Generating the code might go slow if you are using low power resources*")
 
183
  if "prev_option" not in st.session_state:
184
  st.session_state.prev_option = selected_model
185
 
@@ -207,7 +184,7 @@ if st.session_state.chat_state == "normal":
207
  # Extract the startup name from the prompt
208
  startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
209
  if startup_name_match:
210
- startup_name = startup_name_match.group(1)
211
  startup_details = scrape_startup_info(startup_name)
212
  if startup_details:
213
  with st.chat_message("user"):
@@ -215,16 +192,16 @@ if st.session_state.chat_state == "normal":
215
 
216
  st.session_state.messages.append({"role": "user", "content": prompt})
217
  conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
218
- custom_instruction = f"Based on the provided startup details and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."
219
 
220
- formated_text = format_promt(prompt, conversation_history, startup_details, custom_instruction)
221
 
222
  with st.chat_message("assistant"):
223
  client = InferenceClient(model=model_links[selected_model])
224
  max_new_tokens = 2048 # Adjust this value as needed
225
  try:
226
  output = client.text_generation(
227
- formated_text,
228
  temperature=temp_values,
229
  max_new_tokens=max_new_tokens,
230
  stream=True
@@ -255,14 +232,14 @@ if st.session_state.chat_state == "normal":
255
  st.session_state.messages.append({"role": "user", "content": prompt})
256
  conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
257
 
258
- formated_text = format_promt(prompt, conversation_history, {})
259
 
260
  with st.chat_message("assistant"):
261
  client = InferenceClient(model=model_links[selected_model])
262
  max_new_tokens = 3000 # Adjust this value as needed
263
  try:
264
  output = client.text_generation(
265
- formated_text,
266
  temperature=temp_values,
267
  max_new_tokens=max_new_tokens,
268
  stream=True
@@ -287,4 +264,4 @@ if st.session_state.chat_state == "normal":
287
 
288
  elif st.session_state.chat_state == "reset":
289
  st.session_state.chat_state = "normal"
290
- st.experimental_rerun()
 
5
  import requests
6
  from bs4 import BeautifulSoup
7
  import os
8
+ import pickle
 
9
  from requests.exceptions import HTTPError
10
 
 
11
  base_url = "https://api-inference.huggingface.co/models/"
12
  API_KEY = os.environ.get('HUGGINGFACE_API_KEY')
13
 
 
22
  },
23
  }
24
 
25
+ def format_prompt(message, conversation_history, startup_details, custom_instructions=None):
26
  prompt = ""
27
  if custom_instructions:
28
  prompt += f"[INST] {custom_instructions} [/INST]\n"
 
76
 
77
  def scrape_startup_info(startup_name):
78
  startup_details = {}
79
+
80
  # Scrape from Wikipedia
81
  try:
82
+ startup_summary = wikipedia.summary(startup_name, auto_suggest=False)
 
83
  startup_details['name'] = startup_name
84
  startup_details['summary'] = startup_summary
85
+ except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError, ValueError, HTTPError):
 
 
 
 
 
 
86
  pass
87
 
88
+ # If no details from Wikipedia, scrape from Crunchbase and AngelList
89
+ if 'summary' not in startup_details:
90
+ # Scrape from Crunchbase
 
 
 
 
 
91
  try:
92
+ crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.replace(' ', '-')}"
93
+ response = requests.get(crunchbase_url)
94
+ if response.status_code == 200:
95
+ soup = BeautifulSoup(response.content, "html.parser")
96
+ startup_details["name"] = startup_name
97
+
98
+ # Extract founded year
99
+ founded_year_elem = soup.select_one("div[data-field='founded_year'] span.component--field-formatter")
100
+ if founded_year_elem:
101
+ startup_details["founded_year"] = int(founded_year_elem.text.strip())
102
+
103
+ # Extract industry
104
+ industry_elem = soup.select_one("div[data-field='industries'] span.component--field-formatter")
105
+ if industry_elem:
106
+ startup_details["industry"] = industry_elem.text.strip()
107
+
108
+ # Extract funding rounds
109
+ funding_rounds_elem = soup.select("div[data-field='funding_rounds'] ul li")
110
+ funding_rounds = []
111
+ for round_elem in funding_rounds_elem:
112
+ round_details = {}
113
+ round_type = round_elem.select_one("span.component--field-formatter")
114
+ if round_type:
115
+ round_details["type"] = round_type.text.strip()
116
+ round_amount = round_elem.select_one("span.component--field-formatter + span")
117
+ if round_amount:
118
+ round_details["amount"] = round_amount.text.strip()
119
+ funding_rounds.append(round_details)
120
+ startup_details["funding_rounds"] = funding_rounds
121
+ except Exception as e:
122
+ st.error(f"Error scraping Crunchbase: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  # Scrape from AngelList
125
+ try:
126
+ angellist_url = f"https://angel.co/company/{startup_name.replace(' ', '-')}"
127
+ response = requests.get(angellist_url)
128
+ if response.status_code == 200:
129
+ soup = BeautifulSoup(response.content, "html.parser")
130
+
131
+ # Extract team members
132
+ team_members_elem = soup.select("div.team-member")
133
+ team_members = []
134
+ for member_elem in team_members_elem:
135
+ member_name = member_elem.select_one("div.name")
136
+ if member_name:
137
+ team_members.append(member_name.text.strip())
138
+ startup_details["team_members"] = team_members
139
+
140
+ # Extract user growth (if available)
141
+ user_growth_elem = soup.select_one("div.profile-content-section div.section-tagline")
142
+ if user_growth_elem:
143
+ startup_details["user_growth"] = user_growth_elem.text.strip()
144
+ except Exception as e:
145
+ st.error(f"Error scraping AngelList: {e}")
146
 
147
  return startup_details
148
 
149
  models = [key for key in model_links.keys()]
150
  selected_model = st.sidebar.selectbox("Select Model", models)
151
+ temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
152
  st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button
153
 
154
  st.sidebar.write(f"You're now chatting with **{selected_model}**")
 
156
  st.sidebar.image(model_info[selected_model]['logo'])
157
 
158
  st.sidebar.markdown("*Generating the code might go slow if you are using low power resources*")
159
+
160
  if "prev_option" not in st.session_state:
161
  st.session_state.prev_option = selected_model
162
 
 
184
  # Extract the startup name from the prompt
185
  startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
186
  if startup_name_match:
187
+ startup_name = startup_name_match.group(1).strip()
188
  startup_details = scrape_startup_info(startup_name)
189
  if startup_details:
190
  with st.chat_message("user"):
 
192
 
193
  st.session_state.messages.append({"role": "user", "content": prompt})
194
  conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
195
+ custom_instruction = f"Based on the provided startup details or information and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."
196
 
197
+ formatted_text = format_prompt(prompt, conversation_history, startup_details, custom_instruction)
198
 
199
  with st.chat_message("assistant"):
200
  client = InferenceClient(model=model_links[selected_model])
201
  max_new_tokens = 2048 # Adjust this value as needed
202
  try:
203
  output = client.text_generation(
204
+ formatted_text,
205
  temperature=temp_values,
206
  max_new_tokens=max_new_tokens,
207
  stream=True
 
232
  st.session_state.messages.append({"role": "user", "content": prompt})
233
  conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
234
 
235
+ formatted_text = format_prompt(prompt, conversation_history, {})
236
 
237
  with st.chat_message("assistant"):
238
  client = InferenceClient(model=model_links[selected_model])
239
  max_new_tokens = 3000 # Adjust this value as needed
240
  try:
241
  output = client.text_generation(
242
+ formatted_text,
243
  temperature=temp_values,
244
  max_new_tokens=max_new_tokens,
245
  stream=True
 
264
 
265
  elif st.session_state.chat_state == "reset":
266
  st.session_state.chat_state = "normal"
267
+ st.experimental_rerun()