sainathBelagavi's picture
Update app.py
260b91a verified
raw
history blame contribute delete
No virus
12.5 kB
import streamlit as st
from huggingface_hub import InferenceClient
import wikipedia
import re
import requests
from bs4 import BeautifulSoup
import os
import pickle
from requests.exceptions import HTTPError
base_url = "https://api-inference.huggingface.co/models/"
API_KEY = os.environ.get('HUGGINGFACE_API_KEY')
model_links = {
"StartupSuccessPredictor🔮": base_url + "mistralai/Mistral-7B-Instruct-v0.2",
}
model_info = {
"StartupSuccessPredictor🔮": {
'description': """The StartupSuccessPredictor model is a **Large Language Model (LLM)** that's able to predict the success potential of Indian startups based on various factors.\n \n\nThis model can analyze startup data, including funding rounds, team experience, industry, market size, user growth, and more to provide insights into the startup's likelihood of success.\n""",
'logo': './startup_predictor.jpg'
},
}
def format_prompt(message, conversation_history, startup_details, custom_instructions=None):
prompt = ""
if custom_instructions:
prompt += f"[INST] {custom_instructions} [/INST]\n"
# Add conversation history to the prompt
prompt += "[CONV_HISTORY]\n"
for role, content in conversation_history:
prompt += f"{role.upper()}: {content}\n"
prompt += "[/CONV_HISTORY]\n"
# Add the startup details to the prompt
prompt += "[STARTUP_DETAILS]\n"
for key, value in startup_details.items():
if key == "funding_rounds":
prompt += f"{key.capitalize()}:\n"
for round_details in value:
prompt += f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}\n"
else:
prompt += f"{key.capitalize()}: {value}\n"
prompt += "[/STARTUP_DETAILS]\n"
# Add the current message
prompt += f"[INST] {message} [/INST]\n"
# Add the response format
prompt += "[RESPONSE]\n"
return prompt
def reset_conversation():
'''
Resets Conversation
'''
st.session_state.conversation = []
st.session_state.messages = []
st.session_state.chat_state = "reset"
def load_conversation_history():
history_file = "conversation_history.pickle"
if os.path.exists(history_file):
with open(history_file, "rb") as f:
conversation_history = pickle.load(f)
else:
conversation_history = []
return conversation_history
def save_conversation_history(conversation_history):
history_file = "conversation_history.pickle"
with open(history_file, "wb") as f:
pickle.dump(conversation_history, f)
def scrape_startup_info(startup_name):
startup_details = {}
# Scrape from Wikipedia
try:
startup_summary = wikipedia.summary(startup_name, auto_suggest=False)
startup_details['name'] = startup_name
startup_details['summary'] = startup_summary
except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError, ValueError, HTTPError):
pass
# If no details from Wikipedia, scrape from Crunchbase and AngelList
if 'summary' not in startup_details:
# Scrape from Crunchbase
try:
crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.replace(' ', '-')}"
response = requests.get(crunchbase_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
startup_details["name"] = startup_name
# Extract founded year
founded_year_elem = soup.select_one("div[data-field='founded_year'] span.component--field-formatter")
if founded_year_elem:
startup_details["founded_year"] = int(founded_year_elem.text.strip())
# Extract industry
industry_elem = soup.select_one("div[data-field='industries'] span.component--field-formatter")
if industry_elem:
startup_details["industry"] = industry_elem.text.strip()
# Extract funding rounds
funding_rounds_elem = soup.select("div[data-field='funding_rounds'] ul li")
funding_rounds = []
for round_elem in funding_rounds_elem:
round_details = {}
round_type = round_elem.select_one("span.component--field-formatter")
if round_type:
round_details["type"] = round_type.text.strip()
round_amount = round_elem.select_one("span.component--field-formatter + span")
if round_amount:
round_details["amount"] = round_amount.text.strip()
funding_rounds.append(round_details)
startup_details["funding_rounds"] = funding_rounds
except Exception as e:
st.error(f"Error scraping Crunchbase: {e}")
# Scrape from AngelList
try:
angellist_url = f"https://angel.co/company/{startup_name.replace(' ', '-')}"
response = requests.get(angellist_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
# Extract team members
team_members_elem = soup.select("div.team-member")
team_members = []
for member_elem in team_members_elem:
member_name = member_elem.select_one("div.name")
if member_name:
team_members.append(member_name.text.strip())
startup_details["team_members"] = team_members
# Extract user growth (if available)
user_growth_elem = soup.select_one("div.profile-content-section div.section-tagline")
if user_growth_elem:
startup_details["user_growth"] = user_growth_elem.text.strip()
except Exception as e:
st.error(f"Error scraping AngelList: {e}")
return startup_details
models = [key for key in model_links.keys()]
selected_model = st.sidebar.selectbox("Select Model", models)
temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button
st.sidebar.write(f"You're now chatting with **{selected_model}**")
st.sidebar.markdown(model_info[selected_model]['description'])
st.sidebar.image(model_info[selected_model]['logo'])
st.sidebar.markdown("*Generating the code might go slow if you are using low power resources*")
if "prev_option" not in st.session_state:
st.session_state.prev_option = selected_model
if st.session_state.prev_option != selected_model:
st.session_state.messages = []
st.session_state.prev_option = selected_model
if "chat_state" not in st.session_state:
st.session_state.chat_state = "normal"
# Load the conversation history from the file
if "messages" not in st.session_state:
st.session_state.messages = load_conversation_history()
repo_id = model_links[selected_model]
st.subheader(f'{selected_model}')
if st.session_state.chat_state == "normal":
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):
if "predict success of" in prompt.lower():
# Extract the startup name from the prompt
startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
if startup_name_match:
startup_name = startup_name_match.group(1).strip()
startup_details = scrape_startup_info(startup_name)
if startup_details:
with st.chat_message("user"):
st.markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
custom_instruction = f"Based on the provided startup details or information and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."
formatted_text = format_prompt(prompt, conversation_history, startup_details, custom_instruction)
with st.chat_message("assistant"):
client = InferenceClient(model=model_links[selected_model])
max_new_tokens = 2048 # Adjust this value as needed
try:
output = client.text_generation(
formatted_text,
temperature=temp_values,
max_new_tokens=max_new_tokens,
stream=True
)
response = ""
for output_chunk in output:
if isinstance(output_chunk, dict) and "text" in output_chunk:
response += output_chunk["text"]
else:
response += output_chunk # Handle the case where output_chunk might be a string
st.markdown(f"**Success Analysis for {startup_details['name']}**\n\n{response}")
except ValueError as e:
if "Input validation error" in str(e):
st.error("Error: The input prompt is too long. Please try a shorter prompt.")
else:
st.error(f"An error occurred: {e}")
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
else:
st.session_state.messages.append({"role": "assistant", "content": response})
save_conversation_history(st.session_state.messages)
else:
st.write(f"No information found for the startup '{startup_name}'. Please try another startup name or provide additional details.")
else:
with st.chat_message("user"):
st.markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
formatted_text = format_prompt(prompt, conversation_history, {})
with st.chat_message("assistant"):
client = InferenceClient(model=model_links[selected_model])
max_new_tokens = 3000 # Adjust this value as needed
try:
output = client.text_generation(
formatted_text,
temperature=temp_values,
max_new_tokens=max_new_tokens,
stream=True
)
response = ""
for output_chunk in output:
if isinstance(output_chunk, dict) and "text" in output_chunk:
response += output_chunk["text"]
else:
response += output_chunk # Handle the case where output_chunk might be a string
st.markdown(response)
except ValueError as e:
if "Input validation error" in str(e):
st.error("Error: The input prompt is too long. Please try a shorter prompt.")
else:
st.error(f"An error occurred: {e}")
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
else:
st.session_state.messages.append({"role": "assistant", "content": response})
save_conversation_history(st.session_state.messages)
elif st.session_state.chat_state == "reset":
st.session_state.chat_state = "normal"
st.experimental_rerun()