Spaces:
Sleeping
Sleeping
import streamlit as st | |
from huggingface_hub import InferenceClient | |
import wikipedia | |
import re | |
import requests | |
from bs4 import BeautifulSoup | |
import os | |
import pickle | |
from requests.exceptions import HTTPError | |
base_url = "https://api-inference.huggingface.co/models/" | |
API_KEY = os.environ.get('HUGGINGFACE_API_KEY') | |
model_links = { | |
"StartupSuccessPredictor🔮": base_url + "mistralai/Mistral-7B-Instruct-v0.2", | |
} | |
model_info = { | |
"StartupSuccessPredictor🔮": { | |
'description': """The StartupSuccessPredictor model is a **Large Language Model (LLM)** that's able to predict the success potential of Indian startups based on various factors.\n \n\nThis model can analyze startup data, including funding rounds, team experience, industry, market size, user growth, and more to provide insights into the startup's likelihood of success.\n""", | |
'logo': './startup_predictor.jpg' | |
}, | |
} | |
def format_prompt(message, conversation_history, startup_details, custom_instructions=None): | |
prompt = "" | |
if custom_instructions: | |
prompt += f"[INST] {custom_instructions} [/INST]\n" | |
# Add conversation history to the prompt | |
prompt += "[CONV_HISTORY]\n" | |
for role, content in conversation_history: | |
prompt += f"{role.upper()}: {content}\n" | |
prompt += "[/CONV_HISTORY]\n" | |
# Add the startup details to the prompt | |
prompt += "[STARTUP_DETAILS]\n" | |
for key, value in startup_details.items(): | |
if key == "funding_rounds": | |
prompt += f"{key.capitalize()}:\n" | |
for round_details in value: | |
prompt += f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}\n" | |
else: | |
prompt += f"{key.capitalize()}: {value}\n" | |
prompt += "[/STARTUP_DETAILS]\n" | |
# Add the current message | |
prompt += f"[INST] {message} [/INST]\n" | |
# Add the response format | |
prompt += "[RESPONSE]\n" | |
return prompt | |
def reset_conversation(): | |
''' | |
Resets Conversation | |
''' | |
st.session_state.conversation = [] | |
st.session_state.messages = [] | |
st.session_state.chat_state = "reset" | |
def load_conversation_history(): | |
history_file = "conversation_history.pickle" | |
if os.path.exists(history_file): | |
with open(history_file, "rb") as f: | |
conversation_history = pickle.load(f) | |
else: | |
conversation_history = [] | |
return conversation_history | |
def save_conversation_history(conversation_history): | |
history_file = "conversation_history.pickle" | |
with open(history_file, "wb") as f: | |
pickle.dump(conversation_history, f) | |
def scrape_startup_info(startup_name): | |
startup_details = {} | |
# Scrape from Wikipedia | |
try: | |
startup_summary = wikipedia.summary(startup_name, auto_suggest=False) | |
startup_details['name'] = startup_name | |
startup_details['summary'] = startup_summary | |
except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError, ValueError, HTTPError): | |
pass | |
# If no details from Wikipedia, scrape from Crunchbase and AngelList | |
if 'summary' not in startup_details: | |
# Scrape from Crunchbase | |
try: | |
crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.replace(' ', '-')}" | |
response = requests.get(crunchbase_url) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.content, "html.parser") | |
startup_details["name"] = startup_name | |
# Extract founded year | |
founded_year_elem = soup.select_one("div[data-field='founded_year'] span.component--field-formatter") | |
if founded_year_elem: | |
startup_details["founded_year"] = int(founded_year_elem.text.strip()) | |
# Extract industry | |
industry_elem = soup.select_one("div[data-field='industries'] span.component--field-formatter") | |
if industry_elem: | |
startup_details["industry"] = industry_elem.text.strip() | |
# Extract funding rounds | |
funding_rounds_elem = soup.select("div[data-field='funding_rounds'] ul li") | |
funding_rounds = [] | |
for round_elem in funding_rounds_elem: | |
round_details = {} | |
round_type = round_elem.select_one("span.component--field-formatter") | |
if round_type: | |
round_details["type"] = round_type.text.strip() | |
round_amount = round_elem.select_one("span.component--field-formatter + span") | |
if round_amount: | |
round_details["amount"] = round_amount.text.strip() | |
funding_rounds.append(round_details) | |
startup_details["funding_rounds"] = funding_rounds | |
except Exception as e: | |
st.error(f"Error scraping Crunchbase: {e}") | |
# Scrape from AngelList | |
try: | |
angellist_url = f"https://angel.co/company/{startup_name.replace(' ', '-')}" | |
response = requests.get(angellist_url) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.content, "html.parser") | |
# Extract team members | |
team_members_elem = soup.select("div.team-member") | |
team_members = [] | |
for member_elem in team_members_elem: | |
member_name = member_elem.select_one("div.name") | |
if member_name: | |
team_members.append(member_name.text.strip()) | |
startup_details["team_members"] = team_members | |
# Extract user growth (if available) | |
user_growth_elem = soup.select_one("div.profile-content-section div.section-tagline") | |
if user_growth_elem: | |
startup_details["user_growth"] = user_growth_elem.text.strip() | |
except Exception as e: | |
st.error(f"Error scraping AngelList: {e}") | |
return startup_details | |
models = [key for key in model_links.keys()] | |
selected_model = st.sidebar.selectbox("Select Model", models) | |
temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5) | |
st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button | |
st.sidebar.write(f"You're now chatting with **{selected_model}**") | |
st.sidebar.markdown(model_info[selected_model]['description']) | |
st.sidebar.image(model_info[selected_model]['logo']) | |
st.sidebar.markdown("*Generating the code might go slow if you are using low power resources*") | |
if "prev_option" not in st.session_state: | |
st.session_state.prev_option = selected_model | |
if st.session_state.prev_option != selected_model: | |
st.session_state.messages = [] | |
st.session_state.prev_option = selected_model | |
if "chat_state" not in st.session_state: | |
st.session_state.chat_state = "normal" | |
# Load the conversation history from the file | |
if "messages" not in st.session_state: | |
st.session_state.messages = load_conversation_history() | |
repo_id = model_links[selected_model] | |
st.subheader(f'{selected_model}') | |
if st.session_state.chat_state == "normal": | |
for message in st.session_state.messages: | |
with st.chat_message(message["role"]): | |
st.markdown(message["content"]) | |
if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"): | |
if "predict success of" in prompt.lower(): | |
# Extract the startup name from the prompt | |
startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE) | |
if startup_name_match: | |
startup_name = startup_name_match.group(1).strip() | |
startup_details = scrape_startup_info(startup_name) | |
if startup_details: | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages] | |
custom_instruction = f"Based on the provided startup details or information and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not." | |
formatted_text = format_prompt(prompt, conversation_history, startup_details, custom_instruction) | |
with st.chat_message("assistant"): | |
client = InferenceClient(model=model_links[selected_model]) | |
max_new_tokens = 2048 # Adjust this value as needed | |
try: | |
output = client.text_generation( | |
formatted_text, | |
temperature=temp_values, | |
max_new_tokens=max_new_tokens, | |
stream=True | |
) | |
response = "" | |
for output_chunk in output: | |
if isinstance(output_chunk, dict) and "text" in output_chunk: | |
response += output_chunk["text"] | |
else: | |
response += output_chunk # Handle the case where output_chunk might be a string | |
st.markdown(f"**Success Analysis for {startup_details['name']}**\n\n{response}") | |
except ValueError as e: | |
if "Input validation error" in str(e): | |
st.error("Error: The input prompt is too long. Please try a shorter prompt.") | |
else: | |
st.error(f"An error occurred: {e}") | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {e}") | |
else: | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
save_conversation_history(st.session_state.messages) | |
else: | |
st.write(f"No information found for the startup '{startup_name}'. Please try another startup name or provide additional details.") | |
else: | |
with st.chat_message("user"): | |
st.markdown(prompt) | |
st.session_state.messages.append({"role": "user", "content": prompt}) | |
conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages] | |
formatted_text = format_prompt(prompt, conversation_history, {}) | |
with st.chat_message("assistant"): | |
client = InferenceClient(model=model_links[selected_model]) | |
max_new_tokens = 3000 # Adjust this value as needed | |
try: | |
output = client.text_generation( | |
formatted_text, | |
temperature=temp_values, | |
max_new_tokens=max_new_tokens, | |
stream=True | |
) | |
response = "" | |
for output_chunk in output: | |
if isinstance(output_chunk, dict) and "text" in output_chunk: | |
response += output_chunk["text"] | |
else: | |
response += output_chunk # Handle the case where output_chunk might be a string | |
st.markdown(response) | |
except ValueError as e: | |
if "Input validation error" in str(e): | |
st.error("Error: The input prompt is too long. Please try a shorter prompt.") | |
else: | |
st.error(f"An error occurred: {e}") | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {e}") | |
else: | |
st.session_state.messages.append({"role": "assistant", "content": response}) | |
save_conversation_history(st.session_state.messages) | |
elif st.session_state.chat_state == "reset": | |
st.session_state.chat_state = "normal" | |
st.experimental_rerun() | |