Spaces:

sainathBelagavi
/

TheNextSucess.ai

Sleeping

App Files Files Community

TheNextSucess.ai / app.py

sainathBelagavi

Update app.py

260b91a verified 4 months ago

raw

history blame contribute delete

No virus

12.5 kB

	import streamlit as st
	from huggingface_hub import InferenceClient
	import wikipedia
	import re
	import requests
	from bs4 import BeautifulSoup
	import os
	import pickle
	from requests.exceptions import HTTPError

	base_url = "https://api-inference.huggingface.co/models/"
	API_KEY = os.environ.get('HUGGINGFACE_API_KEY')

	model_links = {
	"StartupSuccessPredictor🔮": base_url + "mistralai/Mistral-7B-Instruct-v0.2",
	}

	model_info = {
	"StartupSuccessPredictor🔮": {
	'description': """The StartupSuccessPredictor model is a Large Language Model (LLM) that's able to predict the success potential of Indian startups based on various factors.\n \n\nThis model can analyze startup data, including funding rounds, team experience, industry, market size, user growth, and more to provide insights into the startup's likelihood of success.\n""",
	'logo': './startup_predictor.jpg'
	},
	}

	def format_prompt(message, conversation_history, startup_details, custom_instructions=None):
	prompt = ""
	if custom_instructions:
	prompt += f"[INST] {custom_instructions} [/INST]\n"

	# Add conversation history to the prompt
	prompt += "[CONV_HISTORY]\n"
	for role, content in conversation_history:
	prompt += f"{role.upper()}: {content}\n"
	prompt += "[/CONV_HISTORY]\n"

	# Add the startup details to the prompt
	prompt += "[STARTUP_DETAILS]\n"
	for key, value in startup_details.items():
	if key == "funding_rounds":
	prompt += f"{key.capitalize()}:\n"
	for round_details in value:
	prompt += f"- Type: {round_details.get('type', 'N/A')}, Amount: {round_details.get('amount', 'N/A')}\n"
	else:
	prompt += f"{key.capitalize()}: {value}\n"
	prompt += "[/STARTUP_DETAILS]\n"

	# Add the current message
	prompt += f"[INST] {message} [/INST]\n"

	# Add the response format
	prompt += "[RESPONSE]\n"

	return prompt

	def reset_conversation():
	'''
	Resets Conversation
	'''
	st.session_state.conversation = []
	st.session_state.messages = []
	st.session_state.chat_state = "reset"

	def load_conversation_history():
	history_file = "conversation_history.pickle"
	if os.path.exists(history_file):
	with open(history_file, "rb") as f:
	conversation_history = pickle.load(f)
	else:
	conversation_history = []
	return conversation_history

	def save_conversation_history(conversation_history):
	history_file = "conversation_history.pickle"
	with open(history_file, "wb") as f:
	pickle.dump(conversation_history, f)

	def scrape_startup_info(startup_name):
	startup_details = {}

	# Scrape from Wikipedia
	try:
	startup_summary = wikipedia.summary(startup_name, auto_suggest=False)
	startup_details['name'] = startup_name
	startup_details['summary'] = startup_summary
	except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError, ValueError, HTTPError):
	pass

	# If no details from Wikipedia, scrape from Crunchbase and AngelList
	if 'summary' not in startup_details:
	# Scrape from Crunchbase
	try:
	crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.replace(' ', '-')}"
	response = requests.get(crunchbase_url)
	if response.status_code == 200:
	soup = BeautifulSoup(response.content, "html.parser")
	startup_details["name"] = startup_name

	# Extract founded year
	founded_year_elem = soup.select_one("div[data-field='founded_year'] span.component--field-formatter")
	if founded_year_elem:
	startup_details["founded_year"] = int(founded_year_elem.text.strip())

	# Extract industry
	industry_elem = soup.select_one("div[data-field='industries'] span.component--field-formatter")
	if industry_elem:
	startup_details["industry"] = industry_elem.text.strip()

	# Extract funding rounds
	funding_rounds_elem = soup.select("div[data-field='funding_rounds'] ul li")
	funding_rounds = []
	for round_elem in funding_rounds_elem:
	round_details = {}
	round_type = round_elem.select_one("span.component--field-formatter")
	if round_type:
	round_details["type"] = round_type.text.strip()
	round_amount = round_elem.select_one("span.component--field-formatter + span")
	if round_amount:
	round_details["amount"] = round_amount.text.strip()
	funding_rounds.append(round_details)
	startup_details["funding_rounds"] = funding_rounds
	except Exception as e:
	st.error(f"Error scraping Crunchbase: {e}")

	# Scrape from AngelList
	try:
	angellist_url = f"https://angel.co/company/{startup_name.replace(' ', '-')}"
	response = requests.get(angellist_url)
	if response.status_code == 200:
	soup = BeautifulSoup(response.content, "html.parser")

	# Extract team members
	team_members_elem = soup.select("div.team-member")
	team_members = []
	for member_elem in team_members_elem:
	member_name = member_elem.select_one("div.name")
	if member_name:
	team_members.append(member_name.text.strip())
	startup_details["team_members"] = team_members

	# Extract user growth (if available)
	user_growth_elem = soup.select_one("div.profile-content-section div.section-tagline")
	if user_growth_elem:
	startup_details["user_growth"] = user_growth_elem.text.strip()
	except Exception as e:
	st.error(f"Error scraping AngelList: {e}")

	return startup_details

	models = [key for key in model_links.keys()]
	selected_model = st.sidebar.selectbox("Select Model", models)
	temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
	st.sidebar.button('Reset Chat', on_click=reset_conversation) # Reset button

	st.sidebar.write(f"You're now chatting with {selected_model}")
	st.sidebar.markdown(model_info[selected_model]['description'])
	st.sidebar.image(model_info[selected_model]['logo'])

	st.sidebar.markdown("Generating the code might go slow if you are using low power resources")

	if "prev_option" not in st.session_state:
	st.session_state.prev_option = selected_model

	if st.session_state.prev_option != selected_model:
	st.session_state.messages = []
	st.session_state.prev_option = selected_model

	if "chat_state" not in st.session_state:
	st.session_state.chat_state = "normal"

	# Load the conversation history from the file
	if "messages" not in st.session_state:
	st.session_state.messages = load_conversation_history()

	repo_id = model_links[selected_model]
	st.subheader(f'{selected_model}')

	if st.session_state.chat_state == "normal":
	for message in st.session_state.messages:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	if prompt := st.chat_input(f"Hi I'm {selected_model}, How can I help you today?"):
	if "predict success of" in prompt.lower():
	# Extract the startup name from the prompt
	startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
	if startup_name_match:
	startup_name = startup_name_match.group(1).strip()
	startup_details = scrape_startup_info(startup_name)
	if startup_details:
	with st.chat_message("user"):
	st.markdown(prompt)

	st.session_state.messages.append({"role": "user", "content": prompt})
	conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
	custom_instruction = f"Based on the provided startup details or information and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."

	formatted_text = format_prompt(prompt, conversation_history, startup_details, custom_instruction)

	with st.chat_message("assistant"):
	client = InferenceClient(model=model_links[selected_model])
	max_new_tokens = 2048 # Adjust this value as needed
	try:
	output = client.text_generation(
	formatted_text,
	temperature=temp_values,
	max_new_tokens=max_new_tokens,
	stream=True
	)
	response = ""
	for output_chunk in output:
	if isinstance(output_chunk, dict) and "text" in output_chunk:
	response += output_chunk["text"]
	else:
	response += output_chunk # Handle the case where output_chunk might be a string
	st.markdown(f"Success Analysis for {startup_details['name']}\n\n{response}")
	except ValueError as e:
	if "Input validation error" in str(e):
	st.error("Error: The input prompt is too long. Please try a shorter prompt.")
	else:
	st.error(f"An error occurred: {e}")
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	else:
	st.session_state.messages.append({"role": "assistant", "content": response})
	save_conversation_history(st.session_state.messages)
	else:
	st.write(f"No information found for the startup '{startup_name}'. Please try another startup name or provide additional details.")
	else:
	with st.chat_message("user"):
	st.markdown(prompt)

	st.session_state.messages.append({"role": "user", "content": prompt})
	conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]

	formatted_text = format_prompt(prompt, conversation_history, {})

	with st.chat_message("assistant"):
	client = InferenceClient(model=model_links[selected_model])
	max_new_tokens = 3000 # Adjust this value as needed
	try:
	output = client.text_generation(
	formatted_text,
	temperature=temp_values,
	max_new_tokens=max_new_tokens,
	stream=True
	)
	response = ""
	for output_chunk in output:
	if isinstance(output_chunk, dict) and "text" in output_chunk:
	response += output_chunk["text"]
	else:
	response += output_chunk # Handle the case where output_chunk might be a string
	st.markdown(response)
	except ValueError as e:
	if "Input validation error" in str(e):
	st.error("Error: The input prompt is too long. Please try a shorter prompt.")
	else:
	st.error(f"An error occurred: {e}")
	except Exception as e:
	st.error(f"An unexpected error occurred: {e}")
	else:
	st.session_state.messages.append({"role": "assistant", "content": response})
	save_conversation_history(st.session_state.messages)

	elif st.session_state.chat_state == "reset":
	st.session_state.chat_state = "normal"
	st.experimental_rerun()