Spaces:

sainathBelagavi
/

TheNextSucess.ai

Sleeping

App Files Files Community

sainathBelagavi commited on Jun 4

Commit

260b91a

•

1 Parent(s): c10a439

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -92

app.py CHANGED Viewed

@@ -5,11 +5,9 @@ import re
 import requests
 from bs4 import BeautifulSoup
 import os
-import pickle
-import requests
 from requests.exceptions import HTTPError
 base_url = "https://api-inference.huggingface.co/models/"
 API_KEY = os.environ.get('HUGGINGFACE_API_KEY')
@@ -24,7 +22,7 @@ model_info = {
     },
 }
-def format_promt(message, conversation_history, startup_details, custom_instructions=None):
     prompt = ""
     if custom_instructions:
         prompt += f"[INST] {custom_instructions} [/INST]\n"
@@ -78,101 +76,79 @@ def save_conversation_history(conversation_history):
 def scrape_startup_info(startup_name):
     startup_details = {}
     # Scrape from Wikipedia
     try:
-        startup_details = {}
-        startup_summary = wikipedia.summary(startup_name, auto_suggest_advice=False)
         startup_details['name'] = startup_name
         startup_details['summary'] = startup_summary
-        # Extract additional details from Wikipedia summary
-        startup_details['additional_details'] = {}
-        for key, value in startup_summary.items():
-            startup_details['additional_details'][key] = value
-    except (ValueError, HTTPError) as e:
         pass
-    # Check if the startup details were scraped successfully
-    if 'additional_details' in startup_details:
-        # Scrape additional details from Wikipedia
-        startup_name = startup_details['additional_details']
-        del startup_details['additional_details']
-        startup_details = {key: value for key, value in startup_details.items() if key != 'name'}
-        # Extract additional details from the search results
         try:
-            startup_summary = startup_details['name']
-            startup_details['additional_details'] = {}
-            search_results = re.search(r'<(/?[a-zA-Z]+[a-zA-Z_-]*?)>.*', startup_summary, re.DOTALL)
-            if search_results:
-                tag_name = search_results.group(1)
-                startup_details['additional_details'][tag_name] = search_results.group(0)
-        except ValueError as e:
-            print(e)
-    # If startup details were not scraped successfully, scrape from CrunchBase and AngelList
-    if 'additional_details' not in startup_details:
-        # Scrape from CrunchBase
-        crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.title()}"
-        response = requests.get(crunchbase_url)
-        if response.status_code == 200:
-            soup = BeautifulSoup(response.content, "html.parser")
-            startup_details["name"] = startup_name.title()
-            # Extract founded year
-            founded_year_elem = soup.select_one("div.field-label:contains('Founded') + div.field-value")
-            if founded_year_elem:
-                startup_details["founded_year"] = int(founded_year_elem.text.strip())
-            # Extract industry
-            industry_elem = soup.select_one("div.field-label:contains('Industries') + div.field-value")
-            if industry_elem:
-                startup_details["industry"] = industry_elem.text.strip()
-            # Extract funding rounds
-            funding_rounds_elem = soup.select("div.funding-rounds-list > div.card")
-            funding_rounds = []
-            for round_elem in funding_rounds_elem:
-                round_details = {}
-                round_type = round_elem.select_one("span.type")
-                if round_type:
-                    round_details["type"] = round_type.text.strip()
-                round_amount = round_elem.select_one("span.amount")
-                if round_amount:
-                    round_details["amount"] = round_amount.text.strip()
-                funding_rounds.append(round_details)
-            startup_details["funding_rounds"] = funding_rounds
         # Scrape from AngelList
-        angellist_url = f"https://angel.co/{startup_name.title()}"
-        response = requests.get(angellist_url)
-        if response.status_code == 200:
-            soup = BeautifulSoup(response.content, "html.parser")
-            # Extract team members
-            team_members_elem = soup.select("div.team-members > div.team-member")
-            team_members = []
-            for member_elem in team_members_elem:
-                member_name = member_elem.select_one("div.name")
-                if member_name:
-                    team_members.append(member_name.text.strip())
-            startup_details["team_members"] = team_members
-            # Extract user growth (if available)
-            user_growth_elem = soup.select_one("div.profile-content-section > div.section-content > div.section-tagline")
-            if user_growth_elem:
-                startup_details["user_growth"] = user_growth_elem.text.strip()
     return startup_details
 models = [key for key in model_links.keys()]
 selected_model = st.sidebar.selectbox("Select Model", models)
-temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, (0.5))
 st.sidebar.button('Reset Chat', on_click=reset_conversation)  # Reset button
 st.sidebar.write(f"You're now chatting with **{selected_model}**")
@@ -180,6 +156,7 @@ st.sidebar.markdown(model_info[selected_model]['description'])
 st.sidebar.image(model_info[selected_model]['logo'])
 st.sidebar.markdown("*Generating the code might go slow if you are using low power resources*")
 if "prev_option" not in st.session_state:
     st.session_state.prev_option = selected_model
@@ -207,7 +184,7 @@ if st.session_state.chat_state == "normal":
             # Extract the startup name from the prompt
             startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
             if startup_name_match:
-                startup_name = startup_name_match.group(1)
                 startup_details = scrape_startup_info(startup_name)
                 if startup_details:
                     with st.chat_message("user"):
@@ -215,16 +192,16 @@ if st.session_state.chat_state == "normal":
                     st.session_state.messages.append({"role": "user", "content": prompt})
                     conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
-                    custom_instruction = f"Based on the provided startup details and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."
-                    formated_text = format_promt(prompt, conversation_history, startup_details, custom_instruction)
                     with st.chat_message("assistant"):
                         client = InferenceClient(model=model_links[selected_model])
                         max_new_tokens = 2048  # Adjust this value as needed
                         try:
                             output = client.text_generation(
-                                formated_text,
                                 temperature=temp_values,
                                 max_new_tokens=max_new_tokens,
                                 stream=True
@@ -255,14 +232,14 @@ if st.session_state.chat_state == "normal":
             st.session_state.messages.append({"role": "user", "content": prompt})
             conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
-            formated_text = format_promt(prompt, conversation_history, {})
             with st.chat_message("assistant"):
                 client = InferenceClient(model=model_links[selected_model])
                 max_new_tokens = 3000  # Adjust this value as needed
                 try:
                     output = client.text_generation(
-                        formated_text,
                         temperature=temp_values,
                         max_new_tokens=max_new_tokens,
                         stream=True
@@ -287,4 +264,4 @@ if st.session_state.chat_state == "normal":
 elif st.session_state.chat_state == "reset":
     st.session_state.chat_state = "normal"
-    st.experimental_rerun()

 import requests
 from bs4 import BeautifulSoup
 import os
+import pickle
 from requests.exceptions import HTTPError
 base_url = "https://api-inference.huggingface.co/models/"
 API_KEY = os.environ.get('HUGGINGFACE_API_KEY')
     },
 }
+def format_prompt(message, conversation_history, startup_details, custom_instructions=None):
     prompt = ""
     if custom_instructions:
         prompt += f"[INST] {custom_instructions} [/INST]\n"
 def scrape_startup_info(startup_name):
     startup_details = {}
     # Scrape from Wikipedia
     try:
+        startup_summary = wikipedia.summary(startup_name, auto_suggest=False)
         startup_details['name'] = startup_name
         startup_details['summary'] = startup_summary
+    except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError, ValueError, HTTPError):
         pass
+    # If no details from Wikipedia, scrape from Crunchbase and AngelList
+    if 'summary' not in startup_details:
+        # Scrape from Crunchbase
         try:
+            crunchbase_url = f"https://www.crunchbase.com/organization/{startup_name.replace(' ', '-')}"
+            response = requests.get(crunchbase_url)
+            if response.status_code == 200:
+                soup = BeautifulSoup(response.content, "html.parser")
+                startup_details["name"] = startup_name
+                # Extract founded year
+                founded_year_elem = soup.select_one("div[data-field='founded_year'] span.component--field-formatter")
+                if founded_year_elem:
+                    startup_details["founded_year"] = int(founded_year_elem.text.strip())
+                # Extract industry
+                industry_elem = soup.select_one("div[data-field='industries'] span.component--field-formatter")
+                if industry_elem:
+                    startup_details["industry"] = industry_elem.text.strip()
+                # Extract funding rounds
+                funding_rounds_elem = soup.select("div[data-field='funding_rounds'] ul li")
+                funding_rounds = []
+                for round_elem in funding_rounds_elem:
+                    round_details = {}
+                    round_type = round_elem.select_one("span.component--field-formatter")
+                    if round_type:
+                        round_details["type"] = round_type.text.strip()
+                    round_amount = round_elem.select_one("span.component--field-formatter + span")
+                    if round_amount:
+                        round_details["amount"] = round_amount.text.strip()
+                    funding_rounds.append(round_details)
+                startup_details["funding_rounds"] = funding_rounds
+        except Exception as e:
+            st.error(f"Error scraping Crunchbase: {e}")
         # Scrape from AngelList
+        try:
+            angellist_url = f"https://angel.co/company/{startup_name.replace(' ', '-')}"
+            response = requests.get(angellist_url)
+            if response.status_code == 200:
+                soup = BeautifulSoup(response.content, "html.parser")
+                # Extract team members
+                team_members_elem = soup.select("div.team-member")
+                team_members = []
+                for member_elem in team_members_elem:
+                    member_name = member_elem.select_one("div.name")
+                    if member_name:
+                        team_members.append(member_name.text.strip())
+                startup_details["team_members"] = team_members
+                # Extract user growth (if available)
+                user_growth_elem = soup.select_one("div.profile-content-section div.section-tagline")
+                if user_growth_elem:
+                    startup_details["user_growth"] = user_growth_elem.text.strip()
+        except Exception as e:
+            st.error(f"Error scraping AngelList: {e}")
     return startup_details
 models = [key for key in model_links.keys()]
 selected_model = st.sidebar.selectbox("Select Model", models)
+temp_values = st.sidebar.slider('Select a temperature value', 0.0, 1.0, 0.5)
 st.sidebar.button('Reset Chat', on_click=reset_conversation)  # Reset button
 st.sidebar.write(f"You're now chatting with **{selected_model}**")
 st.sidebar.image(model_info[selected_model]['logo'])
 st.sidebar.markdown("*Generating the code might go slow if you are using low power resources*")
 if "prev_option" not in st.session_state:
     st.session_state.prev_option = selected_model
             # Extract the startup name from the prompt
             startup_name_match = re.search(r'predict success of (.*?)\?', prompt, re.IGNORECASE)
             if startup_name_match:
+                startup_name = startup_name_match.group(1).strip()
                 startup_details = scrape_startup_info(startup_name)
                 if startup_details:
                     with st.chat_message("user"):
                     st.session_state.messages.append({"role": "user", "content": prompt})
                     conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
+                    custom_instruction = f"Based on the provided startup details or information  and your knowledge of the industry, provide a comprehensive analysis of the startup's potential for success. Discuss the industry outlook, future scope, and any other relevant factors that could contribute to the startup's success or failure. Provide a clear recommendation on whether the startup is likely to be successful or not."
+                    formatted_text = format_prompt(prompt, conversation_history, startup_details, custom_instruction)
                     with st.chat_message("assistant"):
                         client = InferenceClient(model=model_links[selected_model])
                         max_new_tokens = 2048  # Adjust this value as needed
                         try:
                             output = client.text_generation(
+                                formatted_text,
                                 temperature=temp_values,
                                 max_new_tokens=max_new_tokens,
                                 stream=True
             st.session_state.messages.append({"role": "user", "content": prompt})
             conversation_history = [(message["role"], message["content"]) for message in st.session_state.messages]
+            formatted_text = format_prompt(prompt, conversation_history, {})
             with st.chat_message("assistant"):
                 client = InferenceClient(model=model_links[selected_model])
                 max_new_tokens = 3000  # Adjust this value as needed
                 try:
                     output = client.text_generation(
+                        formatted_text,
                         temperature=temp_values,
                         max_new_tokens=max_new_tokens,
                         stream=True
 elif st.session_state.chat_state == "reset":
     st.session_state.chat_state = "normal"
+    st.experimental_rerun()