Spaces:

VentureFlow
/

ai-deal-demo

Runtime error

App Files Files Community

0xleec commited on May 26, 2023

Commit

6a6f2fa

•

1 Parent(s): aea1676

update app

Browse files

Files changed (1) hide show

app.py +45 -12

app.py CHANGED Viewed

@@ -9,13 +9,15 @@ from langchain.vectorstores import FAISS
 from langchain.chains.question_answering import load_qa_chain
 from langchain.chat_models import ChatOpenAI
 from langchain.callbacks import get_openai_callback
-from requests.exceptions import Timeout
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse, urljoin
 import time
 import random
 import os
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
@@ -30,6 +32,18 @@ headers = {
 }
 def get_internal_links(url):
     print('start get internal links')
     internal_links = []
@@ -53,9 +67,10 @@ def get_internal_links(url):
 def get_page_content(url):
     response = requests.get(url, headers=headers, timeout=5)
     soup = BeautifulSoup(response.content, 'html.parser')
-    content = soup.get_text()
     time.sleep(random.randint(1, 3))
@@ -63,9 +78,8 @@ def get_page_content(url):
 def crawl_site(url):
-    links_to_visit = get_internal_links(url)
     content = ""
@@ -99,9 +113,27 @@ def get_pdf_response(file):
     if file is not None:
         text = decode_pdf(file)
-        return get_response(text)
 def get_website_response(url):
     content = crawl_site(url)
     result = get_response(content)
@@ -110,9 +142,6 @@ def get_website_response(url):
 def get_response(text):
-        print(text)
         # split into chunks
         text_splitter = CharacterTextSplitter(
             separator="\n",
@@ -134,8 +163,12 @@ def get_response(text):
 def ask_question(knowledge_base):
     user_question = """this content is a web3 project pitch deck. return result as JSON format. Please use the following JSON format to return data. if some fields are incomplete or missing, use 'N/A' to replace it.
-{{"project_name":"this project name","introduction":"project introduction, less than 200 words","slogan":"project slogan","features":"project features","description":"project description","roadmap":"g","fundraising":"fundraising target,round, valuation etc.",contact_email":"project contact email","website":"project official website","twitter":"official twitter","github":"official github","telegram":"official telegram","team_member":"team member list, include name, position, introduction, twitter, github, telegram, etc."}}"""
     print("Question:", user_question)
@@ -185,9 +218,9 @@ def upload_file(file):
 with gr.Blocks(title="Use AI boost your deal flow - Ventureflow") as demo:
     gr.Markdown("# Use AI boost your deal flow")
     with gr.Tab("Upload Deck"):
-        file_input = gr.File(file_types=[".pdf"])
         json_output = gr.JSON()
-        upload_button = gr.UploadButton("Click to Upload a Deck(.pdf))")
         upload_button.upload(upload_file, upload_button, json_output)
     with gr.Tab("Enter Project website"):
         text_input = gr.Textbox(label="Enter Project website")

 from langchain.chains.question_answering import load_qa_chain
 from langchain.chat_models import ChatOpenAI
 from langchain.callbacks import get_openai_callback
+# from requests.exceptions import Timeout
 import requests
 from bs4 import BeautifulSoup
 from urllib.parse import urlparse, urljoin
 import time
 import random
 import os
+import mimetypes
+from openai.error import Timeout
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 }
+def is_webpage(url):
+    """
+    判断一个链接是否为网页链接
+    """
+    content_type = requests.head(url, headers=headers).headers.get('Content-Type')
+    if content_type is not None:
+        mimetype, encoding = mimetypes.guess_type(url, strict=False)
+        if mimetype is not None and mimetype.startswith('text/html'):
+            return True
+    return False
 def get_internal_links(url):
     print('start get internal links')
     internal_links = []
 def get_page_content(url):
     response = requests.get(url, headers=headers, timeout=5)
     soup = BeautifulSoup(response.content, 'html.parser')
+    content = soup.get_text('\n')
     time.sleep(random.randint(1, 3))
 def crawl_site(url):
+    # links_to_visit = get_internal_links(url)
+    links_to_visit = [url]
     content = ""
     if file is not None:
         text = decode_pdf(file)
+        print('pdf text:', text)
+        if text:
+            return get_response(text)
+        else:
+            return {"error": "covert pdf to text failed"}
+def fix_url(url):
+    try:
+        response = requests.head(url)
+        if response.status_code != 405:
+            return url
+        else:
+            return "https://" + url
+    except requests.exceptions.MissingSchema:
+        return "https://" + url
 def get_website_response(url):
+    url = fix_url(url)
     content = crawl_site(url)
     result = get_response(content)
 def get_response(text):
         # split into chunks
         text_splitter = CharacterTextSplitter(
             separator="\n",
 def ask_question(knowledge_base):
+#     user_question = """this content is a web3 project pitch deck. return result as JSON format. Please use the following JSON format to return data. if some fields are incomplete or missing, use 'N/A' to replace it.
+# {{"project_name":"this project name","introduction":"project introduction, less than 200 words","slogan":"project slogan","features":"project features","description":"project description","roadmap":"g","fundraising":"fundraising target,round, valuation etc."}}"""
     user_question = """this content is a web3 project pitch deck. return result as JSON format. Please use the following JSON format to return data. if some fields are incomplete or missing, use 'N/A' to replace it.
+    {{"project_name":"this project name","introduction":"project introduction, less than 200 words","slogan":"project slogan","features":"project features","description":"project description","roadmap":"g","fundraising":"fundraising target,round, valuation etc.",contact_email":"project contact email","website":"project official website","twitter":"official twitter","github":"official github","telegram":"official telegram"}}"""
     print("Question:", user_question)
 with gr.Blocks(title="Use AI boost your deal flow - Ventureflow") as demo:
     gr.Markdown("# Use AI boost your deal flow")
     with gr.Tab("Upload Deck"):
+        # file_input = gr.File(file_types=[".pdf"])
+        upload_button = gr.UploadButton("Click to Upload a Deck(.pdf))", file_types=[".pdf"])
         json_output = gr.JSON()
         upload_button.upload(upload_file, upload_button, json_output)
     with gr.Tab("Enter Project website"):
         text_input = gr.Textbox(label="Enter Project website")