dromerosm commited on
Commit
497769a
1 Parent(s): 982a876

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -20,7 +20,7 @@ def text_prompt(request, page_url, contraseña, temp):
20
  page.parse()
21
 
22
  except Exception as e:
23
- return "", f"--- Ha ocurrido un error al procesar la URL: {e} ---", ""
24
 
25
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
26
  sentences = page.text.split('.')
@@ -31,12 +31,12 @@ def text_prompt(request, page_url, contraseña, temp):
31
  for sentence in sentences:
32
  tokens.extend(tokenizer.tokenize(sentence))
33
 
34
- # Recortar el texto a un máximo de 1800 tokens
35
  if len(tokens) > 1800:
36
  break
37
  page_text += sentence + ". "
38
 
39
- # Eliminar el ultimo espacio
40
  page_text = page_text.strip()
41
 
42
  num_tokens = len(tokens)
@@ -61,8 +61,8 @@ def text_prompt(request, page_url, contraseña, temp):
61
  response_text = response_text.strip()
62
  return page.text, response_text, total_tokens
63
  except Exception as e:
64
- return page.text, f"--- Ha ocurrido un error al procesar la solicitud: {e} ---", num_tokens
65
- return page.text, "--- Min number of tokens ---", num_tokens
66
 
67
  # define the gradio interface
68
  iface = gr.Interface(
@@ -77,11 +77,11 @@ iface = gr.Interface(
77
  ["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
78
  ["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
79
  ],
80
- title="ChatGPT info extraction with newspaper3k",
81
- description="This tool allows querying the text retrieved from the URL using OpenAI's [text-davinci-003] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 2000 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the url for text retrieval, your api-key and temperature to process the text."
82
  )
83
 
84
- # captura de errores en la integración como componente
85
 
86
  error_message = ""
87
 
 
20
  page.parse()
21
 
22
  except Exception as e:
23
+ return "", f"--- An error occurred while processing the URL: {e} ---", ""
24
 
25
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
26
  sentences = page.text.split('.')
 
31
  for sentence in sentences:
32
  tokens.extend(tokenizer.tokenize(sentence))
33
 
34
+ # Trim text to a maximum of 1800 tokens
35
  if len(tokens) > 1800:
36
  break
37
  page_text += sentence + ". "
38
 
39
+ # Delete the last space
40
  page_text = page_text.strip()
41
 
42
  num_tokens = len(tokens)
 
61
  response_text = response_text.strip()
62
  return page.text, response_text, total_tokens
63
  except Exception as e:
64
+ return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
65
+ return page.text, "--- Min number of tokens:", num_tokens
66
 
67
  # define the gradio interface
68
  iface = gr.Interface(
 
77
  ["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
78
  ["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
79
  ],
80
+ title="ChatGPT / GPT-3 info extraction from URL",
81
+ description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [text-davinci-003] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the url for text retrieval, your api-key and temperature to process the text."
82
  )
83
 
84
+ # error capturing in integration as a component
85
 
86
  error_message = ""
87