Alberto Carmona commited on
Commit
5537833
1 Parent(s): 92bb964

Add the summarize function

Browse files
Files changed (3) hide show
  1. app.py +3 -2
  2. functions.py +42 -0
  3. requirements.txt +9 -1
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
- from functions import extract_text
 
3
 
4
 
5
  def update(name='default text'):
@@ -25,7 +26,7 @@ with gr.Blocks() as demo:
25
  gr.Markdown("Puede obtener un resumen del texto extraído.")
26
  btn_summarize = gr.Button("Elaborar resumen")
27
  out_summary = gr.Textbox(label="Resumen")
28
- btn_summarize.click(fn=update, inputs=out_url_text, outputs=out_summary)
29
 
30
  # Question generation section
31
  gr.Markdown("## Evalúa tu conocimiento")
 
1
  import gradio as gr
2
+ from functions import extract_text, summarize_text
3
+
4
 
5
 
6
  def update(name='default text'):
 
26
  gr.Markdown("Puede obtener un resumen del texto extraído.")
27
  btn_summarize = gr.Button("Elaborar resumen")
28
  out_summary = gr.Textbox(label="Resumen")
29
+ btn_summarize.click(fn=summarize_text, inputs=out_url_text, outputs=out_summary)
30
 
31
  # Question generation section
32
  gr.Markdown("## Evalúa tu conocimiento")
functions.py CHANGED
@@ -1,11 +1,53 @@
1
  import requests
2
  from bs4 import BeautifulSoup
 
 
 
 
 
 
 
 
3
 
4
 
5
  def extract_text(url: str):
 
6
  if url is None or url.strip() == '':
7
  return ''
8
  response = requests.get(url)
9
  soup = BeautifulSoup(response.text, "html.parser")
10
  text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
 
11
  return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
+ import torch
4
+ from peft import PeftModel, PeftConfig
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
6
+
7
+
8
+ generation_config = GenerationConfig(temperature=.8,
9
+ top_p=0.75,
10
+ top_k=40)
11
 
12
 
13
  def extract_text(url: str):
14
+ print(['extract_text', 'start'])
15
  if url is None or url.strip() == '':
16
  return ''
17
  response = requests.get(url)
18
  soup = BeautifulSoup(response.text, "html.parser")
19
  text = '\n\n'.join(map(lambda p: p.text, soup.find_all('p')))
20
+ print(['extract_text', 'end'])
21
  return text
22
+
23
+
24
+ def summarize_text(text: str):
25
+ print(['summarize_text', 'start'])
26
+ input_text = f'<s>Instruction: Elabora un resume del siguiente texto.\nInput: {text}\nOutput: '
27
+ batch = tokenizer(input_text, return_tensors='pt')
28
+ print(['summarize_text', 'generating'])
29
+ with torch.cuda.amp.autocast():
30
+ output_tokens = model.generate(**batch,
31
+ max_new_tokens=256,
32
+ generation_config=generation_config
33
+ )
34
+ output = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
35
+ print(['summarize_text', 'end'])
36
+ return output
37
+
38
+
39
+ def load_model(peft_model_id):
40
+ print(['load_model', 'start'])
41
+ config = PeftConfig.from_pretrained(peft_model_id)
42
+ print(['load_model', 'loading model'])
43
+ model = AutoModelForCausalLM.from_pretrained(
44
+ config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
45
+ print(['load_model', 'loading tokenizer'])
46
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
47
+ model = PeftModel.from_pretrained(model, peft_model_id)
48
+ model.config.use_cache = True
49
+ print(['load_model', 'end'])
50
+ return model, tokenizer
51
+
52
+
53
+ model, tokenizer = load_model("milyiyo/opt-6.7b-lora-sag-t3000-v300-v2")
requirements.txt CHANGED
@@ -1,2 +1,10 @@
1
- requests
2
  beautifulsoup4
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
  beautifulsoup4
3
+ bitsandbytes
4
+ datasets
5
+ loralib
6
+ peft
7
+ requests
8
+ sentencepiece
9
+ torch
10
+ transformers