eldavid commited on
Commit
09be9ae
1 Parent(s): 2a9f612

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -22
app.py CHANGED
@@ -1,26 +1,58 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
-
4
- # Carregar o modelo pré-treinado diretamente do Hugging Face
5
- ner_model = pipeline(
6
- "ner",
7
- model="bert-base-cased",
8
- tokenizer="bert-base-cased"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  )
10
 
11
- # Definir a função para a interface do Gradio
12
- def analyze_ner(text):
13
- result = ner_model(text)
14
- return result
15
-
16
- # Criar a interface do Gradio
17
- iface = gr.Interface(
18
- fn=analyze_ner,
19
- inputs="text",
20
- outputs="json",
21
- title="Reconhecimento de Entidades Nomeadas",
22
- description="Digite um texto para identificar entidades nomeadas."
 
 
 
23
  )
24
 
25
- # Lançar a interface do Gradio com um link público
26
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
3
+ from datasets import load_dataset, load_metric
4
+
5
+ # Carregar o dataset IMDB
6
+ dataset = load_dataset('imdb')
7
+
8
+ # Carregar o tokenizer e o modelo RoBERTa
9
+ tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
10
+ model = RobertaForSequenceClassification.from_pretrained('roberta-base')
11
+
12
+ # Tokenizar os dados
13
+ def preprocess_function(examples):
14
+ return tokenizer(examples['text'], padding='max_length', truncation=True)
15
+
16
+ tokenized_datasets = dataset.map(preprocess_function, batched=True)
17
+
18
+ # Preparar o data collator
19
+ from transformers import DataCollatorWithPadding
20
+ data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
21
+
22
+ # Configurar os argumentos de treinamento
23
+ training_args = TrainingArguments(
24
+ output_dir='./results',
25
+ evaluation_strategy='epoch',
26
+ per_device_train_batch_size=8,
27
+ per_device_eval_batch_size=8,
28
+ num_train_epochs=3,
29
+ weight_decay=0.01,
30
  )
31
 
32
+ # Definir a função de métricas
33
+ def compute_metrics(eval_pred):
34
+ logits, labels = eval_pred
35
+ predictions = torch.argmax(logits, dim=-1)
36
+ return metric.compute(predictions=predictions, references=labels)
37
+
38
+ # Definir o Trainer
39
+ trainer = Trainer(
40
+ model=model,
41
+ args=training_args,
42
+ train_dataset=tokenized_datasets['train'],
43
+ eval_dataset=tokenized_datasets['test'],
44
+ tokenizer=tokenizer,
45
+ data_collator=data_collator,
46
+ compute_metrics=compute_metrics
47
  )
48
 
49
+ # Treinar o modelo
50
+ trainer.train()
51
+
52
+ # Avaliar o modelo
53
+ results = trainer.evaluate()
54
+ print(results)
55
+
56
+ # Salvar o modelo
57
+ model.save_pretrained('./model')
58
+ tokenizer.save_pretrained('./model')