Spaces:
Runtime error
Runtime error
Isaac Isa铆as
commited on
Commit
路
de6ed29
1
Parent(s):
b0d112d
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,8 @@ description = "Clasifica el texto de una tesis con la carrera a la que le perten
|
|
7 |
|
8 |
article = """
|
9 |
## Obtenci贸n de los datos
|
10 |
-
En un principio no se contaba con ning煤n dataset disponible, por lo que se opt贸 realizar un scraper para conseguir la informaci贸n. Se decidi贸 usar la base de datos [TESIUNAM](https://tesiunam.dgb.unam.mx/F?func=find-b-0&local_base=TES01),
|
|
|
11 |
|
12 |
|
13 |
|
@@ -21,80 +22,12 @@ article = """
|
|
21 |
tokenizer = AutoTokenizer.from_pretrained('hiiamsid/BETO_es_binary_classification', use_fast=False)
|
22 |
model = AutoModelForSequenceClassification.from_pretrained(
|
23 |
'hackathon-pln-es/unam_tesis_BETO_finnetuning', num_labels=5, output_attentions=False, output_hidden_states=False)
|
24 |
-
|
25 |
|
26 |
-
|
27 |
|
28 |
|
29 |
def thesis_prediction(input):
|
30 |
-
tokenizer = AutoTokenizer.from_pretrained('hiiamsid/BETO_es_binary_classification', use_fast=False)
|
31 |
-
X_val_inputs, X_val_masks = preprocessingtext(_text,tokenizer)
|
32 |
-
t0 = time.time()
|
33 |
-
|
34 |
-
# Deserialization of the file
|
35 |
-
#file = open(path + os.path.sep + 'classIndexAssociation.pkl', 'rb')
|
36 |
-
#new_model = pickle.load(file)
|
37 |
-
|
38 |
-
#sizeOfClass = len(new_model)
|
39 |
-
|
40 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
41 |
-
'hackathon-pln-es/unam_tesis_BETO_finnetuning', num_labels=5, output_attentions=False, output_hidden_states=False)
|
42 |
-
#Bibliografy from:
|
43 |
-
#
|
44 |
-
# https://huggingface.co/docs/transformers/main_classes/output
|
45 |
-
#
|
46 |
-
inputs = tokenizer(_text, return_tensors="pt")
|
47 |
-
labels = torch.tensor([1]).unsqueeze(0) # Batch size 1
|
48 |
-
outputs = model(**inputs, labels=labels)
|
49 |
-
|
50 |
-
loss, logits = outputs[:2]
|
51 |
-
|
52 |
-
#Transform in array
|
53 |
-
logits = logits.detach().cpu().numpy()
|
54 |
-
|
55 |
-
#Get max element and position
|
56 |
-
result = logits.argmax()
|
57 |
-
return result
|
58 |
-
|
59 |
-
#Example from
|
60 |
-
#
|
61 |
-
#
|
62 |
-
#
|
63 |
-
# pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
|
64 |
-
# # Put the model in evaluation mode
|
65 |
-
# classificationResult = pipe(_text)
|
66 |
-
# if classificationResult[0] != None and len (classificationResult[0]) > 0:
|
67 |
-
# #Order the result with more close to 1
|
68 |
-
# classificationResult[0].sort(reverse=True, key=lambda x:x['score'])
|
69 |
-
# # Return the text clasification
|
70 |
-
# keyClass = classificationResult[0][0]['label']
|
71 |
-
# keyClass = keyClass.replace("LABEL_","").strip()
|
72 |
-
# if keyClass.isnumeric():
|
73 |
-
# return new_model[ int (keyClass)]
|
74 |
-
# else:
|
75 |
-
# raise Exception("Not exist class info")
|
76 |
-
# model.eval()
|
77 |
-
# outputs = model(X_val_inputs,
|
78 |
-
# token_type_ids=None,
|
79 |
-
# attention_mask=X_val_masks)
|
80 |
-
#
|
81 |
-
# # The "logits" are the output values
|
82 |
-
# # prior to applying an activation function
|
83 |
-
# logits = outputs[0]
|
84 |
-
#
|
85 |
-
# # Move logits and labels to CPU
|
86 |
-
# logits = logits.detach().cpu().numpy()
|
87 |
-
#
|
88 |
-
# sorted_tuples = sorted(logits.items(), key=lambda item: item[1])
|
89 |
-
# #Return the text clasification
|
90 |
-
# keyClass = sorted_tuples.keys()[0]
|
91 |
-
# return new_model[keyClass]
|
92 |
-
|
93 |
-
else:
|
94 |
-
raise Exception("Not exist model info")
|
95 |
-
else:
|
96 |
-
raise Exception("Not exist model info")
|
97 |
-
return "Text"
|
98 |
pass
|
99 |
|
100 |
|
|
|
7 |
|
8 |
article = """
|
9 |
## Obtenci贸n de los datos
|
10 |
+
En un principio no se contaba con ning煤n dataset disponible, por lo que se opt贸 realizar un [scraper](https://github.com/IsaacIsaias/NLP-clasificador-tesis/blob/main/main.py) para conseguir la informaci贸n. Se decidi贸 usar la base de datos [TESIUNAM](https://tesiunam.dgb.unam.mx/F?func=find-b-0&local_base=TES01), la cual es un cat谩logo en donde se pueden visualizar las tesis de los sustentantes que obtuvieron un grado en la Universidad Nacional Aut贸noma de M茅xico (UNAM), as铆 como de las tesis de licenciatura de escuelas incorporadas a ella.
|
11 |
+
Para ello, en primer lugar se consult贸 la [Oferta Acad茅mica](http://oferta.unam.mx/indice-alfabetico.html) de la Universidad, sitio de donde se extrajo cada una de las 131 licenciaturas en forma de lista.
|
12 |
|
13 |
|
14 |
|
|
|
22 |
tokenizer = AutoTokenizer.from_pretrained('hiiamsid/BETO_es_binary_classification', use_fast=False)
|
23 |
model = AutoModelForSequenceClassification.from_pretrained(
|
24 |
'hackathon-pln-es/unam_tesis_BETO_finnetuning', num_labels=5, output_attentions=False, output_hidden_states=False)
|
25 |
+
pipe = TextClassificationPipeline(model=model, tokenizer=tokenizer, return_all_scores=True)
|
26 |
|
27 |
+
classificationResult = pipe("El objetivo de esta tesis es elaborar un estudio de las condiciones asociadas al aprendizaje desde casa.")
|
28 |
|
29 |
|
30 |
def thesis_prediction(input):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
pass
|
32 |
|
33 |
|