Spaces:
Sleeping
Sleeping
bertugmirasyedi
commited on
Commit
•
76bf172
1
Parent(s):
19449a2
Added locally trained models
Browse files
app.py
CHANGED
@@ -38,11 +38,19 @@ classification_tokenizer_normal = AutoTokenizer.from_pretrained(
|
|
38 |
classification_model_normal = AutoModelForSequenceClassification.from_pretrained(
|
39 |
"sileod/deberta-v3-base-tasksource-nli"
|
40 |
)
|
41 |
-
|
42 |
-
|
|
|
43 |
)
|
44 |
-
|
45 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
)
|
47 |
|
48 |
# Define similarity model
|
@@ -365,37 +373,68 @@ async def classify(data: list, runtime: str = "normal"):
|
|
365 |
# Define the zero-shot classifier
|
366 |
tokenizer = classification_tokenizer_normal
|
367 |
model = classification_model_normal
|
368 |
-
elif runtime == "onnxruntime":
|
369 |
-
tokenizer = classification_tokenizer_onnx
|
370 |
-
model = classification_model_onnx
|
371 |
-
|
372 |
-
classifier_pipe = pipeline(
|
373 |
-
"zero-shot-classification",
|
374 |
-
model=model,
|
375 |
-
tokenizer=tokenizer,
|
376 |
-
hypothesis_template="This book is {}.",
|
377 |
-
batch_size=1,
|
378 |
-
device=-1,
|
379 |
-
multi_label=False,
|
380 |
-
)
|
381 |
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
|
|
|
|
|
|
|
|
387 |
|
388 |
-
|
|
|
|
|
|
|
|
|
389 |
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
return classes
|
401 |
|
|
|
38 |
classification_model_normal = AutoModelForSequenceClassification.from_pretrained(
|
39 |
"sileod/deberta-v3-base-tasksource-nli"
|
40 |
)
|
41 |
+
|
42 |
+
audience_classification_tokenizer = AutoTokenizer.from_pretrained(
|
43 |
+
"bertugmirasyedi/deberta-v3-base-book-classification"
|
44 |
)
|
45 |
+
audience_classification_model = AutoModelForSequenceClassification.from_pretrained(
|
46 |
+
"bertugmirasyedi/deberta-v3-base-level-classification"
|
47 |
+
)
|
48 |
+
|
49 |
+
level_classification_tokenizer = AutoTokenizer.from_pretrained(
|
50 |
+
"bertugmirasyedi/deberta-v3-base-level-classification"
|
51 |
+
)
|
52 |
+
level_classification_model = AutoModelForSequenceClassification.from_pretrained(
|
53 |
+
"bertugmirasyedi/deberta-v3-base-level-classification"
|
54 |
)
|
55 |
|
56 |
# Define similarity model
|
|
|
373 |
# Define the zero-shot classifier
|
374 |
tokenizer = classification_tokenizer_normal
|
375 |
model = classification_model_normal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
|
377 |
+
classifier_pipe = pipeline(
|
378 |
+
"zero-shot-classification",
|
379 |
+
model=model,
|
380 |
+
tokenizer=tokenizer,
|
381 |
+
hypothesis_template="This book is {}.",
|
382 |
+
batch_size=1,
|
383 |
+
device=-1,
|
384 |
+
multi_label=False,
|
385 |
+
)
|
386 |
|
387 |
+
# Define the candidate labels
|
388 |
+
level = [
|
389 |
+
"Introductory",
|
390 |
+
"Advanced",
|
391 |
+
]
|
392 |
|
393 |
+
audience = ["Academic", "Not Academic", "Manual"]
|
394 |
+
|
395 |
+
classes = [
|
396 |
+
{
|
397 |
+
"audience": classifier_pipe(doc, audience)["labels"][0],
|
398 |
+
"audience_confidence": classifier_pipe(doc, audience)["scores"][0],
|
399 |
+
"level": classifier_pipe(doc, level)["labels"][0],
|
400 |
+
"level_confidence": classifier_pipe(doc, level)["scores"][0],
|
401 |
+
}
|
402 |
+
for doc in combined_data
|
403 |
+
]
|
404 |
+
elif runtime == "local":
|
405 |
+
### Define the classifier for audience prediction ###
|
406 |
+
audience_classifier = pipeline(
|
407 |
+
"text-classification",
|
408 |
+
model=audience_classification_model,
|
409 |
+
tokenizer=audience_classification_tokenizer,
|
410 |
+
device=-1,
|
411 |
+
)
|
412 |
+
|
413 |
+
### Define the classifier for level prediction ###
|
414 |
+
level_classifier = pipeline(
|
415 |
+
"text-classification",
|
416 |
+
model=level_classification_model,
|
417 |
+
tokenizer=level_classification_tokenizer,
|
418 |
+
device=-1,
|
419 |
+
)
|
420 |
+
|
421 |
+
classes = [
|
422 |
+
{
|
423 |
+
"audience": audience_classifier(doc, padding=True, truncation=True)[0][
|
424 |
+
"label"
|
425 |
+
],
|
426 |
+
"audience_confidence": audience_classifier(
|
427 |
+
doc, padding=True, truncation=True
|
428 |
+
)[0]["score"],
|
429 |
+
"level": level_classifier(doc, padding=True, truncation=True)[0][
|
430 |
+
"label"
|
431 |
+
],
|
432 |
+
"level_confidence": level_classifier(
|
433 |
+
doc, padding=True, truncation=True
|
434 |
+
)[0]["score"],
|
435 |
+
}
|
436 |
+
for doc in combined_data
|
437 |
+
]
|
438 |
|
439 |
return classes
|
440 |
|