bertugmirasyedi commited on
Commit
76bf172
1 Parent(s): 19449a2

Added locally trained models

Browse files
Files changed (1) hide show
  1. app.py +71 -32
app.py CHANGED
@@ -38,11 +38,19 @@ classification_tokenizer_normal = AutoTokenizer.from_pretrained(
38
  classification_model_normal = AutoModelForSequenceClassification.from_pretrained(
39
  "sileod/deberta-v3-base-tasksource-nli"
40
  )
41
- classification_tokenizer_onnx = AutoTokenizer.from_pretrained(
42
- "optimum/distilbert-base-uncased-mnli"
 
43
  )
44
- classification_model_onnx = ORTModelForSequenceClassification.from_pretrained(
45
- "optimum/distilbert-base-uncased-mnli"
 
 
 
 
 
 
 
46
  )
47
 
48
  # Define similarity model
@@ -365,37 +373,68 @@ async def classify(data: list, runtime: str = "normal"):
365
  # Define the zero-shot classifier
366
  tokenizer = classification_tokenizer_normal
367
  model = classification_model_normal
368
- elif runtime == "onnxruntime":
369
- tokenizer = classification_tokenizer_onnx
370
- model = classification_model_onnx
371
-
372
- classifier_pipe = pipeline(
373
- "zero-shot-classification",
374
- model=model,
375
- tokenizer=tokenizer,
376
- hypothesis_template="This book is {}.",
377
- batch_size=1,
378
- device=-1,
379
- multi_label=False,
380
- )
381
 
382
- # Define the candidate labels
383
- level = [
384
- "Introductory",
385
- "Advanced",
386
- ]
 
 
 
 
387
 
388
- audience = ["Academic", "Not Academic", "Manual"]
 
 
 
 
389
 
390
- classes = [
391
- {
392
- "audience": classifier_pipe(doc, audience)["labels"][0],
393
- "audience_confidence": classifier_pipe(doc, audience)["scores"][0],
394
- "level": classifier_pipe(doc, level)["labels"][0],
395
- "level_confidence": classifier_pipe(doc, level)["scores"][0],
396
- }
397
- for doc in combined_data
398
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  return classes
401
 
 
38
  classification_model_normal = AutoModelForSequenceClassification.from_pretrained(
39
  "sileod/deberta-v3-base-tasksource-nli"
40
  )
41
+
42
+ audience_classification_tokenizer = AutoTokenizer.from_pretrained(
43
+ "bertugmirasyedi/deberta-v3-base-book-classification"
44
  )
45
+ audience_classification_model = AutoModelForSequenceClassification.from_pretrained(
46
+ "bertugmirasyedi/deberta-v3-base-level-classification"
47
+ )
48
+
49
+ level_classification_tokenizer = AutoTokenizer.from_pretrained(
50
+ "bertugmirasyedi/deberta-v3-base-level-classification"
51
+ )
52
+ level_classification_model = AutoModelForSequenceClassification.from_pretrained(
53
+ "bertugmirasyedi/deberta-v3-base-level-classification"
54
  )
55
 
56
  # Define similarity model
 
373
  # Define the zero-shot classifier
374
  tokenizer = classification_tokenizer_normal
375
  model = classification_model_normal
 
 
 
 
 
 
 
 
 
 
 
 
 
376
 
377
+ classifier_pipe = pipeline(
378
+ "zero-shot-classification",
379
+ model=model,
380
+ tokenizer=tokenizer,
381
+ hypothesis_template="This book is {}.",
382
+ batch_size=1,
383
+ device=-1,
384
+ multi_label=False,
385
+ )
386
 
387
+ # Define the candidate labels
388
+ level = [
389
+ "Introductory",
390
+ "Advanced",
391
+ ]
392
 
393
+ audience = ["Academic", "Not Academic", "Manual"]
394
+
395
+ classes = [
396
+ {
397
+ "audience": classifier_pipe(doc, audience)["labels"][0],
398
+ "audience_confidence": classifier_pipe(doc, audience)["scores"][0],
399
+ "level": classifier_pipe(doc, level)["labels"][0],
400
+ "level_confidence": classifier_pipe(doc, level)["scores"][0],
401
+ }
402
+ for doc in combined_data
403
+ ]
404
+ elif runtime == "local":
405
+ ### Define the classifier for audience prediction ###
406
+ audience_classifier = pipeline(
407
+ "text-classification",
408
+ model=audience_classification_model,
409
+ tokenizer=audience_classification_tokenizer,
410
+ device=-1,
411
+ )
412
+
413
+ ### Define the classifier for level prediction ###
414
+ level_classifier = pipeline(
415
+ "text-classification",
416
+ model=level_classification_model,
417
+ tokenizer=level_classification_tokenizer,
418
+ device=-1,
419
+ )
420
+
421
+ classes = [
422
+ {
423
+ "audience": audience_classifier(doc, padding=True, truncation=True)[0][
424
+ "label"
425
+ ],
426
+ "audience_confidence": audience_classifier(
427
+ doc, padding=True, truncation=True
428
+ )[0]["score"],
429
+ "level": level_classifier(doc, padding=True, truncation=True)[0][
430
+ "label"
431
+ ],
432
+ "level_confidence": level_classifier(
433
+ doc, padding=True, truncation=True
434
+ )[0]["score"],
435
+ }
436
+ for doc in combined_data
437
+ ]
438
 
439
  return classes
440