waddaheaven commited on
Commit
adfd51e
1 Parent(s): 4da0ffc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -1,26 +1,45 @@
1
  import gradio as gr
2
  import onnxruntime as rt
3
  from transformers import AutoTokenizer
4
- import torch, json
 
5
 
 
6
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
7
 
8
  with open("genre_types_encoded.json", "r") as fp:
9
- encode_genre_types = json.load(fp)
10
 
11
  genres = list(encode_genre_types.keys())
12
 
 
13
  inf_session = rt.InferenceSession('movie-genre-classifier-quantized.onnx')
14
  input_name = inf_session.get_inputs()[0].name
15
  output_name = inf_session.get_outputs()[0].name
16
 
17
  def classify_movie_genre(summary):
18
- input_ids = tokenizer(summary)['input_ids'][:512]
19
- logits = inf_session.run([output_name], {input_name: [input_ids]})[0]
20
- logits = torch.FloatTensor(logits)
21
- probs = torch.sigmoid(logits)[0]
22
- return dict(zip(genres, map(float, probs)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
24
  label = gr.Label(num_top_classes=5)
25
  iface = gr.Interface(fn=classify_movie_genre, inputs="text", outputs=label)
26
  iface.launch(inline=False)
 
1
  import gradio as gr
2
  import onnxruntime as rt
3
  from transformers import AutoTokenizer
4
+ import torch
5
+ import json
6
 
7
+ # Load tokenizer and genre types
8
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
9
 
10
  with open("genre_types_encoded.json", "r") as fp:
11
+ encode_genre_types = json.load(fp)
12
 
13
  genres = list(encode_genre_types.keys())
14
 
15
+ # Load ONNX model
16
  inf_session = rt.InferenceSession('movie-genre-classifier-quantized.onnx')
17
  input_name = inf_session.get_inputs()[0].name
18
  output_name = inf_session.get_outputs()[0].name
19
 
20
  def classify_movie_genre(summary):
21
+ # Tokenize input
22
+ tokens = tokenizer(summary, padding='max_length', truncation=True, return_tensors="pt")
23
+ input_ids = tokens['input_ids'][0].tolist()[:512]
24
+
25
+ # Debug prints
26
+ print("Input summary:", summary)
27
+ print("Tokenized input:", input_ids)
28
+
29
+ # Run inference
30
+ logits = inf_session.run([output_name], {input_name: [input_ids]})[0]
31
+
32
+ # Convert logits to probabilities
33
+ logits = torch.FloatTensor(logits)
34
+ probs = torch.sigmoid(logits)[0]
35
+
36
+ # Debug prints
37
+ print("Logits:", logits)
38
+ print("Probabilities:", probs)
39
+
40
+ return dict(zip(genres, map(float, probs)))
41
 
42
+ # Setup Gradio interface
43
  label = gr.Label(num_top_classes=5)
44
  iface = gr.Interface(fn=classify_movie_genre, inputs="text", outputs=label)
45
  iface.launch(inline=False)