Spaces:

KonGor
/

movie_genre_classificator

Runtime error

App Files Files Community

Konstantin Gordeev commited on Mar 29, 2022

Commit

49e8106

•

1 Parent(s): b60d6e8

Update model

Browse files

Files changed (1) hide show

app.py +22 -30

app.py CHANGED Viewed

@@ -1,46 +1,37 @@
 import streamlit as st
-from transformers import DistilBertModel, DistilBertTokenizer
 import torch
-model_path = './models/pytorch_distilbert.bin'
-vocab_path = './models/vocab_distilbert.bin'
 device = torch.device('cpu')
-MAX_LEN = 512
-def get_labels(text, model, tokenizer, count_labels=8):
-    tokens = tokenizer(text, return_tensors='pt')
-    outputs = model(**tokens)
-    probs = torch.nn.Softmax()(outputs.logits)
-    labels = ['Computer_science', 'Economics',
-              'Electrical_Engineering_and_Systems_Science', 'Mathematics',
-              'Physics', 'Quantitative_Biology', 'Quantitative_Finance',
-              'Statistics']
-    sort_lst = sorted([(prob, label) for prob, label in zip(probs.detach().numpy()[0], labels)], key=lambda x: -x[0])
-    cumsum = 0
-    result_labels = []
-    for pair in sort_lst:
-        cumsum += pair[0]
-        if cumsum > 0.95 and len(result_labels) >= 1:
-            return result_labels
-        result_labels.append(pair[1])
 @st.cache(allow_output_mutation=True)
 def load_model():
-    tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-cased")
-    model = DistilBertModel.from_pretrained("distilbert-base-cased", num_labels=8)
-    model.load_state_dict(torch.load('weight_model'))
     return model, tokenizer
-tokenizer = DistilBertTokenizer.from_pretrained(vocab_path)
-model = torch.load(model_path, map_location=torch.device(device))
 st.markdown("### Movie genre classification")
 text = st.text_area("Write some movie description")
 if st.button('Predict'):
@@ -48,5 +39,6 @@ if st.button('Predict'):
         if not text:
             st.error("Write something.")
         else:
-            pred = predict(text, model.to(device))
-            st.success("\n\n".join(pred))

 import streamlit as st
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
+import numpy as np
+import pandas as pd
+model_path = 'model'
 device = torch.device('cpu')
+model_name = 'distilbert-base-cased'
+genres = np.array(['Animation', 'Comedy', 'Adult', 'Adventure', 'Musical', 'History', 'Reality-TV', 'Film-Noir',
+ 'Sport', 'Biography', 'Drama', 'Fantasy', 'Romance', 'Thriller', 'News', 'Documentary', 'Sci-Fi', 'Music',
+ 'Family', 'Mystery', 'Crime', 'Horror', 'War', 'Action', 'Western'])
 @st.cache(allow_output_mutation=True)
 def load_model():
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(genres))
+    model.load_state_dict(torch.load(model_path))
     return model, tokenizer
+def predict(text: str, tokenizer, model):
+    tokens = tokenizer.encode(text)
+    probas = torch.nn.Softmax(dim=1)(model(torch.as_tensor([tokens], device=device))[0]).detach().numpy()[0]
+    top_5_index = probas.argsort()[:-6:-1]
+    return dict(zip(genres[top_5_index], probas[top_5_index]))
 st.markdown("### Movie genre classification")
+model, tokenizer = load_model()
 text = st.text_area("Write some movie description")
 if st.button('Predict'):
         if not text:
             st.error("Write something.")
         else:
+            pred = predict(text, tokenizer, model)
+            result = pd.DataFrame(list(pred.values()), index=list(pred.keys()), columns=['Probability'])
+            st.write(result)