samanthakarungi commited on
Commit
00be1d6
1 Parent(s): 70f3746
Files changed (2) hide show
  1. app.py +49 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
3
+ import torch
4
+ import torch.nn.functional as F
5
+
6
+ model = DistilBertForSequenceClassification.from_pretrained('samanthakarungi/fine-tuned-bert')
7
+ tokenizer = DistilBertTokenizer.from_pretrained('samanthakarungi/fine-tuned-bert')
8
+
9
+ categories = {
10
+ 'INVENTORY, SUPPLIES AND EQUIPMENT': 0,
11
+ 'PROFESSIONAL SERVICES': 1,
12
+ 'TRANSPORTATION AND TRAVEL': 2,
13
+ 'UTILITIES': 3,
14
+ 'EMPLOYEE BENEFITS AND COMPENSATION': 4,
15
+ 'MEALS AND ENTERTAINMENT': 5,
16
+ 'TAX PAYMENTS': 6,
17
+ 'LEGAL AND COMPLIANCE FEES': 7,
18
+ 'BUSINESS DEVELOPMENT AND INVESTMENT': 8
19
+ }
20
+
21
+ st.title("Text Classification Model")
22
+
23
+ st.header("Model Description")
24
+ st.write("This model is a fine-tuned version of the distilbert-base-uncased model on Hugging Face. DistilBERT is a transformers model, smaller and faster than BERT, which was pretrained on the same corpus in a self-supervised fashion, using the BERT base model as a teacher. This means it was pretrained on the raw texts only, with no humans labelling them in any way (which is why it can use lots of publicly available data) with an automatic process to generate inputs and labels from those texts using the BERT base model.")
25
+ st.write("The model is trained to classify an expenditure for BUSINESS OWNERS based on the reason attached to it in a mobile wallet. The classification is into one of the following categories:")
26
+ st.write(categories)
27
+
28
+ st.markdown("[Read more about DistilBert base model here](https://huggingface.co/distilbert/distilbert-base-uncased)")
29
+
30
+
31
+ st.header("Try it out")
32
+
33
+ def predict(model, tokenizer, text):
34
+ inputs = tokenizer(text, truncation=True, padding='max_length', max_length=20, return_tensors='pt')
35
+
36
+ outputs = model(**inputs)
37
+ logits = outputs.logits
38
+
39
+ prbs = F.softmax(logits, dim=-1)
40
+ predicted_label = torch.argmax(prbs, dim=-1).item()
41
+ for key, value in categories.items():
42
+ if value == predicted_label:
43
+ st.write("The predicted label is:", key)
44
+
45
+ return prbs, predicted_label
46
+
47
+ text = st.text_input("Enter sequence to classify")
48
+ if st.button("Classify"):
49
+ predict(model=model, tokenizer=tokenizer, text=text)
requirements.txt ADDED
Binary file (3.48 kB). View file