MKaan commited on
Commit
b7d25a0
1 Parent(s): 4bb6d49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -80
app.py CHANGED
@@ -1,81 +1,81 @@
1
- import streamlit as st
2
- from multiprocessing import Process
3
-
4
- from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
5
-
6
- import torch
7
- import pandas as pd
8
- import json
9
- import requests
10
-
11
- import time
12
- import os
13
-
14
- model_name_or_directory = "MKaan/multilingual-cpv-sector-classifier"
15
- tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")
16
-
17
- config = AutoConfig.from_pretrained(model_name_or_directory)
18
- model = AutoModelForSequenceClassification.from_pretrained(model_name_or_directory, config=config)
19
-
20
- device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
21
-
22
- idx2cpv = pd.read_csv("idx2cpv.csv")
23
- idx2cpv = dict(zip(idx2cpv.indexes, idx2cpv.sectors))
24
-
25
- def get_result(input):
26
- input_ids = tokenizer(input, return_tensors="pt").input_ids
27
- output = model(input_ids)
28
- pred = output.logits.argmax(dim=-1)
29
- pred = pred.cpu().detach().numpy()[0]
30
- return idx2cpv[pred]
31
-
32
- if __name__ == "__main__":
33
- st.title('Multilingual Sector Classifier 📄') #📊💼
34
- st.subheader('Finds the correct sector for the given contract description')
35
- st.markdown("Built by Mustafa Kaan Görgün, [Linkedin](https://www.linkedin.com/in/mustafa-kaan-görgün-a2461288/), [Model Card](https://huggingface.co/MKaan/multilingual-cpv-sector-classifier) ", unsafe_allow_html=True)
36
-
37
- examples = pd.read_csv("examples.csv")
38
- lang2example = dict(zip(examples.lang, examples.descr))
39
-
40
- st.markdown(f'##### Try it now:')
41
-
42
- #st.markdown(f'Choose a language in any of 22 languages')
43
- input_lang = st.selectbox(
44
- label="Choose a language from the list of 22 languages",
45
- options=examples.lang,
46
- index=5
47
- )
48
-
49
- input_text_1 = st.text_area(
50
- label="Example description in choosen language",
51
- value=lang2example[input_lang],
52
- height=150,
53
- max_chars=500
54
- )
55
-
56
- button1 = st.button('Run the example')
57
-
58
- st.write("or")
59
-
60
- #st.markdown('Write your own contract description in any of 104 languages that MBERT supports.')
61
- input_text_2 = st.text_area(
62
- label="Write your own contract description in any of 104 languages that MBERT supports.",
63
- value="Your description comes here..",
64
- height=100,
65
- max_chars=500
66
- )
67
-
68
- button2 = st.button('Run your own')
69
-
70
- st.markdown(f'##### Classified Sector: ')
71
- if button1:
72
- with st.spinner('In progress.......'):
73
- sector_class = get_result(input_text_1)
74
- #sector_class = input_text_1
75
- st.success(sector_class)
76
-
77
- if button2:
78
- with st.spinner('In progress.......'):
79
- sector_class = get_result(input_text_2)
80
- #sector_class = input_text_2
81
  st.success(sector_class)
 
1
+ import streamlit as st
2
+ from multiprocessing import Process
3
+
4
+ from transformers import AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
5
+
6
+ import torch
7
+ import pandas as pd
8
+ import json
9
+ import requests
10
+
11
+ import time
12
+ import os
13
+
14
+ model_name_or_directory = "MKaan/multilingual-cpv-sector-classifier"
15
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")
16
+
17
+ config = AutoConfig.from_pretrained(model_name_or_directory)
18
+ model = AutoModelForSequenceClassification.from_pretrained(model_name_or_directory, config=config)
19
+
20
+ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
21
+
22
+ idx2cpv = pd.read_csv("idx2cpv.csv")
23
+ idx2cpv = dict(zip(idx2cpv.indexes, idx2cpv.sectors))
24
+
25
+ def get_result(input):
26
+ input_ids = tokenizer(input, return_tensors="pt").input_ids
27
+ output = model(input_ids)
28
+ pred = output.logits.argmax(dim=-1)
29
+ pred = pred.cpu().detach().numpy()[0]
30
+ return idx2cpv[pred]
31
+
32
+ if __name__ == "__main__":
33
+ st.title('Multilingual Sector Classifier 📄') #📊💼
34
+ st.subheader('Finds the correct sector for the given contract description')
35
+ st.markdown("Built by Mustafa Kaan Görgün, [Linkedin](https://www.linkedin.com/in/mustafa-kaan-görgün-a2461288/), [Model Card](https://huggingface.co/MKaan/multilingual-cpv-sector-classifier) ", unsafe_allow_html=True)
36
+
37
+ examples = pd.read_csv("examples.csv")
38
+ lang2example = dict(zip(examples.lang, examples.descr))
39
+
40
+ st.markdown(f'##### Try it now:')
41
+
42
+ #st.markdown(f'Choose a language in any of 22 languages')
43
+ input_lang = st.selectbox(
44
+ label="Choose a language from the list of 22 languages",
45
+ options=examples.lang,
46
+ index=5
47
+ )
48
+
49
+ input_text_1 = st.text_area(
50
+ label="Example description in choosen language",
51
+ value=lang2example[input_lang],
52
+ height=150,
53
+ max_chars=500
54
+ )
55
+
56
+ button1 = st.button('Run the example')
57
+
58
+ st.write("or")
59
+
60
+ #st.markdown('Write your own contract description in any of 104 languages that MBERT supports.')
61
+ input_text_2 = st.text_area(
62
+ label="Write your own contract description in any of 104 languages that MBERT supports.",
63
+ value="Your description comes here..",
64
+ height=100,
65
+ max_chars=500
66
+ )
67
+
68
+ button2 = st.button('Run your own')
69
+
70
+ st.markdown(f'##### Classified Sector: ')
71
+ if button1:
72
+ with st.spinner('In progress.......'):
73
+ sector_class = get_result(input_text_1)
74
+ #sector_class = input_text_1
75
+ st.success(sector_class)
76
+
77
+ if button2:
78
+ with st.spinner('In progress.......'):
79
+ sector_class = get_result(input_text_2)
80
+ #sector_class = input_text_2
81
  st.success(sector_class)