gefedya commited on
Commit
13c67de
1 Parent(s): 4fbc42a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -38
app.py CHANGED
@@ -1,53 +1,53 @@
1
  import streamlit as st
2
  from datasets import load_dataset
3
- import pandas as pd
4
- import numpy as np
5
- from transformers import pipeline
6
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
7
  import json
8
 
9
- st.markdown("### Here is a sentiment model trained on a slice of a twitter dataset")
10
- st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
11
- # ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
12
 
13
- text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model. No kidding")
14
- # ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
15
-
16
- ### Loading and tokenizing data
17
-
18
- data = load_dataset("carblacac/twitter-sentiment-analysis")
19
- tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
20
- dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
21
- dataset = dataset.rename_column("feeling", "labels")
22
-
23
- ### Importing existing model
24
 
25
- model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
26
- # model.to('cpu');
 
 
27
 
28
- ### Training model
 
 
 
29
 
30
- trainer = Trainer(
31
- model=model, train_dataset=dataset["train"].shuffle().select(range(10000)),
32
- eval_dataset = dataset['test'].select(range(5000)),
33
- args=TrainingArguments(
34
- output_dir="./my_saved_model", overwrite_output_dir=True,
35
- num_train_epochs=1, per_device_train_batch_size=4,
36
- save_steps=10_000, save_total_limit=2),
37
- )
38
 
39
- trainer.train()
 
 
 
 
40
 
 
 
41
 
42
- ### Using our new BEAST model to predict the sentiment of uers' entries
43
-
44
- # TODO: add predictions
45
-
46
- model()
47
-
48
- #classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
49
- #raw_predictions = classifier(text)
 
 
 
 
 
 
 
 
 
50
  # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
51
 
52
- st.markdown(f"{raw_predictions}")
53
  # выводим результаты модели в текстовое поле, на потеху пользователю
 
1
  import streamlit as st
2
  from datasets import load_dataset
 
 
 
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel, Trainer, TrainingArguments, LineByLineTextDataset
4
  import json
5
 
 
 
 
6
 
7
+ @st.cache()
8
+ def get_model():
9
+ model = AutoModelForSequenceClassification.from_pretrained("siebert/sentiment-roberta-large-english", num_labels=2)
10
+ model.load_state_dict(torch.load('model'))
11
+ return model
 
 
 
 
 
 
12
 
13
+ @st.cache()
14
+ def get_tokenizer():
15
+ tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
16
+ return tokenizer
17
 
18
+ def make_prediction():
19
+ model = get_model()
20
+ tokenizer = tokenizer()
21
+
22
 
 
 
 
 
 
 
 
 
23
 
24
+ st.header("Sentiment analysis on twitter datasets")
25
+ st.markdown("Here is a sentiment model further trained on a slice of a twitter dataset")
26
+ st.markdown("""
27
+ <img width=700px src='https://imagez.tmz.com/image/73/4by3/2020/10/05/735aaee2f6b9464ca220e62ef797dab0_md.jpg'>
28
+ """, unsafe_allow_html=True)
29
 
30
+ text = st.text_area("Try typing something here! \n You will see how much better our model is compared to the base model! No kidding")
31
+ # ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
32
 
33
+ ### Loading and tokenizing data
34
+ # data = load_dataset("carblacac/twitter-sentiment-analysis")
35
+ # tokenizer = AutoTokenizer.from_pretrained("siebert/sentiment-roberta-large-english")
36
+ # dataset = data.map(lambda xs: tokenizer(xs["text"], truncation=True, padding='max_length'))
37
+ # dataset = dataset.rename_column("feeling", "labels")
38
+
39
+ with st.form(key='input_form'):
40
+ to_analyze = st.text_input(label='Input text to be analyzed')
41
+ button = st.form_submit_button(label='Classify')
42
+ if button:
43
+ if to_analyze:
44
+ make_prediction(to_analyze)
45
+ else:
46
+ st.markdown("Empty request. Please resubmit")
47
+
48
+ # classifier = pipeline('sentiment-analysis', model="distilbert-base-uncased-finetuned-sst-2-english")
49
+ # raw_predictions = classifier(text)
50
  # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
51
 
52
+ # st.markdown(f"{raw_predictions}")
53
  # выводим результаты модели в текстовое поле, на потеху пользователю