oliverguhr commited on
Commit
c5c8dd0
1 Parent(s): d61f2ce

removed sample as it is more confusing than helpful

Browse files
Files changed (1) hide show
  1. README.md +0 -50
README.md CHANGED
@@ -48,56 +48,6 @@ The code above will output following list:
48
  ["negative","negative","positive","positive","neutral", "neutral"]
49
  ```
50
 
51
- ## A minimal working Sample
52
-
53
-
54
- ```python
55
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
56
- from typing import List
57
- import torch
58
- import re
59
-
60
- class SentimentModel():
61
- def __init__(self, model_name: str):
62
- self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
63
- self.tokenizer = AutoTokenizer.from_pretrained(model_name)
64
-
65
- self.clean_chars = re.compile(r'[^A-Za-züöäÖÜÄß ]', re.MULTILINE)
66
- self.clean_http_urls = re.compile(r'https*\\S+', re.MULTILINE)
67
- self.clean_at_mentions = re.compile(r'@\\S+', re.MULTILINE)
68
-
69
- def predict_sentiment(self, texts: List[str])-> List[str]:
70
- texts = [self.clean_text(text) for text in texts]
71
- # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
72
- encoded = self.tokenizer.batch_encode_plus(texts,padding=True, add_special_tokens=True,truncation=True, return_tensors="pt")
73
- encoded = encoded.to(self.device)
74
- with torch.no_grad():
75
- logits = self.model(**encoded)
76
-
77
- label_ids = torch.argmax(logits[0], axis=1)
78
- return [self.model.config.id2label[label_id.item()] for label_id in label_ids]
79
-
80
- def replace_numbers(self,text: str) -> str:
81
- return text.replace("0"," null").replace("1"," eins").replace("2"," zwei").replace("3"," drei").replace("4"," vier").replace("5"," fünf").replace("6"," sechs").replace("7"," sieben").replace("8"," acht").replace("9"," neun")
82
-
83
- def clean_text(self,text: str)-> str:
84
- text = text.replace("\n", " ")
85
- text = self.clean_http_urls.sub('',text)
86
- text = self.clean_at_mentions.sub('',text)
87
- text = self.replace_numbers(text)
88
- text = self.clean_chars.sub('', text) # use only text chars
89
- text = ' '.join(text.split()) # substitute multiple whitespace with single whitespace
90
- text = text.strip().lower()
91
- return text
92
-
93
- texts = ["Mit keinem guten Ergebniss","Das war unfair", "Das ist gar nicht mal so gut",
94
- "Total awesome!","nicht so schlecht wie erwartet", "Das ist gar nicht mal so schlecht",
95
- "Der Test verlief positiv.","Sie fährt ein grünes Auto.", "Der Fall wurde an die Polzei übergeben."]
96
-
97
- model = SentimentModel(model_name = "oliverguhr/german-sentiment-bert")
98
-
99
- print(model.predict_sentiment(texts))
100
- ```
101
 
102
  ## Model and Data
103
 
 
48
  ["negative","negative","positive","positive","neutral", "neutral"]
49
  ```
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  ## Model and Data
53