MR17u commited on
Commit
27ce743
1 Parent(s): 2950aec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -1
app.py CHANGED
@@ -26,6 +26,25 @@ scheduler = CommitScheduler(
26
 
27
  classifier = pipeline(model = CLS_MODEL_NAME, tokenizer = 'cardiffnlp/twitter-roberta-large-2022-154m')
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def save_json(entry: str, result) -> None:
30
  with scheduler.lock:
31
  with JSON_DATASET_PATH.open("a") as f:
@@ -34,7 +53,7 @@ def save_json(entry: str, result) -> None:
34
  f.write("\n")
35
 
36
  def classif(text: str):
37
- return classifier(text)
38
 
39
  with gr.Blocks() as demo:
40
  with gr.Row():
 
26
 
27
  classifier = pipeline(model = CLS_MODEL_NAME, tokenizer = 'cardiffnlp/twitter-roberta-large-2022-154m')
28
 
29
+ def clean_brackets(text):
30
+ return text.replace('{', '(').replace('}', ')')
31
+
32
+ def clean_emojis(text, type:str = ''):
33
+ if type=='rem':
34
+ return demoji.replace(text, '')
35
+ elif type!='keep':
36
+ return demoji.replace_with_desc(text, type)
37
+ else:
38
+ return text
39
+
40
+ def clean_hashtags(text, hashtags=['#irony', '#sarcasm','#not']):
41
+ for hashtag in hashtags:
42
+ text = re.sub(hashtag, '', text, flags=re.I)
43
+ return re.sub(r' +', r' ', text)
44
+
45
+ def clean_text(text):
46
+ return re.sub(' {2,}', ' ',clean_emojis(clean_hashtags(clean_brackets(text)))).strip()
47
+
48
  def save_json(entry: str, result) -> None:
49
  with scheduler.lock:
50
  with JSON_DATASET_PATH.open("a") as f:
 
53
  f.write("\n")
54
 
55
  def classif(text: str):
56
+ return classifier(clean_text(text))
57
 
58
  with gr.Blocks() as demo:
59
  with gr.Row():