AlGe commited on
Commit
6b0ab1a
·
verified ·
1 Parent(s): a8497b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -18
app.py CHANGED
@@ -12,49 +12,76 @@ import spaces
12
  import torch
13
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline
14
  import os
15
-
16
  import colorsys
17
  import matplotlib.pyplot as plt
18
 
19
  def hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
20
  hex_color = hex_color.lstrip('#')
21
  return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
 
 
 
 
 
 
 
 
22
  return tuple(int(v * 255) for v in new_rgb)
23
 
24
  monochrome = Monochrome()
25
 
26
  auth_token = os.environ['HF_TOKEN']
27
 
28
-
29
  tokenizer_bin = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
30
  model_bin = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
31
  tokenizer_bin.model_max_length = 512
32
  pipe_bin = pipeline("ner", model=model_bin, tokenizer=tokenizer_bin)
33
 
34
-
35
  tokenizer_ext = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
36
  model_ext = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
37
  tokenizer_ext.model_max_length = 512
38
  pipe_ext = pipeline("ner", model=model_ext, tokenizer=tokenizer_ext)
39
 
40
-
41
  model1 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_Int_segment", num_labels=1, token=auth_token)
42
  tokenizer1 = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_Int_segment", token=auth_token)
43
 
44
  model2 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_seq_ext", num_labels=1, token=auth_token)
45
 
46
  def process_ner(text: str, pipeline) -> dict:
47
-
48
  output = pipeline(text)
49
  entities = []
50
  current_entity = None
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  return {"text": text, "entities": entities}
53
 
54
  def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
55
  inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
56
 
57
  with torch.no_grad():
 
 
 
 
58
  prediction2 = outputs2[0].item()
59
  score = prediction1 / (prediction2 + prediction1)
60
 
@@ -64,21 +91,18 @@ def generate_charts(ner_output_bin: dict, ner_output_ext: dict) -> Tuple[plt.Fig
64
  entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
65
  entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
66
 
67
- entity_counts_bin = {entity: entities_bin.count(entity) for entity in set(entities_bin)}
68
- entity_counts_ext = {entity: entities_ext.count(entity) for entity in set(entities_ext)}
69
 
70
-
71
- pie_labels_bin = list(entity_counts_bin.keys())
72
- pie_sizes_bin = list(entity_counts_bin.values())
73
- pie_labels_ext = list(entity_counts_ext.keys())
74
- pie_sizes_ext = list(entity_counts_ext.values())
75
 
76
  fig1, ax1 = plt.subplots()
77
- ax1.pie(pie_sizes_ext, labels=pie_labels_ext, autopct='%1.1f%%', startangle=90)
78
  ax1.axis('equal')
79
 
80
  fig2, ax2 = plt.subplots()
81
- ax2.bar(pie_labels_bin, pie_sizes_bin)
82
  ax2.set_ylabel('Count')
83
  ax2.set_xlabel('Entity Type')
84
  ax2.set_title('Entity Counts')
@@ -97,13 +121,10 @@ def all(text: str):
97
  classification_output[0], classification_output[1], classification_output[2],
98
  pie_chart, bar_chart)
99
 
100
-
101
-
102
  examples = [
103
  ['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
104
  ]
105
 
106
-
107
  iface = gr.Interface(
108
  fn=all,
109
  inputs=gr.Textbox(lines=5, label="Input Text", placeholder="Write about how your breakfast went or anything else that happened or might happen to you ..."),
@@ -138,4 +159,4 @@ iface = gr.Interface(
138
  theme=monochrome
139
  )
140
 
141
- iface.launch()
 
12
  import torch
13
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline
14
  import os
 
15
  import colorsys
16
  import matplotlib.pyplot as plt
17
 
18
  def hex_to_rgb(hex_color: str) -> tuple[int, int, int]:
19
  hex_color = hex_color.lstrip('#')
20
  return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
21
+
22
+ def rgb_to_hex(rgb_color: tuple[int, int, int]) -> str:
23
+ return "#{:02x}{:02x}{:02x}".format(*rgb_color)
24
+
25
+ def adjust_brightness(rgb_color: tuple[int, int, int], factor: float) -> tuple[int, int, int]:
26
+ hsv_color = colorsys.rgb_to_hsv(*[v / 255.0 for v in rgb_color])
27
+ new_v = max(0, min(hsv_color[2] * factor, 1))
28
+ new_rgb = colorsys.hsv_to_rgb(hsv_color[0], hsv_color[1], new_v)
29
  return tuple(int(v * 255) for v in new_rgb)
30
 
31
  monochrome = Monochrome()
32
 
33
  auth_token = os.environ['HF_TOKEN']
34
 
 
35
  tokenizer_bin = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
36
  model_bin = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_token", token=auth_token)
37
  tokenizer_bin.model_max_length = 512
38
  pipe_bin = pipeline("ner", model=model_bin, tokenizer=tokenizer_bin)
39
 
 
40
  tokenizer_ext = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
41
  model_ext = AutoModelForTokenClassification.from_pretrained("AlGe/deberta-v3-large_AIS-token", token=auth_token)
42
  tokenizer_ext.model_max_length = 512
43
  pipe_ext = pipeline("ner", model=model_ext, tokenizer=tokenizer_ext)
44
 
 
45
  model1 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_Int_segment", num_labels=1, token=auth_token)
46
  tokenizer1 = AutoTokenizer.from_pretrained("AlGe/deberta-v3-large_Int_segment", token=auth_token)
47
 
48
  model2 = AutoModelForSequenceClassification.from_pretrained("AlGe/deberta-v3-large_seq_ext", num_labels=1, token=auth_token)
49
 
50
  def process_ner(text: str, pipeline) -> dict:
 
51
  output = pipeline(text)
52
  entities = []
53
  current_entity = None
54
 
55
+ for token in output:
56
+ entity_type = token['entity'][2:]
57
+ entity_prefix = token['entity'][:1]
58
+
59
+ if current_entity is None or entity_type != current_entity['entity'] or (entity_prefix == 'B' and entity_type == current_entity['entity']):
60
+ if current_entity is not None:
61
+ entities.append(current_entity)
62
+ current_entity = {
63
+ "entity": entity_type,
64
+ "start": token['start'],
65
+ "end": token['end'],
66
+ "score": token['score']
67
+ }
68
+ else:
69
+ current_entity['end'] = token['end']
70
+ current_entity['score'] = max(current_entity['score'], token['score'])
71
+
72
+ if current_entity is not None:
73
+ entities.append(current_entity)
74
+
75
  return {"text": text, "entities": entities}
76
 
77
  def process_classification(text: str, model1, model2, tokenizer1) -> Tuple[str, str, str]:
78
  inputs1 = tokenizer1(text, max_length=512, return_tensors='pt', truncation=True, padding=True)
79
 
80
  with torch.no_grad():
81
+ outputs1 = model1(**inputs1)
82
+ outputs2 = model2(**inputs1)
83
+
84
+ prediction1 = outputs1[0].item()
85
  prediction2 = outputs2[0].item()
86
  score = prediction1 / (prediction2 + prediction1)
87
 
 
91
  entities_bin = [entity['entity'] for entity in ner_output_bin['entities']]
92
  entities_ext = [entity['entity'] for entity in ner_output_ext['entities']]
93
 
94
+ all_entities = entities_bin + entities_ext
95
+ entity_counts = {entity: all_entities.count(entity) for entity in set(all_entities)}
96
 
97
+ pie_labels = list(entity_counts.keys())
98
+ pie_sizes = list(entity_counts.values())
 
 
 
99
 
100
  fig1, ax1 = plt.subplots()
101
+ ax1.pie(pie_sizes, labels=pie_labels, autopct='%1.1f%%', startangle=90)
102
  ax1.axis('equal')
103
 
104
  fig2, ax2 = plt.subplots()
105
+ ax2.bar(entity_counts.keys(), entity_counts.values())
106
  ax2.set_ylabel('Count')
107
  ax2.set_xlabel('Entity Type')
108
  ax2.set_title('Entity Counts')
 
121
  classification_output[0], classification_output[1], classification_output[2],
122
  pie_chart, bar_chart)
123
 
 
 
124
  examples = [
125
  ['Bevor ich meinen Hund kaufte bin ich immer alleine durch den Park gelaufen. Gestern war ich aber mit dem Hund losgelaufen. Das Wetter war sehr schön, nicht wie sonst im Winter. Ich weiß nicht genau. Mir fällt sonst nichts dazu ein. Wir trafen auf mehrere Spaziergänger. Ein Mann mit seinem Kind. Das Kind hat ein Eis gegessen.'],
126
  ]
127
 
 
128
  iface = gr.Interface(
129
  fn=all,
130
  inputs=gr.Textbox(lines=5, label="Input Text", placeholder="Write about how your breakfast went or anything else that happened or might happen to you ..."),
 
159
  theme=monochrome
160
  )
161
 
162
+ iface.launch()