proyecto-meis commited on
Commit
631b67d
verified
1 Parent(s): 7d6cf6f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -62
app.py CHANGED
@@ -1,19 +1,16 @@
1
- from keras.layers import Layer
2
- import keras.backend as K
3
- from transformers import TFAutoModel, AutoTokenizer
4
  from tensorflow.keras.layers import (
5
  Softmax, GlobalAveragePooling1D, GlobalMaxPooling1D, Activation, Concatenate,
6
  Conv1D, MultiHeadAttention, LayerNormalization, Input, LSTM, Embedding,
7
  Lambda, Dense, Dropout, concatenate, SpatialDropout1D, Bidirectional
8
  )
9
- from keras.models import Model
 
10
  from tcn import TCN
11
- import keras.ops as ops
12
- from keras import initializers
13
- import tensorflow as tf
14
  import re
15
  import os
16
- import gradio as gr
17
 
18
  bert_model_name = "dccuchile/bert-base-spanish-wwm-uncased"
19
  MAX_LEN = 274
@@ -21,48 +18,33 @@ WEIGHTS_PATH = os.getenv("WEIGHTS_PATH", "model.h5")
21
  THRESHOLD = float(os.getenv("THRESHOLD", "0.5"))
22
 
23
  tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
24
-
25
- bert_model = TFAutoModel.from_pretrained(
26
- bert_model_name,
27
- output_hidden_states=False,
28
- output_attentions=False,
29
- )
30
  bert_model.trainable = False
31
 
32
  def tcn_model_with_bert(bert_model_name="google-bert/bert-base-multilingual-uncased", max_length=512):
33
  input_ids = Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
34
- attention_mask = Input(shape=(max_length,),
35
- dtype=tf.int32, name='attention_mask')
36
 
37
  def extract_bert_embeddings(inputs):
38
  return tf.cast(
39
- bert_model(
40
- {'input_ids': inputs[0], 'attention_mask': inputs[1]}).last_hidden_state,
41
  tf.float32
42
  )
43
 
44
- bert_output = Lambda(extract_bert_embeddings, output_shape=(
45
- max_length, 768))([input_ids, attention_mask])
46
-
47
  x = SpatialDropout1D(0.15)(bert_output)
48
- x = LSTM(128, activation='tanh', stateful=False,
49
- return_sequences=True, dropout=0.1)(x)
50
  x = LayerNormalization()(x)
51
- x = Bidirectional(TCN(128, dilations=[
52
- 1, 2, 4, 8], kernel_size=5, return_sequences=True, activation='gelu', name='tcn1'))(x)
53
-
54
  gap = GlobalAveragePooling1D()(x)
55
  gmp = GlobalMaxPooling1D()(x)
56
  head = Concatenate()([gap, gmp])
57
-
58
  head = Dense(64, activation="gelu")(head)
59
  head = Dropout(0.2)(head)
60
  outp = Dense(1, activation="sigmoid")(head)
61
-
62
  model = Model(inputs=[input_ids, attention_mask], outputs=outp)
63
  model.compile(
64
- optimizer=tf.keras.optimizers.AdamW(
65
- learning_rate=1e-4, weight_decay=0.01, clipnorm=1.0),
66
  loss="binary_crossentropy",
67
  metrics=['accuracy']
68
  )
@@ -75,25 +57,17 @@ def preprocessing(text):
75
  text = re.sub(r'\S*@\S*\s?', ' ', text).strip()
76
  text = re.sub(r'#\S*\s?', ' ', text).strip()
77
  text = re.sub(r'[.?!隆驴]+$', '', text)
78
- text = text.lower()
79
- text = text.strip()
80
  return text
81
 
82
- model = tcn_model_with_bert(
83
- bert_model_name=bert_model_name, max_length=MAX_LEN)
84
 
85
- _loaded = False
86
  if os.path.exists(WEIGHTS_PATH):
87
  try:
88
  model.load_weights(WEIGHTS_PATH)
89
- _loaded = True
90
  except Exception:
91
- try:
92
- from tensorflow.keras.models import load_model
93
- model = load_model(WEIGHTS_PATH, custom_objects={"TCN": TCN})
94
- _loaded = True
95
- except Exception:
96
- pass
97
 
98
  def predict_text(text: str, max_len: int = MAX_LEN, threshold: float = THRESHOLD):
99
  preprocessed_text = preprocessing(text)
@@ -105,33 +79,28 @@ def predict_text(text: str, max_len: int = MAX_LEN, threshold: float = THRESHOLD
105
  return_tensors='tf'
106
  )
107
  probs = model.predict(
108
- {'input_ids': enc['input_ids'],
109
- 'attention_mask': enc['attention_mask']},
110
  verbose=0
111
  )
112
  score = float(probs[0][0])
113
  label = int(score >= threshold)
114
  return {
115
- "text": text,
116
- "preprocessed": preprocessed_text,
117
- "score": score,
118
- "label": label
119
  }
120
 
121
- def predict_fn(texto):
122
- if not isinstance(texto, list):
123
- texto = [texto]
124
- details = []
125
- for t in texto:
126
- result = predict_text(t)
127
- details.append({
128
- "txt": t,
129
- "probability": round(float(result["score"]), 3),
130
- "risk": "ALTO" if result["label"] == 1 else "BAJO"
131
- })
132
- return details
133
 
134
- iface = gr.Interface(fn=predict_fn, inputs="text", outputs="json")
 
 
 
 
 
 
135
 
136
  if __name__ == "__main__":
137
- iface.launch(share=True, api_name="predict")
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import tensorflow as tf
4
  from tensorflow.keras.layers import (
5
  Softmax, GlobalAveragePooling1D, GlobalMaxPooling1D, Activation, Concatenate,
6
  Conv1D, MultiHeadAttention, LayerNormalization, Input, LSTM, Embedding,
7
  Lambda, Dense, Dropout, concatenate, SpatialDropout1D, Bidirectional
8
  )
9
+ from tensorflow.keras.models import Model
10
+ from transformers import TFAutoModel, AutoTokenizer
11
  from tcn import TCN
 
 
 
12
  import re
13
  import os
 
14
 
15
  bert_model_name = "dccuchile/bert-base-spanish-wwm-uncased"
16
  MAX_LEN = 274
 
18
  THRESHOLD = float(os.getenv("THRESHOLD", "0.5"))
19
 
20
  tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
21
+ bert_model = TFAutoModel.from_pretrained(bert_model_name, output_hidden_states=False, output_attentions=False)
 
 
 
 
 
22
  bert_model.trainable = False
23
 
24
  def tcn_model_with_bert(bert_model_name="google-bert/bert-base-multilingual-uncased", max_length=512):
25
  input_ids = Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
26
+ attention_mask = Input(shape=(max_length,), dtype=tf.int32, name='attention_mask')
 
27
 
28
  def extract_bert_embeddings(inputs):
29
  return tf.cast(
30
+ bert_model({'input_ids': inputs[0], 'attention_mask': inputs[1]}).last_hidden_state,
 
31
  tf.float32
32
  )
33
 
34
+ bert_output = Lambda(extract_bert_embeddings, output_shape=(max_length, 768))([input_ids, attention_mask])
 
 
35
  x = SpatialDropout1D(0.15)(bert_output)
36
+ x = LSTM(128, activation='tanh', stateful=False, return_sequences=True, dropout=0.1)(x)
 
37
  x = LayerNormalization()(x)
38
+ x = Bidirectional(TCN(128, dilations=[1, 2, 4, 8], kernel_size=5, return_sequences=True, activation='gelu', name='tcn1'))(x)
 
 
39
  gap = GlobalAveragePooling1D()(x)
40
  gmp = GlobalMaxPooling1D()(x)
41
  head = Concatenate()([gap, gmp])
 
42
  head = Dense(64, activation="gelu")(head)
43
  head = Dropout(0.2)(head)
44
  outp = Dense(1, activation="sigmoid")(head)
 
45
  model = Model(inputs=[input_ids, attention_mask], outputs=outp)
46
  model.compile(
47
+ optimizer=tf.keras.optimizers.AdamW(learning_rate=1e-4, weight_decay=0.01, clipnorm=1.0),
 
48
  loss="binary_crossentropy",
49
  metrics=['accuracy']
50
  )
 
57
  text = re.sub(r'\S*@\S*\s?', ' ', text).strip()
58
  text = re.sub(r'#\S*\s?', ' ', text).strip()
59
  text = re.sub(r'[.?!隆驴]+$', '', text)
60
+ text = text.lower().strip()
 
61
  return text
62
 
63
+ model = tcn_model_with_bert(bert_model_name=bert_model_name, max_length=MAX_LEN)
 
64
 
 
65
  if os.path.exists(WEIGHTS_PATH):
66
  try:
67
  model.load_weights(WEIGHTS_PATH)
 
68
  except Exception:
69
+ from tensorflow.keras.models import load_model
70
+ model = load_model(WEIGHTS_PATH, custom_objects={"TCN": TCN})
 
 
 
 
71
 
72
  def predict_text(text: str, max_len: int = MAX_LEN, threshold: float = THRESHOLD):
73
  preprocessed_text = preprocessing(text)
 
79
  return_tensors='tf'
80
  )
81
  probs = model.predict(
82
+ {'input_ids': enc['input_ids'], 'attention_mask': enc['attention_mask']},
 
83
  verbose=0
84
  )
85
  score = float(probs[0][0])
86
  label = int(score >= threshold)
87
  return {
88
+ "txt": text,
89
+ "probability": round(score, 3),
90
+ "risk": "ALTO" if label == 1 else "BAJO"
 
91
  }
92
 
93
+ app = FastAPI()
94
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 
 
 
 
 
 
 
 
 
 
95
 
96
+ @app.post("/predict")
97
+ async def predict(payload: dict):
98
+ textos = payload.get("texto", [])
99
+ if not isinstance(textos, list):
100
+ textos = [textos]
101
+ details = [predict_text(t) for t in textos]
102
+ return {"details": details}
103
 
104
  if __name__ == "__main__":
105
+ import uvicorn
106
+ uvicorn.run(app, host="0.0.0.0", port=7860)