Spaces:
Sleeping
Sleeping
Manejo de datos tipo Date archivos
Browse files
app.py
CHANGED
@@ -471,6 +471,24 @@ class ModeloDataset:
|
|
471 |
else:
|
472 |
x=x+1
|
473 |
return new_identificadores
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
###
|
475 |
### Funciones para generar diversos datos fake dependiendo de la catagoria
|
476 |
###
|
@@ -492,8 +510,10 @@ class ModeloDataset:
|
|
492 |
return self.faker_.company()
|
493 |
def fake_city(self):
|
494 |
return self.faker_.city()
|
495 |
-
def reemplazo_fake(self,identificadores):
|
496 |
-
|
|
|
|
|
497 |
if self.idioma=='es':
|
498 |
self.faker_ = Faker('es_MX')
|
499 |
|
@@ -510,8 +530,40 @@ class ModeloDataset:
|
|
510 |
|
511 |
elif 'LOC' in id:
|
512 |
new_iden.append(self.fake_city())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
513 |
else:
|
514 |
new_iden.append(id)
|
|
|
515 |
return new_iden
|
516 |
###
|
517 |
### Función que aplica los modelos de acuerdo al idioma detectado
|
@@ -661,7 +713,7 @@ class ModeloDataset:
|
|
661 |
if etiquetas:
|
662 |
out.append(self.salida_texto( iden,np.array(tokens[i])))
|
663 |
else:
|
664 |
-
out.append(self.salida_texto(iden,self.reemplazo_fake(np.array(tokens[i]))))
|
665 |
i=i+1
|
666 |
|
667 |
return out
|
|
|
471 |
else:
|
472 |
x=x+1
|
473 |
return new_identificadores
|
474 |
+
def is_integer_string(self,value):
|
475 |
+
try:
|
476 |
+
int(value)
|
477 |
+
return True
|
478 |
+
except ValueError:
|
479 |
+
return False
|
480 |
+
def get_day_of(self, month_name, year=2024):
|
481 |
+
|
482 |
+
months = {
|
483 |
+
'enero': 1, 'febrero': 2, 'marzo': 3, 'abril': 4, 'mayo': 5, 'junio': 6,
|
484 |
+
'julio': 7, 'agosto': 8, 'septiembre': 9, 'octubre': 10, 'noviembre': 11, 'diciembre': 12,
|
485 |
+
'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6,
|
486 |
+
'july': 7, 'august': 8, 'september': 9, 'october': 10, 'november': 11, 'december': 12
|
487 |
+
}
|
488 |
+
month = months[month_name]
|
489 |
+
_, num_days = calendar.monthrange(year, month)
|
490 |
+
return str(num_days)
|
491 |
+
|
492 |
###
|
493 |
### Funciones para generar diversos datos fake dependiendo de la catagoria
|
494 |
###
|
|
|
510 |
return self.faker_.company()
|
511 |
def fake_city(self):
|
512 |
return self.faker_.city()
|
513 |
+
def reemplazo_fake(self,identificadores,new_tokens):
|
514 |
+
a=['Enero','January', 'February','Febrero','Marzo','March','Abril','April','Mayo','May','Junio','June','Julio','July','Agosto','August','Septiembre','September','Octubre','October','Noviembre','November','Diciembre','December']
|
515 |
+
b=['Ene','Jan', 'Feb','Mar','Mar','Abr','Apr','May','May','Jun','Jun','Jul','Jul','Ago','Aug','Sep','Oct','Nov','Dic','Dec']
|
516 |
+
i=0
|
517 |
if self.idioma=='es':
|
518 |
self.faker_ = Faker('es_MX')
|
519 |
|
|
|
530 |
|
531 |
elif 'LOC' in id:
|
532 |
new_iden.append(self.fake_city())
|
533 |
+
elif 'DATE' in id:
|
534 |
+
|
535 |
+
if self.is_integer_string(new_tokens[i]):
|
536 |
+
|
537 |
+
match len(new_tokens[i]):
|
538 |
+
case 4:
|
539 |
+
new_iden.append(self.faker_.date()[:4])
|
540 |
+
case 10:
|
541 |
+
new_iden.append(self.faker_.date())
|
542 |
+
case 1:
|
543 |
+
new_iden.append(self.get_day_of('february'))
|
544 |
+
case 2:
|
545 |
+
new_iden.append(self.get_day_of('february'))
|
546 |
+
case _:
|
547 |
+
new_iden.append(id)
|
548 |
+
else:
|
549 |
+
match new_tokens[i]:
|
550 |
+
case w if w in a:
|
551 |
+
new_iden.append(self.faker_.month_name())
|
552 |
+
case w if w in b:
|
553 |
+
new_iden.append(self.faker_.month_name()[:3])
|
554 |
+
case "-":
|
555 |
+
new_iden.append("-")
|
556 |
+
case ".":
|
557 |
+
new_iden.append(".")
|
558 |
+
case ",":
|
559 |
+
new_iden.append(",")
|
560 |
+
case "/":
|
561 |
+
new_iden.append("/")
|
562 |
+
case _:
|
563 |
+
new_iden.append(id)
|
564 |
else:
|
565 |
new_iden.append(id)
|
566 |
+
i=i+1
|
567 |
return new_iden
|
568 |
###
|
569 |
### Función que aplica los modelos de acuerdo al idioma detectado
|
|
|
713 |
if etiquetas:
|
714 |
out.append(self.salida_texto( iden,np.array(tokens[i])))
|
715 |
else:
|
716 |
+
out.append(self.salida_texto(iden,self.reemplazo_fake(np.array(tokens[i]),labels[i])))
|
717 |
i=i+1
|
718 |
|
719 |
return out
|