jcmachicao commited on
Commit
ae6afbe
1 Parent(s): 3381eff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -23
app.py CHANGED
@@ -4,8 +4,10 @@ import streamlit as st
4
  import pandas as pd
5
  import base64
6
  from pyxlsb import open_workbook as open_xlsb
 
7
  from datetime import datetime
8
  from funcs import extrae_dato_web, extrae_web, extrae_alternate, convierte_excel
 
9
  import bs4 as BeautifulSoup
10
  import urllib.request
11
  from urllib.request import urlopen, Request
@@ -16,29 +18,30 @@ with c2:
16
  st.image('encopartslogo.jpg', width=300, caption='https://encoparts.com/')
17
 
18
  rutas_websearch = ['https://en.hespareparts.com/search/?search=', 'https://offroadeq.com/parts-search/']
19
- st.title('Generación de Tablas de Datos de Extracción')
20
- st.subheader('Carga de Datos')
21
- selec = st.radio('Seleccione: ', [None, 'Carga por Texto con Comas', 'Carga por Archivo Excel'])
22
  items = None
23
 
24
  if selec is None:
25
 
26
- st.write('Por favor seleccione una opción válida de carga.')
27
 
28
  else:
29
 
30
- if selec == 'Carga por Texto con Comas' and items is None:
31
  st.write(selec)
32
- codigos = st.text_input('Escriba o peque aqui texto separando los códigos por comas: ')
33
- if st.button('Proceder'):
34
  items = list(codigos.split(','))
35
 
36
  else:
37
  st.write(selec)
38
- file = st.file_uploader('Seleccione un archivo: ')
39
  if file is not None:
40
- codigosf = pd.read_excel(file)
41
- st.write('Filas, Columnas de Data de Prueba: ', codigosf.shape)
 
42
  namcol = codigosf.columns[0]
43
  items = pd.Series(codigosf[namcol]).astype(str)
44
 
@@ -47,35 +50,39 @@ if selec is not None and items is not None:
47
  st.write(items)
48
 
49
  datos_tot = []
50
- st.write('Por favor espere mientas se extrae datos...')
51
  for it in items:
52
- extrae_med = extrae_web(it)
53
  extrae_dat = extrae_dato_web(it)
54
  itxx = it[:-4]+'-'+it[-4:]
55
- datos = [it, itxx] + list(extrae_med) + list(extrae_dat)
 
56
  datos_tot.append(datos)
57
 
58
  dtdf = pd.DataFrame(datos_tot)
59
  dtdf.columns = ['part_no_', 'part_no',
60
- 'descrip_en', 'length_m', 'width_m', 'height_m', 'vol_m3', 'compatible',
61
- 'alternate', 'precio_bm_us', 'peso_lb', 'descr']
 
62
  now = datetime.now()
63
  date_time = now.strftime("%m/%d/%Y, %H:%M:%S").replace('/','_').replace(':','_').replace(', ', '_')
64
  dtdf['peso_kg'] = dtdf.peso_lb*0.453592
65
 
66
- dtdf2 = dtdf[['part_no_', 'part_no', 'descr', 'length_m', 'width_m', 'height_m', 'vol_m3', 'peso_kg', 'precio_bm_us', 'alternate', 'compatible']]
 
 
 
67
 
68
  df_xlsx = convierte_excel(dtdf2)
69
- st.download_button(label='📩 Descargar XLSX', data=df_xlsx,
70
- file_name = 'df_'+date_time+'.xlsx')
71
-
72
  csv = dtdf2.to_csv(index=False)
73
- st.download_button(label='📩 Descargar CSV', data=csv,
74
- file_name = 'df_'+date_time+'.csv')
75
 
76
  else:
77
- st.write('Cuando seleccione la opción, por favor cargue datos y proceda.')
78
 
79
  c1, c2, c3 = st.columns([4,4,4])
80
  with c3:
81
- st.image('gdmklogo.png', width=100, caption='Diseñado por GestioDinámica 2022')
 
4
  import pandas as pd
5
  import base64
6
  from pyxlsb import open_workbook as open_xlsb
7
+ from io import BytesIO
8
  from datetime import datetime
9
  from funcs import extrae_dato_web, extrae_web, extrae_alternate, convierte_excel
10
+ from funcs import encuentra_hoja
11
  import bs4 as BeautifulSoup
12
  import urllib.request
13
  from urllib.request import urlopen, Request
 
18
  st.image('encopartslogo.jpg', width=300, caption='https://encoparts.com/')
19
 
20
  rutas_websearch = ['https://en.hespareparts.com/search/?search=', 'https://offroadeq.com/parts-search/']
21
+ st.title('Data Extraction')
22
+ st.subheader('Part Number Loading A')
23
+ selec = st.radio('Select: ', [None, 'Comma Separated Text', 'Excel File Loading'])
24
  items = None
25
 
26
  if selec is None:
27
 
28
+ st.write('Please select data loading method.')
29
 
30
  else:
31
 
32
+ if selec == 'Comma Separated Text' and items is None:
33
  st.write(selec)
34
+ codigos = st.text_input('Paste or write here the text, separated by commas: ')
35
+ if st.button('Proceed'):
36
  items = list(codigos.split(','))
37
 
38
  else:
39
  st.write(selec)
40
+ file = st.file_uploader('Select an Excel File: ')
41
  if file is not None:
42
+ hojax = encuentra_hoja(file)
43
+ codigosf = pd.read_excel(file, sheet_name=hojax)
44
+ st.write('Rows, Columns: ', codigosf.shape)
45
  namcol = codigosf.columns[0]
46
  items = pd.Series(codigosf[namcol]).astype(str)
47
 
 
50
  st.write(items)
51
 
52
  datos_tot = []
53
+ st.write('Please wait while data is being processed ...')
54
  for it in items:
55
+ #extrae_med = extrae_web(it)
56
  extrae_dat = extrae_dato_web(it)
57
  itxx = it[:-4]+'-'+it[-4:]
58
+ datos = [it, itxx] + list(extrae_dat)
59
+ #list(extrae_med) +
60
  datos_tot.append(datos)
61
 
62
  dtdf = pd.DataFrame(datos_tot)
63
  dtdf.columns = ['part_no_', 'part_no',
64
+ #'descrip_en', 'length_m', 'width_m', 'height_m',
65
+ #'vol_m3',' compatible',
66
+ 'alternate', 'peso_lb', 'precio_bm_us', 'descr']
67
  now = datetime.now()
68
  date_time = now.strftime("%m/%d/%Y, %H:%M:%S").replace('/','_').replace(':','_').replace(', ', '_')
69
  dtdf['peso_kg'] = dtdf.peso_lb*0.453592
70
 
71
+ dtdf2 = dtdf[['part_no_', 'part_no', 'descr',
72
+ #'compatible',
73
+ #'length_m', 'width_m', 'height_m', 'vol_m3',
74
+ 'peso_kg', 'precio_bm_us', 'alternate']]
75
 
76
  df_xlsx = convierte_excel(dtdf2)
77
+ st.download_button(label='📩 Download XLSX', data=df_xlsx,
78
+ file_name = 'df_test'+date_time+'.xlsx')
79
+
80
  csv = dtdf2.to_csv(index=False)
81
+ st.download_button(label='📩 Download CSV', data=csv, file_name = 'extraccion_'+date_time+'.csv')
 
82
 
83
  else:
84
+ st.write('Please select loading option, load and proceed.')
85
 
86
  c1, c2, c3 = st.columns([4,4,4])
87
  with c3:
88
+ st.image('gdmklogo.png', width=100, caption='Updated by GestioDinámica in 2023')