File size: 1,628 Bytes
b86e5c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import shutil
import tempfile
from io import BytesIO
from pathlib import Path

import streamlit as st

from ocr_and_translate_en2jp.genarate_tranrate_df import df_generator

st.title('OCR and Translation App')

uploaded_file = st.file_uploader('Choose a file')
if uploaded_file is not None:
    with tempfile.NamedTemporaryFile(
        delete=False, suffix=Path(uploaded_file.name).suffix
    ) as tmpfile:
        shutil.copyfileobj(uploaded_file, tmpfile)
        uploaded_file_path = tmpfile.name

    max_words = st.number_input(
        'Maximum number of words to process', min_value=1, value=50
    )
    do_shuffle = st.checkbox('Shuffle output')
    seed = (
        st.number_input('Seed for shuffling', min_value=0, value=42)
        if do_shuffle
        else None
    )
    do_clean_noise_data = st.checkbox('Clean noise data', value=True)

    if st.button('Process'):
        df = df_generator(
            uploaded_file_path, max_words, do_shuffle, seed, do_clean_noise_data
        )
        st.dataframe(df)

        csv = df.to_csv(index=False).encode('utf-8')
        st.download_button(
            label='Download data as CSV',
            data=csv,
            file_name='processed_data.csv',
            mime='text/csv',
        )

        # REF: https://qiita.com/nyakiri_0726/items/2ae8cfb926c48072b190
        df.to_excel(buf := BytesIO(), index=False)

        st.download_button(
            label="Download data as Excel",
            data=buf.getvalue(),
            file_name='processed_data.xlsx',
            mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
        )