Spaces:
Sleeping
Sleeping
merging
Browse files- backend/file_uploads.py +4 -4
- backend/parser.py +11 -5
- temp/file.txt +0 -0
backend/file_uploads.py
CHANGED
@@ -6,7 +6,7 @@ import os
|
|
6 |
|
7 |
app = Flask(__name__)
|
8 |
|
9 |
-
app.config['UPLOAD_FOLDER'] = '
|
10 |
app.config['ALLOWED_EXTENSIONS'] = {'pdf', 'png', 'jpg', 'jpeg', 'xlsx', 'csv'}
|
11 |
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
|
12 |
|
@@ -25,10 +25,10 @@ def upload_file():
|
|
25 |
if file and allowed_file(file.filename):
|
26 |
filename = secure_filename(file.filename)
|
27 |
|
28 |
-
if not os.path.exists(
|
29 |
-
os.makedirs(
|
30 |
|
31 |
-
filepath = os.path.join(
|
32 |
file.save(filepath)
|
33 |
|
34 |
if filename.endswith('.pdf'):
|
|
|
6 |
|
7 |
app = Flask(__name__)
|
8 |
|
9 |
+
app.config['UPLOAD_FOLDER'] = 'tmp'
|
10 |
app.config['ALLOWED_EXTENSIONS'] = {'pdf', 'png', 'jpg', 'jpeg', 'xlsx', 'csv'}
|
11 |
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
|
12 |
|
|
|
25 |
if file and allowed_file(file.filename):
|
26 |
filename = secure_filename(file.filename)
|
27 |
|
28 |
+
if not os.path.exists('/tmp'):
|
29 |
+
os.makedirs('/tmp', exist_ok=True)
|
30 |
|
31 |
+
filepath = os.path.join('/tmp', filename)
|
32 |
file.save(filepath)
|
33 |
|
34 |
if filename.endswith('.pdf'):
|
backend/parser.py
CHANGED
@@ -117,9 +117,9 @@ def parse_promotion_pdf(pdf_path):
|
|
117 |
docx_path = pathname + ".docx"
|
118 |
|
119 |
with open(pdf_path, 'rb') as f:
|
120 |
-
convert_pdf_to_word(f, os.path.join('
|
121 |
|
122 |
-
tables = extract_tables_from_docx(os.path.join('
|
123 |
tables_result = parse_table_data(tables)
|
124 |
del tables_result[0]
|
125 |
|
@@ -243,14 +243,20 @@ def parse_promotion_excel(excel_path, filename):
|
|
243 |
df = df.where(pd.notnull(df), None)
|
244 |
|
245 |
data = df.to_dict(orient="records")
|
246 |
-
|
|
|
|
|
|
|
|
|
247 |
if not filename.lower().endswith('.json'):
|
248 |
filename += '.json'
|
249 |
-
|
|
|
|
|
250 |
base_name, ext = os.path.splitext(filename)
|
251 |
copy_num = 1
|
252 |
while os.path.exists(filepath):
|
253 |
-
filepath = os.path.join('
|
254 |
copy_num += 1
|
255 |
with open(filepath, "w", encoding="utf-8") as f:
|
256 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
|
|
117 |
docx_path = pathname + ".docx"
|
118 |
|
119 |
with open(pdf_path, 'rb') as f:
|
120 |
+
convert_pdf_to_word(f, os.path.join('/tmp', docx_path))
|
121 |
|
122 |
+
tables = extract_tables_from_docx(os.path.join('/tmp', docx_path))
|
123 |
tables_result = parse_table_data(tables)
|
124 |
del tables_result[0]
|
125 |
|
|
|
243 |
df = df.where(pd.notnull(df), None)
|
244 |
|
245 |
data = df.to_dict(orient="records")
|
246 |
+
|
247 |
+
# Buat folder temp jika belum ada
|
248 |
+
os.makedirs('/tmp', exist_ok=True)
|
249 |
+
|
250 |
+
# Tambah .json jika belum ada
|
251 |
if not filename.lower().endswith('.json'):
|
252 |
filename += '.json'
|
253 |
+
|
254 |
+
# Cegah overwrite file
|
255 |
+
filepath = os.path.join('/tmp', filename)
|
256 |
base_name, ext = os.path.splitext(filename)
|
257 |
copy_num = 1
|
258 |
while os.path.exists(filepath):
|
259 |
+
filepath = os.path.join('/tmp', f"{base_name} ({copy_num}){ext}")
|
260 |
copy_num += 1
|
261 |
with open(filepath, "w", encoding="utf-8") as f:
|
262 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
temp/file.txt
ADDED
File without changes
|