blaxx14 commited on
Commit
fa6a5e5
·
2 Parent(s): ec46473 15a0b55
Files changed (3) hide show
  1. backend/file_uploads.py +4 -4
  2. backend/parser.py +11 -5
  3. temp/file.txt +0 -0
backend/file_uploads.py CHANGED
@@ -6,7 +6,7 @@ import os
6
 
7
  app = Flask(__name__)
8
 
9
- app.config['UPLOAD_FOLDER'] = 'temp'
10
  app.config['ALLOWED_EXTENSIONS'] = {'pdf', 'png', 'jpg', 'jpeg', 'xlsx', 'csv'}
11
  app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
12
 
@@ -25,10 +25,10 @@ def upload_file():
25
  if file and allowed_file(file.filename):
26
  filename = secure_filename(file.filename)
27
 
28
- if not os.path.exists(app.config['UPLOAD_FOLDER']):
29
- os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
30
 
31
- filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
32
  file.save(filepath)
33
 
34
  if filename.endswith('.pdf'):
 
6
 
7
  app = Flask(__name__)
8
 
9
+ app.config['UPLOAD_FOLDER'] = 'tmp'
10
  app.config['ALLOWED_EXTENSIONS'] = {'pdf', 'png', 'jpg', 'jpeg', 'xlsx', 'csv'}
11
  app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
12
 
 
25
  if file and allowed_file(file.filename):
26
  filename = secure_filename(file.filename)
27
 
28
+ if not os.path.exists('/tmp'):
29
+ os.makedirs('/tmp', exist_ok=True)
30
 
31
+ filepath = os.path.join('/tmp', filename)
32
  file.save(filepath)
33
 
34
  if filename.endswith('.pdf'):
backend/parser.py CHANGED
@@ -117,9 +117,9 @@ def parse_promotion_pdf(pdf_path):
117
  docx_path = pathname + ".docx"
118
 
119
  with open(pdf_path, 'rb') as f:
120
- convert_pdf_to_word(f, os.path.join('temp', docx_path))
121
 
122
- tables = extract_tables_from_docx(os.path.join('temp', docx_path))
123
  tables_result = parse_table_data(tables)
124
  del tables_result[0]
125
 
@@ -243,14 +243,20 @@ def parse_promotion_excel(excel_path, filename):
243
  df = df.where(pd.notnull(df), None)
244
 
245
  data = df.to_dict(orient="records")
246
- os.makedirs('temp', exist_ok=True)
 
 
 
 
247
  if not filename.lower().endswith('.json'):
248
  filename += '.json'
249
- filepath = os.path.join('temp', filename)
 
 
250
  base_name, ext = os.path.splitext(filename)
251
  copy_num = 1
252
  while os.path.exists(filepath):
253
- filepath = os.path.join('temp', f"{base_name} ({copy_num}){ext}")
254
  copy_num += 1
255
  with open(filepath, "w", encoding="utf-8") as f:
256
  json.dump(data, f, ensure_ascii=False, indent=2)
 
117
  docx_path = pathname + ".docx"
118
 
119
  with open(pdf_path, 'rb') as f:
120
+ convert_pdf_to_word(f, os.path.join('/tmp', docx_path))
121
 
122
+ tables = extract_tables_from_docx(os.path.join('/tmp', docx_path))
123
  tables_result = parse_table_data(tables)
124
  del tables_result[0]
125
 
 
243
  df = df.where(pd.notnull(df), None)
244
 
245
  data = df.to_dict(orient="records")
246
+
247
+ # Buat folder temp jika belum ada
248
+ os.makedirs('/tmp', exist_ok=True)
249
+
250
+ # Tambah .json jika belum ada
251
  if not filename.lower().endswith('.json'):
252
  filename += '.json'
253
+
254
+ # Cegah overwrite file
255
+ filepath = os.path.join('/tmp', filename)
256
  base_name, ext = os.path.splitext(filename)
257
  copy_num = 1
258
  while os.path.exists(filepath):
259
+ filepath = os.path.join('/tmp', f"{base_name} ({copy_num}){ext}")
260
  copy_num += 1
261
  with open(filepath, "w", encoding="utf-8") as f:
262
  json.dump(data, f, ensure_ascii=False, indent=2)
temp/file.txt ADDED
File without changes