Shami96 commited on
Commit
55e3c9a
·
verified ·
1 Parent(s): e8b46b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -23
app.py CHANGED
@@ -2,47 +2,60 @@ import gradio as gr
2
  import tempfile
3
  import os
4
  import subprocess
 
5
 
6
  def process_files(pdf_file, word_file):
7
- # 1. Save the uploaded files to temp
8
- temp_dir = tempfile.mkdtemp()
 
 
 
9
  pdf_path = os.path.join(temp_dir, "input.pdf")
10
  word_path = os.path.join(temp_dir, "input.docx")
11
- with open(pdf_path, "wb") as f:
12
- f.write(pdf_file.read())
13
- with open(word_path, "wb") as f:
14
- f.write(word_file.read())
15
 
16
- # 2. Step 1: Extract PDF data to txt
17
  pdf_txt_path = os.path.join(temp_dir, "pdf_data.txt")
18
- subprocess.run(["python", "extract_pdf_data.py", pdf_path, pdf_txt_path], check=True)
 
 
 
19
 
20
- # 3. Step 2: Extract red text from Word to JSON
21
  word_json_path = os.path.join(temp_dir, "word_data.json")
22
- subprocess.run(["python", "extract_red_text.py", word_path, word_json_path], check=True)
 
 
 
23
 
24
- # 4. Step 3: Update docx JSON with PDF txt, output updated JSON
25
  updated_json_path = os.path.join(temp_dir, "updated_word_data.json")
26
- subprocess.run([
27
- "python", "update_docx_with_pdf.py", word_json_path, pdf_txt_path, updated_json_path
28
- ], check=True)
 
29
 
30
- # 5. Step 4: Compare word file with updated JSON and update docx
31
  final_docx_path = os.path.join(temp_dir, "updated.docx")
32
- subprocess.run([
33
- "python", "updated_word.py", word_path, updated_json_path, final_docx_path
34
- ], check=True)
 
35
 
36
- # 6. Return final docx file
37
  return final_docx_path
38
 
39
- gr.Interface(
40
  fn=process_files,
41
  inputs=[
42
- gr.File(label="Upload PDF File"),
43
- gr.File(label="Upload Word File")
44
  ],
45
  outputs=gr.File(label="Download Updated Word File"),
46
  title="Red Text Replacer",
47
  description="Upload a PDF and Word document. Red-colored text in the Word doc will be replaced by matching content from the PDF."
48
- ).launch()
 
 
 
 
2
  import tempfile
3
  import os
4
  import subprocess
5
+ import uuid
6
 
7
  def process_files(pdf_file, word_file):
8
+ # Each upload returns a path (str) with type="filepath"
9
+ # Create a unique temp directory for each run (prevents parallel collision)
10
+ temp_dir = tempfile.mkdtemp(prefix="hf_redtext_")
11
+
12
+ # Copy user-uploaded files into temp directory with standard names
13
  pdf_path = os.path.join(temp_dir, "input.pdf")
14
  word_path = os.path.join(temp_dir, "input.docx")
15
+ os.rename(pdf_file, pdf_path)
16
+ os.rename(word_file, word_path)
 
 
17
 
18
+ # Step 1: Extract PDF data to txt
19
  pdf_txt_path = os.path.join(temp_dir, "pdf_data.txt")
20
+ subprocess.run(
21
+ ["python", "extract_pdf_data.py", pdf_path, pdf_txt_path],
22
+ check=True
23
+ )
24
 
25
+ # Step 2: Extract red text from Word to JSON
26
  word_json_path = os.path.join(temp_dir, "word_data.json")
27
+ subprocess.run(
28
+ ["python", "extract_red_text.py", word_path, word_json_path],
29
+ check=True
30
+ )
31
 
32
+ # Step 3: Update docx JSON with PDF txt, output updated JSON
33
  updated_json_path = os.path.join(temp_dir, "updated_word_data.json")
34
+ subprocess.run(
35
+ ["python", "update_docx_with_pdf.py", word_json_path, pdf_txt_path, updated_json_path],
36
+ check=True
37
+ )
38
 
39
+ # Step 4: Compare word file with updated JSON and update docx
40
  final_docx_path = os.path.join(temp_dir, "updated.docx")
41
+ subprocess.run(
42
+ ["python", "updated_word.py", word_path, updated_json_path, final_docx_path],
43
+ check=True
44
+ )
45
 
46
+ # Return final updated docx file
47
  return final_docx_path
48
 
49
+ iface = gr.Interface(
50
  fn=process_files,
51
  inputs=[
52
+ gr.File(label="Upload PDF File", type="filepath"),
53
+ gr.File(label="Upload Word File", type="filepath"),
54
  ],
55
  outputs=gr.File(label="Download Updated Word File"),
56
  title="Red Text Replacer",
57
  description="Upload a PDF and Word document. Red-colored text in the Word doc will be replaced by matching content from the PDF."
58
+ )
59
+
60
+ if __name__ == "__main__":
61
+ iface.launch()