BilalHasan commited on
Commit
aff689d
·
verified ·
1 Parent(s): 9709b1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -19
app.py CHANGED
@@ -6,7 +6,7 @@ def preprocess_image(image):
6
  bin_image = cv.copyMakeBorder(bin_image, int(0.10 * image.shape[0]), int(0.05 * image.shape[0]), int(0.05 * image.shape[1]), int(0.10 * image.shape[1]), cv.BORDER_CONSTANT, value=(255, 255, 255))
7
  return bin_image
8
 
9
- bin_image = preprocess_image(image)
10
 
11
  def split_image_into_lines(image):
12
  lines = []
@@ -42,7 +42,7 @@ def split_image_into_lines(image):
42
 
43
  return lines
44
 
45
- lines = split_image_into_lines(bin_image)
46
 
47
 
48
  def generate_text(line):
@@ -51,24 +51,28 @@ def generate_text(line):
51
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
52
  return generated_text
53
 
54
- with ProcessPoolExecutor() as executor:
55
- results = ' '.join(executor.map(generate_text, lines))
56
- #improve results with llm
57
 
58
- client = OpenAI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- completion = client.chat.completions.create(
61
- model="gpt-4o",
62
- messages=[
63
- {
64
- "role": "user",
65
-
66
- "content": f"I have a string that was extracted from an image of handwritten text. The extraction process introduced minor grammatical, spelling, and punctuation errors. Please carefully review the text below and make any necessary corrections to improve readability and accuracy while preserving the original meaning. Do not change the content or style beyond necessary corrections. Return the corrected text only without adding any headings, explanations, or extra formatting. Text: {results}"
67
- }
68
- ]
69
- )
70
-
71
- improved_text = completion.choices[0].message.content
72
 
73
 
74
 
@@ -105,4 +109,21 @@ max_width = 0.9
105
  out_image_width = 1500
106
  top_margin = 100
107
 
108
- out_image = put_text(improved_text, font, font_scale, color, thickness, max_width, out_image_width, top_margin)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  bin_image = cv.copyMakeBorder(bin_image, int(0.10 * image.shape[0]), int(0.05 * image.shape[0]), int(0.05 * image.shape[1]), int(0.10 * image.shape[1]), cv.BORDER_CONSTANT, value=(255, 255, 255))
7
  return bin_image
8
 
9
+ #bin_image = preprocess_image(image)
10
 
11
  def split_image_into_lines(image):
12
  lines = []
 
42
 
43
  return lines
44
 
45
+ #lines = split_image_into_lines(bin_image)
46
 
47
 
48
  def generate_text(line):
 
51
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
52
  return generated_text
53
 
 
 
 
54
 
55
+ def get_improved_result(lines):
56
+ with ProcessPoolExecutor() as executor:
57
+ results = ' '.join(executor.map(generate_text, lines))
58
+ #improve results with llm
59
+
60
+ client = OpenAI()
61
+
62
+ completion = client.chat.completions.create(
63
+ model="gpt-4o",
64
+ messages=[
65
+ {
66
+ "role": "user",
67
+
68
+ "content": f"I have a string that was extracted from an image of handwritten text. The extraction process introduced minor grammatical, spelling, and punctuation errors. Please carefully review the text below and make any necessary corrections to improve readability and accuracy while preserving the original meaning. Do not change the content or style beyond necessary corrections. Return the corrected text only without adding any headings, explanations, or extra formatting. Text: {results}"
69
+ }
70
+ ]
71
+ )
72
+
73
+ improved_text = completion.choices[0].message.content
74
 
75
+ return improved_text
 
 
 
 
 
 
 
 
 
 
 
76
 
77
 
78
 
 
109
  out_image_width = 1500
110
  top_margin = 100
111
 
112
+ #out_image = put_text(improved_text, font, font_scale, color, thickness, max_width, out_image_width, top_margin)
113
+
114
+ def predict(input_img):
115
+ bin_image = preprocess_image(input_img)
116
+ lines = split_image_into_lines(bin_image)
117
+ improved_text = get_improved_result(lines)
118
+ out_image = put_text(improved_text, font, font_scale, color, thickness, max_width, out_image_width, top_margin)
119
+ return out_img
120
+
121
+ gradio_app = gr.Interface(
122
+ predict,
123
+ inputs=gr.Image(label="Image with handwritten text", sources=['upload']),
124
+ outputs=[gr.Image(label="Output Image")],
125
+ title="Extract Handwritten Text",
126
+ )
127
+
128
+ if __name__ == "__main__":
129
+ gradio_app.launch()