stepfun-ai
/

GOT-OCR2_0

@@ -484,7 +484,7 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
         setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
-    def chat(self, tokenizer, image_file, ocr_type, ocr_box='', ocr_color='', render=False):
         self.disable_torch_init()
@@ -575,87 +575,86 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
                 )
-            # if render:
-            #     print('==============rendering===============')
-            #     outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
-            #     if outputs.endswith(stop_str):
-            #         outputs = outputs[:-len(stop_str)]
-            #     outputs = outputs.strip()
-            #     if '**kern' in outputs:
-            #         import verovio
-            #         from cairosvg import svg2png
-            #         import cv2
-            #         import numpy as np
-            #         tk = verovio.toolkit()
-            #         tk.loadData(outputs)
-            #         tk.setOptions({"pageWidth": 2100, "footer": 'none',
-            #     'barLineWidth': 0.5, 'beamMaxSlope': 15,
-            #     'staffLineWidth': 0.2, 'spacingStaff': 6})
-            #         tk.getPageCount()
-            #         svg = tk.renderToSVG()
-            #         svg = svg.replace("overflow=\"inherit\"", "overflow=\"visible\"")
-            #         svg_to_html(svg, "./results/demo.html")
-            #     if ocr_type == 'format' and '**kern' not in outputs:
-            #         if  '\\begin{tikzpicture}' not in outputs:
-            #             html_path = "./render_tools/" + "/content-mmd-to-html.html"
-            #             html_path_2 = "./results/demo.html"
-            #             right_num = outputs.count('\\right')
-            #             left_num = outputs.count('\left')
-            #             if right_num != left_num:
-            #                 outputs = outputs.replace('\left(', '(').replace('\\right)', ')').replace('\left[', '[').replace('\\right]', ']').replace('\left{', '{').replace('\\right}', '}').replace('\left|', '|').replace('\\right|', '|').replace('\left.', '.').replace('\\right.', '.')
-            #             outputs = outputs.replace('"', '``').replace('$', '')
-            #             outputs_list = outputs.split('\n')
-            #             gt= ''
-            #             for out in outputs_list:
-            #                 gt +=  '"' + out.replace('\\', '\\\\') + r'\n' + '"' + '+' + '\n'
-            #             gt = gt[:-2]
-            #             with open(html_path, 'r') as web_f:
-            #                 lines = web_f.read()
-            #                 lines = lines.split("const text =")
-            #                 new_web = lines[0] + 'const text ='  + gt  + lines[1]
-            #         else:
-            #             html_path = "./render_tools/" + "/tikz.html"
-            #             html_path_2 = "./results/demo.html"
-            #             outputs = outputs.translate(translation_table)
-            #             outputs_list = outputs.split('\n')
-            #             gt= ''
-            #             for out in outputs_list:
-            #                 if out:
-            #                     if '\\begin{tikzpicture}' not in out and '\\end{tikzpicture}' not in out:
-            #                         while out[-1] == ' ':
-            #                             out = out[:-1]
-            #                             if out is None:
-            #                                 break
-            #                         if out:
-            #                             if out[-1] != ';':
-            #                                 gt += out[:-1] + ';\n'
-            #                             else:
-            #                                 gt += out + '\n'
-            #                     else:
-            #                         gt += out + '\n'
-            #             with open(html_path, 'r') as web_f:
-            #                 lines = web_f.read()
-            #                 lines = lines.split("const text =")
-            #                 new_web = lines[0] + gt + lines[1]
-            #         with open(html_path_2, 'w') as web_f_new:
-            #             web_f_new.write(new_web)
     def dynamic_preprocess(self, image, min_num=1, max_num=6, image_size=1024, use_thumbnail=True):
@@ -807,13 +806,13 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
         if render:
             print('==============rendering===============')
             outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
             if outputs.endswith(stop_str):
                 outputs = outputs[:-len(stop_str)]
             outputs = outputs.strip()
-            html_path = "./render_tools/" + "content-mmd-to-html.html"
             html_path_2 = save_render_file
             right_num = outputs.count('\\right')
             left_num = outputs.count('\left')
@@ -831,10 +830,9 @@ class GOTQwenForCausalLM(Qwen2ForCausalLM):
             gt = gt[:-2]
-            with smart_open(html_path, 'r') as web_f:
-                lines = web_f.read()
-                lines = lines.split("const text =")
-                new_web = lines[0] + 'const text ='  + gt  + lines[1]
             with smart_open(html_path_2, 'w') as web_f_new:
                 web_f_new.write(new_web)

         setattr(torch.nn.Linear, "reset_parameters", lambda self: None)
         setattr(torch.nn.LayerNorm, "reset_parameters", lambda self: None)
+    def chat(self, tokenizer, image_file, ocr_type, ocr_box='', ocr_color='', render=False, save_render_file=None):
         self.disable_torch_init()
                 )
+            if render:
+                print('==============rendering===============')
+                from .render_tools import svg_to_html, content_mmd_to_html, tik_html, translation_table
+                outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
+                if outputs.endswith(stop_str):
+                    outputs = outputs[:-len(stop_str)]
+                outputs = outputs.strip()
+                if '**kern' in outputs:
+                    import verovio
+                    from cairosvg import svg2png
+                    import cv2
+                    import numpy as np
+                    tk = verovio.toolkit()
+                    tk.loadData(outputs)
+                    tk.setOptions({"pageWidth": 2100, "footer": 'none',
+                'barLineWidth': 0.5, 'beamMaxSlope': 15,
+                'staffLineWidth': 0.2, 'spacingStaff': 6})
+                    tk.getPageCount()
+                    svg = tk.renderToSVG()
+                    svg = svg.replace("overflow=\"inherit\"", "overflow=\"visible\"")
+                    svg_to_html(svg, save_render_file)
+                if ocr_type == 'format' and '**kern' not in outputs:
+                    if  '\\begin{tikzpicture}' not in outputs:
+                        html_path_2 = save_render_file
+                        right_num = outputs.count('\\right')
+                        left_num = outputs.count('\left')
+                        if right_num != left_num:
+                            outputs = outputs.replace('\left(', '(').replace('\\right)', ')').replace('\left[', '[').replace('\\right]', ']').replace('\left{', '{').replace('\\right}', '}').replace('\left|', '|').replace('\\right|', '|').replace('\left.', '.').replace('\\right.', '.')
+                        outputs = outputs.replace('"', '``').replace('$', '')
+                        outputs_list = outputs.split('\n')
+                        gt= ''
+                        for out in outputs_list:
+                            gt +=  '"' + out.replace('\\', '\\\\') + r'\n' + '"' + '+' + '\n'
+                        gt = gt[:-2]
+                        lines = content_mmd_to_html
+                        lines = lines.split("const text =")
+                        new_web = lines[0] + 'const text ='  + gt  + lines[1]
+                    else:
+                        html_path_2 = save_render_file
+                        outputs = outputs.translate(translation_table)
+                        outputs_list = outputs.split('\n')
+                        gt= ''
+                        for out in outputs_list:
+                            if out:
+                                if '\\begin{tikzpicture}' not in out and '\\end{tikzpicture}' not in out:
+                                    while out[-1] == ' ':
+                                        out = out[:-1]
+                                        if out is None:
+                                            break
+                                    if out:
+                                        if out[-1] != ';':
+                                            gt += out[:-1] + ';\n'
+                                        else:
+                                            gt += out + '\n'
+                                else:
+                                    gt += out + '\n'
+                        lines = tik_html
+                        lines = lines.split("const text =")
+                        new_web = lines[0] + gt + lines[1]
+                    with smart_open(html_path_2, 'w') as web_f_new:
+                        web_f_new.write(new_web)
     def dynamic_preprocess(self, image, min_num=1, max_num=6, image_size=1024, use_thumbnail=True):
         if render:
             print('==============rendering===============')
+            from .render_tools import content_mmd_to_html
             outputs = tokenizer.decode(output_ids[0, input_ids.shape[1]:]).strip()
             if outputs.endswith(stop_str):
                 outputs = outputs[:-len(stop_str)]
             outputs = outputs.strip()
             html_path_2 = save_render_file
             right_num = outputs.count('\\right')
             left_num = outputs.count('\left')
             gt = gt[:-2]
+            lines = content_mmd_to_html
+            lines = lines.split("const text =")
+            new_web = lines[0] + 'const text ='  + gt  + lines[1]
             with smart_open(html_path_2, 'w') as web_f_new:
                 web_f_new.write(new_web)