Upload folder using huggingface_hub
Browse files- .gitattributes +2 -34
- .gitkeep +0 -0
- README.md +32 -0
- added_tokens.json +1021 -0
- chat_template.jinja +22 -0
- config.json +75 -0
- configuration_paddleocr_vl.py +191 -0
- generation_config.json +6 -0
- image_processing.py +569 -0
- inference.yml +2 -0
- model.safetensors +3 -0
- modeling_paddleocr_vl.py +0 -0
- preprocessor_config.json +33 -0
- processing_paddleocr_vl.py +293 -0
- processor_config.json +6 -0
- special_tokens_map.json +58 -0
- tokenizer.json +3 -0
- tokenizer.model +3 -0
- tokenizer_config.json +0 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -1,35 +1,3 @@ | |
| 1 | 
            -
            *.7z filter=lfs diff=lfs merge=lfs -text
         | 
| 2 | 
            -
            *.arrow filter=lfs diff=lfs merge=lfs -text
         | 
| 3 | 
            -
            *.bin filter=lfs diff=lfs merge=lfs -text
         | 
| 4 | 
            -
            *.bz2 filter=lfs diff=lfs merge=lfs -text
         | 
| 5 | 
            -
            *.ckpt filter=lfs diff=lfs merge=lfs -text
         | 
| 6 | 
            -
            *.ftz filter=lfs diff=lfs merge=lfs -text
         | 
| 7 | 
            -
            *.gz filter=lfs diff=lfs merge=lfs -text
         | 
| 8 | 
            -
            *.h5 filter=lfs diff=lfs merge=lfs -text
         | 
| 9 | 
            -
            *.joblib filter=lfs diff=lfs merge=lfs -text
         | 
| 10 | 
            -
            *.lfs.* filter=lfs diff=lfs merge=lfs -text
         | 
| 11 | 
            -
            *.mlmodel filter=lfs diff=lfs merge=lfs -text
         | 
| 12 | 
            -
            *.model filter=lfs diff=lfs merge=lfs -text
         | 
| 13 | 
            -
            *.msgpack filter=lfs diff=lfs merge=lfs -text
         | 
| 14 | 
            -
            *.npy filter=lfs diff=lfs merge=lfs -text
         | 
| 15 | 
            -
            *.npz filter=lfs diff=lfs merge=lfs -text
         | 
| 16 | 
            -
            *.onnx filter=lfs diff=lfs merge=lfs -text
         | 
| 17 | 
            -
            *.ot filter=lfs diff=lfs merge=lfs -text
         | 
| 18 | 
            -
            *.parquet filter=lfs diff=lfs merge=lfs -text
         | 
| 19 | 
            -
            *.pb filter=lfs diff=lfs merge=lfs -text
         | 
| 20 | 
            -
            *.pickle filter=lfs diff=lfs merge=lfs -text
         | 
| 21 | 
            -
            *.pkl filter=lfs diff=lfs merge=lfs -text
         | 
| 22 | 
            -
            *.pt filter=lfs diff=lfs merge=lfs -text
         | 
| 23 | 
            -
            *.pth filter=lfs diff=lfs merge=lfs -text
         | 
| 24 | 
            -
            *.rar filter=lfs diff=lfs merge=lfs -text
         | 
| 25 | 
             
            *.safetensors filter=lfs diff=lfs merge=lfs -text
         | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
            *.tar filter=lfs diff=lfs merge=lfs -text
         | 
| 29 | 
            -
            *.tflite filter=lfs diff=lfs merge=lfs -text
         | 
| 30 | 
            -
            *.tgz filter=lfs diff=lfs merge=lfs -text
         | 
| 31 | 
            -
            *.wasm filter=lfs diff=lfs merge=lfs -text
         | 
| 32 | 
            -
            *.xz filter=lfs diff=lfs merge=lfs -text
         | 
| 33 | 
            -
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
            -
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
            -
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            *.safetensors filter=lfs diff=lfs merge=lfs -text
         | 
| 2 | 
            +
            tokenizer.json filter=lfs diff=lfs merge=lfs -text
         | 
| 3 | 
            +
            tokenizer.model filter=lfs diff=lfs merge=lfs -text
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
    	
        .gitkeep
    ADDED
    
    | 
            File without changes
         | 
    	
        README.md
    ADDED
    
    | @@ -0,0 +1,32 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # PaddleOCR-VL-0.9B
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Duplicated from https://huggingface.co/PaddlePaddle/PaddleOCR-VL/tree/main/imgs.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            Example use with transformers:
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            ```py
         | 
| 8 | 
            +
            from transformers import AutoModelForCausalLM, AutoProcessor
         | 
| 9 | 
            +
            import torch
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            DEVICE="cuda" if torch.cuda.is_available() else "mps" if torch.mps.is_available() else "cpu"
         | 
| 12 | 
            +
            model_id = "./PaddleOCR-VL-0.9B"
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            model = AutoModelForCausalLM.from_pretrained(
         | 
| 15 | 
            +
                model_id, trust_remote_code=True, dtype=torch.bfloat16
         | 
| 16 | 
            +
            ).to(DEVICE).eval()
         | 
| 17 | 
            +
            processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            from transformers.image_utils import load_image
         | 
| 20 | 
            +
            image_url = "https://fiverr-res.cloudinary.com/images/t_main1,q_auto,f_auto,q_auto,f_auto/gigs/154456946/original/41556aac80fc43dcb29ce656d786c0a6f9b4073f/do-handwritten-text-image-or-pdf-to-word-means-typing-form.jpg"
         | 
| 21 | 
            +
            image = load_image(image_url)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            messages = [{"role": "user", "content": "OCR"}]
         | 
| 24 | 
            +
            text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         | 
| 25 | 
            +
            inputs = processor(text=[text], images=[image], return_tensors="pt").to(DEVICE)
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            generated = model.generate(**inputs, max_new_tokens=200, do_sample=False)
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            resp = processor.batch_decode(generated, skip_special_tokens=True)[0]
         | 
| 30 | 
            +
            answer = resp.split(text)[-1].strip()
         | 
| 31 | 
            +
            print(answer)
         | 
| 32 | 
            +
            ```
         | 
    	
        added_tokens.json
    ADDED
    
    | @@ -0,0 +1,1021 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "<ecel>": 101308,
         | 
| 3 | 
            +
              "<fcel>": 101309,
         | 
| 4 | 
            +
              "<lcel>": 101311,
         | 
| 5 | 
            +
              "<nl>": 101313,
         | 
| 6 | 
            +
              "<ucel>": 101312,
         | 
| 7 | 
            +
              "<xcel>": 101310,
         | 
| 8 | 
            +
              "<|AUDIO_PLACEHOLDER|>": 100296,
         | 
| 9 | 
            +
              "<|CROP_COL_SEP|>": 101301,
         | 
| 10 | 
            +
              "<|CROP_ROW_SEP|>": 101302,
         | 
| 11 | 
            +
              "<|IMAGE_END|>": 101306,
         | 
| 12 | 
            +
              "<|IMAGE_PLACEHOLDER|>": 100295,
         | 
| 13 | 
            +
              "<|IMAGE_SEP|>": 101303,
         | 
| 14 | 
            +
              "<|IMAGE_START|>": 101305,
         | 
| 15 | 
            +
              "<|LOC_0|>": 100297,
         | 
| 16 | 
            +
              "<|LOC_1000|>": 101297,
         | 
| 17 | 
            +
              "<|LOC_100|>": 100397,
         | 
| 18 | 
            +
              "<|LOC_101|>": 100398,
         | 
| 19 | 
            +
              "<|LOC_102|>": 100399,
         | 
| 20 | 
            +
              "<|LOC_103|>": 100400,
         | 
| 21 | 
            +
              "<|LOC_104|>": 100401,
         | 
| 22 | 
            +
              "<|LOC_105|>": 100402,
         | 
| 23 | 
            +
              "<|LOC_106|>": 100403,
         | 
| 24 | 
            +
              "<|LOC_107|>": 100404,
         | 
| 25 | 
            +
              "<|LOC_108|>": 100405,
         | 
| 26 | 
            +
              "<|LOC_109|>": 100406,
         | 
| 27 | 
            +
              "<|LOC_10|>": 100307,
         | 
| 28 | 
            +
              "<|LOC_110|>": 100407,
         | 
| 29 | 
            +
              "<|LOC_111|>": 100408,
         | 
| 30 | 
            +
              "<|LOC_112|>": 100409,
         | 
| 31 | 
            +
              "<|LOC_113|>": 100410,
         | 
| 32 | 
            +
              "<|LOC_114|>": 100411,
         | 
| 33 | 
            +
              "<|LOC_115|>": 100412,
         | 
| 34 | 
            +
              "<|LOC_116|>": 100413,
         | 
| 35 | 
            +
              "<|LOC_117|>": 100414,
         | 
| 36 | 
            +
              "<|LOC_118|>": 100415,
         | 
| 37 | 
            +
              "<|LOC_119|>": 100416,
         | 
| 38 | 
            +
              "<|LOC_11|>": 100308,
         | 
| 39 | 
            +
              "<|LOC_120|>": 100417,
         | 
| 40 | 
            +
              "<|LOC_121|>": 100418,
         | 
| 41 | 
            +
              "<|LOC_122|>": 100419,
         | 
| 42 | 
            +
              "<|LOC_123|>": 100420,
         | 
| 43 | 
            +
              "<|LOC_124|>": 100421,
         | 
| 44 | 
            +
              "<|LOC_125|>": 100422,
         | 
| 45 | 
            +
              "<|LOC_126|>": 100423,
         | 
| 46 | 
            +
              "<|LOC_127|>": 100424,
         | 
| 47 | 
            +
              "<|LOC_128|>": 100425,
         | 
| 48 | 
            +
              "<|LOC_129|>": 100426,
         | 
| 49 | 
            +
              "<|LOC_12|>": 100309,
         | 
| 50 | 
            +
              "<|LOC_130|>": 100427,
         | 
| 51 | 
            +
              "<|LOC_131|>": 100428,
         | 
| 52 | 
            +
              "<|LOC_132|>": 100429,
         | 
| 53 | 
            +
              "<|LOC_133|>": 100430,
         | 
| 54 | 
            +
              "<|LOC_134|>": 100431,
         | 
| 55 | 
            +
              "<|LOC_135|>": 100432,
         | 
| 56 | 
            +
              "<|LOC_136|>": 100433,
         | 
| 57 | 
            +
              "<|LOC_137|>": 100434,
         | 
| 58 | 
            +
              "<|LOC_138|>": 100435,
         | 
| 59 | 
            +
              "<|LOC_139|>": 100436,
         | 
| 60 | 
            +
              "<|LOC_13|>": 100310,
         | 
| 61 | 
            +
              "<|LOC_140|>": 100437,
         | 
| 62 | 
            +
              "<|LOC_141|>": 100438,
         | 
| 63 | 
            +
              "<|LOC_142|>": 100439,
         | 
| 64 | 
            +
              "<|LOC_143|>": 100440,
         | 
| 65 | 
            +
              "<|LOC_144|>": 100441,
         | 
| 66 | 
            +
              "<|LOC_145|>": 100442,
         | 
| 67 | 
            +
              "<|LOC_146|>": 100443,
         | 
| 68 | 
            +
              "<|LOC_147|>": 100444,
         | 
| 69 | 
            +
              "<|LOC_148|>": 100445,
         | 
| 70 | 
            +
              "<|LOC_149|>": 100446,
         | 
| 71 | 
            +
              "<|LOC_14|>": 100311,
         | 
| 72 | 
            +
              "<|LOC_150|>": 100447,
         | 
| 73 | 
            +
              "<|LOC_151|>": 100448,
         | 
| 74 | 
            +
              "<|LOC_152|>": 100449,
         | 
| 75 | 
            +
              "<|LOC_153|>": 100450,
         | 
| 76 | 
            +
              "<|LOC_154|>": 100451,
         | 
| 77 | 
            +
              "<|LOC_155|>": 100452,
         | 
| 78 | 
            +
              "<|LOC_156|>": 100453,
         | 
| 79 | 
            +
              "<|LOC_157|>": 100454,
         | 
| 80 | 
            +
              "<|LOC_158|>": 100455,
         | 
| 81 | 
            +
              "<|LOC_159|>": 100456,
         | 
| 82 | 
            +
              "<|LOC_15|>": 100312,
         | 
| 83 | 
            +
              "<|LOC_160|>": 100457,
         | 
| 84 | 
            +
              "<|LOC_161|>": 100458,
         | 
| 85 | 
            +
              "<|LOC_162|>": 100459,
         | 
| 86 | 
            +
              "<|LOC_163|>": 100460,
         | 
| 87 | 
            +
              "<|LOC_164|>": 100461,
         | 
| 88 | 
            +
              "<|LOC_165|>": 100462,
         | 
| 89 | 
            +
              "<|LOC_166|>": 100463,
         | 
| 90 | 
            +
              "<|LOC_167|>": 100464,
         | 
| 91 | 
            +
              "<|LOC_168|>": 100465,
         | 
| 92 | 
            +
              "<|LOC_169|>": 100466,
         | 
| 93 | 
            +
              "<|LOC_16|>": 100313,
         | 
| 94 | 
            +
              "<|LOC_170|>": 100467,
         | 
| 95 | 
            +
              "<|LOC_171|>": 100468,
         | 
| 96 | 
            +
              "<|LOC_172|>": 100469,
         | 
| 97 | 
            +
              "<|LOC_173|>": 100470,
         | 
| 98 | 
            +
              "<|LOC_174|>": 100471,
         | 
| 99 | 
            +
              "<|LOC_175|>": 100472,
         | 
| 100 | 
            +
              "<|LOC_176|>": 100473,
         | 
| 101 | 
            +
              "<|LOC_177|>": 100474,
         | 
| 102 | 
            +
              "<|LOC_178|>": 100475,
         | 
| 103 | 
            +
              "<|LOC_179|>": 100476,
         | 
| 104 | 
            +
              "<|LOC_17|>": 100314,
         | 
| 105 | 
            +
              "<|LOC_180|>": 100477,
         | 
| 106 | 
            +
              "<|LOC_181|>": 100478,
         | 
| 107 | 
            +
              "<|LOC_182|>": 100479,
         | 
| 108 | 
            +
              "<|LOC_183|>": 100480,
         | 
| 109 | 
            +
              "<|LOC_184|>": 100481,
         | 
| 110 | 
            +
              "<|LOC_185|>": 100482,
         | 
| 111 | 
            +
              "<|LOC_186|>": 100483,
         | 
| 112 | 
            +
              "<|LOC_187|>": 100484,
         | 
| 113 | 
            +
              "<|LOC_188|>": 100485,
         | 
| 114 | 
            +
              "<|LOC_189|>": 100486,
         | 
| 115 | 
            +
              "<|LOC_18|>": 100315,
         | 
| 116 | 
            +
              "<|LOC_190|>": 100487,
         | 
| 117 | 
            +
              "<|LOC_191|>": 100488,
         | 
| 118 | 
            +
              "<|LOC_192|>": 100489,
         | 
| 119 | 
            +
              "<|LOC_193|>": 100490,
         | 
| 120 | 
            +
              "<|LOC_194|>": 100491,
         | 
| 121 | 
            +
              "<|LOC_195|>": 100492,
         | 
| 122 | 
            +
              "<|LOC_196|>": 100493,
         | 
| 123 | 
            +
              "<|LOC_197|>": 100494,
         | 
| 124 | 
            +
              "<|LOC_198|>": 100495,
         | 
| 125 | 
            +
              "<|LOC_199|>": 100496,
         | 
| 126 | 
            +
              "<|LOC_19|>": 100316,
         | 
| 127 | 
            +
              "<|LOC_1|>": 100298,
         | 
| 128 | 
            +
              "<|LOC_200|>": 100497,
         | 
| 129 | 
            +
              "<|LOC_201|>": 100498,
         | 
| 130 | 
            +
              "<|LOC_202|>": 100499,
         | 
| 131 | 
            +
              "<|LOC_203|>": 100500,
         | 
| 132 | 
            +
              "<|LOC_204|>": 100501,
         | 
| 133 | 
            +
              "<|LOC_205|>": 100502,
         | 
| 134 | 
            +
              "<|LOC_206|>": 100503,
         | 
| 135 | 
            +
              "<|LOC_207|>": 100504,
         | 
| 136 | 
            +
              "<|LOC_208|>": 100505,
         | 
| 137 | 
            +
              "<|LOC_209|>": 100506,
         | 
| 138 | 
            +
              "<|LOC_20|>": 100317,
         | 
| 139 | 
            +
              "<|LOC_210|>": 100507,
         | 
| 140 | 
            +
              "<|LOC_211|>": 100508,
         | 
| 141 | 
            +
              "<|LOC_212|>": 100509,
         | 
| 142 | 
            +
              "<|LOC_213|>": 100510,
         | 
| 143 | 
            +
              "<|LOC_214|>": 100511,
         | 
| 144 | 
            +
              "<|LOC_215|>": 100512,
         | 
| 145 | 
            +
              "<|LOC_216|>": 100513,
         | 
| 146 | 
            +
              "<|LOC_217|>": 100514,
         | 
| 147 | 
            +
              "<|LOC_218|>": 100515,
         | 
| 148 | 
            +
              "<|LOC_219|>": 100516,
         | 
| 149 | 
            +
              "<|LOC_21|>": 100318,
         | 
| 150 | 
            +
              "<|LOC_220|>": 100517,
         | 
| 151 | 
            +
              "<|LOC_221|>": 100518,
         | 
| 152 | 
            +
              "<|LOC_222|>": 100519,
         | 
| 153 | 
            +
              "<|LOC_223|>": 100520,
         | 
| 154 | 
            +
              "<|LOC_224|>": 100521,
         | 
| 155 | 
            +
              "<|LOC_225|>": 100522,
         | 
| 156 | 
            +
              "<|LOC_226|>": 100523,
         | 
| 157 | 
            +
              "<|LOC_227|>": 100524,
         | 
| 158 | 
            +
              "<|LOC_228|>": 100525,
         | 
| 159 | 
            +
              "<|LOC_229|>": 100526,
         | 
| 160 | 
            +
              "<|LOC_22|>": 100319,
         | 
| 161 | 
            +
              "<|LOC_230|>": 100527,
         | 
| 162 | 
            +
              "<|LOC_231|>": 100528,
         | 
| 163 | 
            +
              "<|LOC_232|>": 100529,
         | 
| 164 | 
            +
              "<|LOC_233|>": 100530,
         | 
| 165 | 
            +
              "<|LOC_234|>": 100531,
         | 
| 166 | 
            +
              "<|LOC_235|>": 100532,
         | 
| 167 | 
            +
              "<|LOC_236|>": 100533,
         | 
| 168 | 
            +
              "<|LOC_237|>": 100534,
         | 
| 169 | 
            +
              "<|LOC_238|>": 100535,
         | 
| 170 | 
            +
              "<|LOC_239|>": 100536,
         | 
| 171 | 
            +
              "<|LOC_23|>": 100320,
         | 
| 172 | 
            +
              "<|LOC_240|>": 100537,
         | 
| 173 | 
            +
              "<|LOC_241|>": 100538,
         | 
| 174 | 
            +
              "<|LOC_242|>": 100539,
         | 
| 175 | 
            +
              "<|LOC_243|>": 100540,
         | 
| 176 | 
            +
              "<|LOC_244|>": 100541,
         | 
| 177 | 
            +
              "<|LOC_245|>": 100542,
         | 
| 178 | 
            +
              "<|LOC_246|>": 100543,
         | 
| 179 | 
            +
              "<|LOC_247|>": 100544,
         | 
| 180 | 
            +
              "<|LOC_248|>": 100545,
         | 
| 181 | 
            +
              "<|LOC_249|>": 100546,
         | 
| 182 | 
            +
              "<|LOC_24|>": 100321,
         | 
| 183 | 
            +
              "<|LOC_250|>": 100547,
         | 
| 184 | 
            +
              "<|LOC_251|>": 100548,
         | 
| 185 | 
            +
              "<|LOC_252|>": 100549,
         | 
| 186 | 
            +
              "<|LOC_253|>": 100550,
         | 
| 187 | 
            +
              "<|LOC_254|>": 100551,
         | 
| 188 | 
            +
              "<|LOC_255|>": 100552,
         | 
| 189 | 
            +
              "<|LOC_256|>": 100553,
         | 
| 190 | 
            +
              "<|LOC_257|>": 100554,
         | 
| 191 | 
            +
              "<|LOC_258|>": 100555,
         | 
| 192 | 
            +
              "<|LOC_259|>": 100556,
         | 
| 193 | 
            +
              "<|LOC_25|>": 100322,
         | 
| 194 | 
            +
              "<|LOC_260|>": 100557,
         | 
| 195 | 
            +
              "<|LOC_261|>": 100558,
         | 
| 196 | 
            +
              "<|LOC_262|>": 100559,
         | 
| 197 | 
            +
              "<|LOC_263|>": 100560,
         | 
| 198 | 
            +
              "<|LOC_264|>": 100561,
         | 
| 199 | 
            +
              "<|LOC_265|>": 100562,
         | 
| 200 | 
            +
              "<|LOC_266|>": 100563,
         | 
| 201 | 
            +
              "<|LOC_267|>": 100564,
         | 
| 202 | 
            +
              "<|LOC_268|>": 100565,
         | 
| 203 | 
            +
              "<|LOC_269|>": 100566,
         | 
| 204 | 
            +
              "<|LOC_26|>": 100323,
         | 
| 205 | 
            +
              "<|LOC_270|>": 100567,
         | 
| 206 | 
            +
              "<|LOC_271|>": 100568,
         | 
| 207 | 
            +
              "<|LOC_272|>": 100569,
         | 
| 208 | 
            +
              "<|LOC_273|>": 100570,
         | 
| 209 | 
            +
              "<|LOC_274|>": 100571,
         | 
| 210 | 
            +
              "<|LOC_275|>": 100572,
         | 
| 211 | 
            +
              "<|LOC_276|>": 100573,
         | 
| 212 | 
            +
              "<|LOC_277|>": 100574,
         | 
| 213 | 
            +
              "<|LOC_278|>": 100575,
         | 
| 214 | 
            +
              "<|LOC_279|>": 100576,
         | 
| 215 | 
            +
              "<|LOC_27|>": 100324,
         | 
| 216 | 
            +
              "<|LOC_280|>": 100577,
         | 
| 217 | 
            +
              "<|LOC_281|>": 100578,
         | 
| 218 | 
            +
              "<|LOC_282|>": 100579,
         | 
| 219 | 
            +
              "<|LOC_283|>": 100580,
         | 
| 220 | 
            +
              "<|LOC_284|>": 100581,
         | 
| 221 | 
            +
              "<|LOC_285|>": 100582,
         | 
| 222 | 
            +
              "<|LOC_286|>": 100583,
         | 
| 223 | 
            +
              "<|LOC_287|>": 100584,
         | 
| 224 | 
            +
              "<|LOC_288|>": 100585,
         | 
| 225 | 
            +
              "<|LOC_289|>": 100586,
         | 
| 226 | 
            +
              "<|LOC_28|>": 100325,
         | 
| 227 | 
            +
              "<|LOC_290|>": 100587,
         | 
| 228 | 
            +
              "<|LOC_291|>": 100588,
         | 
| 229 | 
            +
              "<|LOC_292|>": 100589,
         | 
| 230 | 
            +
              "<|LOC_293|>": 100590,
         | 
| 231 | 
            +
              "<|LOC_294|>": 100591,
         | 
| 232 | 
            +
              "<|LOC_295|>": 100592,
         | 
| 233 | 
            +
              "<|LOC_296|>": 100593,
         | 
| 234 | 
            +
              "<|LOC_297|>": 100594,
         | 
| 235 | 
            +
              "<|LOC_298|>": 100595,
         | 
| 236 | 
            +
              "<|LOC_299|>": 100596,
         | 
| 237 | 
            +
              "<|LOC_29|>": 100326,
         | 
| 238 | 
            +
              "<|LOC_2|>": 100299,
         | 
| 239 | 
            +
              "<|LOC_300|>": 100597,
         | 
| 240 | 
            +
              "<|LOC_301|>": 100598,
         | 
| 241 | 
            +
              "<|LOC_302|>": 100599,
         | 
| 242 | 
            +
              "<|LOC_303|>": 100600,
         | 
| 243 | 
            +
              "<|LOC_304|>": 100601,
         | 
| 244 | 
            +
              "<|LOC_305|>": 100602,
         | 
| 245 | 
            +
              "<|LOC_306|>": 100603,
         | 
| 246 | 
            +
              "<|LOC_307|>": 100604,
         | 
| 247 | 
            +
              "<|LOC_308|>": 100605,
         | 
| 248 | 
            +
              "<|LOC_309|>": 100606,
         | 
| 249 | 
            +
              "<|LOC_30|>": 100327,
         | 
| 250 | 
            +
              "<|LOC_310|>": 100607,
         | 
| 251 | 
            +
              "<|LOC_311|>": 100608,
         | 
| 252 | 
            +
              "<|LOC_312|>": 100609,
         | 
| 253 | 
            +
              "<|LOC_313|>": 100610,
         | 
| 254 | 
            +
              "<|LOC_314|>": 100611,
         | 
| 255 | 
            +
              "<|LOC_315|>": 100612,
         | 
| 256 | 
            +
              "<|LOC_316|>": 100613,
         | 
| 257 | 
            +
              "<|LOC_317|>": 100614,
         | 
| 258 | 
            +
              "<|LOC_318|>": 100615,
         | 
| 259 | 
            +
              "<|LOC_319|>": 100616,
         | 
| 260 | 
            +
              "<|LOC_31|>": 100328,
         | 
| 261 | 
            +
              "<|LOC_320|>": 100617,
         | 
| 262 | 
            +
              "<|LOC_321|>": 100618,
         | 
| 263 | 
            +
              "<|LOC_322|>": 100619,
         | 
| 264 | 
            +
              "<|LOC_323|>": 100620,
         | 
| 265 | 
            +
              "<|LOC_324|>": 100621,
         | 
| 266 | 
            +
              "<|LOC_325|>": 100622,
         | 
| 267 | 
            +
              "<|LOC_326|>": 100623,
         | 
| 268 | 
            +
              "<|LOC_327|>": 100624,
         | 
| 269 | 
            +
              "<|LOC_328|>": 100625,
         | 
| 270 | 
            +
              "<|LOC_329|>": 100626,
         | 
| 271 | 
            +
              "<|LOC_32|>": 100329,
         | 
| 272 | 
            +
              "<|LOC_330|>": 100627,
         | 
| 273 | 
            +
              "<|LOC_331|>": 100628,
         | 
| 274 | 
            +
              "<|LOC_332|>": 100629,
         | 
| 275 | 
            +
              "<|LOC_333|>": 100630,
         | 
| 276 | 
            +
              "<|LOC_334|>": 100631,
         | 
| 277 | 
            +
              "<|LOC_335|>": 100632,
         | 
| 278 | 
            +
              "<|LOC_336|>": 100633,
         | 
| 279 | 
            +
              "<|LOC_337|>": 100634,
         | 
| 280 | 
            +
              "<|LOC_338|>": 100635,
         | 
| 281 | 
            +
              "<|LOC_339|>": 100636,
         | 
| 282 | 
            +
              "<|LOC_33|>": 100330,
         | 
| 283 | 
            +
              "<|LOC_340|>": 100637,
         | 
| 284 | 
            +
              "<|LOC_341|>": 100638,
         | 
| 285 | 
            +
              "<|LOC_342|>": 100639,
         | 
| 286 | 
            +
              "<|LOC_343|>": 100640,
         | 
| 287 | 
            +
              "<|LOC_344|>": 100641,
         | 
| 288 | 
            +
              "<|LOC_345|>": 100642,
         | 
| 289 | 
            +
              "<|LOC_346|>": 100643,
         | 
| 290 | 
            +
              "<|LOC_347|>": 100644,
         | 
| 291 | 
            +
              "<|LOC_348|>": 100645,
         | 
| 292 | 
            +
              "<|LOC_349|>": 100646,
         | 
| 293 | 
            +
              "<|LOC_34|>": 100331,
         | 
| 294 | 
            +
              "<|LOC_350|>": 100647,
         | 
| 295 | 
            +
              "<|LOC_351|>": 100648,
         | 
| 296 | 
            +
              "<|LOC_352|>": 100649,
         | 
| 297 | 
            +
              "<|LOC_353|>": 100650,
         | 
| 298 | 
            +
              "<|LOC_354|>": 100651,
         | 
| 299 | 
            +
              "<|LOC_355|>": 100652,
         | 
| 300 | 
            +
              "<|LOC_356|>": 100653,
         | 
| 301 | 
            +
              "<|LOC_357|>": 100654,
         | 
| 302 | 
            +
              "<|LOC_358|>": 100655,
         | 
| 303 | 
            +
              "<|LOC_359|>": 100656,
         | 
| 304 | 
            +
              "<|LOC_35|>": 100332,
         | 
| 305 | 
            +
              "<|LOC_360|>": 100657,
         | 
| 306 | 
            +
              "<|LOC_361|>": 100658,
         | 
| 307 | 
            +
              "<|LOC_362|>": 100659,
         | 
| 308 | 
            +
              "<|LOC_363|>": 100660,
         | 
| 309 | 
            +
              "<|LOC_364|>": 100661,
         | 
| 310 | 
            +
              "<|LOC_365|>": 100662,
         | 
| 311 | 
            +
              "<|LOC_366|>": 100663,
         | 
| 312 | 
            +
              "<|LOC_367|>": 100664,
         | 
| 313 | 
            +
              "<|LOC_368|>": 100665,
         | 
| 314 | 
            +
              "<|LOC_369|>": 100666,
         | 
| 315 | 
            +
              "<|LOC_36|>": 100333,
         | 
| 316 | 
            +
              "<|LOC_370|>": 100667,
         | 
| 317 | 
            +
              "<|LOC_371|>": 100668,
         | 
| 318 | 
            +
              "<|LOC_372|>": 100669,
         | 
| 319 | 
            +
              "<|LOC_373|>": 100670,
         | 
| 320 | 
            +
              "<|LOC_374|>": 100671,
         | 
| 321 | 
            +
              "<|LOC_375|>": 100672,
         | 
| 322 | 
            +
              "<|LOC_376|>": 100673,
         | 
| 323 | 
            +
              "<|LOC_377|>": 100674,
         | 
| 324 | 
            +
              "<|LOC_378|>": 100675,
         | 
| 325 | 
            +
              "<|LOC_379|>": 100676,
         | 
| 326 | 
            +
              "<|LOC_37|>": 100334,
         | 
| 327 | 
            +
              "<|LOC_380|>": 100677,
         | 
| 328 | 
            +
              "<|LOC_381|>": 100678,
         | 
| 329 | 
            +
              "<|LOC_382|>": 100679,
         | 
| 330 | 
            +
              "<|LOC_383|>": 100680,
         | 
| 331 | 
            +
              "<|LOC_384|>": 100681,
         | 
| 332 | 
            +
              "<|LOC_385|>": 100682,
         | 
| 333 | 
            +
              "<|LOC_386|>": 100683,
         | 
| 334 | 
            +
              "<|LOC_387|>": 100684,
         | 
| 335 | 
            +
              "<|LOC_388|>": 100685,
         | 
| 336 | 
            +
              "<|LOC_389|>": 100686,
         | 
| 337 | 
            +
              "<|LOC_38|>": 100335,
         | 
| 338 | 
            +
              "<|LOC_390|>": 100687,
         | 
| 339 | 
            +
              "<|LOC_391|>": 100688,
         | 
| 340 | 
            +
              "<|LOC_392|>": 100689,
         | 
| 341 | 
            +
              "<|LOC_393|>": 100690,
         | 
| 342 | 
            +
              "<|LOC_394|>": 100691,
         | 
| 343 | 
            +
              "<|LOC_395|>": 100692,
         | 
| 344 | 
            +
              "<|LOC_396|>": 100693,
         | 
| 345 | 
            +
              "<|LOC_397|>": 100694,
         | 
| 346 | 
            +
              "<|LOC_398|>": 100695,
         | 
| 347 | 
            +
              "<|LOC_399|>": 100696,
         | 
| 348 | 
            +
              "<|LOC_39|>": 100336,
         | 
| 349 | 
            +
              "<|LOC_3|>": 100300,
         | 
| 350 | 
            +
              "<|LOC_400|>": 100697,
         | 
| 351 | 
            +
              "<|LOC_401|>": 100698,
         | 
| 352 | 
            +
              "<|LOC_402|>": 100699,
         | 
| 353 | 
            +
              "<|LOC_403|>": 100700,
         | 
| 354 | 
            +
              "<|LOC_404|>": 100701,
         | 
| 355 | 
            +
              "<|LOC_405|>": 100702,
         | 
| 356 | 
            +
              "<|LOC_406|>": 100703,
         | 
| 357 | 
            +
              "<|LOC_407|>": 100704,
         | 
| 358 | 
            +
              "<|LOC_408|>": 100705,
         | 
| 359 | 
            +
              "<|LOC_409|>": 100706,
         | 
| 360 | 
            +
              "<|LOC_40|>": 100337,
         | 
| 361 | 
            +
              "<|LOC_410|>": 100707,
         | 
| 362 | 
            +
              "<|LOC_411|>": 100708,
         | 
| 363 | 
            +
              "<|LOC_412|>": 100709,
         | 
| 364 | 
            +
              "<|LOC_413|>": 100710,
         | 
| 365 | 
            +
              "<|LOC_414|>": 100711,
         | 
| 366 | 
            +
              "<|LOC_415|>": 100712,
         | 
| 367 | 
            +
              "<|LOC_416|>": 100713,
         | 
| 368 | 
            +
              "<|LOC_417|>": 100714,
         | 
| 369 | 
            +
              "<|LOC_418|>": 100715,
         | 
| 370 | 
            +
              "<|LOC_419|>": 100716,
         | 
| 371 | 
            +
              "<|LOC_41|>": 100338,
         | 
| 372 | 
            +
              "<|LOC_420|>": 100717,
         | 
| 373 | 
            +
              "<|LOC_421|>": 100718,
         | 
| 374 | 
            +
              "<|LOC_422|>": 100719,
         | 
| 375 | 
            +
              "<|LOC_423|>": 100720,
         | 
| 376 | 
            +
              "<|LOC_424|>": 100721,
         | 
| 377 | 
            +
              "<|LOC_425|>": 100722,
         | 
| 378 | 
            +
              "<|LOC_426|>": 100723,
         | 
| 379 | 
            +
              "<|LOC_427|>": 100724,
         | 
| 380 | 
            +
              "<|LOC_428|>": 100725,
         | 
| 381 | 
            +
              "<|LOC_429|>": 100726,
         | 
| 382 | 
            +
              "<|LOC_42|>": 100339,
         | 
| 383 | 
            +
              "<|LOC_430|>": 100727,
         | 
| 384 | 
            +
              "<|LOC_431|>": 100728,
         | 
| 385 | 
            +
              "<|LOC_432|>": 100729,
         | 
| 386 | 
            +
              "<|LOC_433|>": 100730,
         | 
| 387 | 
            +
              "<|LOC_434|>": 100731,
         | 
| 388 | 
            +
              "<|LOC_435|>": 100732,
         | 
| 389 | 
            +
              "<|LOC_436|>": 100733,
         | 
| 390 | 
            +
              "<|LOC_437|>": 100734,
         | 
| 391 | 
            +
              "<|LOC_438|>": 100735,
         | 
| 392 | 
            +
              "<|LOC_439|>": 100736,
         | 
| 393 | 
            +
              "<|LOC_43|>": 100340,
         | 
| 394 | 
            +
              "<|LOC_440|>": 100737,
         | 
| 395 | 
            +
              "<|LOC_441|>": 100738,
         | 
| 396 | 
            +
              "<|LOC_442|>": 100739,
         | 
| 397 | 
            +
              "<|LOC_443|>": 100740,
         | 
| 398 | 
            +
              "<|LOC_444|>": 100741,
         | 
| 399 | 
            +
              "<|LOC_445|>": 100742,
         | 
| 400 | 
            +
              "<|LOC_446|>": 100743,
         | 
| 401 | 
            +
              "<|LOC_447|>": 100744,
         | 
| 402 | 
            +
              "<|LOC_448|>": 100745,
         | 
| 403 | 
            +
              "<|LOC_449|>": 100746,
         | 
| 404 | 
            +
              "<|LOC_44|>": 100341,
         | 
| 405 | 
            +
              "<|LOC_450|>": 100747,
         | 
| 406 | 
            +
              "<|LOC_451|>": 100748,
         | 
| 407 | 
            +
              "<|LOC_452|>": 100749,
         | 
| 408 | 
            +
              "<|LOC_453|>": 100750,
         | 
| 409 | 
            +
              "<|LOC_454|>": 100751,
         | 
| 410 | 
            +
              "<|LOC_455|>": 100752,
         | 
| 411 | 
            +
              "<|LOC_456|>": 100753,
         | 
| 412 | 
            +
              "<|LOC_457|>": 100754,
         | 
| 413 | 
            +
              "<|LOC_458|>": 100755,
         | 
| 414 | 
            +
              "<|LOC_459|>": 100756,
         | 
| 415 | 
            +
              "<|LOC_45|>": 100342,
         | 
| 416 | 
            +
              "<|LOC_460|>": 100757,
         | 
| 417 | 
            +
              "<|LOC_461|>": 100758,
         | 
| 418 | 
            +
              "<|LOC_462|>": 100759,
         | 
| 419 | 
            +
              "<|LOC_463|>": 100760,
         | 
| 420 | 
            +
              "<|LOC_464|>": 100761,
         | 
| 421 | 
            +
              "<|LOC_465|>": 100762,
         | 
| 422 | 
            +
              "<|LOC_466|>": 100763,
         | 
| 423 | 
            +
              "<|LOC_467|>": 100764,
         | 
| 424 | 
            +
              "<|LOC_468|>": 100765,
         | 
| 425 | 
            +
              "<|LOC_469|>": 100766,
         | 
| 426 | 
            +
              "<|LOC_46|>": 100343,
         | 
| 427 | 
            +
              "<|LOC_470|>": 100767,
         | 
| 428 | 
            +
              "<|LOC_471|>": 100768,
         | 
| 429 | 
            +
              "<|LOC_472|>": 100769,
         | 
| 430 | 
            +
              "<|LOC_473|>": 100770,
         | 
| 431 | 
            +
              "<|LOC_474|>": 100771,
         | 
| 432 | 
            +
              "<|LOC_475|>": 100772,
         | 
| 433 | 
            +
              "<|LOC_476|>": 100773,
         | 
| 434 | 
            +
              "<|LOC_477|>": 100774,
         | 
| 435 | 
            +
              "<|LOC_478|>": 100775,
         | 
| 436 | 
            +
              "<|LOC_479|>": 100776,
         | 
| 437 | 
            +
              "<|LOC_47|>": 100344,
         | 
| 438 | 
            +
              "<|LOC_480|>": 100777,
         | 
| 439 | 
            +
              "<|LOC_481|>": 100778,
         | 
| 440 | 
            +
              "<|LOC_482|>": 100779,
         | 
| 441 | 
            +
              "<|LOC_483|>": 100780,
         | 
| 442 | 
            +
              "<|LOC_484|>": 100781,
         | 
| 443 | 
            +
              "<|LOC_485|>": 100782,
         | 
| 444 | 
            +
              "<|LOC_486|>": 100783,
         | 
| 445 | 
            +
              "<|LOC_487|>": 100784,
         | 
| 446 | 
            +
              "<|LOC_488|>": 100785,
         | 
| 447 | 
            +
              "<|LOC_489|>": 100786,
         | 
| 448 | 
            +
              "<|LOC_48|>": 100345,
         | 
| 449 | 
            +
              "<|LOC_490|>": 100787,
         | 
| 450 | 
            +
              "<|LOC_491|>": 100788,
         | 
| 451 | 
            +
              "<|LOC_492|>": 100789,
         | 
| 452 | 
            +
              "<|LOC_493|>": 100790,
         | 
| 453 | 
            +
              "<|LOC_494|>": 100791,
         | 
| 454 | 
            +
              "<|LOC_495|>": 100792,
         | 
| 455 | 
            +
              "<|LOC_496|>": 100793,
         | 
| 456 | 
            +
              "<|LOC_497|>": 100794,
         | 
| 457 | 
            +
              "<|LOC_498|>": 100795,
         | 
| 458 | 
            +
              "<|LOC_499|>": 100796,
         | 
| 459 | 
            +
              "<|LOC_49|>": 100346,
         | 
| 460 | 
            +
              "<|LOC_4|>": 100301,
         | 
| 461 | 
            +
              "<|LOC_500|>": 100797,
         | 
| 462 | 
            +
              "<|LOC_501|>": 100798,
         | 
| 463 | 
            +
              "<|LOC_502|>": 100799,
         | 
| 464 | 
            +
              "<|LOC_503|>": 100800,
         | 
| 465 | 
            +
              "<|LOC_504|>": 100801,
         | 
| 466 | 
            +
              "<|LOC_505|>": 100802,
         | 
| 467 | 
            +
              "<|LOC_506|>": 100803,
         | 
| 468 | 
            +
              "<|LOC_507|>": 100804,
         | 
| 469 | 
            +
              "<|LOC_508|>": 100805,
         | 
| 470 | 
            +
              "<|LOC_509|>": 100806,
         | 
| 471 | 
            +
              "<|LOC_50|>": 100347,
         | 
| 472 | 
            +
              "<|LOC_510|>": 100807,
         | 
| 473 | 
            +
              "<|LOC_511|>": 100808,
         | 
| 474 | 
            +
              "<|LOC_512|>": 100809,
         | 
| 475 | 
            +
              "<|LOC_513|>": 100810,
         | 
| 476 | 
            +
              "<|LOC_514|>": 100811,
         | 
| 477 | 
            +
              "<|LOC_515|>": 100812,
         | 
| 478 | 
            +
              "<|LOC_516|>": 100813,
         | 
| 479 | 
            +
              "<|LOC_517|>": 100814,
         | 
| 480 | 
            +
              "<|LOC_518|>": 100815,
         | 
| 481 | 
            +
              "<|LOC_519|>": 100816,
         | 
| 482 | 
            +
              "<|LOC_51|>": 100348,
         | 
| 483 | 
            +
              "<|LOC_520|>": 100817,
         | 
| 484 | 
            +
              "<|LOC_521|>": 100818,
         | 
| 485 | 
            +
              "<|LOC_522|>": 100819,
         | 
| 486 | 
            +
              "<|LOC_523|>": 100820,
         | 
| 487 | 
            +
              "<|LOC_524|>": 100821,
         | 
| 488 | 
            +
              "<|LOC_525|>": 100822,
         | 
| 489 | 
            +
              "<|LOC_526|>": 100823,
         | 
| 490 | 
            +
              "<|LOC_527|>": 100824,
         | 
| 491 | 
            +
              "<|LOC_528|>": 100825,
         | 
| 492 | 
            +
              "<|LOC_529|>": 100826,
         | 
| 493 | 
            +
              "<|LOC_52|>": 100349,
         | 
| 494 | 
            +
              "<|LOC_530|>": 100827,
         | 
| 495 | 
            +
              "<|LOC_531|>": 100828,
         | 
| 496 | 
            +
              "<|LOC_532|>": 100829,
         | 
| 497 | 
            +
              "<|LOC_533|>": 100830,
         | 
| 498 | 
            +
              "<|LOC_534|>": 100831,
         | 
| 499 | 
            +
              "<|LOC_535|>": 100832,
         | 
| 500 | 
            +
              "<|LOC_536|>": 100833,
         | 
| 501 | 
            +
              "<|LOC_537|>": 100834,
         | 
| 502 | 
            +
              "<|LOC_538|>": 100835,
         | 
| 503 | 
            +
              "<|LOC_539|>": 100836,
         | 
| 504 | 
            +
              "<|LOC_53|>": 100350,
         | 
| 505 | 
            +
              "<|LOC_540|>": 100837,
         | 
| 506 | 
            +
              "<|LOC_541|>": 100838,
         | 
| 507 | 
            +
              "<|LOC_542|>": 100839,
         | 
| 508 | 
            +
              "<|LOC_543|>": 100840,
         | 
| 509 | 
            +
              "<|LOC_544|>": 100841,
         | 
| 510 | 
            +
              "<|LOC_545|>": 100842,
         | 
| 511 | 
            +
              "<|LOC_546|>": 100843,
         | 
| 512 | 
            +
              "<|LOC_547|>": 100844,
         | 
| 513 | 
            +
              "<|LOC_548|>": 100845,
         | 
| 514 | 
            +
              "<|LOC_549|>": 100846,
         | 
| 515 | 
            +
              "<|LOC_54|>": 100351,
         | 
| 516 | 
            +
              "<|LOC_550|>": 100847,
         | 
| 517 | 
            +
              "<|LOC_551|>": 100848,
         | 
| 518 | 
            +
              "<|LOC_552|>": 100849,
         | 
| 519 | 
            +
              "<|LOC_553|>": 100850,
         | 
| 520 | 
            +
              "<|LOC_554|>": 100851,
         | 
| 521 | 
            +
              "<|LOC_555|>": 100852,
         | 
| 522 | 
            +
              "<|LOC_556|>": 100853,
         | 
| 523 | 
            +
              "<|LOC_557|>": 100854,
         | 
| 524 | 
            +
              "<|LOC_558|>": 100855,
         | 
| 525 | 
            +
              "<|LOC_559|>": 100856,
         | 
| 526 | 
            +
              "<|LOC_55|>": 100352,
         | 
| 527 | 
            +
              "<|LOC_560|>": 100857,
         | 
| 528 | 
            +
              "<|LOC_561|>": 100858,
         | 
| 529 | 
            +
              "<|LOC_562|>": 100859,
         | 
| 530 | 
            +
              "<|LOC_563|>": 100860,
         | 
| 531 | 
            +
              "<|LOC_564|>": 100861,
         | 
| 532 | 
            +
              "<|LOC_565|>": 100862,
         | 
| 533 | 
            +
              "<|LOC_566|>": 100863,
         | 
| 534 | 
            +
              "<|LOC_567|>": 100864,
         | 
| 535 | 
            +
              "<|LOC_568|>": 100865,
         | 
| 536 | 
            +
              "<|LOC_569|>": 100866,
         | 
| 537 | 
            +
              "<|LOC_56|>": 100353,
         | 
| 538 | 
            +
              "<|LOC_570|>": 100867,
         | 
| 539 | 
            +
              "<|LOC_571|>": 100868,
         | 
| 540 | 
            +
              "<|LOC_572|>": 100869,
         | 
| 541 | 
            +
              "<|LOC_573|>": 100870,
         | 
| 542 | 
            +
              "<|LOC_574|>": 100871,
         | 
| 543 | 
            +
              "<|LOC_575|>": 100872,
         | 
| 544 | 
            +
              "<|LOC_576|>": 100873,
         | 
| 545 | 
            +
              "<|LOC_577|>": 100874,
         | 
| 546 | 
            +
              "<|LOC_578|>": 100875,
         | 
| 547 | 
            +
              "<|LOC_579|>": 100876,
         | 
| 548 | 
            +
              "<|LOC_57|>": 100354,
         | 
| 549 | 
            +
              "<|LOC_580|>": 100877,
         | 
| 550 | 
            +
              "<|LOC_581|>": 100878,
         | 
| 551 | 
            +
              "<|LOC_582|>": 100879,
         | 
| 552 | 
            +
              "<|LOC_583|>": 100880,
         | 
| 553 | 
            +
              "<|LOC_584|>": 100881,
         | 
| 554 | 
            +
              "<|LOC_585|>": 100882,
         | 
| 555 | 
            +
              "<|LOC_586|>": 100883,
         | 
| 556 | 
            +
              "<|LOC_587|>": 100884,
         | 
| 557 | 
            +
              "<|LOC_588|>": 100885,
         | 
| 558 | 
            +
              "<|LOC_589|>": 100886,
         | 
| 559 | 
            +
              "<|LOC_58|>": 100355,
         | 
| 560 | 
            +
              "<|LOC_590|>": 100887,
         | 
| 561 | 
            +
              "<|LOC_591|>": 100888,
         | 
| 562 | 
            +
              "<|LOC_592|>": 100889,
         | 
| 563 | 
            +
              "<|LOC_593|>": 100890,
         | 
| 564 | 
            +
              "<|LOC_594|>": 100891,
         | 
| 565 | 
            +
              "<|LOC_595|>": 100892,
         | 
| 566 | 
            +
              "<|LOC_596|>": 100893,
         | 
| 567 | 
            +
              "<|LOC_597|>": 100894,
         | 
| 568 | 
            +
              "<|LOC_598|>": 100895,
         | 
| 569 | 
            +
              "<|LOC_599|>": 100896,
         | 
| 570 | 
            +
              "<|LOC_59|>": 100356,
         | 
| 571 | 
            +
              "<|LOC_5|>": 100302,
         | 
| 572 | 
            +
              "<|LOC_600|>": 100897,
         | 
| 573 | 
            +
              "<|LOC_601|>": 100898,
         | 
| 574 | 
            +
              "<|LOC_602|>": 100899,
         | 
| 575 | 
            +
              "<|LOC_603|>": 100900,
         | 
| 576 | 
            +
              "<|LOC_604|>": 100901,
         | 
| 577 | 
            +
              "<|LOC_605|>": 100902,
         | 
| 578 | 
            +
              "<|LOC_606|>": 100903,
         | 
| 579 | 
            +
              "<|LOC_607|>": 100904,
         | 
| 580 | 
            +
              "<|LOC_608|>": 100905,
         | 
| 581 | 
            +
              "<|LOC_609|>": 100906,
         | 
| 582 | 
            +
              "<|LOC_60|>": 100357,
         | 
| 583 | 
            +
              "<|LOC_610|>": 100907,
         | 
| 584 | 
            +
              "<|LOC_611|>": 100908,
         | 
| 585 | 
            +
              "<|LOC_612|>": 100909,
         | 
| 586 | 
            +
              "<|LOC_613|>": 100910,
         | 
| 587 | 
            +
              "<|LOC_614|>": 100911,
         | 
| 588 | 
            +
              "<|LOC_615|>": 100912,
         | 
| 589 | 
            +
              "<|LOC_616|>": 100913,
         | 
| 590 | 
            +
              "<|LOC_617|>": 100914,
         | 
| 591 | 
            +
              "<|LOC_618|>": 100915,
         | 
| 592 | 
            +
              "<|LOC_619|>": 100916,
         | 
| 593 | 
            +
              "<|LOC_61|>": 100358,
         | 
| 594 | 
            +
              "<|LOC_620|>": 100917,
         | 
| 595 | 
            +
              "<|LOC_621|>": 100918,
         | 
| 596 | 
            +
              "<|LOC_622|>": 100919,
         | 
| 597 | 
            +
              "<|LOC_623|>": 100920,
         | 
| 598 | 
            +
              "<|LOC_624|>": 100921,
         | 
| 599 | 
            +
              "<|LOC_625|>": 100922,
         | 
| 600 | 
            +
              "<|LOC_626|>": 100923,
         | 
| 601 | 
            +
              "<|LOC_627|>": 100924,
         | 
| 602 | 
            +
              "<|LOC_628|>": 100925,
         | 
| 603 | 
            +
              "<|LOC_629|>": 100926,
         | 
| 604 | 
            +
              "<|LOC_62|>": 100359,
         | 
| 605 | 
            +
              "<|LOC_630|>": 100927,
         | 
| 606 | 
            +
              "<|LOC_631|>": 100928,
         | 
| 607 | 
            +
              "<|LOC_632|>": 100929,
         | 
| 608 | 
            +
              "<|LOC_633|>": 100930,
         | 
| 609 | 
            +
              "<|LOC_634|>": 100931,
         | 
| 610 | 
            +
              "<|LOC_635|>": 100932,
         | 
| 611 | 
            +
              "<|LOC_636|>": 100933,
         | 
| 612 | 
            +
              "<|LOC_637|>": 100934,
         | 
| 613 | 
            +
              "<|LOC_638|>": 100935,
         | 
| 614 | 
            +
              "<|LOC_639|>": 100936,
         | 
| 615 | 
            +
              "<|LOC_63|>": 100360,
         | 
| 616 | 
            +
              "<|LOC_640|>": 100937,
         | 
| 617 | 
            +
              "<|LOC_641|>": 100938,
         | 
| 618 | 
            +
              "<|LOC_642|>": 100939,
         | 
| 619 | 
            +
              "<|LOC_643|>": 100940,
         | 
| 620 | 
            +
              "<|LOC_644|>": 100941,
         | 
| 621 | 
            +
              "<|LOC_645|>": 100942,
         | 
| 622 | 
            +
              "<|LOC_646|>": 100943,
         | 
| 623 | 
            +
              "<|LOC_647|>": 100944,
         | 
| 624 | 
            +
              "<|LOC_648|>": 100945,
         | 
| 625 | 
            +
              "<|LOC_649|>": 100946,
         | 
| 626 | 
            +
              "<|LOC_64|>": 100361,
         | 
| 627 | 
            +
              "<|LOC_650|>": 100947,
         | 
| 628 | 
            +
              "<|LOC_651|>": 100948,
         | 
| 629 | 
            +
              "<|LOC_652|>": 100949,
         | 
| 630 | 
            +
              "<|LOC_653|>": 100950,
         | 
| 631 | 
            +
              "<|LOC_654|>": 100951,
         | 
| 632 | 
            +
              "<|LOC_655|>": 100952,
         | 
| 633 | 
            +
              "<|LOC_656|>": 100953,
         | 
| 634 | 
            +
              "<|LOC_657|>": 100954,
         | 
| 635 | 
            +
              "<|LOC_658|>": 100955,
         | 
| 636 | 
            +
              "<|LOC_659|>": 100956,
         | 
| 637 | 
            +
              "<|LOC_65|>": 100362,
         | 
| 638 | 
            +
              "<|LOC_660|>": 100957,
         | 
| 639 | 
            +
              "<|LOC_661|>": 100958,
         | 
| 640 | 
            +
              "<|LOC_662|>": 100959,
         | 
| 641 | 
            +
              "<|LOC_663|>": 100960,
         | 
| 642 | 
            +
              "<|LOC_664|>": 100961,
         | 
| 643 | 
            +
              "<|LOC_665|>": 100962,
         | 
| 644 | 
            +
              "<|LOC_666|>": 100963,
         | 
| 645 | 
            +
              "<|LOC_667|>": 100964,
         | 
| 646 | 
            +
              "<|LOC_668|>": 100965,
         | 
| 647 | 
            +
              "<|LOC_669|>": 100966,
         | 
| 648 | 
            +
              "<|LOC_66|>": 100363,
         | 
| 649 | 
            +
              "<|LOC_670|>": 100967,
         | 
| 650 | 
            +
              "<|LOC_671|>": 100968,
         | 
| 651 | 
            +
              "<|LOC_672|>": 100969,
         | 
| 652 | 
            +
              "<|LOC_673|>": 100970,
         | 
| 653 | 
            +
              "<|LOC_674|>": 100971,
         | 
| 654 | 
            +
              "<|LOC_675|>": 100972,
         | 
| 655 | 
            +
              "<|LOC_676|>": 100973,
         | 
| 656 | 
            +
              "<|LOC_677|>": 100974,
         | 
| 657 | 
            +
              "<|LOC_678|>": 100975,
         | 
| 658 | 
            +
              "<|LOC_679|>": 100976,
         | 
| 659 | 
            +
              "<|LOC_67|>": 100364,
         | 
| 660 | 
            +
              "<|LOC_680|>": 100977,
         | 
| 661 | 
            +
              "<|LOC_681|>": 100978,
         | 
| 662 | 
            +
              "<|LOC_682|>": 100979,
         | 
| 663 | 
            +
              "<|LOC_683|>": 100980,
         | 
| 664 | 
            +
              "<|LOC_684|>": 100981,
         | 
| 665 | 
            +
              "<|LOC_685|>": 100982,
         | 
| 666 | 
            +
              "<|LOC_686|>": 100983,
         | 
| 667 | 
            +
              "<|LOC_687|>": 100984,
         | 
| 668 | 
            +
              "<|LOC_688|>": 100985,
         | 
| 669 | 
            +
              "<|LOC_689|>": 100986,
         | 
| 670 | 
            +
              "<|LOC_68|>": 100365,
         | 
| 671 | 
            +
              "<|LOC_690|>": 100987,
         | 
| 672 | 
            +
              "<|LOC_691|>": 100988,
         | 
| 673 | 
            +
              "<|LOC_692|>": 100989,
         | 
| 674 | 
            +
              "<|LOC_693|>": 100990,
         | 
| 675 | 
            +
              "<|LOC_694|>": 100991,
         | 
| 676 | 
            +
              "<|LOC_695|>": 100992,
         | 
| 677 | 
            +
              "<|LOC_696|>": 100993,
         | 
| 678 | 
            +
              "<|LOC_697|>": 100994,
         | 
| 679 | 
            +
              "<|LOC_698|>": 100995,
         | 
| 680 | 
            +
              "<|LOC_699|>": 100996,
         | 
| 681 | 
            +
              "<|LOC_69|>": 100366,
         | 
| 682 | 
            +
              "<|LOC_6|>": 100303,
         | 
| 683 | 
            +
              "<|LOC_700|>": 100997,
         | 
| 684 | 
            +
              "<|LOC_701|>": 100998,
         | 
| 685 | 
            +
              "<|LOC_702|>": 100999,
         | 
| 686 | 
            +
              "<|LOC_703|>": 101000,
         | 
| 687 | 
            +
              "<|LOC_704|>": 101001,
         | 
| 688 | 
            +
              "<|LOC_705|>": 101002,
         | 
| 689 | 
            +
              "<|LOC_706|>": 101003,
         | 
| 690 | 
            +
              "<|LOC_707|>": 101004,
         | 
| 691 | 
            +
              "<|LOC_708|>": 101005,
         | 
| 692 | 
            +
              "<|LOC_709|>": 101006,
         | 
| 693 | 
            +
              "<|LOC_70|>": 100367,
         | 
| 694 | 
            +
              "<|LOC_710|>": 101007,
         | 
| 695 | 
            +
              "<|LOC_711|>": 101008,
         | 
| 696 | 
            +
              "<|LOC_712|>": 101009,
         | 
| 697 | 
            +
              "<|LOC_713|>": 101010,
         | 
| 698 | 
            +
              "<|LOC_714|>": 101011,
         | 
| 699 | 
            +
              "<|LOC_715|>": 101012,
         | 
| 700 | 
            +
              "<|LOC_716|>": 101013,
         | 
| 701 | 
            +
              "<|LOC_717|>": 101014,
         | 
| 702 | 
            +
              "<|LOC_718|>": 101015,
         | 
| 703 | 
            +
              "<|LOC_719|>": 101016,
         | 
| 704 | 
            +
              "<|LOC_71|>": 100368,
         | 
| 705 | 
            +
              "<|LOC_720|>": 101017,
         | 
| 706 | 
            +
              "<|LOC_721|>": 101018,
         | 
| 707 | 
            +
              "<|LOC_722|>": 101019,
         | 
| 708 | 
            +
              "<|LOC_723|>": 101020,
         | 
| 709 | 
            +
              "<|LOC_724|>": 101021,
         | 
| 710 | 
            +
              "<|LOC_725|>": 101022,
         | 
| 711 | 
            +
              "<|LOC_726|>": 101023,
         | 
| 712 | 
            +
              "<|LOC_727|>": 101024,
         | 
| 713 | 
            +
              "<|LOC_728|>": 101025,
         | 
| 714 | 
            +
              "<|LOC_729|>": 101026,
         | 
| 715 | 
            +
              "<|LOC_72|>": 100369,
         | 
| 716 | 
            +
              "<|LOC_730|>": 101027,
         | 
| 717 | 
            +
              "<|LOC_731|>": 101028,
         | 
| 718 | 
            +
              "<|LOC_732|>": 101029,
         | 
| 719 | 
            +
              "<|LOC_733|>": 101030,
         | 
| 720 | 
            +
              "<|LOC_734|>": 101031,
         | 
| 721 | 
            +
              "<|LOC_735|>": 101032,
         | 
| 722 | 
            +
              "<|LOC_736|>": 101033,
         | 
| 723 | 
            +
              "<|LOC_737|>": 101034,
         | 
| 724 | 
            +
              "<|LOC_738|>": 101035,
         | 
| 725 | 
            +
              "<|LOC_739|>": 101036,
         | 
| 726 | 
            +
              "<|LOC_73|>": 100370,
         | 
| 727 | 
            +
              "<|LOC_740|>": 101037,
         | 
| 728 | 
            +
              "<|LOC_741|>": 101038,
         | 
| 729 | 
            +
              "<|LOC_742|>": 101039,
         | 
| 730 | 
            +
              "<|LOC_743|>": 101040,
         | 
| 731 | 
            +
              "<|LOC_744|>": 101041,
         | 
| 732 | 
            +
              "<|LOC_745|>": 101042,
         | 
| 733 | 
            +
              "<|LOC_746|>": 101043,
         | 
| 734 | 
            +
              "<|LOC_747|>": 101044,
         | 
| 735 | 
            +
              "<|LOC_748|>": 101045,
         | 
| 736 | 
            +
              "<|LOC_749|>": 101046,
         | 
| 737 | 
            +
              "<|LOC_74|>": 100371,
         | 
| 738 | 
            +
              "<|LOC_750|>": 101047,
         | 
| 739 | 
            +
              "<|LOC_751|>": 101048,
         | 
| 740 | 
            +
              "<|LOC_752|>": 101049,
         | 
| 741 | 
            +
              "<|LOC_753|>": 101050,
         | 
| 742 | 
            +
              "<|LOC_754|>": 101051,
         | 
| 743 | 
            +
              "<|LOC_755|>": 101052,
         | 
| 744 | 
            +
              "<|LOC_756|>": 101053,
         | 
| 745 | 
            +
              "<|LOC_757|>": 101054,
         | 
| 746 | 
            +
              "<|LOC_758|>": 101055,
         | 
| 747 | 
            +
              "<|LOC_759|>": 101056,
         | 
| 748 | 
            +
              "<|LOC_75|>": 100372,
         | 
| 749 | 
            +
              "<|LOC_760|>": 101057,
         | 
| 750 | 
            +
              "<|LOC_761|>": 101058,
         | 
| 751 | 
            +
              "<|LOC_762|>": 101059,
         | 
| 752 | 
            +
              "<|LOC_763|>": 101060,
         | 
| 753 | 
            +
              "<|LOC_764|>": 101061,
         | 
| 754 | 
            +
              "<|LOC_765|>": 101062,
         | 
| 755 | 
            +
              "<|LOC_766|>": 101063,
         | 
| 756 | 
            +
              "<|LOC_767|>": 101064,
         | 
| 757 | 
            +
              "<|LOC_768|>": 101065,
         | 
| 758 | 
            +
              "<|LOC_769|>": 101066,
         | 
| 759 | 
            +
              "<|LOC_76|>": 100373,
         | 
| 760 | 
            +
              "<|LOC_770|>": 101067,
         | 
| 761 | 
            +
              "<|LOC_771|>": 101068,
         | 
| 762 | 
            +
              "<|LOC_772|>": 101069,
         | 
| 763 | 
            +
              "<|LOC_773|>": 101070,
         | 
| 764 | 
            +
              "<|LOC_774|>": 101071,
         | 
| 765 | 
            +
              "<|LOC_775|>": 101072,
         | 
| 766 | 
            +
              "<|LOC_776|>": 101073,
         | 
| 767 | 
            +
              "<|LOC_777|>": 101074,
         | 
| 768 | 
            +
              "<|LOC_778|>": 101075,
         | 
| 769 | 
            +
              "<|LOC_779|>": 101076,
         | 
| 770 | 
            +
              "<|LOC_77|>": 100374,
         | 
| 771 | 
            +
              "<|LOC_780|>": 101077,
         | 
| 772 | 
            +
              "<|LOC_781|>": 101078,
         | 
| 773 | 
            +
              "<|LOC_782|>": 101079,
         | 
| 774 | 
            +
              "<|LOC_783|>": 101080,
         | 
| 775 | 
            +
              "<|LOC_784|>": 101081,
         | 
| 776 | 
            +
              "<|LOC_785|>": 101082,
         | 
| 777 | 
            +
              "<|LOC_786|>": 101083,
         | 
| 778 | 
            +
              "<|LOC_787|>": 101084,
         | 
| 779 | 
            +
              "<|LOC_788|>": 101085,
         | 
| 780 | 
            +
              "<|LOC_789|>": 101086,
         | 
| 781 | 
            +
              "<|LOC_78|>": 100375,
         | 
| 782 | 
            +
              "<|LOC_790|>": 101087,
         | 
| 783 | 
            +
              "<|LOC_791|>": 101088,
         | 
| 784 | 
            +
              "<|LOC_792|>": 101089,
         | 
| 785 | 
            +
              "<|LOC_793|>": 101090,
         | 
| 786 | 
            +
              "<|LOC_794|>": 101091,
         | 
| 787 | 
            +
              "<|LOC_795|>": 101092,
         | 
| 788 | 
            +
              "<|LOC_796|>": 101093,
         | 
| 789 | 
            +
              "<|LOC_797|>": 101094,
         | 
| 790 | 
            +
              "<|LOC_798|>": 101095,
         | 
| 791 | 
            +
              "<|LOC_799|>": 101096,
         | 
| 792 | 
            +
              "<|LOC_79|>": 100376,
         | 
| 793 | 
            +
              "<|LOC_7|>": 100304,
         | 
| 794 | 
            +
              "<|LOC_800|>": 101097,
         | 
| 795 | 
            +
              "<|LOC_801|>": 101098,
         | 
| 796 | 
            +
              "<|LOC_802|>": 101099,
         | 
| 797 | 
            +
              "<|LOC_803|>": 101100,
         | 
| 798 | 
            +
              "<|LOC_804|>": 101101,
         | 
| 799 | 
            +
              "<|LOC_805|>": 101102,
         | 
| 800 | 
            +
              "<|LOC_806|>": 101103,
         | 
| 801 | 
            +
              "<|LOC_807|>": 101104,
         | 
| 802 | 
            +
              "<|LOC_808|>": 101105,
         | 
| 803 | 
            +
              "<|LOC_809|>": 101106,
         | 
| 804 | 
            +
              "<|LOC_80|>": 100377,
         | 
| 805 | 
            +
              "<|LOC_810|>": 101107,
         | 
| 806 | 
            +
              "<|LOC_811|>": 101108,
         | 
| 807 | 
            +
              "<|LOC_812|>": 101109,
         | 
| 808 | 
            +
              "<|LOC_813|>": 101110,
         | 
| 809 | 
            +
              "<|LOC_814|>": 101111,
         | 
| 810 | 
            +
              "<|LOC_815|>": 101112,
         | 
| 811 | 
            +
              "<|LOC_816|>": 101113,
         | 
| 812 | 
            +
              "<|LOC_817|>": 101114,
         | 
| 813 | 
            +
              "<|LOC_818|>": 101115,
         | 
| 814 | 
            +
              "<|LOC_819|>": 101116,
         | 
| 815 | 
            +
              "<|LOC_81|>": 100378,
         | 
| 816 | 
            +
              "<|LOC_820|>": 101117,
         | 
| 817 | 
            +
              "<|LOC_821|>": 101118,
         | 
| 818 | 
            +
              "<|LOC_822|>": 101119,
         | 
| 819 | 
            +
              "<|LOC_823|>": 101120,
         | 
| 820 | 
            +
              "<|LOC_824|>": 101121,
         | 
| 821 | 
            +
              "<|LOC_825|>": 101122,
         | 
| 822 | 
            +
              "<|LOC_826|>": 101123,
         | 
| 823 | 
            +
              "<|LOC_827|>": 101124,
         | 
| 824 | 
            +
              "<|LOC_828|>": 101125,
         | 
| 825 | 
            +
              "<|LOC_829|>": 101126,
         | 
| 826 | 
            +
              "<|LOC_82|>": 100379,
         | 
| 827 | 
            +
              "<|LOC_830|>": 101127,
         | 
| 828 | 
            +
              "<|LOC_831|>": 101128,
         | 
| 829 | 
            +
              "<|LOC_832|>": 101129,
         | 
| 830 | 
            +
              "<|LOC_833|>": 101130,
         | 
| 831 | 
            +
              "<|LOC_834|>": 101131,
         | 
| 832 | 
            +
              "<|LOC_835|>": 101132,
         | 
| 833 | 
            +
              "<|LOC_836|>": 101133,
         | 
| 834 | 
            +
              "<|LOC_837|>": 101134,
         | 
| 835 | 
            +
              "<|LOC_838|>": 101135,
         | 
| 836 | 
            +
              "<|LOC_839|>": 101136,
         | 
| 837 | 
            +
              "<|LOC_83|>": 100380,
         | 
| 838 | 
            +
              "<|LOC_840|>": 101137,
         | 
| 839 | 
            +
              "<|LOC_841|>": 101138,
         | 
| 840 | 
            +
              "<|LOC_842|>": 101139,
         | 
| 841 | 
            +
              "<|LOC_843|>": 101140,
         | 
| 842 | 
            +
              "<|LOC_844|>": 101141,
         | 
| 843 | 
            +
              "<|LOC_845|>": 101142,
         | 
| 844 | 
            +
              "<|LOC_846|>": 101143,
         | 
| 845 | 
            +
              "<|LOC_847|>": 101144,
         | 
| 846 | 
            +
              "<|LOC_848|>": 101145,
         | 
| 847 | 
            +
              "<|LOC_849|>": 101146,
         | 
| 848 | 
            +
              "<|LOC_84|>": 100381,
         | 
| 849 | 
            +
              "<|LOC_850|>": 101147,
         | 
| 850 | 
            +
              "<|LOC_851|>": 101148,
         | 
| 851 | 
            +
              "<|LOC_852|>": 101149,
         | 
| 852 | 
            +
              "<|LOC_853|>": 101150,
         | 
| 853 | 
            +
              "<|LOC_854|>": 101151,
         | 
| 854 | 
            +
              "<|LOC_855|>": 101152,
         | 
| 855 | 
            +
              "<|LOC_856|>": 101153,
         | 
| 856 | 
            +
              "<|LOC_857|>": 101154,
         | 
| 857 | 
            +
              "<|LOC_858|>": 101155,
         | 
| 858 | 
            +
              "<|LOC_859|>": 101156,
         | 
| 859 | 
            +
              "<|LOC_85|>": 100382,
         | 
| 860 | 
            +
              "<|LOC_860|>": 101157,
         | 
| 861 | 
            +
              "<|LOC_861|>": 101158,
         | 
| 862 | 
            +
              "<|LOC_862|>": 101159,
         | 
| 863 | 
            +
              "<|LOC_863|>": 101160,
         | 
| 864 | 
            +
              "<|LOC_864|>": 101161,
         | 
| 865 | 
            +
              "<|LOC_865|>": 101162,
         | 
| 866 | 
            +
              "<|LOC_866|>": 101163,
         | 
| 867 | 
            +
              "<|LOC_867|>": 101164,
         | 
| 868 | 
            +
              "<|LOC_868|>": 101165,
         | 
| 869 | 
            +
              "<|LOC_869|>": 101166,
         | 
| 870 | 
            +
              "<|LOC_86|>": 100383,
         | 
| 871 | 
            +
              "<|LOC_870|>": 101167,
         | 
| 872 | 
            +
              "<|LOC_871|>": 101168,
         | 
| 873 | 
            +
              "<|LOC_872|>": 101169,
         | 
| 874 | 
            +
              "<|LOC_873|>": 101170,
         | 
| 875 | 
            +
              "<|LOC_874|>": 101171,
         | 
| 876 | 
            +
              "<|LOC_875|>": 101172,
         | 
| 877 | 
            +
              "<|LOC_876|>": 101173,
         | 
| 878 | 
            +
              "<|LOC_877|>": 101174,
         | 
| 879 | 
            +
              "<|LOC_878|>": 101175,
         | 
| 880 | 
            +
              "<|LOC_879|>": 101176,
         | 
| 881 | 
            +
              "<|LOC_87|>": 100384,
         | 
| 882 | 
            +
              "<|LOC_880|>": 101177,
         | 
| 883 | 
            +
              "<|LOC_881|>": 101178,
         | 
| 884 | 
            +
              "<|LOC_882|>": 101179,
         | 
| 885 | 
            +
              "<|LOC_883|>": 101180,
         | 
| 886 | 
            +
              "<|LOC_884|>": 101181,
         | 
| 887 | 
            +
              "<|LOC_885|>": 101182,
         | 
| 888 | 
            +
              "<|LOC_886|>": 101183,
         | 
| 889 | 
            +
              "<|LOC_887|>": 101184,
         | 
| 890 | 
            +
              "<|LOC_888|>": 101185,
         | 
| 891 | 
            +
              "<|LOC_889|>": 101186,
         | 
| 892 | 
            +
              "<|LOC_88|>": 100385,
         | 
| 893 | 
            +
              "<|LOC_890|>": 101187,
         | 
| 894 | 
            +
              "<|LOC_891|>": 101188,
         | 
| 895 | 
            +
              "<|LOC_892|>": 101189,
         | 
| 896 | 
            +
              "<|LOC_893|>": 101190,
         | 
| 897 | 
            +
              "<|LOC_894|>": 101191,
         | 
| 898 | 
            +
              "<|LOC_895|>": 101192,
         | 
| 899 | 
            +
              "<|LOC_896|>": 101193,
         | 
| 900 | 
            +
              "<|LOC_897|>": 101194,
         | 
| 901 | 
            +
              "<|LOC_898|>": 101195,
         | 
| 902 | 
            +
              "<|LOC_899|>": 101196,
         | 
| 903 | 
            +
              "<|LOC_89|>": 100386,
         | 
| 904 | 
            +
              "<|LOC_8|>": 100305,
         | 
| 905 | 
            +
              "<|LOC_900|>": 101197,
         | 
| 906 | 
            +
              "<|LOC_901|>": 101198,
         | 
| 907 | 
            +
              "<|LOC_902|>": 101199,
         | 
| 908 | 
            +
              "<|LOC_903|>": 101200,
         | 
| 909 | 
            +
              "<|LOC_904|>": 101201,
         | 
| 910 | 
            +
              "<|LOC_905|>": 101202,
         | 
| 911 | 
            +
              "<|LOC_906|>": 101203,
         | 
| 912 | 
            +
              "<|LOC_907|>": 101204,
         | 
| 913 | 
            +
              "<|LOC_908|>": 101205,
         | 
| 914 | 
            +
              "<|LOC_909|>": 101206,
         | 
| 915 | 
            +
              "<|LOC_90|>": 100387,
         | 
| 916 | 
            +
              "<|LOC_910|>": 101207,
         | 
| 917 | 
            +
              "<|LOC_911|>": 101208,
         | 
| 918 | 
            +
              "<|LOC_912|>": 101209,
         | 
| 919 | 
            +
              "<|LOC_913|>": 101210,
         | 
| 920 | 
            +
              "<|LOC_914|>": 101211,
         | 
| 921 | 
            +
              "<|LOC_915|>": 101212,
         | 
| 922 | 
            +
              "<|LOC_916|>": 101213,
         | 
| 923 | 
            +
              "<|LOC_917|>": 101214,
         | 
| 924 | 
            +
              "<|LOC_918|>": 101215,
         | 
| 925 | 
            +
              "<|LOC_919|>": 101216,
         | 
| 926 | 
            +
              "<|LOC_91|>": 100388,
         | 
| 927 | 
            +
              "<|LOC_920|>": 101217,
         | 
| 928 | 
            +
              "<|LOC_921|>": 101218,
         | 
| 929 | 
            +
              "<|LOC_922|>": 101219,
         | 
| 930 | 
            +
              "<|LOC_923|>": 101220,
         | 
| 931 | 
            +
              "<|LOC_924|>": 101221,
         | 
| 932 | 
            +
              "<|LOC_925|>": 101222,
         | 
| 933 | 
            +
              "<|LOC_926|>": 101223,
         | 
| 934 | 
            +
              "<|LOC_927|>": 101224,
         | 
| 935 | 
            +
              "<|LOC_928|>": 101225,
         | 
| 936 | 
            +
              "<|LOC_929|>": 101226,
         | 
| 937 | 
            +
              "<|LOC_92|>": 100389,
         | 
| 938 | 
            +
              "<|LOC_930|>": 101227,
         | 
| 939 | 
            +
              "<|LOC_931|>": 101228,
         | 
| 940 | 
            +
              "<|LOC_932|>": 101229,
         | 
| 941 | 
            +
              "<|LOC_933|>": 101230,
         | 
| 942 | 
            +
              "<|LOC_934|>": 101231,
         | 
| 943 | 
            +
              "<|LOC_935|>": 101232,
         | 
| 944 | 
            +
              "<|LOC_936|>": 101233,
         | 
| 945 | 
            +
              "<|LOC_937|>": 101234,
         | 
| 946 | 
            +
              "<|LOC_938|>": 101235,
         | 
| 947 | 
            +
              "<|LOC_939|>": 101236,
         | 
| 948 | 
            +
              "<|LOC_93|>": 100390,
         | 
| 949 | 
            +
              "<|LOC_940|>": 101237,
         | 
| 950 | 
            +
              "<|LOC_941|>": 101238,
         | 
| 951 | 
            +
              "<|LOC_942|>": 101239,
         | 
| 952 | 
            +
              "<|LOC_943|>": 101240,
         | 
| 953 | 
            +
              "<|LOC_944|>": 101241,
         | 
| 954 | 
            +
              "<|LOC_945|>": 101242,
         | 
| 955 | 
            +
              "<|LOC_946|>": 101243,
         | 
| 956 | 
            +
              "<|LOC_947|>": 101244,
         | 
| 957 | 
            +
              "<|LOC_948|>": 101245,
         | 
| 958 | 
            +
              "<|LOC_949|>": 101246,
         | 
| 959 | 
            +
              "<|LOC_94|>": 100391,
         | 
| 960 | 
            +
              "<|LOC_950|>": 101247,
         | 
| 961 | 
            +
              "<|LOC_951|>": 101248,
         | 
| 962 | 
            +
              "<|LOC_952|>": 101249,
         | 
| 963 | 
            +
              "<|LOC_953|>": 101250,
         | 
| 964 | 
            +
              "<|LOC_954|>": 101251,
         | 
| 965 | 
            +
              "<|LOC_955|>": 101252,
         | 
| 966 | 
            +
              "<|LOC_956|>": 101253,
         | 
| 967 | 
            +
              "<|LOC_957|>": 101254,
         | 
| 968 | 
            +
              "<|LOC_958|>": 101255,
         | 
| 969 | 
            +
              "<|LOC_959|>": 101256,
         | 
| 970 | 
            +
              "<|LOC_95|>": 100392,
         | 
| 971 | 
            +
              "<|LOC_960|>": 101257,
         | 
| 972 | 
            +
              "<|LOC_961|>": 101258,
         | 
| 973 | 
            +
              "<|LOC_962|>": 101259,
         | 
| 974 | 
            +
              "<|LOC_963|>": 101260,
         | 
| 975 | 
            +
              "<|LOC_964|>": 101261,
         | 
| 976 | 
            +
              "<|LOC_965|>": 101262,
         | 
| 977 | 
            +
              "<|LOC_966|>": 101263,
         | 
| 978 | 
            +
              "<|LOC_967|>": 101264,
         | 
| 979 | 
            +
              "<|LOC_968|>": 101265,
         | 
| 980 | 
            +
              "<|LOC_969|>": 101266,
         | 
| 981 | 
            +
              "<|LOC_96|>": 100393,
         | 
| 982 | 
            +
              "<|LOC_970|>": 101267,
         | 
| 983 | 
            +
              "<|LOC_971|>": 101268,
         | 
| 984 | 
            +
              "<|LOC_972|>": 101269,
         | 
| 985 | 
            +
              "<|LOC_973|>": 101270,
         | 
| 986 | 
            +
              "<|LOC_974|>": 101271,
         | 
| 987 | 
            +
              "<|LOC_975|>": 101272,
         | 
| 988 | 
            +
              "<|LOC_976|>": 101273,
         | 
| 989 | 
            +
              "<|LOC_977|>": 101274,
         | 
| 990 | 
            +
              "<|LOC_978|>": 101275,
         | 
| 991 | 
            +
              "<|LOC_979|>": 101276,
         | 
| 992 | 
            +
              "<|LOC_97|>": 100394,
         | 
| 993 | 
            +
              "<|LOC_980|>": 101277,
         | 
| 994 | 
            +
              "<|LOC_981|>": 101278,
         | 
| 995 | 
            +
              "<|LOC_982|>": 101279,
         | 
| 996 | 
            +
              "<|LOC_983|>": 101280,
         | 
| 997 | 
            +
              "<|LOC_984|>": 101281,
         | 
| 998 | 
            +
              "<|LOC_985|>": 101282,
         | 
| 999 | 
            +
              "<|LOC_986|>": 101283,
         | 
| 1000 | 
            +
              "<|LOC_987|>": 101284,
         | 
| 1001 | 
            +
              "<|LOC_988|>": 101285,
         | 
| 1002 | 
            +
              "<|LOC_989|>": 101286,
         | 
| 1003 | 
            +
              "<|LOC_98|>": 100395,
         | 
| 1004 | 
            +
              "<|LOC_990|>": 101287,
         | 
| 1005 | 
            +
              "<|LOC_991|>": 101288,
         | 
| 1006 | 
            +
              "<|LOC_992|>": 101289,
         | 
| 1007 | 
            +
              "<|LOC_993|>": 101290,
         | 
| 1008 | 
            +
              "<|LOC_994|>": 101291,
         | 
| 1009 | 
            +
              "<|LOC_995|>": 101292,
         | 
| 1010 | 
            +
              "<|LOC_996|>": 101293,
         | 
| 1011 | 
            +
              "<|LOC_997|>": 101294,
         | 
| 1012 | 
            +
              "<|LOC_998|>": 101295,
         | 
| 1013 | 
            +
              "<|LOC_999|>": 101296,
         | 
| 1014 | 
            +
              "<|LOC_99|>": 100396,
         | 
| 1015 | 
            +
              "<|LOC_9|>": 100306,
         | 
| 1016 | 
            +
              "<|LOC_BEGIN|>": 101298,
         | 
| 1017 | 
            +
              "<|LOC_END|>": 101299,
         | 
| 1018 | 
            +
              "<|LOC_SEP|>": 101300,
         | 
| 1019 | 
            +
              "<|image_pad|>": 101304,
         | 
| 1020 | 
            +
              "<|video_pad|>": 101307
         | 
| 1021 | 
            +
            }
         | 
    	
        chat_template.jinja
    ADDED
    
    | @@ -0,0 +1,22 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {%- if not add_generation_prompt is defined -%}
         | 
| 2 | 
            +
                {%- set add_generation_prompt = true -%}
         | 
| 3 | 
            +
            {%- endif -%}
         | 
| 4 | 
            +
            {%- if not cls_token is defined -%}
         | 
| 5 | 
            +
                {%- set cls_token = "<|begin_of_sentence|>" -%}
         | 
| 6 | 
            +
            {%- endif -%}
         | 
| 7 | 
            +
            {%- if not sep_token is defined -%}
         | 
| 8 | 
            +
                {%- set sep_token = "<|end_of_sentence|>" -%}
         | 
| 9 | 
            +
            {%- endif -%}
         | 
| 10 | 
            +
            {{- cls_token -}}
         | 
| 11 | 
            +
            {%- for message in messages -%}
         | 
| 12 | 
            +
                {%- if message["role"] == "user" -%}
         | 
| 13 | 
            +
                    {{- "User: <|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>" + message["content"] + "\n" -}}
         | 
| 14 | 
            +
                {%- elif message["role"] == "assistant" -%}
         | 
| 15 | 
            +
                    {{- "Assistant: " + message["content"] + sep_token -}}
         | 
| 16 | 
            +
                {%- elif message["role"] == "system" -%}
         | 
| 17 | 
            +
                    {{- message["content"] -}}
         | 
| 18 | 
            +
                {%- endif -%}
         | 
| 19 | 
            +
            {%- endfor -%}
         | 
| 20 | 
            +
            {%- if add_generation_prompt -%}
         | 
| 21 | 
            +
                {{- "Assistant: " -}}
         | 
| 22 | 
            +
            {%- endif -%}
         | 
    	
        config.json
    ADDED
    
    | @@ -0,0 +1,75 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "architectures": [
         | 
| 3 | 
            +
                "PaddleOCRVLForConditionalGeneration"
         | 
| 4 | 
            +
              ],
         | 
| 5 | 
            +
              "attention_probs_dropout_prob": 0.0,
         | 
| 6 | 
            +
              "auto_map": {
         | 
| 7 | 
            +
                "AutoConfig": "configuration_paddleocr_vl.PaddleOCRVLConfig",
         | 
| 8 | 
            +
                "AutoModel": "modeling_paddleocr_vl.PaddleOCRVLForConditionalGeneration",
         | 
| 9 | 
            +
                "AutoModelForCausalLM": "modeling_paddleocr_vl.PaddleOCRVLForConditionalGeneration"
         | 
| 10 | 
            +
              },
         | 
| 11 | 
            +
              "compression_ratio": 1.0,
         | 
| 12 | 
            +
              "head_dim": 128,
         | 
| 13 | 
            +
              "hidden_act": "silu",
         | 
| 14 | 
            +
              "hidden_dropout_prob": 0.0,
         | 
| 15 | 
            +
              "hidden_size": 1024,
         | 
| 16 | 
            +
              "ignored_index": -100,
         | 
| 17 | 
            +
              "image_token_id": 100295,
         | 
| 18 | 
            +
              "intermediate_size": 3072,
         | 
| 19 | 
            +
              "max_position_embeddings": 131072,
         | 
| 20 | 
            +
              "max_sequence_length": null,
         | 
| 21 | 
            +
              "model_type": "paddleocr_vl",
         | 
| 22 | 
            +
              "num_attention_heads": 16,
         | 
| 23 | 
            +
              "num_hidden_layers": 18,
         | 
| 24 | 
            +
              "num_key_value_heads": 2,
         | 
| 25 | 
            +
              "pad_token_id": 0,
         | 
| 26 | 
            +
              "rms_norm_eps": 1e-05,
         | 
| 27 | 
            +
              "rope_scaling": {
         | 
| 28 | 
            +
                "mrope_section": [
         | 
| 29 | 
            +
                  16,
         | 
| 30 | 
            +
                  24,
         | 
| 31 | 
            +
                  24
         | 
| 32 | 
            +
                ],
         | 
| 33 | 
            +
                "rope_type": "default",
         | 
| 34 | 
            +
                "type": "default"
         | 
| 35 | 
            +
              },
         | 
| 36 | 
            +
              "rope_theta": 500000,
         | 
| 37 | 
            +
              "sliding_window": null,
         | 
| 38 | 
            +
              "tie_word_embeddings": false,
         | 
| 39 | 
            +
              "torch_dtype": "bfloat16",
         | 
| 40 | 
            +
              "transformers_version": "4.55.0",
         | 
| 41 | 
            +
              "use_bias": false,
         | 
| 42 | 
            +
              "use_cache": false,
         | 
| 43 | 
            +
              "use_flash_attention": false,
         | 
| 44 | 
            +
              "video_token_id": 101307,
         | 
| 45 | 
            +
              "vision_config": {
         | 
| 46 | 
            +
                "architectures": [
         | 
| 47 | 
            +
                  "SiglipVisionModel"
         | 
| 48 | 
            +
                ],
         | 
| 49 | 
            +
                "attention_dropout": 0.0,
         | 
| 50 | 
            +
                "auto_map": {
         | 
| 51 | 
            +
                  "AutoConfig": "configuration_paddleocr_vl.PaddleOCRVLConfig",
         | 
| 52 | 
            +
                  "AutoModel": "modeling_paddleocr_vl.SiglipVisionModel"
         | 
| 53 | 
            +
                },
         | 
| 54 | 
            +
                "hidden_act": "gelu_pytorch_tanh",
         | 
| 55 | 
            +
                "hidden_size": 1152,
         | 
| 56 | 
            +
                "image_size": 384,
         | 
| 57 | 
            +
                "intermediate_size": 4304,
         | 
| 58 | 
            +
                "layer_norm_eps": 1e-06,
         | 
| 59 | 
            +
                "model_type": "paddleocr_vl",
         | 
| 60 | 
            +
                "num_attention_heads": 16,
         | 
| 61 | 
            +
                "num_channels": 3,
         | 
| 62 | 
            +
                "num_hidden_layers": 27,
         | 
| 63 | 
            +
                "pad_token_id": 0,
         | 
| 64 | 
            +
                "patch_size": 14,
         | 
| 65 | 
            +
                "spatial_merge_size": 2,
         | 
| 66 | 
            +
                "temporal_patch_size": 2,
         | 
| 67 | 
            +
                "tokens_per_second": 2,
         | 
| 68 | 
            +
                "torch_dtype": "bfloat16"
         | 
| 69 | 
            +
              },
         | 
| 70 | 
            +
              "vision_start_token_id": 101305,
         | 
| 71 | 
            +
              "vocab_size": 103424,
         | 
| 72 | 
            +
              "weight_share_add_bias": true,
         | 
| 73 | 
            +
              "use_3d_rope": true,
         | 
| 74 | 
            +
              "rope_is_neox_style": true
         | 
| 75 | 
            +
            }
         | 
    	
        configuration_paddleocr_vl.py
    ADDED
    
    | @@ -0,0 +1,191 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 4 | 
            +
            # you may not use this file except in compliance with the License.
         | 
| 5 | 
            +
            # You may obtain a copy of the License at
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Unless required by applicable law or agreed to in writing, software
         | 
| 10 | 
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 11 | 
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 12 | 
            +
            # See the License for the specific language governing permissions and
         | 
| 13 | 
            +
            # limitations under the License.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            from transformers.configuration_utils import PretrainedConfig
         | 
| 16 | 
            +
            from transformers.modeling_rope_utils import rope_config_validation
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            class PaddleOCRVisionConfig(PretrainedConfig):
         | 
| 19 | 
            +
                model_type = "paddleocr_vl"
         | 
| 20 | 
            +
                base_config_key = "vision_config"
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                def __init__(
         | 
| 23 | 
            +
                    self,
         | 
| 24 | 
            +
                    hidden_size=768,
         | 
| 25 | 
            +
                    intermediate_size=3072,
         | 
| 26 | 
            +
                    num_hidden_layers=12,
         | 
| 27 | 
            +
                    num_attention_heads=12,
         | 
| 28 | 
            +
                    num_channels=3,
         | 
| 29 | 
            +
                    image_size=224,
         | 
| 30 | 
            +
                    patch_size=14,
         | 
| 31 | 
            +
                    hidden_act="gelu_pytorch_tanh",
         | 
| 32 | 
            +
                    layer_norm_eps=1e-6,
         | 
| 33 | 
            +
                    attention_dropout=0.0,
         | 
| 34 | 
            +
                    spatial_merge_size=2,
         | 
| 35 | 
            +
                    temporal_patch_size=2,
         | 
| 36 | 
            +
                    tokens_per_second=2,
         | 
| 37 | 
            +
                    **kwargs,
         | 
| 38 | 
            +
                ):
         | 
| 39 | 
            +
                    super().__init__(**kwargs)
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                    self.hidden_size = hidden_size
         | 
| 42 | 
            +
                    self.intermediate_size = intermediate_size
         | 
| 43 | 
            +
                    self.num_hidden_layers = num_hidden_layers
         | 
| 44 | 
            +
                    self.num_attention_heads = num_attention_heads
         | 
| 45 | 
            +
                    self.num_channels = num_channels
         | 
| 46 | 
            +
                    self.patch_size = patch_size
         | 
| 47 | 
            +
                    self.image_size = image_size
         | 
| 48 | 
            +
                    self.attention_dropout = attention_dropout
         | 
| 49 | 
            +
                    self.layer_norm_eps = layer_norm_eps
         | 
| 50 | 
            +
                    self.hidden_act = hidden_act
         | 
| 51 | 
            +
                    self.spatial_merge_size = spatial_merge_size
         | 
| 52 | 
            +
                    self.temporal_patch_size = temporal_patch_size
         | 
| 53 | 
            +
                    self.tokens_per_second = tokens_per_second
         | 
| 54 | 
            +
             | 
| 55 | 
            +
             | 
| 56 | 
            +
             | 
| 57 | 
            +
            class PaddleOCRVLConfig(PretrainedConfig):
         | 
| 58 | 
            +
                """
         | 
| 59 | 
            +
                Configuration class.
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                This class stores the configuration of an Ernie model, defining the model architecture.
         | 
| 62 | 
            +
                It inherits from PretrainedConfig and can be used to control model outputs.
         | 
| 63 | 
            +
                """
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                model_type = "paddleocr_vl"
         | 
| 66 | 
            +
                keys_to_ignore_at_inference = ["past_key_values"]
         | 
| 67 | 
            +
                sub_configs = {"vision_config": PaddleOCRVisionConfig}
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                # Default tensor parallel plan for base model `Qwen3`
         | 
| 70 | 
            +
                base_model_tp_plan = {
         | 
| 71 | 
            +
                    "layers.*.self_attn.q_proj": "colwise",
         | 
| 72 | 
            +
                    "layers.*.self_attn.k_proj": "colwise",
         | 
| 73 | 
            +
                    "layers.*.self_attn.v_proj": "colwise",
         | 
| 74 | 
            +
                    "layers.*.self_attn.o_proj": "rowwise",
         | 
| 75 | 
            +
                    "layers.*.mlp.gate_proj": "colwise",
         | 
| 76 | 
            +
                    "layers.*.mlp.up_proj": "colwise",
         | 
| 77 | 
            +
                    "layers.*.mlp.down_proj": "rowwise",
         | 
| 78 | 
            +
                }
         | 
| 79 | 
            +
                base_model_pp_plan = {
         | 
| 80 | 
            +
                    "embed_tokens": (["input_ids"], ["inputs_embeds"]),
         | 
| 81 | 
            +
                    "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
         | 
| 82 | 
            +
                    "norm": (["hidden_states"], ["hidden_states"]),
         | 
| 83 | 
            +
                }
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                def __init__(
         | 
| 86 | 
            +
                    self,
         | 
| 87 | 
            +
                    vocab_size=32000,
         | 
| 88 | 
            +
                    hidden_size=768,
         | 
| 89 | 
            +
                    intermediate_size=11008,
         | 
| 90 | 
            +
                    max_position_embeddings=32768,
         | 
| 91 | 
            +
                    num_hidden_layers=2,
         | 
| 92 | 
            +
                    num_attention_heads=2,
         | 
| 93 | 
            +
                    image_token_id=101304,
         | 
| 94 | 
            +
                    video_token_id=101305,
         | 
| 95 | 
            +
                    vision_start_token_id=101306,
         | 
| 96 | 
            +
                    rms_norm_eps=1e-6,
         | 
| 97 | 
            +
                    use_cache=False,
         | 
| 98 | 
            +
                    use_flash_attention=False,
         | 
| 99 | 
            +
                    pad_token_id=0,
         | 
| 100 | 
            +
                    bos_token_id=1,
         | 
| 101 | 
            +
                    eos_token_id=2,
         | 
| 102 | 
            +
                    head_dim=128,
         | 
| 103 | 
            +
                    hidden_act="silu",
         | 
| 104 | 
            +
                    use_bias=False,
         | 
| 105 | 
            +
                    rope_theta=10000,
         | 
| 106 | 
            +
                    weight_share_add_bias=True,
         | 
| 107 | 
            +
                    ignored_index=-100,
         | 
| 108 | 
            +
                    attention_probs_dropout_prob=0.0,
         | 
| 109 | 
            +
                    hidden_dropout_prob=0.0,
         | 
| 110 | 
            +
                    compression_ratio: float = 1.0,
         | 
| 111 | 
            +
                    num_key_value_heads=None,
         | 
| 112 | 
            +
                    max_sequence_length=None,
         | 
| 113 | 
            +
                    tie_word_embeddings=False,
         | 
| 114 | 
            +
                    vision_config=None,
         | 
| 115 | 
            +
                    rope_scaling=None,
         | 
| 116 | 
            +
                    **kwargs,
         | 
| 117 | 
            +
                ):
         | 
| 118 | 
            +
                    """
         | 
| 119 | 
            +
                    Initialize configuration with default or specified parameters.
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                    Args:
         | 
| 122 | 
            +
                        vocab_size (int): Size of the vocabulary (number of unique tokens)
         | 
| 123 | 
            +
                        hidden_size (int): Dimensionality of the encoder layers and the pooler layer
         | 
| 124 | 
            +
                        intermediate_size (int): Dimensionality of the "intermediate" (feed-forward) layer
         | 
| 125 | 
            +
                        max_position_embeddings (int): Maximum sequence length the model can handle
         | 
| 126 | 
            +
                        num_hidden_layers (int): Number of hidden layers in the Transformer encoder
         | 
| 127 | 
            +
                        num_attention_heads (int): Number of attention heads for each attention layer
         | 
| 128 | 
            +
                        rms_norm_eps (float): The epsilon used by the RMS normalization layers
         | 
| 129 | 
            +
                        use_cache (bool): Whether to use caching for faster generation (decoding)
         | 
| 130 | 
            +
                        use_flash_attention (bool): Whether to use FlashAttention for optimized attention computation
         | 
| 131 | 
            +
                        pad_token_id (int): Token ID used for padding sequences
         | 
| 132 | 
            +
                        bos_token_id (int): Token ID used for beginning-of-sequence
         | 
| 133 | 
            +
                        eos_token_id (int): Token ID used for end-of-sequence
         | 
| 134 | 
            +
                        use_bias (bool): Whether to use bias terms in linear layers
         | 
| 135 | 
            +
                        rope_theta (float): The base period of the RoPE embeddings
         | 
| 136 | 
            +
                        weight_share_add_bias (bool): Whether to share bias weights in certain layers
         | 
| 137 | 
            +
                        ignored_index (int): Target value that is ignored during loss computation
         | 
| 138 | 
            +
                        attention_probs_dropout_prob (float): Dropout probability for attention weights
         | 
| 139 | 
            +
                        hidden_dropout_prob (float): Dropout probability for hidden layers
         | 
| 140 | 
            +
                        compression_ratio (float): Ratio for KV cache compression (1.0 = no compression)
         | 
| 141 | 
            +
                        num_key_value_heads (int): Number of key/value heads (for Grouped Query Attention)
         | 
| 142 | 
            +
                        max_sequence_length (int): Maximum sequence length for positional embeddings
         | 
| 143 | 
            +
                        **kwargs: Additional keyword arguments passed to parent class
         | 
| 144 | 
            +
                    """
         | 
| 145 | 
            +
             | 
| 146 | 
            +
                    # Set default for tied embeddings if not specified.
         | 
| 147 | 
            +
                    super().__init__(
         | 
| 148 | 
            +
                        pad_token_id=pad_token_id,
         | 
| 149 | 
            +
                        bos_token_id=bos_token_id,
         | 
| 150 | 
            +
                        eos_token_id=eos_token_id,
         | 
| 151 | 
            +
                        **kwargs,
         | 
| 152 | 
            +
                    )
         | 
| 153 | 
            +
                    if isinstance(vision_config, dict):
         | 
| 154 | 
            +
                        self.vision_config = self.sub_configs["vision_config"](**vision_config)
         | 
| 155 | 
            +
                    elif vision_config is None:
         | 
| 156 | 
            +
                        self.vision_config = self.sub_configs["vision_config"]()        
         | 
| 157 | 
            +
                    self.vocab_size = vocab_size
         | 
| 158 | 
            +
                    self.hidden_size = hidden_size
         | 
| 159 | 
            +
                    self.intermediate_size = intermediate_size
         | 
| 160 | 
            +
                    self.max_position_embeddings = max_position_embeddings
         | 
| 161 | 
            +
                    self.num_hidden_layers = num_hidden_layers
         | 
| 162 | 
            +
                    self.num_attention_heads = num_attention_heads
         | 
| 163 | 
            +
                    self.rms_norm_eps = rms_norm_eps
         | 
| 164 | 
            +
                    self.use_cache = use_cache
         | 
| 165 | 
            +
                    self.use_flash_attention = use_flash_attention
         | 
| 166 | 
            +
                    self.pad_token_id = pad_token_id
         | 
| 167 | 
            +
                    self.bos_token_id = bos_token_id
         | 
| 168 | 
            +
                    self.eos_token_id = eos_token_id
         | 
| 169 | 
            +
                    self.image_token_id = image_token_id
         | 
| 170 | 
            +
                    self.video_token_id = video_token_id
         | 
| 171 | 
            +
                    self.vision_start_token_id = vision_start_token_id
         | 
| 172 | 
            +
                    self.head_dim = head_dim
         | 
| 173 | 
            +
                    self.hidden_act=hidden_act
         | 
| 174 | 
            +
                    self.sliding_window = None
         | 
| 175 | 
            +
                    self.hidden_size = hidden_size
         | 
| 176 | 
            +
                    self.use_bias = use_bias
         | 
| 177 | 
            +
                    self.weight_share_add_bias = weight_share_add_bias
         | 
| 178 | 
            +
                    self.rope_theta = rope_theta
         | 
| 179 | 
            +
                    self.ignored_index = ignored_index
         | 
| 180 | 
            +
                    self.attention_probs_dropout_prob = attention_probs_dropout_prob
         | 
| 181 | 
            +
                    self.hidden_dropout_prob = hidden_dropout_prob
         | 
| 182 | 
            +
                    self.compression_ratio = compression_ratio
         | 
| 183 | 
            +
                    self.num_key_value_heads = num_key_value_heads
         | 
| 184 | 
            +
                    self.max_sequence_length = max_sequence_length
         | 
| 185 | 
            +
                    self.rope_scaling = rope_scaling
         | 
| 186 | 
            +
                    if self.rope_scaling is not None and "type" in self.rope_scaling:
         | 
| 187 | 
            +
                        if self.rope_scaling["type"] == "mrope":
         | 
| 188 | 
            +
                            self.rope_scaling["type"] = "default"
         | 
| 189 | 
            +
                        self.rope_scaling["rope_type"] = self.rope_scaling["type"]
         | 
| 190 | 
            +
                    rope_config_validation(self, ignore_keys={"mrope_section"})        
         | 
| 191 | 
            +
                    super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
         | 
    	
        generation_config.json
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "_from_model_config": true,
         | 
| 3 | 
            +
              "eos_token_id": 2,
         | 
| 4 | 
            +
              "transformers_version": "4.55.0",
         | 
| 5 | 
            +
              "use_cache": false
         | 
| 6 | 
            +
            }
         | 
    	
        image_processing.py
    ADDED
    
    | @@ -0,0 +1,569 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 4 | 
            +
            # you may not use this file except in compliance with the License.
         | 
| 5 | 
            +
            # You may obtain a copy of the License at
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Unless required by applicable law or agreed to in writing, software
         | 
| 10 | 
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 11 | 
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 12 | 
            +
            # See the License for the specific language governing permissions and
         | 
| 13 | 
            +
            # limitations under the License.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            """Image processor class for PaddleOCR-VL."""
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            import math
         | 
| 18 | 
            +
            from typing import Dict, List, Optional, Union
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            import numpy as np
         | 
| 21 | 
            +
            import torch
         | 
| 22 | 
            +
            from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
         | 
| 23 | 
            +
            from torchvision.transforms import functional as TF
         | 
| 24 | 
            +
            from transformers.image_transforms import (
         | 
| 25 | 
            +
                convert_to_rgb,
         | 
| 26 | 
            +
                resize,
         | 
| 27 | 
            +
                to_channel_dimension_format,
         | 
| 28 | 
            +
            )
         | 
| 29 | 
            +
            from transformers.image_utils import (
         | 
| 30 | 
            +
                OPENAI_CLIP_MEAN,
         | 
| 31 | 
            +
                OPENAI_CLIP_STD,
         | 
| 32 | 
            +
                ChannelDimension,
         | 
| 33 | 
            +
                PILImageResampling,
         | 
| 34 | 
            +
                get_image_size,
         | 
| 35 | 
            +
                infer_channel_dimension_format,
         | 
| 36 | 
            +
                is_scaled_image,
         | 
| 37 | 
            +
                is_valid_image,
         | 
| 38 | 
            +
                make_list_of_images,
         | 
| 39 | 
            +
                to_numpy_array,
         | 
| 40 | 
            +
                valid_images,
         | 
| 41 | 
            +
                validate_preprocess_arguments,
         | 
| 42 | 
            +
            )
         | 
| 43 | 
            +
            from transformers.utils import TensorType, is_vision_available, logging
         | 
| 44 | 
            +
             | 
| 45 | 
            +
             | 
| 46 | 
            +
            logger = logging.get_logger(__name__)
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            if is_vision_available():
         | 
| 50 | 
            +
                from PIL import Image
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            ImageInput = Union[
         | 
| 53 | 
            +
                "PIL.Image.Image",
         | 
| 54 | 
            +
                np.ndarray,
         | 
| 55 | 
            +
                "torch.Tensor",
         | 
| 56 | 
            +
                List["PIL.Image.Image"],
         | 
| 57 | 
            +
                List[np.ndarray],
         | 
| 58 | 
            +
                List["torch.Tensor"],
         | 
| 59 | 
            +
            ]  # noqa
         | 
| 60 | 
            +
             | 
| 61 | 
            +
             | 
| 62 | 
            +
            VideoInput = Union[
         | 
| 63 | 
            +
                List["PIL.Image.Image"],
         | 
| 64 | 
            +
                "np.ndarray",
         | 
| 65 | 
            +
                "torch.Tensor",
         | 
| 66 | 
            +
                List["np.ndarray"],
         | 
| 67 | 
            +
                List["torch.Tensor"],
         | 
| 68 | 
            +
                List[List["PIL.Image.Image"]],
         | 
| 69 | 
            +
                List[List["np.ndarrray"]],
         | 
| 70 | 
            +
                List[List["torch.Tensor"]],
         | 
| 71 | 
            +
            ]  # noqa
         | 
| 72 | 
            +
             | 
| 73 | 
            +
             | 
| 74 | 
            +
            def make_batched_images(images) -> List[List[ImageInput]]:
         | 
| 75 | 
            +
                """
         | 
| 76 | 
            +
                Accepts images in list or nested list format, and makes a list of images for preprocessing.
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                Args:
         | 
| 79 | 
            +
                    images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`):
         | 
| 80 | 
            +
                        The input image.
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                Returns:
         | 
| 83 | 
            +
                    list: A list of images.
         | 
| 84 | 
            +
                """
         | 
| 85 | 
            +
                if (
         | 
| 86 | 
            +
                    isinstance(images, (list, tuple))
         | 
| 87 | 
            +
                    and isinstance(images[0], (list, tuple))
         | 
| 88 | 
            +
                    and is_valid_image(images[0][0])
         | 
| 89 | 
            +
                ):
         | 
| 90 | 
            +
                    return [img for img_list in images for img in img_list]
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                elif isinstance(images, (list, tuple)) and is_valid_image(images[0]):
         | 
| 93 | 
            +
                    return images
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                elif is_valid_image(images):
         | 
| 96 | 
            +
                    return [images]
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                raise ValueError(f"Could not make batched images from {images}")
         | 
| 99 | 
            +
             | 
| 100 | 
            +
             | 
| 101 | 
            +
            def adjust_size(size, patch_size):
         | 
| 102 | 
            +
                num_patches = size // patch_size
         | 
| 103 | 
            +
                if num_patches % 2 != 0:  # 如果是奇数,减1
         | 
| 104 | 
            +
                    num_patches -= 1
         | 
| 105 | 
            +
                return num_patches * patch_size
         | 
| 106 | 
            +
             | 
| 107 | 
            +
             | 
| 108 | 
            +
            def make_batched_videos(videos) -> List[VideoInput]:
         | 
| 109 | 
            +
                if (
         | 
| 110 | 
            +
                    isinstance(videos, (list, tuple))
         | 
| 111 | 
            +
                    and isinstance(videos[0], (list, tuple))
         | 
| 112 | 
            +
                    and is_valid_image(videos[0][0])
         | 
| 113 | 
            +
                ):
         | 
| 114 | 
            +
                    return videos
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                elif isinstance(videos, (list, tuple)) and is_valid_image(videos[0]):
         | 
| 117 | 
            +
                    if isinstance(videos[0], Image.Image):
         | 
| 118 | 
            +
                        return [videos]
         | 
| 119 | 
            +
                    elif len(videos[0].shape) == 4:
         | 
| 120 | 
            +
                        return [list(video) for video in videos]
         | 
| 121 | 
            +
             | 
| 122 | 
            +
                elif is_valid_image(videos) and len(videos.shape) == 4:
         | 
| 123 | 
            +
                    return [list(videos)]
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                raise ValueError(f"Could not make batched video from {videos}")
         | 
| 126 | 
            +
             | 
| 127 | 
            +
             | 
| 128 | 
            +
            def smart_resize(
         | 
| 129 | 
            +
                height: int,
         | 
| 130 | 
            +
                width: int,
         | 
| 131 | 
            +
                factor: int = 28,
         | 
| 132 | 
            +
                min_pixels: int = 28 * 28 * 130,
         | 
| 133 | 
            +
                max_pixels: int = 28 * 28 * 1280,
         | 
| 134 | 
            +
            ):
         | 
| 135 | 
            +
                """Rescales the image so that the following conditions are met:
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                1. Both dimensions (height and width) are divisible by 'factor'.
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                2. The total number of pixels is within the range ['min_pixels', 'max_pixels'].
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                3. The aspect ratio of the image is maintained as closely as possible.
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                """
         | 
| 144 | 
            +
                # if height < factor or width < factor:
         | 
| 145 | 
            +
                #    raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor}")
         | 
| 146 | 
            +
                # if int(height < factor//4) + int(width < factor//4):
         | 
| 147 | 
            +
                #     raise ValueError(f"height:{height} or width:{width} must be larger than factor:{factor//4}")
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                if height < factor:
         | 
| 150 | 
            +
                    print(f"smart_resize: height={height} < factor={factor}, reset height=factor")
         | 
| 151 | 
            +
                    width = round((width * factor) / height)
         | 
| 152 | 
            +
                    height = factor
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                if width < factor:
         | 
| 155 | 
            +
                    print(f"smart_resize: width={width} < factor={factor}, reset width=factor")
         | 
| 156 | 
            +
                    height = round((height * factor) / width)
         | 
| 157 | 
            +
                    width = factor
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                if max(height, width) / min(height, width) > 200:
         | 
| 160 | 
            +
                    raise ValueError(
         | 
| 161 | 
            +
                        f"absolute aspect ratio must be smaller than 200, got {max(height, width) / min(height, width)}"
         | 
| 162 | 
            +
                    )
         | 
| 163 | 
            +
                h_bar = round(height / factor) * factor
         | 
| 164 | 
            +
                w_bar = round(width / factor) * factor
         | 
| 165 | 
            +
                if h_bar * w_bar > max_pixels:
         | 
| 166 | 
            +
                    beta = math.sqrt((height * width) / max_pixels)
         | 
| 167 | 
            +
                    h_bar = math.floor(height / beta / factor) * factor
         | 
| 168 | 
            +
                    w_bar = math.floor(width / beta / factor) * factor
         | 
| 169 | 
            +
                elif h_bar * w_bar < min_pixels:
         | 
| 170 | 
            +
                    beta = math.sqrt(min_pixels / (height * width))
         | 
| 171 | 
            +
                    h_bar = math.ceil(height * beta / factor) * factor
         | 
| 172 | 
            +
                    w_bar = math.ceil(width * beta / factor) * factor
         | 
| 173 | 
            +
                return h_bar, w_bar
         | 
| 174 | 
            +
             | 
| 175 | 
            +
             | 
| 176 | 
            +
            class SiglipImageProcessor(BaseImageProcessor):
         | 
| 177 | 
            +
                r"""
         | 
| 178 | 
            +
                Constructs a Siglip image processor that dynamically resizes images based on the original images.
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                Args:
         | 
| 181 | 
            +
                    do_resize (`bool`, *optional*, defaults to `True`):
         | 
| 182 | 
            +
                        Whether to resize the image's (height, width) dimensions.
         | 
| 183 | 
            +
                    resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
         | 
| 184 | 
            +
                        Resampling filter to use when resizing the image.
         | 
| 185 | 
            +
                    do_rescale (`bool`, *optional*, defaults to `True`):
         | 
| 186 | 
            +
                        Whether to rescale the image by the specified scale `rescale_factor`.
         | 
| 187 | 
            +
                    rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
         | 
| 188 | 
            +
                        Scale factor to use if rescaling the image.
         | 
| 189 | 
            +
                    do_normalize (`bool`, *optional*, defaults to `True`):
         | 
| 190 | 
            +
                        Whether to normalize the image.
         | 
| 191 | 
            +
                    image_mean (`float` or `List[float]`, *optional*, defaults to `[0.48145466, 0.4578275, 0.40821073]`):
         | 
| 192 | 
            +
                        Mean to use if normalizing the image. This is a float or list of floats for each channel in the image.
         | 
| 193 | 
            +
                    image_std (`float` or `List[float]`, *optional*, defaults to `[0.26862954, 0.26130258, 0.27577711]`):
         | 
| 194 | 
            +
                        Standard deviation to use if normalizing the image. This is a float or list of floats for each channel in the image.
         | 
| 195 | 
            +
                    do_convert_rgb (`bool`, *optional*, defaults to `True`):
         | 
| 196 | 
            +
                        Whether to convert the image to RGB.
         | 
| 197 | 
            +
                    min_pixels (`int`, *optional*, defaults to `28 * 28 * 130`):
         | 
| 198 | 
            +
                        The min pixels of the image to resize the image.
         | 
| 199 | 
            +
                    max_pixels (`int`, *optional*, defaults to `28 * 28 * 1670`):
         | 
| 200 | 
            +
                        The max pixels of the image to resize the image.
         | 
| 201 | 
            +
                    patch_size (`int`, *optional*, defaults to 14):
         | 
| 202 | 
            +
                        The spacial patch size of the vision encoder.
         | 
| 203 | 
            +
                    temporal_patch_size (`int`, *optional*, defaults to 2):
         | 
| 204 | 
            +
                        The temporal patch size of the vision encoder.
         | 
| 205 | 
            +
                    merge_size (`int`, *optional*, defaults to 2):
         | 
| 206 | 
            +
                        The merge size of the vision encoder to llm encoder.
         | 
| 207 | 
            +
                """
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                model_input_names = [
         | 
| 210 | 
            +
                    "pixel_values",
         | 
| 211 | 
            +
                    "image_grid_thw",
         | 
| 212 | 
            +
                    "pixel_values_videos",
         | 
| 213 | 
            +
                    "video_grid_thw",
         | 
| 214 | 
            +
                ]
         | 
| 215 | 
            +
             | 
| 216 | 
            +
                def __init__(
         | 
| 217 | 
            +
                    self,
         | 
| 218 | 
            +
                    do_resize: bool = True,
         | 
| 219 | 
            +
                    resample: PILImageResampling = PILImageResampling.BICUBIC,
         | 
| 220 | 
            +
                    do_rescale: bool = True,
         | 
| 221 | 
            +
                    rescale_factor: Union[int, float] = 1 / 255,
         | 
| 222 | 
            +
                    do_normalize: bool = True,
         | 
| 223 | 
            +
                    image_mean: Optional[Union[float, List[float]]] = None,
         | 
| 224 | 
            +
                    image_std: Optional[Union[float, List[float]]] = None,
         | 
| 225 | 
            +
                    do_convert_rgb: bool = True,
         | 
| 226 | 
            +
                    min_pixels: int = 28 * 28 * 130,
         | 
| 227 | 
            +
                    max_pixels: int = 28 * 28 * 1280,
         | 
| 228 | 
            +
                    patch_size: int = 14,
         | 
| 229 | 
            +
                    temporal_patch_size: int = 1,
         | 
| 230 | 
            +
                    merge_size: int = 2,
         | 
| 231 | 
            +
                    **kwargs,
         | 
| 232 | 
            +
                ) -> None:
         | 
| 233 | 
            +
                    super().__init__(**kwargs)
         | 
| 234 | 
            +
                    self.do_resize = do_resize
         | 
| 235 | 
            +
                    self.resample = resample
         | 
| 236 | 
            +
                    self.do_rescale = do_rescale
         | 
| 237 | 
            +
                    self.rescale_factor = rescale_factor
         | 
| 238 | 
            +
                    self.do_normalize = do_normalize
         | 
| 239 | 
            +
                    self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
         | 
| 240 | 
            +
                    self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
         | 
| 241 | 
            +
                    self.min_pixels = min_pixels
         | 
| 242 | 
            +
                    self.max_pixels = max_pixels
         | 
| 243 | 
            +
                    self.patch_size = patch_size
         | 
| 244 | 
            +
                    self.temporal_patch_size = temporal_patch_size
         | 
| 245 | 
            +
                    self.merge_size = merge_size
         | 
| 246 | 
            +
                    self.size = {"min_pixels": min_pixels, "max_pixels": max_pixels}  # not used
         | 
| 247 | 
            +
                    self.do_convert_rgb = do_convert_rgb
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                def mvit_rescale(self, image: Image.Image, merge_size: int = 2) -> Image.Image:
         | 
| 250 | 
            +
                    try:
         | 
| 251 | 
            +
                        w, h = image.size
         | 
| 252 | 
            +
                    except:
         | 
| 253 | 
            +
                        raise ValueError(str((type(image), image)))
         | 
| 254 | 
            +
                    patch_size = self.patch_size
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                    if (w // patch_size) * (h // patch_size) > self.in_token_limit:
         | 
| 257 | 
            +
                        scale = math.sqrt(
         | 
| 258 | 
            +
                            self.in_token_limit / ((w // patch_size) * (h // patch_size))
         | 
| 259 | 
            +
                        )
         | 
| 260 | 
            +
                        new_w, new_h = int(w * scale), int(h * scale)
         | 
| 261 | 
            +
             | 
| 262 | 
            +
                        image = image.resize((new_w, new_h), Image.Resampling.BICUBIC)
         | 
| 263 | 
            +
                    if self.pad_input:
         | 
| 264 | 
            +
                        new_w, new_h = image.size
         | 
| 265 | 
            +
                        pad_size_h = merge_size * patch_size
         | 
| 266 | 
            +
                        pad_size_w = merge_size * patch_size
         | 
| 267 | 
            +
             | 
| 268 | 
            +
                        pad_h = (pad_size_h - new_h % pad_size_h) % pad_size_h
         | 
| 269 | 
            +
                        pad_w = (pad_size_w - new_w % pad_size_w) % pad_size_w
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                        image = TF.pad(image, (0, 0, pad_w, pad_h))
         | 
| 272 | 
            +
                    else:
         | 
| 273 | 
            +
                        new_w, new_h = image.size
         | 
| 274 | 
            +
                        new_w = new_w - new_w % patch_size
         | 
| 275 | 
            +
                        new_h = new_h - new_h % patch_size
         | 
| 276 | 
            +
             | 
| 277 | 
            +
                        new_w = adjust_size(new_w, patch_size)
         | 
| 278 | 
            +
                        new_h = adjust_size(new_h, patch_size)
         | 
| 279 | 
            +
             | 
| 280 | 
            +
                        image = TF.center_crop(image, (new_h, new_w))
         | 
| 281 | 
            +
             | 
| 282 | 
            +
                    w, h = image.size
         | 
| 283 | 
            +
                    if w // patch_size >= 512 or h // patch_size >= 512:
         | 
| 284 | 
            +
                        new_h = min(patch_size * 510, h)
         | 
| 285 | 
            +
                        new_w = min(patch_size * 510, w)
         | 
| 286 | 
            +
                        image = TF.center_crop(image, (new_h, new_w))
         | 
| 287 | 
            +
                        # raise ValueError("Exceed pos emb")
         | 
| 288 | 
            +
                    return image
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                def _preprocess(
         | 
| 291 | 
            +
                    self,
         | 
| 292 | 
            +
                    images: Union[ImageInput, VideoInput],
         | 
| 293 | 
            +
                    do_resize: bool = None,
         | 
| 294 | 
            +
                    resample: PILImageResampling = None,
         | 
| 295 | 
            +
                    do_rescale: bool = None,
         | 
| 296 | 
            +
                    rescale_factor: float = None,
         | 
| 297 | 
            +
                    do_normalize: bool = None,
         | 
| 298 | 
            +
                    image_mean: Optional[Union[float, List[float]]] = None,
         | 
| 299 | 
            +
                    image_std: Optional[Union[float, List[float]]] = None,
         | 
| 300 | 
            +
                    do_convert_rgb: bool = None,
         | 
| 301 | 
            +
                    data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
         | 
| 302 | 
            +
                    input_data_format: Optional[Union[str, ChannelDimension]] = None,
         | 
| 303 | 
            +
                ):
         | 
| 304 | 
            +
                    """
         | 
| 305 | 
            +
                    Preprocess an image or batch of images. Copy of the `preprocess` method from `CLIPImageProcessor`.
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                    Args:
         | 
| 308 | 
            +
                        images (`ImageInput`):
         | 
| 309 | 
            +
                            Image or batch of images to preprocess. Expects pixel values ranging from 0 to 255. If pixel values range from 0 to 1, set `do_rescale=False`.
         | 
| 310 | 
            +
                        vision_info (`List[Dict]`, *optional*):
         | 
| 311 | 
            +
                            Optional list of dictionaries containing additional information about vision inputs.
         | 
| 312 | 
            +
                        do_resize (`bool`, *optional*, defaults to `self.do_resize`):
         | 
| 313 | 
            +
                            Whether to resize the image.
         | 
| 314 | 
            +
                        resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
         | 
| 315 | 
            +
                            Resampling filter to use if resizing the image. This can be one of the `PILImageResampling` enums.
         | 
| 316 | 
            +
                        do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
         | 
| 317 | 
            +
                            Whether to rescale the image.
         | 
| 318 | 
            +
                        rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
         | 
| 319 | 
            +
                            Scale factor to use if rescaling the image.
         | 
| 320 | 
            +
                        do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
         | 
| 321 | 
            +
                            Whether to normalize the image.
         | 
| 322 | 
            +
                        image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
         | 
| 323 | 
            +
                            Mean to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image.
         | 
| 324 | 
            +
                        image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
         | 
| 325 | 
            +
                            Standard deviation to use if normalizing the image. Can be a float or a list of floats corresponding to the number of channels in the image.
         | 
| 326 | 
            +
                        do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
         | 
| 327 | 
            +
                            Whether to convert the image to RGB.
         | 
| 328 | 
            +
                        data_format (`ChannelDimension`, *optional*, defaults to `ChannelDimension.FIRST`):
         | 
| 329 | 
            +
                            The channel dimension format for the output image. Can be one of:
         | 
| 330 | 
            +
                            - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
         | 
| 331 | 
            +
                            - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
         | 
| 332 | 
            +
                            - Unset: Use the channel dimension format of the input image.
         | 
| 333 | 
            +
                        input_data_format (`ChannelDimension` or `str`, *optional*):
         | 
| 334 | 
            +
                            The channel dimension format for the input image. Can be one of:
         | 
| 335 | 
            +
                            - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
         | 
| 336 | 
            +
                            - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
         | 
| 337 | 
            +
                            - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.   - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
         | 
| 338 | 
            +
                    """
         | 
| 339 | 
            +
                    images = make_list_of_images(images)
         | 
| 340 | 
            +
             | 
| 341 | 
            +
                    if do_convert_rgb:
         | 
| 342 | 
            +
                        images = [convert_to_rgb(image) for image in images]
         | 
| 343 | 
            +
             | 
| 344 | 
            +
                    # All transformations expect numpy arrays.
         | 
| 345 | 
            +
                    images = [to_numpy_array(image) for image in images]
         | 
| 346 | 
            +
             | 
| 347 | 
            +
                    if is_scaled_image(images[0]) and do_rescale:
         | 
| 348 | 
            +
                        logger.warning_once(
         | 
| 349 | 
            +
                            "It looks like you are trying to rescale already rescaled images. If the input"
         | 
| 350 | 
            +
                            " images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again."
         | 
| 351 | 
            +
                        )
         | 
| 352 | 
            +
                    if input_data_format is None:
         | 
| 353 | 
            +
                        # We assume that all images have the same channel dimension format.
         | 
| 354 | 
            +
                        input_data_format = infer_channel_dimension_format(images[0])
         | 
| 355 | 
            +
             | 
| 356 | 
            +
                    height, width = get_image_size(images[0], channel_dim=input_data_format)
         | 
| 357 | 
            +
                    resized_height, resized_width = height, width
         | 
| 358 | 
            +
                    processed_images = []
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                    for image in images:
         | 
| 361 | 
            +
                        if do_resize:
         | 
| 362 | 
            +
                            resized_height, resized_width = smart_resize(
         | 
| 363 | 
            +
                                height,
         | 
| 364 | 
            +
                                width,
         | 
| 365 | 
            +
                                factor=self.patch_size * self.merge_size,
         | 
| 366 | 
            +
                                min_pixels=self.min_pixels,
         | 
| 367 | 
            +
                                max_pixels=self.max_pixels,
         | 
| 368 | 
            +
                            )
         | 
| 369 | 
            +
                            image = resize(
         | 
| 370 | 
            +
                                image,
         | 
| 371 | 
            +
                                size=(resized_height, resized_width),
         | 
| 372 | 
            +
                                resample=resample,
         | 
| 373 | 
            +
                                input_data_format=input_data_format,
         | 
| 374 | 
            +
                            )
         | 
| 375 | 
            +
             | 
| 376 | 
            +
                        if do_rescale:
         | 
| 377 | 
            +
                            image = self.rescale(
         | 
| 378 | 
            +
                                image, scale=rescale_factor, input_data_format=input_data_format
         | 
| 379 | 
            +
                            )
         | 
| 380 | 
            +
             | 
| 381 | 
            +
                        if do_normalize:
         | 
| 382 | 
            +
                            image = self.normalize(
         | 
| 383 | 
            +
                                image=image,
         | 
| 384 | 
            +
                                mean=image_mean,
         | 
| 385 | 
            +
                                std=image_std,
         | 
| 386 | 
            +
                                input_data_format=input_data_format,
         | 
| 387 | 
            +
                            )
         | 
| 388 | 
            +
                        image = to_channel_dimension_format(
         | 
| 389 | 
            +
                            image, data_format, input_channel_dim=input_data_format
         | 
| 390 | 
            +
                        )
         | 
| 391 | 
            +
                        processed_images.append(image)
         | 
| 392 | 
            +
             | 
| 393 | 
            +
                    patches = np.array(processed_images)
         | 
| 394 | 
            +
                    if data_format == ChannelDimension.LAST:
         | 
| 395 | 
            +
                        patches = patches.transpose(0, 3, 1, 2)
         | 
| 396 | 
            +
                    if patches.shape[0] == 1:
         | 
| 397 | 
            +
                        patches = np.tile(patches, (self.temporal_patch_size, 1, 1, 1))
         | 
| 398 | 
            +
                    init_patches = patches
         | 
| 399 | 
            +
                    channel = patches.shape[1]
         | 
| 400 | 
            +
                    grid_t = patches.shape[0] // self.temporal_patch_size
         | 
| 401 | 
            +
                    grid_h, grid_w = (
         | 
| 402 | 
            +
                        resized_height // self.patch_size,
         | 
| 403 | 
            +
                        resized_width // self.patch_size,
         | 
| 404 | 
            +
                    )
         | 
| 405 | 
            +
                    patches = patches.reshape(
         | 
| 406 | 
            +
                        grid_t,
         | 
| 407 | 
            +
                        self.temporal_patch_size,
         | 
| 408 | 
            +
                        channel,
         | 
| 409 | 
            +
                        grid_h,
         | 
| 410 | 
            +
                        self.patch_size,
         | 
| 411 | 
            +
                        grid_w,
         | 
| 412 | 
            +
                        self.patch_size,
         | 
| 413 | 
            +
                    )
         | 
| 414 | 
            +
                    patches = patches.transpose(0, 3, 5, 2, 1, 4, 6)
         | 
| 415 | 
            +
                    assert self.temporal_patch_size == 1
         | 
| 416 | 
            +
                    flatten_patches = patches.reshape(
         | 
| 417 | 
            +
                        grid_t * grid_h * grid_w, channel, self.patch_size, self.patch_size
         | 
| 418 | 
            +
                    )
         | 
| 419 | 
            +
                    return flatten_patches, (grid_t, grid_h, grid_w)
         | 
| 420 | 
            +
             | 
| 421 | 
            +
                def preprocess(
         | 
| 422 | 
            +
                    self,
         | 
| 423 | 
            +
                    images: ImageInput,
         | 
| 424 | 
            +
                    videos: VideoInput = None,
         | 
| 425 | 
            +
                    do_resize: bool = None,
         | 
| 426 | 
            +
                    size: Dict[str, int] = None,
         | 
| 427 | 
            +
                    resample: PILImageResampling = None,
         | 
| 428 | 
            +
                    do_rescale: bool = None,
         | 
| 429 | 
            +
                    rescale_factor: float = None,
         | 
| 430 | 
            +
                    do_normalize: bool = None,
         | 
| 431 | 
            +
                    image_mean: Optional[Union[float, List[float]]] = None,
         | 
| 432 | 
            +
                    image_std: Optional[Union[float, List[float]]] = None,
         | 
| 433 | 
            +
                    do_convert_rgb: bool = None,
         | 
| 434 | 
            +
                    return_tensors: Optional[Union[str, TensorType]] = None,
         | 
| 435 | 
            +
                    data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
         | 
| 436 | 
            +
                    input_data_format: Optional[Union[str, ChannelDimension]] = None,
         | 
| 437 | 
            +
                ):
         | 
| 438 | 
            +
                    """
         | 
| 439 | 
            +
                    Args:
         | 
| 440 | 
            +
                        images (`ImageInput`):
         | 
| 441 | 
            +
                            Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
         | 
| 442 | 
            +
                            passing in images with pixel values between 0 and 1, set `do_rescale=False`.
         | 
| 443 | 
            +
                        videos (`VideoInput`):
         | 
| 444 | 
            +
                            Video to preprocess. Expects a single or batch of videos with pixel values ranging from 0 to 255. If
         | 
| 445 | 
            +
                            passing in videos with pixel values between 0 and 1, set `do_rescale=False`.
         | 
| 446 | 
            +
                        do_resize (`bool`, *optional*, defaults to `self.do_resize`):
         | 
| 447 | 
            +
                            Whether to resize the image.
         | 
| 448 | 
            +
                        size (`Dict[str, int]`, *optional*, defaults to `self.size`):
         | 
| 449 | 
            +
                            Size of the image after resizing. Shortest edge of the image is resized to size["shortest_edge"], with
         | 
| 450 | 
            +
                            the longest edge resized to keep the input aspect ratio.
         | 
| 451 | 
            +
                        resample (`int`, *optional*, defaults to `self.resample`):
         | 
| 452 | 
            +
                            Resampling filter to use if resizing the image. This can be one of the enum `PILImageResampling`. Only
         | 
| 453 | 
            +
                            has an effect if `do_resize` is set to `True`.
         | 
| 454 | 
            +
                        do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
         | 
| 455 | 
            +
                            Whether to rescale the image.
         | 
| 456 | 
            +
                        rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
         | 
| 457 | 
            +
                            Rescale factor to rescale the image by if `do_rescale` is set to `True`.
         | 
| 458 | 
            +
                        do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
         | 
| 459 | 
            +
                            Whether to normalize the image.
         | 
| 460 | 
            +
                        image_mean (`float` or `List[float]`, *optional*, defaults to `self.image_mean`):
         | 
| 461 | 
            +
                            Image mean to use for normalization. Only has an effect if `do_normalize` is set to `True`.
         | 
| 462 | 
            +
                        image_std (`float` or `List[float]`, *optional*, defaults to `self.image_std`):
         | 
| 463 | 
            +
                            Image standard deviation to use for normalization. Only has an effect if `do_normalize` is set to
         | 
| 464 | 
            +
                            `True`.
         | 
| 465 | 
            +
                        do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
         | 
| 466 | 
            +
                            Whether to convert the image to RGB.
         | 
| 467 | 
            +
                        return_tensors (`str` or `TensorType`, *optional*):
         | 
| 468 | 
            +
                            The type of tensors to return. Can be one of:
         | 
| 469 | 
            +
                            - Unset: Return a list of `np.ndarray`.
         | 
| 470 | 
            +
                            - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
         | 
| 471 | 
            +
                            - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
         | 
| 472 | 
            +
                            - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
         | 
| 473 | 
            +
                            - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
         | 
| 474 | 
            +
                        data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
         | 
| 475 | 
            +
                            The channel dimension format for the output image. Can be one of:
         | 
| 476 | 
            +
                            - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
         | 
| 477 | 
            +
                            - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
         | 
| 478 | 
            +
                            - Unset: Use the channel dimension format of the input image.
         | 
| 479 | 
            +
                        input_data_format (`ChannelDimension` or `str`, *optional*):
         | 
| 480 | 
            +
                            The channel dimension format for the input image. If unset, the channel dimension format is inferred
         | 
| 481 | 
            +
                            from the input image. Can be one of:
         | 
| 482 | 
            +
                            - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
         | 
| 483 | 
            +
                            - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
         | 
| 484 | 
            +
                            - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
         | 
| 485 | 
            +
             | 
| 486 | 
            +
                    """
         | 
| 487 | 
            +
                    do_resize = do_resize if do_resize is not None else self.do_resize
         | 
| 488 | 
            +
                    size = size if size is not None else self.size
         | 
| 489 | 
            +
                    resample = resample if resample is not None else self.resample
         | 
| 490 | 
            +
                    do_rescale = do_rescale if do_rescale is not None else self.do_rescale
         | 
| 491 | 
            +
                    rescale_factor = (
         | 
| 492 | 
            +
                        rescale_factor if rescale_factor is not None else self.rescale_factor
         | 
| 493 | 
            +
                    )
         | 
| 494 | 
            +
                    do_normalize = do_normalize if do_normalize is not None else self.do_normalize
         | 
| 495 | 
            +
                    image_mean = image_mean if image_mean is not None else self.image_mean
         | 
| 496 | 
            +
                    image_std = image_std if image_std is not None else self.image_std
         | 
| 497 | 
            +
                    do_convert_rgb = (
         | 
| 498 | 
            +
                        do_convert_rgb if do_convert_rgb is not None else self.do_convert_rgb
         | 
| 499 | 
            +
                    )
         | 
| 500 | 
            +
             | 
| 501 | 
            +
                    if images is not None:
         | 
| 502 | 
            +
                        images = make_batched_images(images)
         | 
| 503 | 
            +
                    if videos is not None:
         | 
| 504 | 
            +
                        videos = make_batched_videos(videos)
         | 
| 505 | 
            +
             | 
| 506 | 
            +
                    if images is not None and not valid_images(images):
         | 
| 507 | 
            +
                        raise ValueError(
         | 
| 508 | 
            +
                            "Invalid image type. Must be of type PIL.Image.Image, numpy.ndarray, "
         | 
| 509 | 
            +
                            "torch.Tensor, tf.Tensor or jax.ndarray."
         | 
| 510 | 
            +
                        )
         | 
| 511 | 
            +
             | 
| 512 | 
            +
                    validate_preprocess_arguments(
         | 
| 513 | 
            +
                        rescale_factor=rescale_factor,
         | 
| 514 | 
            +
                        do_normalize=do_normalize,
         | 
| 515 | 
            +
                        image_mean=image_mean,
         | 
| 516 | 
            +
                        image_std=image_std,
         | 
| 517 | 
            +
                        do_resize=do_resize,
         | 
| 518 | 
            +
                        size=size,
         | 
| 519 | 
            +
                        resample=resample,
         | 
| 520 | 
            +
                    )
         | 
| 521 | 
            +
             | 
| 522 | 
            +
                    if images is not None:
         | 
| 523 | 
            +
                        pixel_values, vision_grid_thws = [], []
         | 
| 524 | 
            +
                        for image in images:
         | 
| 525 | 
            +
                            patches, image_grid_thw = self._preprocess(
         | 
| 526 | 
            +
                                image,
         | 
| 527 | 
            +
                                do_resize=do_resize,
         | 
| 528 | 
            +
                                resample=resample,
         | 
| 529 | 
            +
                                do_rescale=do_rescale,
         | 
| 530 | 
            +
                                rescale_factor=rescale_factor,
         | 
| 531 | 
            +
                                do_normalize=do_normalize,
         | 
| 532 | 
            +
                                image_mean=image_mean,
         | 
| 533 | 
            +
                                image_std=image_std,
         | 
| 534 | 
            +
                                data_format=data_format,
         | 
| 535 | 
            +
                                do_convert_rgb=do_convert_rgb,
         | 
| 536 | 
            +
                                input_data_format=input_data_format,
         | 
| 537 | 
            +
                            )
         | 
| 538 | 
            +
                            pixel_values.extend(patches)
         | 
| 539 | 
            +
                            vision_grid_thws.append(image_grid_thw)
         | 
| 540 | 
            +
                        pixel_values = np.array(pixel_values)
         | 
| 541 | 
            +
                        vision_grid_thws = np.array(vision_grid_thws)
         | 
| 542 | 
            +
                        data = {"pixel_values": pixel_values, "image_grid_thw": vision_grid_thws}
         | 
| 543 | 
            +
             | 
| 544 | 
            +
                    if videos is not None:
         | 
| 545 | 
            +
                        pixel_values, vision_grid_thws = [], []
         | 
| 546 | 
            +
                        for images in videos:
         | 
| 547 | 
            +
                            patches, video_grid_thw = self._preprocess(
         | 
| 548 | 
            +
                                images,
         | 
| 549 | 
            +
                                do_resize=do_resize,
         | 
| 550 | 
            +
                                resample=resample,
         | 
| 551 | 
            +
                                do_rescale=do_rescale,
         | 
| 552 | 
            +
                                rescale_factor=rescale_factor,
         | 
| 553 | 
            +
                                do_normalize=do_normalize,
         | 
| 554 | 
            +
                                image_mean=image_mean,
         | 
| 555 | 
            +
                                image_std=image_std,
         | 
| 556 | 
            +
                                data_format=data_format,
         | 
| 557 | 
            +
                                do_convert_rgb=do_convert_rgb,
         | 
| 558 | 
            +
                                input_data_format=input_data_format,
         | 
| 559 | 
            +
                            )
         | 
| 560 | 
            +
                            pixel_values.extend(patches)
         | 
| 561 | 
            +
                            vision_grid_thws.append(video_grid_thw)
         | 
| 562 | 
            +
                        pixel_values = np.array(pixel_values)
         | 
| 563 | 
            +
                        vision_grid_thws = np.array(vision_grid_thws)
         | 
| 564 | 
            +
                        data = {
         | 
| 565 | 
            +
                            "pixel_values_videos": pixel_values,
         | 
| 566 | 
            +
                            "video_grid_thw": vision_grid_thws,
         | 
| 567 | 
            +
                        }
         | 
| 568 | 
            +
             | 
| 569 | 
            +
                    return BatchFeature(data=data, tensor_type=return_tensors)
         | 
    	
        inference.yml
    ADDED
    
    | @@ -0,0 +1,2 @@ | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Global:
         | 
| 2 | 
            +
              model_name: PaddleOCR-VL-0.9B
         | 
    	
        model.safetensors
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:3085f1042e184f68f8a412aa0f64f2c4b8562989598bbfba326aaa11fc685de8
         | 
| 3 | 
            +
            size 1917255968
         | 
    	
        modeling_paddleocr_vl.py
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        preprocessor_config.json
    ADDED
    
    | @@ -0,0 +1,33 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "auto_map": {
         | 
| 3 | 
            +
                "AutoImageProcessor": "image_processing.SiglipImageProcessor",
         | 
| 4 | 
            +
                "AutoProcessor": "processing_paddleocr_vl.PaddleOCRVLProcessor"
         | 
| 5 | 
            +
              },
         | 
| 6 | 
            +
              "do_convert_rgb": true,
         | 
| 7 | 
            +
              "do_normalize": true,
         | 
| 8 | 
            +
              "do_rescale": true,
         | 
| 9 | 
            +
              "do_resize": true,
         | 
| 10 | 
            +
              "image_mean": [
         | 
| 11 | 
            +
                0.5,
         | 
| 12 | 
            +
                0.5,
         | 
| 13 | 
            +
                0.5
         | 
| 14 | 
            +
              ],
         | 
| 15 | 
            +
              "image_processor_type": "SiglipImageProcessor",
         | 
| 16 | 
            +
              "image_std": [
         | 
| 17 | 
            +
                0.5,
         | 
| 18 | 
            +
                0.5,
         | 
| 19 | 
            +
                0.5
         | 
| 20 | 
            +
              ],
         | 
| 21 | 
            +
              "max_pixels": 2822400,
         | 
| 22 | 
            +
              "merge_size": 2,
         | 
| 23 | 
            +
              "min_pixels": 147384,
         | 
| 24 | 
            +
              "patch_size": 14,
         | 
| 25 | 
            +
              "processor_class": "PaddleOCRVLProcessor",
         | 
| 26 | 
            +
              "resample": 3,
         | 
| 27 | 
            +
              "rescale_factor": 0.00392156862745098,
         | 
| 28 | 
            +
              "size": {
         | 
| 29 | 
            +
                "max_pixels": 2822400,
         | 
| 30 | 
            +
                "min_pixels": 147384
         | 
| 31 | 
            +
              },
         | 
| 32 | 
            +
              "temporal_patch_size": 1
         | 
| 33 | 
            +
            }
         | 
    	
        processing_paddleocr_vl.py
    ADDED
    
    | @@ -0,0 +1,293 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # Licensed under the Apache License, Version 2.0 (the "License");
         | 
| 4 | 
            +
            # you may not use this file except in compliance with the License.
         | 
| 5 | 
            +
            # You may obtain a copy of the License at
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            #     http://www.apache.org/licenses/LICENSE-2.0
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            # Unless required by applicable law or agreed to in writing, software
         | 
| 10 | 
            +
            # distributed under the License is distributed on an "AS IS" BASIS,
         | 
| 11 | 
            +
            # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
         | 
| 12 | 
            +
            # See the License for the specific language governing permissions and
         | 
| 13 | 
            +
            # limitations under the License.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            from typing import List, Union
         | 
| 16 | 
            +
            import numpy as np
         | 
| 17 | 
            +
            import torch
         | 
| 18 | 
            +
            from transformers.feature_extraction_utils import BatchFeature
         | 
| 19 | 
            +
            from transformers.processing_utils import (
         | 
| 20 | 
            +
                ProcessingKwargs,
         | 
| 21 | 
            +
                ProcessorMixin,
         | 
| 22 | 
            +
                Unpack,
         | 
| 23 | 
            +
                VideosKwargs,
         | 
| 24 | 
            +
            )
         | 
| 25 | 
            +
            from transformers.tokenization_utils_base import PreTokenizedInput, TextInput
         | 
| 26 | 
            +
             | 
| 27 | 
            +
             | 
| 28 | 
            +
            ImageInput = Union[
         | 
| 29 | 
            +
                "PIL.Image.Image",
         | 
| 30 | 
            +
                np.ndarray,
         | 
| 31 | 
            +
                "torch.Tensor",
         | 
| 32 | 
            +
                List["PIL.Image.Image"],
         | 
| 33 | 
            +
                List[np.ndarray],
         | 
| 34 | 
            +
                List["torch.Tensor"],
         | 
| 35 | 
            +
            ]  # noqa
         | 
| 36 | 
            +
             | 
| 37 | 
            +
             | 
| 38 | 
            +
            VideoInput = Union[
         | 
| 39 | 
            +
                List["PIL.Image.Image"],
         | 
| 40 | 
            +
                "np.ndarray",
         | 
| 41 | 
            +
                "torch.Tensor",
         | 
| 42 | 
            +
                List["np.ndarray"],
         | 
| 43 | 
            +
                List["torch.Tensor"],
         | 
| 44 | 
            +
                List[List["PIL.Image.Image"]],
         | 
| 45 | 
            +
                List[List["np.ndarrray"]],
         | 
| 46 | 
            +
                List[List["torch.Tensor"]],
         | 
| 47 | 
            +
            ]  # noqa
         | 
| 48 | 
            +
             | 
| 49 | 
            +
             | 
| 50 | 
            +
            class PaddleOCRVLVideosProcessorKwargs(VideosKwargs, total=False):
         | 
| 51 | 
            +
                fps: Union[List[float], float]
         | 
| 52 | 
            +
             | 
| 53 | 
            +
             | 
| 54 | 
            +
            class PaddleOCRVLProcessorKwargs(ProcessingKwargs, total=False):
         | 
| 55 | 
            +
                videos_kwargs: PaddleOCRVLVideosProcessorKwargs
         | 
| 56 | 
            +
                _defaults = {
         | 
| 57 | 
            +
                    "text_kwargs": {
         | 
| 58 | 
            +
                        "padding": False,
         | 
| 59 | 
            +
                    },
         | 
| 60 | 
            +
                    "videos_kwargs": {"fps": 2.0},
         | 
| 61 | 
            +
                }
         | 
| 62 | 
            +
             | 
| 63 | 
            +
             | 
| 64 | 
            +
            class PaddleOCRVLProcessor(ProcessorMixin):
         | 
| 65 | 
            +
                r"""
         | 
| 66 | 
            +
                [`PaddleOCRVLProcessor`] offers all the functionalities of [`SiglipImageProcessor`] and [`Qwen2TokenizerFast`]. See the
         | 
| 67 | 
            +
                [`~PaddleOCRVLProcessor.__call__`] and [`~PaddleOCRVLProcessor.decode`] for more information.
         | 
| 68 | 
            +
                Args:
         | 
| 69 | 
            +
                    image_processor ([`SiglipImageProcessor`], *optional*):
         | 
| 70 | 
            +
                        The image processor is a required input.
         | 
| 71 | 
            +
                    tokenizer ([`Qwen2TokenizerFast`], *optional*):
         | 
| 72 | 
            +
                        The tokenizer is a required input.
         | 
| 73 | 
            +
                    chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
         | 
| 74 | 
            +
                        in a chat into a tokenizable string.
         | 
| 75 | 
            +
                """
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                attributes = ["image_processor", "tokenizer"]
         | 
| 78 | 
            +
                valid_kwargs = [
         | 
| 79 | 
            +
                    "chat_template",
         | 
| 80 | 
            +
                    "image_std",
         | 
| 81 | 
            +
                    "min_pixels",
         | 
| 82 | 
            +
                    "image_mean",
         | 
| 83 | 
            +
                    "merge_size",
         | 
| 84 | 
            +
                    "image_processor_type",
         | 
| 85 | 
            +
                    "temporal_patch_size",
         | 
| 86 | 
            +
                    "patch_size",
         | 
| 87 | 
            +
                    "max_pixels",
         | 
| 88 | 
            +
                ]
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                image_processor_class = "AutoImageProcessor"
         | 
| 91 | 
            +
                tokenizer_class = "AutoTokenizer"
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                def __init__(
         | 
| 94 | 
            +
                    self, image_processor=None, tokenizer=None, chat_template=None, **kwargs
         | 
| 95 | 
            +
                ):
         | 
| 96 | 
            +
                    self.image_token = (
         | 
| 97 | 
            +
                        "<|IMAGE_PLACEHOLDER|>"
         | 
| 98 | 
            +
                        if not hasattr(tokenizer, "image_token")
         | 
| 99 | 
            +
                        else tokenizer.image_token
         | 
| 100 | 
            +
                    )
         | 
| 101 | 
            +
                    self.video_token = (
         | 
| 102 | 
            +
                        "<|video_pad|>"
         | 
| 103 | 
            +
                        if not hasattr(tokenizer, "video_token")
         | 
| 104 | 
            +
                        else tokenizer.video_token
         | 
| 105 | 
            +
                    )
         | 
| 106 | 
            +
                    super().__init__(image_processor, tokenizer, chat_template=chat_template)
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                def __call__(
         | 
| 109 | 
            +
                    self,
         | 
| 110 | 
            +
                    images: ImageInput = None,
         | 
| 111 | 
            +
                    text: Union[
         | 
| 112 | 
            +
                        TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]
         | 
| 113 | 
            +
                    ] = None,
         | 
| 114 | 
            +
                    videos: VideoInput = None,
         | 
| 115 | 
            +
                    **kwargs: Unpack[PaddleOCRVLProcessorKwargs],
         | 
| 116 | 
            +
                ) -> BatchFeature:
         | 
| 117 | 
            +
                    """
         | 
| 118 | 
            +
                    Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
         | 
| 119 | 
            +
                    and `kwargs` arguments to Qwen2TokenizerFast's [`~Qwen2TokenizerFast.__call__`] if `text` is not `None` to encode
         | 
| 120 | 
            +
                    the text. To prepare the vision inputs, this method forwards the `vision_infos` and `kwrags` arguments to
         | 
| 121 | 
            +
                    SiglipImageProcessor's [`~SiglipImageProcessor.__call__`] if `vision_infos` is not `None`.
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                    Args:
         | 
| 124 | 
            +
                        images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
         | 
| 125 | 
            +
                            The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
         | 
| 126 | 
            +
                            tensor. Both channels-first and channels-last formats are supported.
         | 
| 127 | 
            +
                        text (`str`, `List[str]`, `List[List[str]]`):
         | 
| 128 | 
            +
                            The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
         | 
| 129 | 
            +
                            (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
         | 
| 130 | 
            +
                            `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
         | 
| 131 | 
            +
                        videos (`np.ndarray`, `torch.Tensor`, `List[np.ndarray]`, `List[torch.Tensor]`):
         | 
| 132 | 
            +
                            The image or batch of videos to be prepared. Each video can be a 4D NumPy array or PyTorch
         | 
| 133 | 
            +
                            tensor, or a nested list of 3D frames. Both channels-first and channels-last formats are supported.
         | 
| 134 | 
            +
                        return_tensors (`str` or [`~utils.TensorType`], *optional*):
         | 
| 135 | 
            +
                            If set, will return tensors of a particular framework. Acceptable values are:
         | 
| 136 | 
            +
                            - `'tf'`: Return TensorFlow `tf.constant` objects.
         | 
| 137 | 
            +
                            - `'pt'`: Return PyTorch `torch.Tensor` objects.
         | 
| 138 | 
            +
                            - `'np'`: Return NumPy `np.ndarray` objects.
         | 
| 139 | 
            +
                            - `'jax'`: Return JAX `jnp.ndarray` objects.
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                    Returns:
         | 
| 142 | 
            +
                        [`BatchFeature`]: A [`BatchFeature`] with the following fields:
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                        - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
         | 
| 145 | 
            +
                        - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
         | 
| 146 | 
            +
                          `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
         | 
| 147 | 
            +
                          `None`).
         | 
| 148 | 
            +
                        - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
         | 
| 149 | 
            +
                        - **pixel_values_videos** -- Pixel values of videos to be fed to a model. Returned when `videos` is not `None`.
         | 
| 150 | 
            +
                        - **image_grid_thw** -- List of image 3D grid in LLM. Returned when `images` is not `None`.
         | 
| 151 | 
            +
                        - **video_grid_thw** -- List of video 3D grid in LLM. Returned when `videos` is not `None`.
         | 
| 152 | 
            +
                        - **second_per_grid_ts** -- List of video seconds per time grid. Returned when `videos` is not `None`.
         | 
| 153 | 
            +
                    """
         | 
| 154 | 
            +
                    output_kwargs = self._merge_kwargs(
         | 
| 155 | 
            +
                        PaddleOCRVLProcessorKwargs,
         | 
| 156 | 
            +
                        tokenizer_init_kwargs=self.tokenizer.init_kwargs,
         | 
| 157 | 
            +
                        **kwargs,
         | 
| 158 | 
            +
                    )
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                    if images is not None:
         | 
| 161 | 
            +
                        image_inputs = self.image_processor(images=images, return_tensors="pt")
         | 
| 162 | 
            +
                        image_inputs["pixel_values"] = image_inputs["pixel_values"]
         | 
| 163 | 
            +
                        image_grid_thw = image_inputs["image_grid_thw"]
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                    else:
         | 
| 166 | 
            +
                        image_inputs = {}
         | 
| 167 | 
            +
                        image_grid_thw = None
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                    if videos is not None:
         | 
| 170 | 
            +
                        # TODO: add video processing
         | 
| 171 | 
            +
                        videos_inputs = self.image_processor(
         | 
| 172 | 
            +
                            images=None, videos=videos, **output_kwargs["images_kwargs"]
         | 
| 173 | 
            +
                        )
         | 
| 174 | 
            +
                        video_grid_thw = videos_inputs["video_grid_thw"]
         | 
| 175 | 
            +
             | 
| 176 | 
            +
                        fps = output_kwargs["videos_kwargs"].pop("fps", 2.0)
         | 
| 177 | 
            +
                        if isinstance(fps, (int, float)):
         | 
| 178 | 
            +
                            second_per_grid_ts = [
         | 
| 179 | 
            +
                                self.image_processor.temporal_patch_size / fps
         | 
| 180 | 
            +
                            ] * len(video_grid_thw)
         | 
| 181 | 
            +
                        elif hasattr(fps, "__len__") and len(fps) == len(video_grid_thw):
         | 
| 182 | 
            +
                            second_per_grid_ts = [
         | 
| 183 | 
            +
                                self.image_processor.temporal_patch_size / tmp for tmp in fps
         | 
| 184 | 
            +
                            ]
         | 
| 185 | 
            +
                        else:
         | 
| 186 | 
            +
                            raise ValueError(
         | 
| 187 | 
            +
                                f"The length of fps ({len(fps) if hasattr(fps, '__len__') else fps}) must be equal to the length of video_grid_thw ({len(video_grid_thw)}) or fps should be a single number."
         | 
| 188 | 
            +
                            )
         | 
| 189 | 
            +
                        videos_inputs.update(
         | 
| 190 | 
            +
                            {"second_per_grid_ts": torch.tensor(second_per_grid_ts)}
         | 
| 191 | 
            +
                        )
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                    else:
         | 
| 194 | 
            +
                        videos_inputs = {}
         | 
| 195 | 
            +
                        video_grid_thw = None
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                    if not isinstance(text, list):
         | 
| 198 | 
            +
                        text = [text]
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                    if image_grid_thw is not None:
         | 
| 201 | 
            +
                        index = 0
         | 
| 202 | 
            +
                        for i in range(len(text)):
         | 
| 203 | 
            +
                            while self.image_token in text[i]:
         | 
| 204 | 
            +
                                text[i] = text[i].replace(
         | 
| 205 | 
            +
                                    self.image_token,
         | 
| 206 | 
            +
                                    "<|placeholder|>"
         | 
| 207 | 
            +
                                    * (
         | 
| 208 | 
            +
                                        image_grid_thw[index].prod()
         | 
| 209 | 
            +
                                        // self.image_processor.merge_size
         | 
| 210 | 
            +
                                        // self.image_processor.merge_size
         | 
| 211 | 
            +
                                    ),
         | 
| 212 | 
            +
                                    1,
         | 
| 213 | 
            +
                                )
         | 
| 214 | 
            +
                                index += 1
         | 
| 215 | 
            +
                            text[i] = text[i].replace("<|placeholder|>", self.image_token)
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                    if video_grid_thw is not None:
         | 
| 218 | 
            +
                        index = 0
         | 
| 219 | 
            +
                        for i in range(len(text)):
         | 
| 220 | 
            +
                            while self.video_token in text[i]:
         | 
| 221 | 
            +
                                text[i] = text[i].replace(
         | 
| 222 | 
            +
                                    self.video_token,
         | 
| 223 | 
            +
                                    "<|placeholder|>"
         | 
| 224 | 
            +
                                    * (
         | 
| 225 | 
            +
                                        video_grid_thw[index].prod()
         | 
| 226 | 
            +
                                        // self.image_processor.merge_size
         | 
| 227 | 
            +
                                        // self.image_processor.merge_size
         | 
| 228 | 
            +
                                    ),
         | 
| 229 | 
            +
                                    1,
         | 
| 230 | 
            +
                                )
         | 
| 231 | 
            +
                                index += 1
         | 
| 232 | 
            +
                            text[i] = text[i].replace("<|placeholder|>", self.video_token)
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                    text_inputs = self.tokenizer(text, **output_kwargs["text_kwargs"])
         | 
| 235 | 
            +
             | 
| 236 | 
            +
                    return BatchFeature(data={**text_inputs, **image_inputs, **videos_inputs})
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                def batch_decode(self, *args, **kwargs):
         | 
| 239 | 
            +
                    """
         | 
| 240 | 
            +
                    This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
         | 
| 241 | 
            +
                    refer to the docstring of this method for more information.
         | 
| 242 | 
            +
                    """
         | 
| 243 | 
            +
                    return self.tokenizer.batch_decode(*args, **kwargs)
         | 
| 244 | 
            +
             | 
| 245 | 
            +
                def decode(self, *args, **kwargs):
         | 
| 246 | 
            +
                    """
         | 
| 247 | 
            +
                    This method forwards all its arguments to Qwen2TokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
         | 
| 248 | 
            +
                    the docstring of this method for more information.
         | 
| 249 | 
            +
                    """
         | 
| 250 | 
            +
                    return self.tokenizer.decode(*args, **kwargs)
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                def post_process_image_text_to_text(
         | 
| 253 | 
            +
                    self,
         | 
| 254 | 
            +
                    generated_outputs,
         | 
| 255 | 
            +
                    skip_special_tokens=True,
         | 
| 256 | 
            +
                    clean_up_tokenization_spaces=False,
         | 
| 257 | 
            +
                    **kwargs,
         | 
| 258 | 
            +
                ):
         | 
| 259 | 
            +
                    """
         | 
| 260 | 
            +
                    Post-process the output of the model to decode the text.
         | 
| 261 | 
            +
             | 
| 262 | 
            +
                    Args:
         | 
| 263 | 
            +
                        generated_outputs (`torch.Tensor` or `np.ndarray`):
         | 
| 264 | 
            +
                            The output of the model `generate` function. The output is expected to be a tensor of shape `(batch_size, sequence_length)`
         | 
| 265 | 
            +
                            or `(sequence_length,)`.
         | 
| 266 | 
            +
                        skip_special_tokens (`bool`, *optional*, defaults to `True`):
         | 
| 267 | 
            +
                            Whether or not to remove special tokens in the output. Argument passed to the tokenizer's `batch_decode` method.
         | 
| 268 | 
            +
                        Clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
         | 
| 269 | 
            +
                            Whether or not to clean up the tokenization spaces. Argument passed to the tokenizer's `batch_decode` method.
         | 
| 270 | 
            +
                        **kwargs:
         | 
| 271 | 
            +
                            Additional arguments to be passed to the tokenizer's `batch_decode method`.
         | 
| 272 | 
            +
             | 
| 273 | 
            +
                    Returns:
         | 
| 274 | 
            +
                        `List[str]`: The decoded text.
         | 
| 275 | 
            +
                    """
         | 
| 276 | 
            +
                    return self.tokenizer.batch_decode(
         | 
| 277 | 
            +
                        generated_outputs,
         | 
| 278 | 
            +
                        skip_special_tokens=skip_special_tokens,
         | 
| 279 | 
            +
                        clean_up_tokenization_spaces=clean_up_tokenization_spaces,
         | 
| 280 | 
            +
                        **kwargs,
         | 
| 281 | 
            +
                    )
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                @property
         | 
| 284 | 
            +
                def model_input_names(self):
         | 
| 285 | 
            +
                    tokenizer_input_names = self.tokenizer.model_input_names
         | 
| 286 | 
            +
                    image_processor_input_names = self.image_processor.model_input_names
         | 
| 287 | 
            +
                    names_from_processor = list(
         | 
| 288 | 
            +
                        dict.fromkeys(tokenizer_input_names + image_processor_input_names)
         | 
| 289 | 
            +
                    )
         | 
| 290 | 
            +
                    return names_from_processor + ["second_per_grid_ts"]
         | 
| 291 | 
            +
             | 
| 292 | 
            +
             | 
| 293 | 
            +
            __all__ = ["PaddleOCRVLProcessor", "PaddleOCRVLProcessor"]
         | 
    	
        processor_config.json
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "auto_map": {
         | 
| 3 | 
            +
                "AutoProcessor": "processing_paddleocr_vl.PaddleOCRVLProcessor"
         | 
| 4 | 
            +
              },
         | 
| 5 | 
            +
              "processor_class": "PaddleOCRVLProcessor"
         | 
| 6 | 
            +
            }
         | 
    	
        special_tokens_map.json
    ADDED
    
    | @@ -0,0 +1,58 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
              "additional_special_tokens": [
         | 
| 3 | 
            +
                "<|IMAGE_PLACEHOLDER|>",
         | 
| 4 | 
            +
                "<|image_pad|>",
         | 
| 5 | 
            +
                "<|IMAGE_START|>",
         | 
| 6 | 
            +
                "<|IMAGE_END|>",
         | 
| 7 | 
            +
                "<|video_pad|>"
         | 
| 8 | 
            +
              ],
         | 
| 9 | 
            +
              "bos_token": {
         | 
| 10 | 
            +
                "content": "<s>",
         | 
| 11 | 
            +
                "lstrip": false,
         | 
| 12 | 
            +
                "normalized": false,
         | 
| 13 | 
            +
                "rstrip": false,
         | 
| 14 | 
            +
                "single_word": false
         | 
| 15 | 
            +
              },
         | 
| 16 | 
            +
              "cls_token": {
         | 
| 17 | 
            +
                "content": "<|begin_of_sentence|>",
         | 
| 18 | 
            +
                "lstrip": false,
         | 
| 19 | 
            +
                "normalized": false,
         | 
| 20 | 
            +
                "rstrip": false,
         | 
| 21 | 
            +
                "single_word": false
         | 
| 22 | 
            +
              },
         | 
| 23 | 
            +
              "eos_token": {
         | 
| 24 | 
            +
                "content": "</s>",
         | 
| 25 | 
            +
                "lstrip": false,
         | 
| 26 | 
            +
                "normalized": false,
         | 
| 27 | 
            +
                "rstrip": false,
         | 
| 28 | 
            +
                "single_word": false
         | 
| 29 | 
            +
              },
         | 
| 30 | 
            +
              "mask_token": {
         | 
| 31 | 
            +
                "content": "<mask:1>",
         | 
| 32 | 
            +
                "lstrip": false,
         | 
| 33 | 
            +
                "normalized": false,
         | 
| 34 | 
            +
                "rstrip": false,
         | 
| 35 | 
            +
                "single_word": false
         | 
| 36 | 
            +
              },
         | 
| 37 | 
            +
              "pad_token": {
         | 
| 38 | 
            +
                "content": "<unk>",
         | 
| 39 | 
            +
                "lstrip": false,
         | 
| 40 | 
            +
                "normalized": false,
         | 
| 41 | 
            +
                "rstrip": false,
         | 
| 42 | 
            +
                "single_word": false
         | 
| 43 | 
            +
              },
         | 
| 44 | 
            +
              "sep_token": {
         | 
| 45 | 
            +
                "content": "<|end_of_sentence|>",
         | 
| 46 | 
            +
                "lstrip": false,
         | 
| 47 | 
            +
                "normalized": false,
         | 
| 48 | 
            +
                "rstrip": false,
         | 
| 49 | 
            +
                "single_word": false
         | 
| 50 | 
            +
              },
         | 
| 51 | 
            +
              "unk_token": {
         | 
| 52 | 
            +
                "content": "<unk>",
         | 
| 53 | 
            +
                "lstrip": false,
         | 
| 54 | 
            +
                "normalized": false,
         | 
| 55 | 
            +
                "rstrip": false,
         | 
| 56 | 
            +
                "single_word": false
         | 
| 57 | 
            +
              }
         | 
| 58 | 
            +
            }
         | 
    	
        tokenizer.json
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:f90f04fd8e5eb6dfa380f37d10c87392de8438dccb6768a2486b5a96ee76dba6
         | 
| 3 | 
            +
            size 11187679
         | 
    	
        tokenizer.model
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:34ef7db83df785924fb83d7b887b6e822a031c56e15cff40aaf9b982988180df
         | 
| 3 | 
            +
            size 1614363
         | 
    	
        tokenizer_config.json
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
