File size: 2,757 Bytes
9c9db04 7657675 9c9db04 7657675 9c9db04 7657675 9c9db04 7657675 9c9db04 7657675 9c9db04 7657675 9c9db04 7657675 9c9db04 7657675 9c9db04 7657675 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
{
"_valid_processor_keys": [
"images",
"do_resize",
"size",
"resample",
"do_center_crop",
"crop_size",
"do_rescale",
"rescale_factor",
"do_normalize",
"image_mean",
"image_std",
"do_convert_rgb",
"return_tensors",
"data_format",
"input_data_format"
],
"auto_map": {
"AutoProcessor": "processing_florence2.Florence2Processor"
},
"crop_size": {
"height": 768,
"width": 768
},
"do_center_crop": false,
"do_convert_rgb": null,
"do_normalize": true,
"do_rescale": true,
"do_resize": true,
"image_mean": [
0.485,
0.456,
0.406
],
"image_processor_type": "CLIPImageProcessor",
"image_seq_length": 577,
"image_std": [
0.229,
0.224,
0.225
],
"processor_class": "Florence2Processor",
"resample": 3,
"rescale_factor": 0.00392156862745098,
"size": {
"height": 768,
"width": 768
},
"tasks_answer_post_processing_type": {
"<OCR>": "pure_text",
"<OCR_WITH_REGION>": "ocr",
"<CAPTION>": "pure_text",
"<DETAILED_CAPTION>": "pure_text",
"<MORE_DETAILED_CAPTION>": "pure_text",
"<OD>": "description_with_bboxes",
"<DENSE_REGION_CAPTION>": "description_with_bboxes",
"<CAPTION_TO_PHRASE_GROUNDING>": "phrase_grounding",
"<REFERRING_EXPRESSION_SEGMENTATION>": "polygons",
"<REGION_TO_SEGMENTATION>": "polygons",
"<OPEN_VOCABULARY_DETECTION>": "description_with_bboxes_or_polygons",
"<REGION_TO_CATEGORY>": "pure_text",
"<REGION_TO_DESCRIPTION>": "pure_text",
"<REGION_TO_OCR>": "pure_text",
"<REGION_PROPOSAL>": "bboxes"
},
"task_prompts_without_inputs": {
"<OCR>": "What is the text in the image?",
"<OCR_WITH_REGION>": "What is the text in the image, with regions?",
"<CAPTION>": "What does the image describe?",
"<DETAILED_CAPTION>": "Describe in detail what is shown in the image.",
"<MORE_DETAILED_CAPTION>": "Describe with a paragraph what is shown in the image.",
"<OD>": "Locate the objects with category name in the image.",
"<DENSE_REGION_CAPTION>": "Locate the objects in the image, with their descriptions.",
"<REGION_PROPOSAL>": "Locate the region proposals in the image."
},
"task_prompts_with_input": {
"<CAPTION_TO_PHRASE_GROUNDING>": "Locate the phrases in the caption: {input}",
"<REFERRING_EXPRESSION_SEGMENTATION>": "Locate {input} in the image with mask",
"<REGION_TO_SEGMENTATION>": "What is the polygon mask of region {input}",
"<OPEN_VOCABULARY_DETECTION>": "Locate {input} in the image.",
"<REGION_TO_CATEGORY>": "What is the region {input}?",
"<REGION_TO_DESCRIPTION>": "What does the region {input} describe?",
"<REGION_TO_OCR>": "What text is in the region {input}?"
}
}
|