import gradio as gr import re import os from pdf2image import convert_from_path from transformers import DonutProcessor, VisionEncoderDecoderModel import torch from PIL import Image from pathlib import Path from models.experimental import attempt_load from utils.datasets import LoadImage from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging from utils.torch_utils import select_device import cv2 #sudo apt-get install poppler-utils Necesario key = str(os.environ.get('key')) def check_image(image): try: images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280)) return images except: return [Image.open(image)] def crop(files = '', #files weights = 'yolov7.pt', #model.pt path(s) classes = None, #filter by class: --class 0, or --class 0 2 3 imgsz = 640, #inference size (pixels) device = '', #cuda device, i.e. 0 or 0,1,2,3 or cpu conf_thres = 0.25, #object confidence threshold iou_thres = 0.45, #IOU threshold for NMS augment = False, #augmented inference agnostic_nms = False): #class-agnostic NMS # Initialize set_logging() device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Set Dataloader dataset = LoadImage(files = files, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once old_img_w = old_img_h = imgsz old_img_b = 1 list_cropobj = [] for img, img0s in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference with torch.no_grad(): # Calculating gradients would cause a GPU memory leak pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) # Process detections for i, det in enumerate(pred): # detections per image if len(det): # Rescale boxes from img_size to img0s size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round() # Write results for *xyxy, conf, cls in reversed(det): #crop an image based on coordinates object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])] cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])] cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB) clase = names[int(cls)] list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)]) return list_cropobj def get_attributes(input_img): #access_token = str(os.environ.get('key')) access_token = key processor = DonutProcessor.from_pretrained("ClipAI/license-demo", use_auth_token=access_token) model = VisionEncoderDecoderModel.from_pretrained("ClipAI/license-demo", use_auth_token=access_token) device = "cuda" if torch.cuda.is_available() else "cpu" model.eval() model.to(device) images = check_image(input_img) images = crop(weights="best.pt", files= images) image_cedula = [img[0] for img in images if img[1]==1][0] #0 en 'img[1]==0' es el label de cedula, si se reemplaza por 1 #entrega licencias pixel_values = processor(image_cedula, return_tensors="pt").pixel_values pixel_values = pixel_values.to(device) print(pixel_values.size()) # prepare decoder inputs task_prompt = "" decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids decoder_input_ids = decoder_input_ids.to(device) # autoregressively generate sequence outputs = model.generate( pixel_values, decoder_input_ids=decoder_input_ids, max_length=model.decoder.config.max_position_embeddings, early_stopping=True, pad_token_id=processor.tokenizer.pad_token_id, eos_token_id=processor.tokenizer.eos_token_id, use_cache=True, num_beams=1, bad_words_ids=[[processor.tokenizer.unk_token_id]], return_dict_in_generate=True, ) # turn into JSON seq = processor.batch_decode(outputs.sequences)[0] seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "") seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token seq = processor.token2json(seq) return str(seq) #demo = gr.Interface(get_attributes, "file", "label") #demo.launch()