from clip_component import get_token_from_clip from grounding_component import run_grounding def detect(image): describe = get_token_from_clip(image) print('describe:',describe) predict_image = run_grounding(image,describe) return predict_image