# Model settings device = "cuda" model_name = "openbmb/MiniCPM-o-2_6" # Decoding settings sampling = False stream = False repetition_penalty = 1.05