# Model settings device = "cuda" model_name = "openbmb/MiniCPM-V-2_6" # Decoding settings sampling = True stream = True repetition_penalty = 1.05