Spaces:
Running
Running
File size: 1,227 Bytes
524a99c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from LLaVA.llava.model.builder import load_pretrained_model
from LLaVA.llava.mm_utils import get_model_name_from_path
from LLaVA.llava.eval.run_llava import eval_model
# model_path = "liuhaotian/llava-v1.5-7b"
# tokenizer, model, image_processor, context_len = load_pretrained_model(
# model_path=model_path,
# model_base=None,
# model_name=get_model_name_from_path(model_path)
# )
# model_path = "liuhaotian/llava-v1.5-7b"
# model_path = "liuhaotian/llava-v1.6-mistral-7b"
model_path = "liuhaotian/llava-v1.6-34b"
prompt = """I need you to transcribe all of the text in this image. Place the transcribed text into a JSON dictionary with this form {"Transcription": "text"}"""
# image_file = "https://llava-vl.github.io/static/images/view.jpg"
image_file = "/home/brlab/Dropbox/VoucherVision/demo/demo_images/MICH_16205594_Poaceae_Jouvea_pilosa.jpg"
args = type('Args', (), {
"model_path": model_path,
"model_base": None,
"model_name": get_model_name_from_path(model_path),
"query": prompt,
"conv_mode": None,
"image_file": image_file,
"sep": ",",
"temperature": 0,
"top_p": None,
"num_beams": 1,
"max_new_tokens": 512,
# "load_8_bit": True,
})()
eval_model(args) |