--- library_name: transformers language: - en --- ### Usage ``` from transformers import AutoConfig, AutoTokenizer from optimum.intel.openvino import OVModelForCausalLM ov_config = {"PERFORMANCE_HINT": "LATENCY", "NUM_STREAMS": "1", "CACHE_DIR": "", "INFERENCE_PRECISION_HINT": "f16"} tok = AutoTokenizer.from_pretrained("xriminact/llama-3-8b-instruct-openvino-int4", trust_remote_code=True) ov_model = OVModelForCausalLM.from_pretrained( "xriminact/llama-3-8b-instruct-openvino-int4", device="GPU", ov_config=ov_config, config=AutoConfig.from_pretrained("xriminact/llama-3-8b-instruct-openvino-int4", trust_remote_code=True), trust_remote_code=True, ) test_string = "What is OpenVino?" input_tokens = tok(test_string, return_tensors="pt") answer = ov_model.generate(**input_tokens, max_new_tokens=200) print(tok.batch_decode(answer, skip_special_tokens=True)[0]) ```