--- license: mit datasets: - yueqingyou/BioQwen language: - en - zh tags: - BioQwen - 0.5B - Biomedical - Multi-Tasks --- # BioQwen: A Small-Parameter, High-Performance Bilingual Model for Biomedical Multi-Tasks For model inference, please refer to the following example code: ```python import torch import transformers from transformers import AutoTokenizer, AutoModelForCausalLM transformers.logging.set_verbosity_error() max_length = 512 model_path = 'yueqingyou/BioQwen-0.5B' tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True) model = AutoModelForCausalLM.from_pretrained(model_path, device_map='auto', torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2').eval() def predict(prompt): zh_system = "你是千问生物智能助手,一个专注于生物领域的先进人工智能。" en_system = "You are BioQwen, an advanced AI specializing in the field of biology." english_count, chinese_count = 0, 0 for char in prompt: if '\u4e00' <= char <= '\u9fff': chinese_count += 1 elif 'a' <= char.lower() <= 'z': english_count += 1 lang = 'zh' if chinese_count > english_count else 'en' messages = [ {"role": "system", "content": zh_system if lang == 'zh' else en_system}, {"role": "user", "content": prompt} ] text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) model_inputs = tokenizer([text], return_tensors="pt").to('cuda') with torch.no_grad(): generated_ids = model.generate( model_inputs.input_ids, max_new_tokens=max_length, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, do_sample=True, top_p = 0.9, temperature = 0.3, repetition_penalty = 1.1 ) generated_ids = [ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) ] response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] return response.strip() prompt = 'I am suffering from irregular periods. I am currently taking medication Levothyroxine 50. My T3 is 0.87 ng/mL, T4 is 8.30 ug/dL, TSH is 2.43 uIU/mL. I am 34 years old, weigh 75 kg, and 5 feet tall. Please advice.' print(f'Question:\t{prompt}\n\nAnswer:\t{predict(prompt)}') ``` For more detailed information and code, please refer to [GitHub](https://github.com/yueqingyou/BioQwen).