from peft import PeftModel, PeftConfig from transformers import AutoTokenizer, AutoModelForCausalLM import torch from transformers import GenerationConfig DEVICE = "cuda" if torch.cuda.is_available() else "cpu" #Base Model 및 Lora Model 선택 base_model = "beomi/KoAlpaca-Polyglot-5.8B" lora_weights = 'KimSHine/Scenario_Koalpaca_5.8B-lora' load_8bit = True # Base Model Tokenizer tokenizer1 = AutoTokenizer.from_pretrained(base_model) ## token id 추가 tokenizer1.pad_token_id = 0 tokenizer1.eos_token_id = 2 """### Base Model 불러오기""" # KoAlpaca-polyglot-5.8B model1 = AutoModelForCausalLM.from_pretrained( base_model, load_in_8bit=load_8bit, torch_dtype=torch.float16, device_map="auto", ) model1.config.pad_token_id = 0 model1.config.eos_token_id = 2 """### LoRA Model 불러오기 Fine Tuning한 Model """ model1 = PeftModel.from_pretrained( model1, lora_weights, torch_dtype=torch.float16, ) model1.config.pad_token_id = 0 # unk model1.config.bos_token_id = 0 model1.config.eos_token_id = 2 def yeollm_text(instruction, summary): prompt = f"""아래는 작업을 설명하는 지시문과 대본을 생성하는데 참고할 줄거리와 짝을 이루는 예제입니다. 요청을 적절히 만족하는 대본을 작성하세요. ### 지시문: {instruction} ### 줄거리: {summary} ### 대본: """ temperature = 0.3 top_p = 0.95 top_k = 40 max_new_tokens = 512 #2048 no_repeat_ngram_size = 5 # 3개 이상의 토큰이 반복될 경우 확률을 0으로 만듦 ## greed search, beam search의 결과는 바뀌지 않음 (물론 최종 결과는 바뀜, 중간 sample 만드는 것은 동일하다는 것) do_sample = True ## True : random, False(default) : Greedy Search num_beams = 5 ## do_sample 이 false일 때 여기에 값이 있으면, beam search inputs = tokenizer1(prompt, return_tensors="pt") input_ids = inputs["input_ids"].to(DEVICE) generation_config = GenerationConfig( do_sample = do_sample, temperature=temperature, top_p=top_p, top_k=top_k, pad_token_id = 0, # pad token 추가 no_repeat_ngram_size = no_repeat_ngram_size, # num_beams=num_beams, # **kwargs, ) # Generate text with torch.no_grad(): generation_output = model1.generate( input_ids=input_ids, generation_config=generation_config, return_dict_in_generate=True, output_scores=True, max_new_tokens=max_new_tokens, ) s = generation_output.sequences[0] output = tokenizer1.decode(s) output = output.split('### 대본:')[1] if output[-13:] == '<|endoftext|>': output = output[:-13] return output.lstrip() """## text davinci 003 불러오기""" import openai OPENAI_API_KEY = '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' openai.api_key = OPENAI_API_KEY model2 = 'text-davinci-003' #'gpt-3.5-turbo' max_tokens = 2048 temperature = 0.3 Top_p = 1 def davinci_text(instruction, summary): prompt = f""" 아래의 줄거리를 보고 {instruction} ### 줄거리: {summary} ### 대본: """ response = openai.Completion.create( engine = model2, prompt = prompt, temperature = temperature, max_tokens = max_tokens, n=1, ) return response.choices[0].text.strip() """## gpt 3.5 turbo 불러오기""" import openai OPENAI_API_KEY = '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' openai.api_key = OPENAI_API_KEY model4 = 'gpt-3.5-turbo' #'gpt-3.5-turbo' max_tokens = 2048 temperature = 0.3 Top_p = 1 def gpt_text(instruction, summary): prompt = f""" ### 지시문: {instruction} ### 줄거리: {summary} ### 대본: """ response = openai.ChatCompletion.create( model = model4, messages=[ {"role": "system", "content": "아래는 작업을 설명하는 지시문과 대본을 생성하는데 참고할 줄거리와 짝을 이루는 예제입니다. 요청을 적절히 만족하는 대본을 작성하세요."}, {"role": "user", "content": prompt}, ], temperature = temperature, max_tokens = max_tokens, n=1, ) for choice in response["choices"]: content = choice["message"]["content"] return content """# gradio""" import gradio as gr generator1 = gr.Interface( fn=yeollm_text, inputs=[ gr.inputs.Textbox(label="Instruction"), gr.inputs.Textbox(label="Summary") ], outputs=gr.outputs.Textbox(label="Yeollm Scenario"), title="Yeollm Scenario Generation", description="Generate scenarios using the Yeollm model.", theme="huggingface" ) generator2 = gr.Interface( fn=davinci_text, inputs=[ gr.inputs.Textbox(label="Instruction"), gr.inputs.Textbox(label="Summary") ], outputs=gr.outputs.Textbox(label="Davinci Scenario"), title="Davinci Generation", description="Generate scenarios using the Davinci model.", theme="huggingface" ) generator3 = gr.Interface( fn=gpt_text, inputs=[ gr.inputs.Textbox(label="Instruction"), gr.inputs.Textbox(label="Summary") ], outputs=gr.outputs.Textbox(label="GPT Scenario"), title="GPT Generation", description="Generate scenarios using the GPT model.", theme="huggingface" ) gr.Parallel(generator1, generator2, generator3).launch(share=True, debug=True)