import os import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM os.environ["CURL_CA_BUNDLE"]="" tokenizer = AutoTokenizer.from_pretrained("togethercomputer/GPT-JT-Moderation-6B", force_download=True, local_files_only=False) model = AutoModelForCausalLM.from_pretrained("togethercomputer/GPT-JT-Moderation-6B", torch_dtype=torch.bfloat16) def gpt(prompt): inputs = tokenizer(": Hello!\n:", return_tensors='pt').to(model.device) outputs = model.generate(**inputs, max_new_tokens=10, do_sample=True, temperature=0.8) output_str = tokenizer.decode(outputs[0]) print(output_str) return output_str gr.Interface(fn=gpt,inputs="text",outputs="text").launch()