import gradio as gr import spaces import os import gc import random import warnings warnings.filterwarnings("ignore") import numpy as np import pandas as pd pd.set_option("display.max_rows", 500) pd.set_option("display.max_columns", 500) pd.set_option("display.width", 1000) from tqdm.auto import tqdm import torch import torch.nn as nn import tokenizers import transformers print(f"tokenizers.__version__: {tokenizers.__version__}") print(f"transformers.__version__: {transformers.__version__}") print(f"torch.__version__: {torch.__version__}") print(f"torch cuda version: {torch.version.cuda}") from transformers import AutoTokenizer, AutoConfig from transformers import BitsAndBytesConfig, AutoModelForCausalLM, MistralForCausalLM from peft import LoraConfig, get_peft_model title = "H2O AI Predict the LLM" zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' 🤔 @spaces.GPU def greet(n): print(zero.device) # <-- 'cuda:0' 🤗 return f"Hello {zero + n} Tensor" gr.Interface(fn=greet, inputs=gr.Number(), outputs=gr.Text()).launch()