Spaces:

posit
/

gptneox-chat

Runtime error

dfalbel commited on May 23, 2023

Commit

173d645

•

1 Parent(s): 1952c1a

add support fur gpu

Files changed (2) hide show

app.R CHANGED Viewed

@@ -4,7 +4,7 @@ library(minhub)
 library(magrittr)
 source("model-session.R")
-repo <- "EleutherAI/pythia-70m"
 repo <- Sys.getenv("MODEL_REPO", unset = repo)
 sess <- model_session$new()

 library(magrittr)
 source("model-session.R")
+repo <- "stabilityai/stablelm-tuned-alpha-3b"
 repo <- Sys.getenv("MODEL_REPO", unset = repo)
 sess <- model_session$new()

model-session.R CHANGED Viewed

@@ -18,9 +18,15 @@ model_session <- R6::R6Class(
         library(torch)
         library(zeallot)
         library(minhub)
         model <<- minhub::gptneox_from_pretrained(repo)
         model$eval()
-        model$to(dtype = torch_float())
         tok <<- tok::tokenizer$from_pretrained(repo)
         "done"
       })
@@ -36,15 +42,16 @@ model_session <- R6::R6Class(
         top_k = self$top_k
       )
       self$task_q$push(args = args, function(prompt, temperature, top_k) {
-        idx <- torch_tensor(tok$encode(prompt)$ids)$view(c(1, -1))
         with_no_grad({
-          logits <- model(idx + 1L)
         })
         logits <- logits[,-1,]/temperature
         c(prob, ind) %<-% logits$topk(top_k)
-        logits <- torch_full_like(logits, -Inf)$scatter_(-1, ind, prob)
         logits <- nnf_softmax(logits, dim = -1)
-        id_next <- torch::torch_multinomial(logits, num_samples = 1) - 1L
         tok$decode(as.integer(id_next))
       })
     }

         library(torch)
         library(zeallot)
         library(minhub)
+	device <- if (cuda_is_available()) "cuda" else "cpu"
         model <<- minhub::gptneox_from_pretrained(repo)
         model$eval()
+	if (device == "cuda") {
+	  model$to(device=device)
+	  #model$to(dtype=torch_float())
+	} else {
+          model$to(dtype = torch_float())
+	}
         tok <<- tok::tokenizer$from_pretrained(repo)
         "done"
       })
         top_k = self$top_k
       )
       self$task_q$push(args = args, function(prompt, temperature, top_k) {
+        device <- if (cuda_is_available()) "cuda" else "cpu"
+	idx <- torch_tensor(tok$encode(prompt)$ids, device=device)$view(c(1, -1))
         with_no_grad({
+          logits <- model(idx + 1L)$to(dtype="float", device="cpu")
         })
         logits <- logits[,-1,]/temperature
         c(prob, ind) %<-% logits$topk(top_k)
+        logits <- torch_full_like(logits, -1e7)$scatter_(-1, ind, prob)
         logits <- nnf_softmax(logits, dim = -1)
+        id_next <- torch::torch_multinomial(logits, num_samples = 1)$cpu() - 1L
         tok$decode(as.integer(id_next))
       })
     }