Spaces:

dorati
/

opioid-use-peds

Sleeping

App Files Files Community

doratiass commited on Jan 15, 2024

Commit

d9940f8

1 Parent(s): 5c8ee5d

update dummy

Browse files

Files changed (4) hide show

.gitignore +3 -1
app.R +8 -1
dummy_df.csv +0 -0
funcs_data.R +1 -109

.gitignore CHANGED Viewed

@@ -3,4 +3,6 @@
 .RData
 .Ruserdata
 cred_hf.R
-old_code

 .RData
 .Ruserdata
 cred_hf.R
+old_code
+dummy_df.R
+.RData 2

app.R CHANGED Viewed

@@ -18,11 +18,18 @@ light <- bs_theme(bootswatch = "flatly")
 dark <- bs_theme(bootswatch = "darkly")
 thematic_shiny()
-# load scripts ####
 source("side_bar.R")
 source("about.R")
 source("funcs_data.R")
 # load git data ####
 git_token <- Sys.getenv("GITHUB_PAT")
 git_url <- Sys.getenv("GIT_URL")

 dark <- bs_theme(bootswatch = "darkly")
 thematic_shiny()
+# load scripts & data ####
 source("side_bar.R")
 source("about.R")
 source("funcs_data.R")
+prob_df <- read_csv("new_prob_df.csv",show_col_types = FALSE) %>%
+  mutate(model_prob = round(model_prob, 3)) %>%
+  group_by(model_prob) %>%
+  summarise(cal_prob = mean(cal_prob))
+dummy_df <- read_csv("dummy_df.csv",show_col_types = FALSE)
 # load git data ####
 git_token <- Sys.getenv("GITHUB_PAT")
 git_url <- Sys.getenv("GIT_URL")

dummy_df.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

funcs_data.R CHANGED Viewed

@@ -2,15 +2,10 @@ library(tidyverse)
 library(sn)
 library(readr)
-prob_df <- read_csv("new_prob_df.csv",show_col_types = FALSE) %>%
-  mutate(model_prob = round(model_prob, 3)) %>%
-  group_by(model_prob) %>%
-  summarise(cal_prob = mean(cal_prob))
 # functions ---------------------------------------------------------------
 generate_model <- function(df, model){
   model_prob <- predict(model, as.matrix(df),
-                      type = "response")
   prob <- cal_prob(model_prob)
   return(prob)
 }
@@ -21,25 +16,9 @@ cal_prob <- function(model_prob, probs = prob_df) {
   return(prob)
 }
-skewed_dist <- function(N, min_val, max_val, peak_val, scale, shape) {
-  skewed_numbers <- rsn(N, xi = peak_val, omega = scale, alpha = shape)
-  skewed_numbers <- pmin(pmax(skewed_numbers, min_val), max_val)
-  return(skewed_numbers)
-}
 find_closest_n <- function(df, model, prob = 10, column_name = "prob", n = 2) {
-  syn_prob <- predict(model, as.matrix(df),
-                      type = "response")
-  df$prob <- sapply(syn_prob, function(x) cal_prob(x))
-  # Calculate absolute differences with the given number
   differences <- abs(df[[column_name]] - prob)
-  # Get the indices of 10 rows with the smallest differences (closest numbers)
   closest_indices <- order(differences)[1:10]
-  # Return N random closest numbers from the dataframe
   return(df[sample(closest_indices,n),])
 }
@@ -109,93 +88,6 @@ vars_label <- function(x) {sapply(str_split_i(x, " =", 1),
 var_get <- function(x) {ifelse(x %in% vars_dict$name,
                                vars_dict[vars_dict$name == x,"var",drop = TRUE],x)}
-# create dummy data -----------------------------------------------------------
-dummy_n <- 1000000
-set.seed(3772)
-dummy_df <- tibble(
-  RE_age = round(skewed_dist(dummy_n,1,18,15,4,0),2),
-  bmi = skewed_dist(dummy_n,7,49,21,5,10),
-  n_visits = round(skewed_dist(dummy_n,0,141,1,10,10)),
-  diff_profession = round(skewed_dist(dummy_n,0,14,2,2,0)),
-  proffesion_primary = skewed_dist(dummy_n,0,1,0.8,0.05, -0.1),
-  n_diagnosis = round(skewed_dist(dummy_n,0,721,1,12,100)),
-  pci = round(skewed_dist(dummy_n,0,19,0,1,10)),
-  sum_drug = round(skewed_dist(dummy_n,0,265,1,10,100)),
-  drug_nervous_frac = round(skewed_dist(dummy_n,0,86,1,5,100)),
-  drug_muscle_frac = round(skewed_dist(dummy_n,0,30,1,10,100)),
-  drug_antineoplastic_frac = round(skewed_dist(dummy_n,0,48,0,2,100)),
-  drug_sum_frac = drug_nervous_frac+ drug_muscle_frac + drug_antineoplastic_frac,
-  drug_nervous = round(sum_drug*drug_nervous_frac/drug_sum_frac),
-  drug_muscle = round(sum_drug*drug_muscle_frac/drug_sum_frac),
-  drug_antineoplastic = round(sum_drug*drug_antineoplastic_frac/drug_sum_frac),
-  lab_n = round(skewed_dist(dummy_n,0,376,0,1,10)),
-  img_n = round(skewed_dist(dummy_n,0,124,0,1,10)),
-  gender_Male = round(runif(dummy_n, min=0, max=1)),
-  SES = round(sample(c(1:5),
-                     prob = c(0.22, 0.42, 0.21, 0.12, 0.03),
-                     size = dummy_n, replace = TRUE)),
-  SES_1 = case_when(
-    SES == 1 ~ -0.6324555,
-    SES == 2 ~ -0.3162278,
-    SES == 3 ~ 0,
-    SES == 4 ~ 0.3162278,
-    SES == 5 ~ 0.6324555,
-  ),
-  SES_2 = case_when(
-    SES == 1 ~ 0.5345225,
-    SES == 2 ~ -0.2672612,
-    SES == 3 ~ -0.5345225,
-    SES == 4 ~ -0.2672612,
-    SES == 5 ~ 0.5345225,
-  ),
-  SES_3 = case_when(
-    SES == 1 ~ -0.3162278,
-    SES == 2 ~ 0.63245552,
-    SES == 3 ~ -4.095972e-16,
-    SES == 4 ~ -0.63245552,
-    SES == 5 ~ 0.3162278,
-  ),
-  SES_4 = case_when(
-    SES == 1 ~ 0.1195229,
-    SES == 2 ~ -0.4780914,
-    SES == 3 ~ 0.7171372,
-    SES == 4 ~ -0.4780914,
-    SES == 5 ~ 0.1195229,
-  ),
-  sector = factor(sample(c("General","Arab","Bedouin","Cherkess","Religious Jewish"),
-                         prob = c(0.35, 0.5, 0.13, 0.01,0.2),
-                         size = dummy_n, replace = TRUE)),
-  `sector_Arab...others` = ifelse(sector == "Arab",1,0),
-  sector_Bedouin = ifelse(sector == "Bedouin",1,0),
-  sector_Cherkess = ifelse(sector == "Cherkess",1,0),
-  sector_Religious.mixed = ifelse(sector == "Religious Jewish",1,0),
-  district = factor(sample(c("Center","Dan PT","Eilat","Haifa","Jerusalem",
-                             "North","Sharon Shomron","South","Tel-Aviv Jaffa"),
-                           prob = c(0.087, 0.048, 0.05, 0.2,
-                                    0.14,0.2,0.12,0.18,0.022),
-                           size = dummy_n, replace = TRUE)),
-  district_Dan.PT = ifelse(district == "Dan PT",1,0),
-  district_Eilat = ifelse(district == "Eilat",1,0),
-  district_Haifa = ifelse(district == "Haifa",1,0),
-  district_Jerusalem = ifelse(district == "Jerusalem",1,0),
-  district_North = ifelse(district == "North",1,0),
-  district_Sharon.Shomron = ifelse(district == "Sharon Shomron",1,0),
-  district_South = ifelse(district == "South",1,0),
-  district_Tel.Aviv.Jaffa = ifelse(district == "Tel-Aviv Jaffa",1,0),
-  generic = factor(sample(c("Codeine","Tramadol","Oxycodone","Other"),
-                          prob = c(0.78, 0.15, 0.6, 0.02),
-                          size = dummy_n, replace = TRUE)),
-  generic_Other = ifelse(generic == "Other",1,0),
-  generic_Oxycodone = ifelse(generic == "Oxycodone",1,0),
-  generic_Tramadol = ifelse(generic == "Tramadol",1,0),
-  `malignancy_TRUE.` = round(runif(dummy_n, min=0, max=1)),
-  `pain_TRUE.` = round(runif(dummy_n, min=0, max=1)),
-  `psychiatric_not_pci_TRUE.` = round(runif(dummy_n, min=0, max=1))
-) %>%
-  select(-c(SES, sector, district, generic,drug_sum_frac,
-            drug_nervous_frac, drug_antineoplastic_frac,drug_muscle_frac))
 # create dictionary data -----------------------------------------------------------
 vars_dict <- tibble(
   "RE_age" = "Age at\nAdmission (years)",

 library(sn)
 library(readr)
 # functions ---------------------------------------------------------------
 generate_model <- function(df, model){
   model_prob <- predict(model, as.matrix(df),
+                        type = "response")
   prob <- cal_prob(model_prob)
   return(prob)
 }
   return(prob)
 }
 find_closest_n <- function(df, model, prob = 10, column_name = "prob", n = 2) {
   differences <- abs(df[[column_name]] - prob)
   closest_indices <- order(differences)[1:10]
   return(df[sample(closest_indices,n),])
 }
 var_get <- function(x) {ifelse(x %in% vars_dict$name,
                                vars_dict[vars_dict$name == x,"var",drop = TRUE],x)}
 # create dictionary data -----------------------------------------------------------
 vars_dict <- tibble(
   "RE_age" = "Age at\nAdmission (years)",