doratiass commited on
Commit
d9940f8
·
1 Parent(s): 5c8ee5d

update dummy

Browse files
Files changed (4) hide show
  1. .gitignore +3 -1
  2. app.R +8 -1
  3. dummy_df.csv +0 -0
  4. funcs_data.R +1 -109
.gitignore CHANGED
@@ -3,4 +3,6 @@
3
  .RData
4
  .Ruserdata
5
  cred_hf.R
6
- old_code
 
 
 
3
  .RData
4
  .Ruserdata
5
  cred_hf.R
6
+ old_code
7
+ dummy_df.R
8
+ .RData 2
app.R CHANGED
@@ -18,11 +18,18 @@ light <- bs_theme(bootswatch = "flatly")
18
  dark <- bs_theme(bootswatch = "darkly")
19
 
20
  thematic_shiny()
21
- # load scripts ####
22
  source("side_bar.R")
23
  source("about.R")
24
  source("funcs_data.R")
25
 
 
 
 
 
 
 
 
26
  # load git data ####
27
  git_token <- Sys.getenv("GITHUB_PAT")
28
  git_url <- Sys.getenv("GIT_URL")
 
18
  dark <- bs_theme(bootswatch = "darkly")
19
 
20
  thematic_shiny()
21
+ # load scripts & data ####
22
  source("side_bar.R")
23
  source("about.R")
24
  source("funcs_data.R")
25
 
26
+ prob_df <- read_csv("new_prob_df.csv",show_col_types = FALSE) %>%
27
+ mutate(model_prob = round(model_prob, 3)) %>%
28
+ group_by(model_prob) %>%
29
+ summarise(cal_prob = mean(cal_prob))
30
+
31
+ dummy_df <- read_csv("dummy_df.csv",show_col_types = FALSE)
32
+
33
  # load git data ####
34
  git_token <- Sys.getenv("GITHUB_PAT")
35
  git_url <- Sys.getenv("GIT_URL")
dummy_df.csv ADDED
The diff for this file is too large to render. See raw diff
 
funcs_data.R CHANGED
@@ -2,15 +2,10 @@ library(tidyverse)
2
  library(sn)
3
  library(readr)
4
 
5
- prob_df <- read_csv("new_prob_df.csv",show_col_types = FALSE) %>%
6
- mutate(model_prob = round(model_prob, 3)) %>%
7
- group_by(model_prob) %>%
8
- summarise(cal_prob = mean(cal_prob))
9
-
10
  # functions ---------------------------------------------------------------
11
  generate_model <- function(df, model){
12
  model_prob <- predict(model, as.matrix(df),
13
- type = "response")
14
  prob <- cal_prob(model_prob)
15
  return(prob)
16
  }
@@ -21,25 +16,9 @@ cal_prob <- function(model_prob, probs = prob_df) {
21
  return(prob)
22
  }
23
 
24
- skewed_dist <- function(N, min_val, max_val, peak_val, scale, shape) {
25
- skewed_numbers <- rsn(N, xi = peak_val, omega = scale, alpha = shape)
26
- skewed_numbers <- pmin(pmax(skewed_numbers, min_val), max_val)
27
- return(skewed_numbers)
28
- }
29
-
30
  find_closest_n <- function(df, model, prob = 10, column_name = "prob", n = 2) {
31
- syn_prob <- predict(model, as.matrix(df),
32
- type = "response")
33
-
34
- df$prob <- sapply(syn_prob, function(x) cal_prob(x))
35
-
36
- # Calculate absolute differences with the given number
37
  differences <- abs(df[[column_name]] - prob)
38
-
39
- # Get the indices of 10 rows with the smallest differences (closest numbers)
40
  closest_indices <- order(differences)[1:10]
41
-
42
- # Return N random closest numbers from the dataframe
43
  return(df[sample(closest_indices,n),])
44
  }
45
 
@@ -109,93 +88,6 @@ vars_label <- function(x) {sapply(str_split_i(x, " =", 1),
109
  var_get <- function(x) {ifelse(x %in% vars_dict$name,
110
  vars_dict[vars_dict$name == x,"var",drop = TRUE],x)}
111
 
112
-
113
- # create dummy data -----------------------------------------------------------
114
- dummy_n <- 1000000
115
- set.seed(3772)
116
- dummy_df <- tibble(
117
- RE_age = round(skewed_dist(dummy_n,1,18,15,4,0),2),
118
- bmi = skewed_dist(dummy_n,7,49,21,5,10),
119
- n_visits = round(skewed_dist(dummy_n,0,141,1,10,10)),
120
- diff_profession = round(skewed_dist(dummy_n,0,14,2,2,0)),
121
- proffesion_primary = skewed_dist(dummy_n,0,1,0.8,0.05, -0.1),
122
- n_diagnosis = round(skewed_dist(dummy_n,0,721,1,12,100)),
123
- pci = round(skewed_dist(dummy_n,0,19,0,1,10)),
124
- sum_drug = round(skewed_dist(dummy_n,0,265,1,10,100)),
125
- drug_nervous_frac = round(skewed_dist(dummy_n,0,86,1,5,100)),
126
- drug_muscle_frac = round(skewed_dist(dummy_n,0,30,1,10,100)),
127
- drug_antineoplastic_frac = round(skewed_dist(dummy_n,0,48,0,2,100)),
128
- drug_sum_frac = drug_nervous_frac+ drug_muscle_frac + drug_antineoplastic_frac,
129
- drug_nervous = round(sum_drug*drug_nervous_frac/drug_sum_frac),
130
- drug_muscle = round(sum_drug*drug_muscle_frac/drug_sum_frac),
131
- drug_antineoplastic = round(sum_drug*drug_antineoplastic_frac/drug_sum_frac),
132
- lab_n = round(skewed_dist(dummy_n,0,376,0,1,10)),
133
- img_n = round(skewed_dist(dummy_n,0,124,0,1,10)),
134
- gender_Male = round(runif(dummy_n, min=0, max=1)),
135
- SES = round(sample(c(1:5),
136
- prob = c(0.22, 0.42, 0.21, 0.12, 0.03),
137
- size = dummy_n, replace = TRUE)),
138
- SES_1 = case_when(
139
- SES == 1 ~ -0.6324555,
140
- SES == 2 ~ -0.3162278,
141
- SES == 3 ~ 0,
142
- SES == 4 ~ 0.3162278,
143
- SES == 5 ~ 0.6324555,
144
- ),
145
- SES_2 = case_when(
146
- SES == 1 ~ 0.5345225,
147
- SES == 2 ~ -0.2672612,
148
- SES == 3 ~ -0.5345225,
149
- SES == 4 ~ -0.2672612,
150
- SES == 5 ~ 0.5345225,
151
- ),
152
- SES_3 = case_when(
153
- SES == 1 ~ -0.3162278,
154
- SES == 2 ~ 0.63245552,
155
- SES == 3 ~ -4.095972e-16,
156
- SES == 4 ~ -0.63245552,
157
- SES == 5 ~ 0.3162278,
158
- ),
159
- SES_4 = case_when(
160
- SES == 1 ~ 0.1195229,
161
- SES == 2 ~ -0.4780914,
162
- SES == 3 ~ 0.7171372,
163
- SES == 4 ~ -0.4780914,
164
- SES == 5 ~ 0.1195229,
165
- ),
166
- sector = factor(sample(c("General","Arab","Bedouin","Cherkess","Religious Jewish"),
167
- prob = c(0.35, 0.5, 0.13, 0.01,0.2),
168
- size = dummy_n, replace = TRUE)),
169
- `sector_Arab...others` = ifelse(sector == "Arab",1,0),
170
- sector_Bedouin = ifelse(sector == "Bedouin",1,0),
171
- sector_Cherkess = ifelse(sector == "Cherkess",1,0),
172
- sector_Religious.mixed = ifelse(sector == "Religious Jewish",1,0),
173
- district = factor(sample(c("Center","Dan PT","Eilat","Haifa","Jerusalem",
174
- "North","Sharon Shomron","South","Tel-Aviv Jaffa"),
175
- prob = c(0.087, 0.048, 0.05, 0.2,
176
- 0.14,0.2,0.12,0.18,0.022),
177
- size = dummy_n, replace = TRUE)),
178
- district_Dan.PT = ifelse(district == "Dan PT",1,0),
179
- district_Eilat = ifelse(district == "Eilat",1,0),
180
- district_Haifa = ifelse(district == "Haifa",1,0),
181
- district_Jerusalem = ifelse(district == "Jerusalem",1,0),
182
- district_North = ifelse(district == "North",1,0),
183
- district_Sharon.Shomron = ifelse(district == "Sharon Shomron",1,0),
184
- district_South = ifelse(district == "South",1,0),
185
- district_Tel.Aviv.Jaffa = ifelse(district == "Tel-Aviv Jaffa",1,0),
186
- generic = factor(sample(c("Codeine","Tramadol","Oxycodone","Other"),
187
- prob = c(0.78, 0.15, 0.6, 0.02),
188
- size = dummy_n, replace = TRUE)),
189
- generic_Other = ifelse(generic == "Other",1,0),
190
- generic_Oxycodone = ifelse(generic == "Oxycodone",1,0),
191
- generic_Tramadol = ifelse(generic == "Tramadol",1,0),
192
- `malignancy_TRUE.` = round(runif(dummy_n, min=0, max=1)),
193
- `pain_TRUE.` = round(runif(dummy_n, min=0, max=1)),
194
- `psychiatric_not_pci_TRUE.` = round(runif(dummy_n, min=0, max=1))
195
- ) %>%
196
- select(-c(SES, sector, district, generic,drug_sum_frac,
197
- drug_nervous_frac, drug_antineoplastic_frac,drug_muscle_frac))
198
-
199
  # create dictionary data -----------------------------------------------------------
200
  vars_dict <- tibble(
201
  "RE_age" = "Age at\nAdmission (years)",
 
2
  library(sn)
3
  library(readr)
4
 
 
 
 
 
 
5
  # functions ---------------------------------------------------------------
6
  generate_model <- function(df, model){
7
  model_prob <- predict(model, as.matrix(df),
8
+ type = "response")
9
  prob <- cal_prob(model_prob)
10
  return(prob)
11
  }
 
16
  return(prob)
17
  }
18
 
 
 
 
 
 
 
19
  find_closest_n <- function(df, model, prob = 10, column_name = "prob", n = 2) {
 
 
 
 
 
 
20
  differences <- abs(df[[column_name]] - prob)
 
 
21
  closest_indices <- order(differences)[1:10]
 
 
22
  return(df[sample(closest_indices,n),])
23
  }
24
 
 
88
  var_get <- function(x) {ifelse(x %in% vars_dict$name,
89
  vars_dict[vars_dict$name == x,"var",drop = TRUE],x)}
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  # create dictionary data -----------------------------------------------------------
92
  vars_dict <- tibble(
93
  "RE_age" = "Age at\nAdmission (years)",