Spaces:
Sleeping
Sleeping
doratiass
commited on
Commit
·
d9940f8
1
Parent(s):
5c8ee5d
update dummy
Browse files- .gitignore +3 -1
- app.R +8 -1
- dummy_df.csv +0 -0
- funcs_data.R +1 -109
.gitignore
CHANGED
@@ -3,4 +3,6 @@
|
|
3 |
.RData
|
4 |
.Ruserdata
|
5 |
cred_hf.R
|
6 |
-
old_code
|
|
|
|
|
|
3 |
.RData
|
4 |
.Ruserdata
|
5 |
cred_hf.R
|
6 |
+
old_code
|
7 |
+
dummy_df.R
|
8 |
+
.RData 2
|
app.R
CHANGED
@@ -18,11 +18,18 @@ light <- bs_theme(bootswatch = "flatly")
|
|
18 |
dark <- bs_theme(bootswatch = "darkly")
|
19 |
|
20 |
thematic_shiny()
|
21 |
-
# load scripts ####
|
22 |
source("side_bar.R")
|
23 |
source("about.R")
|
24 |
source("funcs_data.R")
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
# load git data ####
|
27 |
git_token <- Sys.getenv("GITHUB_PAT")
|
28 |
git_url <- Sys.getenv("GIT_URL")
|
|
|
18 |
dark <- bs_theme(bootswatch = "darkly")
|
19 |
|
20 |
thematic_shiny()
|
21 |
+
# load scripts & data ####
|
22 |
source("side_bar.R")
|
23 |
source("about.R")
|
24 |
source("funcs_data.R")
|
25 |
|
26 |
+
prob_df <- read_csv("new_prob_df.csv",show_col_types = FALSE) %>%
|
27 |
+
mutate(model_prob = round(model_prob, 3)) %>%
|
28 |
+
group_by(model_prob) %>%
|
29 |
+
summarise(cal_prob = mean(cal_prob))
|
30 |
+
|
31 |
+
dummy_df <- read_csv("dummy_df.csv",show_col_types = FALSE)
|
32 |
+
|
33 |
# load git data ####
|
34 |
git_token <- Sys.getenv("GITHUB_PAT")
|
35 |
git_url <- Sys.getenv("GIT_URL")
|
dummy_df.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
funcs_data.R
CHANGED
@@ -2,15 +2,10 @@ library(tidyverse)
|
|
2 |
library(sn)
|
3 |
library(readr)
|
4 |
|
5 |
-
prob_df <- read_csv("new_prob_df.csv",show_col_types = FALSE) %>%
|
6 |
-
mutate(model_prob = round(model_prob, 3)) %>%
|
7 |
-
group_by(model_prob) %>%
|
8 |
-
summarise(cal_prob = mean(cal_prob))
|
9 |
-
|
10 |
# functions ---------------------------------------------------------------
|
11 |
generate_model <- function(df, model){
|
12 |
model_prob <- predict(model, as.matrix(df),
|
13 |
-
|
14 |
prob <- cal_prob(model_prob)
|
15 |
return(prob)
|
16 |
}
|
@@ -21,25 +16,9 @@ cal_prob <- function(model_prob, probs = prob_df) {
|
|
21 |
return(prob)
|
22 |
}
|
23 |
|
24 |
-
skewed_dist <- function(N, min_val, max_val, peak_val, scale, shape) {
|
25 |
-
skewed_numbers <- rsn(N, xi = peak_val, omega = scale, alpha = shape)
|
26 |
-
skewed_numbers <- pmin(pmax(skewed_numbers, min_val), max_val)
|
27 |
-
return(skewed_numbers)
|
28 |
-
}
|
29 |
-
|
30 |
find_closest_n <- function(df, model, prob = 10, column_name = "prob", n = 2) {
|
31 |
-
syn_prob <- predict(model, as.matrix(df),
|
32 |
-
type = "response")
|
33 |
-
|
34 |
-
df$prob <- sapply(syn_prob, function(x) cal_prob(x))
|
35 |
-
|
36 |
-
# Calculate absolute differences with the given number
|
37 |
differences <- abs(df[[column_name]] - prob)
|
38 |
-
|
39 |
-
# Get the indices of 10 rows with the smallest differences (closest numbers)
|
40 |
closest_indices <- order(differences)[1:10]
|
41 |
-
|
42 |
-
# Return N random closest numbers from the dataframe
|
43 |
return(df[sample(closest_indices,n),])
|
44 |
}
|
45 |
|
@@ -109,93 +88,6 @@ vars_label <- function(x) {sapply(str_split_i(x, " =", 1),
|
|
109 |
var_get <- function(x) {ifelse(x %in% vars_dict$name,
|
110 |
vars_dict[vars_dict$name == x,"var",drop = TRUE],x)}
|
111 |
|
112 |
-
|
113 |
-
# create dummy data -----------------------------------------------------------
|
114 |
-
dummy_n <- 1000000
|
115 |
-
set.seed(3772)
|
116 |
-
dummy_df <- tibble(
|
117 |
-
RE_age = round(skewed_dist(dummy_n,1,18,15,4,0),2),
|
118 |
-
bmi = skewed_dist(dummy_n,7,49,21,5,10),
|
119 |
-
n_visits = round(skewed_dist(dummy_n,0,141,1,10,10)),
|
120 |
-
diff_profession = round(skewed_dist(dummy_n,0,14,2,2,0)),
|
121 |
-
proffesion_primary = skewed_dist(dummy_n,0,1,0.8,0.05, -0.1),
|
122 |
-
n_diagnosis = round(skewed_dist(dummy_n,0,721,1,12,100)),
|
123 |
-
pci = round(skewed_dist(dummy_n,0,19,0,1,10)),
|
124 |
-
sum_drug = round(skewed_dist(dummy_n,0,265,1,10,100)),
|
125 |
-
drug_nervous_frac = round(skewed_dist(dummy_n,0,86,1,5,100)),
|
126 |
-
drug_muscle_frac = round(skewed_dist(dummy_n,0,30,1,10,100)),
|
127 |
-
drug_antineoplastic_frac = round(skewed_dist(dummy_n,0,48,0,2,100)),
|
128 |
-
drug_sum_frac = drug_nervous_frac+ drug_muscle_frac + drug_antineoplastic_frac,
|
129 |
-
drug_nervous = round(sum_drug*drug_nervous_frac/drug_sum_frac),
|
130 |
-
drug_muscle = round(sum_drug*drug_muscle_frac/drug_sum_frac),
|
131 |
-
drug_antineoplastic = round(sum_drug*drug_antineoplastic_frac/drug_sum_frac),
|
132 |
-
lab_n = round(skewed_dist(dummy_n,0,376,0,1,10)),
|
133 |
-
img_n = round(skewed_dist(dummy_n,0,124,0,1,10)),
|
134 |
-
gender_Male = round(runif(dummy_n, min=0, max=1)),
|
135 |
-
SES = round(sample(c(1:5),
|
136 |
-
prob = c(0.22, 0.42, 0.21, 0.12, 0.03),
|
137 |
-
size = dummy_n, replace = TRUE)),
|
138 |
-
SES_1 = case_when(
|
139 |
-
SES == 1 ~ -0.6324555,
|
140 |
-
SES == 2 ~ -0.3162278,
|
141 |
-
SES == 3 ~ 0,
|
142 |
-
SES == 4 ~ 0.3162278,
|
143 |
-
SES == 5 ~ 0.6324555,
|
144 |
-
),
|
145 |
-
SES_2 = case_when(
|
146 |
-
SES == 1 ~ 0.5345225,
|
147 |
-
SES == 2 ~ -0.2672612,
|
148 |
-
SES == 3 ~ -0.5345225,
|
149 |
-
SES == 4 ~ -0.2672612,
|
150 |
-
SES == 5 ~ 0.5345225,
|
151 |
-
),
|
152 |
-
SES_3 = case_when(
|
153 |
-
SES == 1 ~ -0.3162278,
|
154 |
-
SES == 2 ~ 0.63245552,
|
155 |
-
SES == 3 ~ -4.095972e-16,
|
156 |
-
SES == 4 ~ -0.63245552,
|
157 |
-
SES == 5 ~ 0.3162278,
|
158 |
-
),
|
159 |
-
SES_4 = case_when(
|
160 |
-
SES == 1 ~ 0.1195229,
|
161 |
-
SES == 2 ~ -0.4780914,
|
162 |
-
SES == 3 ~ 0.7171372,
|
163 |
-
SES == 4 ~ -0.4780914,
|
164 |
-
SES == 5 ~ 0.1195229,
|
165 |
-
),
|
166 |
-
sector = factor(sample(c("General","Arab","Bedouin","Cherkess","Religious Jewish"),
|
167 |
-
prob = c(0.35, 0.5, 0.13, 0.01,0.2),
|
168 |
-
size = dummy_n, replace = TRUE)),
|
169 |
-
`sector_Arab...others` = ifelse(sector == "Arab",1,0),
|
170 |
-
sector_Bedouin = ifelse(sector == "Bedouin",1,0),
|
171 |
-
sector_Cherkess = ifelse(sector == "Cherkess",1,0),
|
172 |
-
sector_Religious.mixed = ifelse(sector == "Religious Jewish",1,0),
|
173 |
-
district = factor(sample(c("Center","Dan PT","Eilat","Haifa","Jerusalem",
|
174 |
-
"North","Sharon Shomron","South","Tel-Aviv Jaffa"),
|
175 |
-
prob = c(0.087, 0.048, 0.05, 0.2,
|
176 |
-
0.14,0.2,0.12,0.18,0.022),
|
177 |
-
size = dummy_n, replace = TRUE)),
|
178 |
-
district_Dan.PT = ifelse(district == "Dan PT",1,0),
|
179 |
-
district_Eilat = ifelse(district == "Eilat",1,0),
|
180 |
-
district_Haifa = ifelse(district == "Haifa",1,0),
|
181 |
-
district_Jerusalem = ifelse(district == "Jerusalem",1,0),
|
182 |
-
district_North = ifelse(district == "North",1,0),
|
183 |
-
district_Sharon.Shomron = ifelse(district == "Sharon Shomron",1,0),
|
184 |
-
district_South = ifelse(district == "South",1,0),
|
185 |
-
district_Tel.Aviv.Jaffa = ifelse(district == "Tel-Aviv Jaffa",1,0),
|
186 |
-
generic = factor(sample(c("Codeine","Tramadol","Oxycodone","Other"),
|
187 |
-
prob = c(0.78, 0.15, 0.6, 0.02),
|
188 |
-
size = dummy_n, replace = TRUE)),
|
189 |
-
generic_Other = ifelse(generic == "Other",1,0),
|
190 |
-
generic_Oxycodone = ifelse(generic == "Oxycodone",1,0),
|
191 |
-
generic_Tramadol = ifelse(generic == "Tramadol",1,0),
|
192 |
-
`malignancy_TRUE.` = round(runif(dummy_n, min=0, max=1)),
|
193 |
-
`pain_TRUE.` = round(runif(dummy_n, min=0, max=1)),
|
194 |
-
`psychiatric_not_pci_TRUE.` = round(runif(dummy_n, min=0, max=1))
|
195 |
-
) %>%
|
196 |
-
select(-c(SES, sector, district, generic,drug_sum_frac,
|
197 |
-
drug_nervous_frac, drug_antineoplastic_frac,drug_muscle_frac))
|
198 |
-
|
199 |
# create dictionary data -----------------------------------------------------------
|
200 |
vars_dict <- tibble(
|
201 |
"RE_age" = "Age at\nAdmission (years)",
|
|
|
2 |
library(sn)
|
3 |
library(readr)
|
4 |
|
|
|
|
|
|
|
|
|
|
|
5 |
# functions ---------------------------------------------------------------
|
6 |
generate_model <- function(df, model){
|
7 |
model_prob <- predict(model, as.matrix(df),
|
8 |
+
type = "response")
|
9 |
prob <- cal_prob(model_prob)
|
10 |
return(prob)
|
11 |
}
|
|
|
16 |
return(prob)
|
17 |
}
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
find_closest_n <- function(df, model, prob = 10, column_name = "prob", n = 2) {
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
differences <- abs(df[[column_name]] - prob)
|
|
|
|
|
21 |
closest_indices <- order(differences)[1:10]
|
|
|
|
|
22 |
return(df[sample(closest_indices,n),])
|
23 |
}
|
24 |
|
|
|
88 |
var_get <- function(x) {ifelse(x %in% vars_dict$name,
|
89 |
vars_dict[vars_dict$name == x,"var",drop = TRUE],x)}
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
# create dictionary data -----------------------------------------------------------
|
92 |
vars_dict <- tibble(
|
93 |
"RE_age" = "Age at\nAdmission (years)",
|