liuzan commited on
Commit
7491e03
·
1 Parent(s): ba9ea8f

Upload 73 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +10 -0
  2. Dockerfile +14 -0
  3. README.md +12 -0
  4. backend/FUNCTIONS.R +1275 -0
  5. backend/analysis.R +0 -0
  6. backend/fill_missing_values.R +41 -0
  7. backend/get_aligned_seq_for_mea02.R +60 -0
  8. backend/get_normalized_data_of_psites3.R +149 -0
  9. backend/get_normalized_data_of_psites4.R +192 -0
  10. backend/import_extract.R +23 -0
  11. backend/preprocess.R +27 -0
  12. backend/visualization_deps_with_scatter02.R +117 -0
  13. examplefile/Clinicaltest.csv +40 -0
  14. examplefile/analysistools/Clinical_for_Demo.csv +40 -0
  15. examplefile/analysistools/Clinical_for_Pre.csv +10 -0
  16. examplefile/analysistools/Clinicaltest.csv +6 -0
  17. examplefile/analysistools/PreNormBasedProSummary.csv +0 -0
  18. examplefile/analysistools/phosphorylation_exp_design_info.txt +40 -0
  19. examplefile/data_frame_normalization_with_control_no_pair.csv +0 -0
  20. examplefile/download/anaysis_demo.zip +3 -0
  21. examplefile/download/mascot_xml.zip +3 -0
  22. examplefile/download/motif_kinase_relation.xlsx +0 -0
  23. examplefile/download/phosphorylation_peptide_txt.zip +3 -0
  24. examplefile/download/profiling_gene_txt.zip +3 -0
  25. examplefile/mascot/phosphorylation_exp_design_info.txt +10 -0
  26. examplefile/mascot/phosphorylation_peptide_txt/Exp027015_peptide.txt +0 -0
  27. examplefile/mascot/phosphorylation_peptide_txt/Exp027016_peptide.txt +0 -0
  28. examplefile/mascot/phosphorylation_peptide_txt/Exp027017_peptide.txt +0 -0
  29. examplefile/mascot/phosphorylation_peptide_txt/Exp027031_peptide.txt +0 -0
  30. examplefile/mascot/phosphorylation_peptide_txt/Exp027032_peptide.txt +0 -0
  31. examplefile/mascot/phosphorylation_peptide_txt/Exp027033_peptide.txt +0 -0
  32. examplefile/mascot/phosphorylation_peptide_txt/Exp027046_peptide.txt +0 -0
  33. examplefile/mascot/phosphorylation_peptide_txt/Exp027047_peptide.txt +0 -0
  34. examplefile/mascot/phosphorylation_peptide_txt/Exp027048_peptide.txt +0 -0
  35. examplefile/mascot/profiling_exp_design_info.txt +7 -0
  36. examplefile/mascot/profiling_gene_txt/Exp026982_gene.txt +0 -0
  37. examplefile/mascot/profiling_gene_txt/Exp026983_gene.txt +0 -0
  38. examplefile/mascot/profiling_gene_txt/Exp026995_gene.txt +0 -0
  39. examplefile/mascot/profiling_gene_txt/Exp026996_gene.txt +0 -0
  40. examplefile/mascot/profiling_gene_txt/Exp027008_gene.txt +0 -0
  41. examplefile/mascot/profiling_gene_txt/Exp027009_gene.txt +0 -0
  42. examplefile/maxquant/Phospho (STY)Sites.txt +0 -0
  43. examplefile/maxquant/phosphorylation_exp_design_info.txt +10 -0
  44. examplefile/maxquant/profiling_exp_design_info.txt +7 -0
  45. examplefile/maxquant/proteinGroups.txt +0 -0
  46. examplefile/motifanalysis.csv +0 -0
  47. examplefile/phosphorylation_exp_design_info.txt +40 -0
  48. examplefile/root/mascot/mascot_xml/Exp027015/Exp027015_F1_R1.txt +3 -0
  49. examplefile/root/mascot/mascot_xml/Exp027016/Exp027016_F1_R1.txt +3 -0
  50. examplefile/root/mascot/mascot_xml/Exp027017/Exp027017_F1_R1.txt +3 -0
.gitattributes CHANGED
@@ -42,3 +42,13 @@ PhosMap_datasets/motif_library/refseq/rattus/STY_background_of_refseq_rattus_for
42
  PhosMap_datasets/motif_library/uniprot/human/STY_background_of_uniprot_human_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
43
  PhosMap_datasets/motif_library/uniprot/mouse/STY_background_of_uniprot_mouse_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
44
  PhosMap_datasets/motif_library/uniprot/rattus/STY_background_of_uniprot_rattus_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
42
  PhosMap_datasets/motif_library/uniprot/human/STY_background_of_uniprot_human_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
43
  PhosMap_datasets/motif_library/uniprot/mouse/STY_background_of_uniprot_mouse_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
44
  PhosMap_datasets/motif_library/uniprot/rattus/STY_background_of_uniprot_rattus_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
45
+ examplefile/root/mascot/mascot_xml/Exp027015/Exp027015_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
46
+ examplefile/root/mascot/mascot_xml/Exp027016/Exp027016_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
47
+ examplefile/root/mascot/mascot_xml/Exp027017/Exp027017_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
48
+ examplefile/root/mascot/mascot_xml/Exp027031/Exp027031_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
49
+ examplefile/root/mascot/mascot_xml/Exp027032/Exp027032_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
50
+ examplefile/root/mascot/mascot_xml/Exp027033/Exp027033_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
51
+ examplefile/root/mascot/mascot_xml/Exp027046/Exp027046_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
52
+ examplefile/root/mascot/mascot_xml/Exp027047/Exp027047_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
53
+ examplefile/root/mascot/mascot_xml/Exp027048/Exp027048_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
54
+ www/manual.pdf filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM liuzandh/phosmap:1.0.0
2
+
3
+ RUN useradd -m -u 1000 user
4
+
5
+ USER user
6
+
7
+ ENV HOME=/home/user \
8
+ PATH=/home/user/.local/bin:$PATH
9
+
10
+ WORKDIR $HOME/app
11
+
12
+ COPY --chown=user . $HOME/app
13
+
14
+ CMD ["R", "--quiet", "-e", "shiny::runApp(host='0.0.0.0', port=7860)"]
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: PhosMap
3
+ emoji: 📚
4
+ colorFrom: blue
5
+ colorTo: yellow
6
+ sdk: docker
7
+ pinned: false
8
+ duplicated_from: posit/shiny-for-r-template
9
+ license: mit
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
backend/FUNCTIONS.R ADDED
@@ -0,0 +1,1275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ extract_psites_score <- function(
2
+ phosphorylation_exp_design_info_file_path,
3
+ mascot_xml_dir,
4
+ mascot_txt_dir
5
+ ){
6
+ requireNamespace('utils')
7
+ withProgress(message = "Start extracting the confidence of Psites from mascot.xml", detail = "This may take a while...", value = 0, {
8
+ phosphorylation_exp_design_info_file_path <- normalizePath(phosphorylation_exp_design_info_file_path)
9
+ if (!file.exists(phosphorylation_exp_design_info_file_path)) {
10
+ cat('\n', phosphorylation_exp_design_info_file_path, ' -> ', 'No the file.')
11
+ stop('')
12
+ }
13
+ mascot_xml_dir <- normalizePath(mascot_xml_dir)
14
+ if (!file.exists(mascot_xml_dir)) {
15
+ cat('\n', mascot_xml_dir, ' -> ', 'No the directory.')
16
+ stop('')
17
+ }
18
+ mascot_xml_dir_files <- list.files(mascot_xml_dir)
19
+
20
+ mascot_txt_dir <- normalizePath(mascot_txt_dir)
21
+ if (!file.exists(mascot_txt_dir)) {
22
+ cat('\n', mascot_txt_dir, ' -> ', 'No the directory, create it.')
23
+ dir.create(mascot_txt_dir)
24
+ }
25
+
26
+ command <- "python"
27
+ path2script <- system.file("src", "XMLParser_mascot_dat.py", package = "PhosMap") # The location of python script called
28
+
29
+ # path2script <- "w:/R/R-3.3.2/library/PhosMap/src/XMLParser_mascot_dat.py"
30
+ path2script <- normalizePath(path2script, mustWork = FALSE)
31
+
32
+ # Get experiments codes by reading txt files
33
+ experiment_code <- utils::read.table(phosphorylation_exp_design_info_file_path,
34
+ sep = '\t',
35
+ header = TRUE)
36
+ experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
37
+
38
+ # match txt files to mascot_xml_dir
39
+ experiment_match_index <- match(experiment_code, mascot_xml_dir_files)
40
+ na_index <- which(is.na(experiment_match_index))
41
+ if(length(na_index)>0){
42
+ na_experiments <- experiment_code[na_index]
43
+ cat('\n', 'The following experiments do not exist in', mascot_xml_dir, '\n')
44
+ for(na_experiment in na_experiments){
45
+ cat('\n', na_experiment, '\n')
46
+ }
47
+ stop('')
48
+ }
49
+
50
+ experiment_code_count <- length(experiment_code)
51
+ if (experiment_code_count < 1) {
52
+ cat('\n', phosphorylation_exp_design_info_file_path, '\n')
53
+ stopifnot('No experiments')
54
+ }
55
+
56
+ cat('\n Start extracting the confidence of Psites from mascot.xml.')
57
+ cat('\n Total ', experiment_code_count, ' experiment(s).')
58
+ cat('\n It will take a little while.')
59
+
60
+ parent_dir <- dirname(phosphorylation_exp_design_info_file_path)
61
+ parent_dir <- normalizePath(parent_dir)
62
+ log_dir <- normalizePath(file.path(parent_dir, 'log'), mustWork = FALSE)
63
+ if (!file.exists(log_dir)) {
64
+ cat('\n', log_dir, ' -> ', 'No the directory, create it.')
65
+ dir.create(log_dir)
66
+ }
67
+
68
+ log_df <- NULL
69
+ for(i in seq_len(experiment_code_count)){
70
+ experiment_code_i <- experiment_code[i]
71
+ args <- c(experiment_code_i, mascot_xml_dir, mascot_txt_dir) # Set args to vector
72
+ allArgs <- c(path2script, args) # Add python script path to parameters vector
73
+ log_out <- tryCatch(
74
+ {
75
+ output <- system2(command, args = allArgs, stdout = TRUE) # R call python script by pass parameters vector
76
+ cat('\n', i, '->', experiment_code_i, '->', 'success', '\n')
77
+ c(experiment_code_i, 'success')
78
+ },
79
+
80
+ warning = function(w){ # process warning
81
+ cat('\n', i, '->', experiment_code_i, '->', 'warning', '\n')
82
+ print(w)
83
+ log_i <- c(experiment_code_i, 'warning')
84
+ return(log_i)
85
+ },
86
+
87
+ error = function(e){ # process error
88
+ cat('\n', i, '->', experiment_code_i, '->', 'error', '\n')
89
+ print(e)
90
+ log_i <- c(experiment_code_i, 'error')
91
+ return(log_i)
92
+ }
93
+ )
94
+ log_df <- rbind(log_df, log_out)
95
+ incProgress(1/seq_len(experiment_code_count), detail = paste0('\n Completed file: ', i, '/', experiment_code_count))
96
+ }
97
+
98
+ colnames(log_df) <- c('Exp_no', 'Status')
99
+ now_time <- Sys.time()
100
+ now_time <- gsub(':', '-', now_time)
101
+ log_df_file_name <- paste(now_time, 'log_of_extract_psites_score.txt')
102
+ log_df_file_path <- normalizePath(file.path(log_dir, log_df_file_name), mustWork = FALSE)
103
+ utils::write.table(log_df, log_df_file_path, sep = '\t', row.names = FALSE, quote = FALSE)
104
+
105
+ cat('\n Program finish, please see result log to check status.', '->', log_df_file_path)
106
+ })
107
+ }
108
+
109
+
110
+ get_file_info_from_dir <- function(specific_dir, experiment_ID){
111
+ requireNamespace('utils')
112
+ withProgress(message = 'Reading peptide identification files', style = "notification", detail = "processing...", value = 0,{
113
+ # read all files from specific director and save them into a list
114
+ all_files <- list.files(specific_dir)
115
+ all_files_count <- length(all_files)
116
+ if(all_files_count>0){
117
+ file_suffix <- get_file_suffix(all_files[1])
118
+ if(file_suffix=='txt'){
119
+ read_file_function <- utils::read.table
120
+ sep <- '\t'
121
+ }else{
122
+ read_file_function <- utils::read.csv
123
+ sep <- ','
124
+ }
125
+ sep_symbol <- paste('.', file_suffix, sep = '')
126
+ all_files_ID <- apply(data.frame(all_files), 1, function(x, sep){
127
+ x <- strsplit(x, split = sep)[[1]][1]
128
+ x
129
+ }, sep=sep_symbol)
130
+
131
+ all_files_ID_code <- apply(data.frame(all_files_ID), 1, function(x, sep){
132
+ x <- strsplit(x, split = sep)[[1]][1]
133
+ x
134
+ }, sep='_')
135
+ all_files_paths <- normalizePath(file.path(specific_dir, all_files))
136
+
137
+ index_of_match <- match(experiment_ID, all_files_ID_code)
138
+ matched_all_files_paths <- all_files_paths[index_of_match]
139
+ matched_all_files_ID <- all_files_ID[index_of_match]
140
+
141
+ file_data_list <- list()
142
+ matched_all_files_count <- length(matched_all_files_paths)
143
+ cat('\n Total file: ', matched_all_files_count)
144
+ for(i in seq_len(matched_all_files_count)){
145
+ # Read bach data and save to file_data_list.
146
+ cat('\n completed: ', i, '/', matched_all_files_count)
147
+ file_data <- as.matrix(read_file_function(matched_all_files_paths[i], header = TRUE, sep = sep))
148
+ file_data_list[[i]] <- file_data
149
+ incProgress(1/matched_all_files_count, detail = paste0('\n completed: ', i, '/', matched_all_files_count))
150
+ }
151
+ attr(file_data_list,'names') <- matched_all_files_ID
152
+ result_list <- list(file_data_list=file_data_list, file_ID=matched_all_files_ID)
153
+ return(result_list)
154
+
155
+ }else{
156
+ stop('The directory of ', specific_dir, ' has no files.')
157
+ }
158
+ })
159
+ }
160
+
161
+
162
+ get_list_with_filtered_sites <- function(peptide_id, files, files_site_score, qc, min_score, min_FDR){
163
+ withProgress(message = 'Reading psites QC files', style = "notification", detail = "processing...", value = 0,{
164
+ peptide_df_with_area_psm_list <- list() # data.frame(area, psm)
165
+ ID_of_seq_gi_site_list <- list() # seq_gi_psite
166
+ ID_DF_list <- list() # seq_gi_psite + data.frame(area, psm)
167
+ peptide_id_len <- length(peptide_id) # File Numbers
168
+ # ************
169
+ # *Required column:
170
+ # *file_peptide: Ion_Score, FDR, Area, PSMs, Sequence, Protein_Groups_Accessions, Modification
171
+ # *file_site_score: pep_seq, pep_var_mod_conf
172
+ cat('\n Total file: ', peptide_id_len)
173
+ for(i in seq_len(peptide_id_len)){
174
+ cat('\n completed: ',i,'/',peptide_id_len)
175
+
176
+ file_peptide <- data.frame(files[[i]])
177
+ # Set parameters 1:reserve peptides with ion score><-20 and FDR<0.01.
178
+ index_of_row_filters_meet_ionscore_and_FDR <- which(as.numeric(as.vector(file_peptide$Ion.Score)) >= min_score &
179
+ as.numeric(as.vector(file_peptide$FDR)) < min_FDR)
180
+ file_peptide <- file_peptide[index_of_row_filters_meet_ionscore_and_FDR, ]
181
+
182
+ if(!qc){
183
+ file_peptide_subset <- file_peptide
184
+ }else{
185
+ # Extract peptides with psites score.
186
+ file_site_score <- as.data.frame(files_site_score[[i]])
187
+ index_of_row_filters_have_site_score <- which(grepl('%', file_site_score$pep_var_mod_conf))
188
+ file_site_score <- file_site_score[index_of_row_filters_have_site_score,]
189
+
190
+ # Reserve peptides with psites score in file_peptide.
191
+ index_of_peptide_with_site_score_in_file_peptide <- match(as.vector(file_site_score[,1]), as.vector(file_peptide[,1]))
192
+ index_of_NA <- which(is.na(index_of_peptide_with_site_score_in_file_peptide))
193
+ if(length(index_of_NA)>0){
194
+ index_of_peptide_with_site_score_in_file_peptide <- index_of_peptide_with_site_score_in_file_peptide[-index_of_NA]
195
+ }
196
+ file_peptide_subset <- file_peptide[index_of_peptide_with_site_score_in_file_peptide,]
197
+ }
198
+ area <- as.numeric(as.vector(file_peptide_subset$Area))
199
+ psms <- as.numeric(as.vector(file_peptide_subset$PSMs))
200
+
201
+ peptide_df_with_area_psm <- data.frame(area, psms)
202
+ peptide_df_with_area_psm_colnames <- paste(peptide_id[i], c('Area', 'PSMs'), sep = '_')
203
+ colnames(peptide_df_with_area_psm) <- peptide_df_with_area_psm_colnames
204
+
205
+ sequence_id <- as.vector(file_peptide_subset$Sequence)
206
+ accession <- as.vector(file_peptide_subset$Protein.Groups.Accessions)
207
+ modification <- as.vector(file_peptide_subset$Modification)
208
+ ID_of_seq_gi_site <- paste(sequence_id, accession, modification, sep = '||')
209
+
210
+ ID_DF <- data.frame(ID_of_seq_gi_site, peptide_df_with_area_psm)
211
+ colnames(ID_DF) <- c("ID", peptide_df_with_area_psm_colnames)
212
+
213
+
214
+ peptide_df_with_area_psm_list[[i]] <- peptide_df_with_area_psm # area, psm
215
+ ID_of_seq_gi_site_list[[i]] <- ID_of_seq_gi_site # seq_gi_psite
216
+ ID_DF_list[[i]] <- ID_DF # seq_gi_psite, area, psm
217
+
218
+ incProgress(1/peptide_id_len, detail = paste0('\n completed: ',i,'/',peptide_id_len))
219
+ }
220
+ result_list <- list(
221
+ peptide_df_with_area_psm_list = peptide_df_with_area_psm_list,
222
+ ID_of_seq_gi_site_list = ID_of_seq_gi_site_list,
223
+ ID_DF_list = ID_DF_list
224
+ )
225
+ return(result_list)
226
+ })
227
+ }
228
+
229
+
230
+ pre_process_filter_psites <- function(firmiana_peptide_dir, psites_score_dir,
231
+ phospho_experiment_design_file_path, qc,
232
+ min_score = 20, min_FDR = 0.01) {
233
+ requireNamespace('utils')
234
+
235
+
236
+ withProgress(message = 'Step2:QC and Merging', style = "notification", detail = "processing...", value = 0, max = 4,{
237
+ PEPTIDE_DIR <- normalizePath(firmiana_peptide_dir, mustWork = FALSE)
238
+ if(!file.exists(firmiana_peptide_dir)){
239
+ cat(firmiana_peptide_dir, ' -> ', 'No the directory.')
240
+ stop('')
241
+ }
242
+
243
+ PSITES_WITH_SCORE_DIR <- normalizePath(psites_score_dir, mustWork = FALSE)
244
+ if(!file.exists(psites_score_dir)){
245
+ cat(psites_score_dir, ' -> ', 'No the directory.')
246
+ stop('')
247
+ }
248
+
249
+ phospho_experiment_design_file_path <- normalizePath(phospho_experiment_design_file_path, mustWork = FALSE)
250
+ if(!file.exists(phospho_experiment_design_file_path)){
251
+ cat(phospho_experiment_design_file_path, ' -> ', 'No the file')
252
+ stop('')
253
+ }
254
+
255
+ # read experiment design file and make merged experments keep order of experiment design
256
+ phospho_experiment_design_file <- utils::read.table(phospho_experiment_design_file_path, sep = '\t',
257
+ header = TRUE, stringsAsFactors = NA)
258
+ phospho_experiment_ID <- as.vector(unlist(phospho_experiment_design_file$Experiment_Code))
259
+ for(j in 1:4){
260
+ # withProgress(message = 'please wait', style = "notification", detail = "processing...", value = 0,{
261
+ if(j == 1){
262
+ result_list_from_PEPTIDE_DIR <- get_file_info_from_dir(PEPTIDE_DIR, phospho_experiment_ID)
263
+ files <- result_list_from_PEPTIDE_DIR$file_data_list
264
+ peptide.id <- result_list_from_PEPTIDE_DIR$file_ID
265
+ }
266
+
267
+ if(j == 2){
268
+ cat('\n The 2nd step: read psites QC files.')
269
+ # rewrite function
270
+ get_file_info_from_dir <- function(specific_dir, experiment_ID){
271
+ requireNamespace('utils')
272
+ withProgress(message = 'Reading psites QC files', style = "notification", detail = "processing...", value = 0,{
273
+ # read all files from specific director and save them into a list
274
+ all_files <- list.files(specific_dir)
275
+ all_files_count <- length(all_files)
276
+ if(all_files_count>0){
277
+ file_suffix <- get_file_suffix(all_files[1])
278
+ if(file_suffix=='txt'){
279
+ read_file_function <- utils::read.table
280
+ sep <- '\t'
281
+ }else{
282
+ read_file_function <- utils::read.csv
283
+ sep <- ','
284
+ }
285
+ sep_symbol <- paste('.', file_suffix, sep = '')
286
+ all_files_ID <- apply(data.frame(all_files), 1, function(x, sep){
287
+ x <- strsplit(x, split = sep)[[1]][1]
288
+ x
289
+ }, sep=sep_symbol)
290
+
291
+ all_files_ID_code <- apply(data.frame(all_files_ID), 1, function(x, sep){
292
+ x <- strsplit(x, split = sep)[[1]][1]
293
+ x
294
+ }, sep='_')
295
+ all_files_paths <- normalizePath(file.path(specific_dir, all_files))
296
+
297
+ index_of_match <- match(experiment_ID, all_files_ID_code)
298
+ matched_all_files_paths <- all_files_paths[index_of_match]
299
+ matched_all_files_ID <- all_files_ID[index_of_match]
300
+
301
+ file_data_list <- list()
302
+ matched_all_files_count <- length(matched_all_files_paths)
303
+ cat('\n Total file: ', matched_all_files_count)
304
+ for(i in seq_len(matched_all_files_count)){
305
+ # Read bach data and save to file_data_list.
306
+ cat('\n completed: ', i, '/', matched_all_files_count)
307
+ file_data <- as.matrix(read_file_function(matched_all_files_paths[i], header = TRUE, sep = sep))
308
+ file_data_list[[i]] <- file_data
309
+ incProgress(1/matched_all_files_count, detail = paste0('\n completed: ', i, '/', matched_all_files_count))
310
+ }
311
+ attr(file_data_list,'names') <- matched_all_files_ID
312
+ result_list <- list(file_data_list=file_data_list, file_ID=matched_all_files_ID)
313
+ return(result_list)
314
+
315
+ }else{
316
+ stop('The directory of ', specific_dir, ' has no files.')
317
+ }
318
+ })
319
+
320
+ }
321
+ result_list_from_PSITES_WITH_SCORE_DIR <- get_file_info_from_dir(PSITES_WITH_SCORE_DIR,
322
+ phospho_experiment_ID)
323
+ files_site_score <- result_list_from_PSITES_WITH_SCORE_DIR$file_data_list
324
+ site_score.id <- result_list_from_PSITES_WITH_SCORE_DIR$file_ID
325
+ }
326
+
327
+
328
+
329
+ if(j == 3){
330
+ cat('\n The 3rd step: filter peptides based on site quality.')
331
+ result_list_with_filtered_sites <- get_list_with_filtered_sites(peptide.id, files,
332
+ files_site_score, qc,
333
+ min_score, min_FDR)
334
+
335
+
336
+ peptide_df_with_area_psm_list <- result_list_with_filtered_sites$peptide_df_with_area_psm_list # including: area, psm
337
+ ID_of_seq_gi_site_list <- result_list_with_filtered_sites$ID_of_seq_gi_site_list # including: seq_gi_psite
338
+ ID_DF_list <- result_list_with_filtered_sites$ID_DF_list # including: seq_gi_psite, area, psm
339
+ }
340
+
341
+
342
+ if(j == 4){
343
+ #### (4) Based on unique peptide, merge all experiments ####
344
+ cat('\n The 4th step: merge data based on peptides (unique ID).')
345
+ withProgress(message = 'Merging data based on peptides (unique ID)', style = "notification", detail = "processing...", value = 0,{
346
+ for (i in 1:1) {
347
+ merge_df_with_phospho_peptides <- get_merged_phospho_df(peptide.id,
348
+ peptide_df_with_area_psm_list,
349
+ ID_of_seq_gi_site_list, ID_DF_list)
350
+
351
+ # delete psm column
352
+ merge_df_with_phospho_peptides_colnames <- colnames(merge_df_with_phospho_peptides)
353
+ index_of_PSMs <- grep('_PSMs', merge_df_with_phospho_peptides_colnames)
354
+ merge_df_with_phospho_peptides <- merge_df_with_phospho_peptides[,-index_of_PSMs]
355
+
356
+
357
+
358
+ merge_df_with_phospho_peptides_colnames <- colnames(merge_df_with_phospho_peptides)
359
+ ID <- as.vector(merge_df_with_phospho_peptides[,1])
360
+ Value <- merge_df_with_phospho_peptides[,-1]
361
+ Value_colnames <- colnames(Value)
362
+ Value_colnames_ID <- apply(data.frame(Value_colnames), 1, function(x){
363
+ x <- strsplit(x, split = '_')[[1]][1]
364
+ x
365
+ })
366
+ index_of_match <- match(phospho_experiment_ID, Value_colnames_ID)
367
+ Value <- Value[,index_of_match]
368
+ merge_df_with_phospho_peptides <- data.frame(ID, Value)
369
+ colnames(merge_df_with_phospho_peptides) <- c(merge_df_with_phospho_peptides_colnames[1], phospho_experiment_ID)
370
+ incProgress(1, detail = 'finishing...')
371
+ }
372
+ })
373
+
374
+ return(merge_df_with_phospho_peptides)
375
+ }
376
+ incProgress(1, detail = '')
377
+ }
378
+ })
379
+ }
380
+
381
+
382
+ get_combined_data_frame02 <- function(merge_df_with_phospho_peptides, species = 'human', id_type = 'RefSeq_Protein_GI'
383
+ ){
384
+ # Read library file, map GI to Gene Symbol
385
+ requireNamespace('utils')
386
+ requireNamespace('stringr')
387
+
388
+ cat('\n The 5th step: write the data frame with symbols mapping to genes.')
389
+
390
+ withProgress(message = 'Writing the data frame with symbols mapping to genes', style = "notification", detail = "This may take a while...", value = 0,{
391
+ id_coversion_table_dir = "./PhosMap_datasets/id_coversion_table/"
392
+ id_coversion_table = utils::read.table((paste0(id_coversion_table_dir, species, "_ID.txt")), sep = '\t', header = TRUE)
393
+
394
+ cat('\n The 5th step is running.')
395
+ # Split a string: sequenceID, accession, modification
396
+ seq_gi_site_vector <- as.vector(merge_df_with_phospho_peptides$ID_of_seq_gi_site)
397
+ Sequence <- apply(data.frame(seq_gi_site_vector), 1, function(x){
398
+ strsplit(x, split="||", fixed = TRUE)[[1]][1]
399
+ })
400
+ ID <- apply(data.frame(seq_gi_site_vector), 1, function(x){
401
+ strsplit(x, split="||", fixed = TRUE)[[1]][2]
402
+ })
403
+ Modification <- apply(data.frame(seq_gi_site_vector), 1, function(x){
404
+ strsplit(x, split="||", fixed = TRUE)[[1]][3]
405
+ })
406
+
407
+
408
+ ##########################################################################################################
409
+ # id_types <- c('GeneID', 'RefSeq_Protein_GI', 'RefSeq_Protein_Accession', 'Uniprot_Protein_Accession')
410
+ # GeneSymbol
411
+ # construct dict
412
+ # id_type <- 'RefSeq_Protein_GI'
413
+ MappingDf <- id_coversion_table[, c('GeneSymbol', id_type)]
414
+ invalid_index <- which(as.vector(unlist(MappingDf[,2])) == '' | as.vector(unlist(MappingDf[,2])) == '-')
415
+ if(length(invalid_index)>0){
416
+ MappingDf <- MappingDf[-invalid_index,]
417
+ }
418
+ MappingDf_row <- nrow(MappingDf)
419
+ cat('\n', 'Construct dictionary based on GeneSymbol and specific ID.')
420
+ mapping_dict <- NULL
421
+ cat('\n', 'The total:', MappingDf_row)
422
+ for(i in 1:MappingDf_row){
423
+ x <- as.vector(MappingDf[i,1])
424
+ y <- as.vector(unlist(MappingDf[i,2]))
425
+ y <- strsplit(y, split = '; ')[[1]]
426
+ x_v <- rep(x, length(y))
427
+ names(x_v) <- y
428
+ mapping_dict <- c(mapping_dict, x_v)
429
+ if(i%%5000==0 | i == MappingDf_row){
430
+ cat('\n', 'Completed:', i, '/', MappingDf_row)
431
+ # incProgress(1/seq_len(MappingDf_row), detail = paste0('\n', 'Completed:', i, '/', MappingDf_row))
432
+ }
433
+ incProgress(1/MappingDf_row, detail = paste0('\n', 'Completed:', i, '/', MappingDf_row))
434
+ }
435
+ ##########################################################################################################
436
+
437
+ GeneSymbol <- apply(data.frame(ID), 1, function(x, mapping_dict, id_type){
438
+ gi_all <- strsplit(x, split=";", fixed = TRUE)[[1]]
439
+
440
+ gi_mapping_symbol <- apply(data.frame(gi_all), 1, function(y, mapping_dict, id_type){
441
+ if(id_type == 'RefSeq_Protein_GI'){
442
+ y = stringr::str_replace_all(y, 'gi[|]', '')
443
+ }
444
+ return(mapping_dict[y])
445
+ }, mapping_dict = mapping_dict, id_type)
446
+
447
+ gi_mapping_symbol_unique <- unique(gi_mapping_symbol[which(!is.na(gi_mapping_symbol))])
448
+ gi_mapping_symbol_unique_count <- length(gi_mapping_symbol_unique)
449
+
450
+
451
+ if(gi_mapping_symbol_unique_count == 0){
452
+ return(NA)
453
+ }else if(gi_mapping_symbol_unique_count == 1){
454
+ return(gi_mapping_symbol_unique)
455
+ }else{
456
+ return(paste(gi_all, collapse = ';'))
457
+ }
458
+ }, mapping_dict = mapping_dict, id_type = id_type)
459
+
460
+
461
+ # sequenceID, accession, symbol, modification, quantification_value_in_experiment
462
+ df_of_combination <- data.frame(Sequence, ID, Modification, GeneSymbol, merge_df_with_phospho_peptides[,-1]) # delete first column
463
+ index_of_NonNA <- which(!is.na(GeneSymbol))
464
+ df_of_combination <- df_of_combination[index_of_NonNA,]
465
+ cat('\n The 5th step is over ^_^.')
466
+ cat('\n The 5th step: write the data frame with symbols mapping to genes.')
467
+ incProgress(1, detail = 'Please wait a moment')
468
+ })
469
+ return(df_of_combination)
470
+ }
471
+
472
+
473
+ get_summary_with_unique_sites02 <- function(combined_df_with_mapped_gene_symbol, species = 'human', fasta_type = 'refseq'
474
+ ){
475
+ requireNamespace('utils')
476
+ requireNamespace('stringr')
477
+ # unique phosphorylation sites
478
+ withProgress(message = 'Constructing the data frame with unique phosphorylation site for each protein sequence', style = "notification", detail = "This may take a while...", value = 0,{
479
+ cat('\n The 6th step: construct the data frame with unique phosphorylation site for each protein sequence.')
480
+
481
+ path <- "./PhosMap_datasets/fasta_library/"
482
+ fasta_data <- utils::read.table(paste0(path, fasta_type, "/", species, "/", species, "_", fasta_type, "_fasta.txt"), header=TRUE, sep="\t")
483
+
484
+ id_data <- combined_df_with_mapped_gene_symbol
485
+
486
+ # Keep peptides assigned to unique protein
487
+ id_data_only_peptide2gi <- id_data[which(!grepl(';', as.vector(id_data$ID))),]
488
+
489
+ for(j in 1:2){
490
+ if(j == 1){
491
+ withProgress(message = 'Getting modification index in protein sequence. ', style = "notification", detail = "This may take a while...", value = 0,{
492
+ get_modification_index <- function(id_data_only_peptide2gi, fasta_data){
493
+ # 1
494
+ # Get modification index in protein sequence.
495
+ cat('\n', 'Get modification index in protein sequence.')
496
+ id_data_only_peptide2gi_row <- nrow(id_data_only_peptide2gi)
497
+ modification_index_in_protein_seq_list <- list()
498
+ for(i in seq_len(id_data_only_peptide2gi_row)){
499
+ peptide_seq <- as.vector(id_data_only_peptide2gi$Sequence[i])
500
+ peptide_id <- as.vector(id_data_only_peptide2gi$ID[i])
501
+ modification_index_in_peptide_seq <- unlist(gregexpr("[a-z]", peptide_seq))
502
+ protein_seq <- as.vector(fasta_data$Sequence[which(fasta_data$ID==peptide_id)])
503
+ first_index_of_peptide2protein <- unlist(gregexpr(toupper(peptide_seq), protein_seq))
504
+ modification_index_in_protein_seq <- NULL
505
+ for(elemt in first_index_of_peptide2protein){
506
+ tmp_modification_index_in_protein_seq <- elemt + modification_index_in_peptide_seq -1
507
+ modification_index_in_protein_seq <- c(modification_index_in_protein_seq,
508
+ tmp_modification_index_in_protein_seq)
509
+ }
510
+ modification_index_in_protein_seq_list[[i]] <- modification_index_in_protein_seq
511
+ if(i%%500==0 | i==id_data_only_peptide2gi_row ){
512
+ cat('\n completed: ', i, '/', id_data_only_peptide2gi_row)
513
+ }
514
+ incProgress(1/id_data_only_peptide2gi_row, detail = paste0('\n', 'Completed:', i, '/', id_data_only_peptide2gi_row))
515
+ }
516
+ return(modification_index_in_protein_seq_list)
517
+ }
518
+
519
+
520
+ # Determine locations of the psites each peptide mapped to protein squence.
521
+ modification_index_in_protein_seq_list <- get_modification_index(id_data_only_peptide2gi,
522
+ fasta_data)
523
+
524
+ proteins_in_id_data_only_peptide2gi <- as.vector(id_data_only_peptide2gi$ID)
525
+ sequences_in_id_data_only_peptide2gi <- as.vector(id_data_only_peptide2gi$Sequence)
526
+ value_in_id_data_only_peptide2gi <- id_data_only_peptide2gi[, -c(seq_len(4))]
527
+
528
+ unique_proteins <- unique(proteins_in_id_data_only_peptide2gi)
529
+ unique_protein_count <- length(unique_proteins)
530
+ })
531
+ }
532
+
533
+ if(j == 2){
534
+ # Show psites and modifications of one protein, merge the values with the same modification type.
535
+ cat('\n', 'Map phosphorylation sites to protein sequence and eliminate redundancy.')
536
+ withProgress(message = 'Mapping phosphorylation sites to protein sequence and eliminate redundancy. ', style = "notification", detail = "This may take a while...", value = 0,{
537
+ system.time({
538
+ summary_df_of_unique_proteins_with_sites <- c()
539
+ for(i in seq_len(unique_protein_count)){
540
+
541
+ df_with_AAs_i <- get_df_with_AAs_i(unique_proteins,
542
+ i,
543
+ id_data_only_peptide2gi,
544
+ proteins_in_id_data_only_peptide2gi,
545
+ sequences_in_id_data_only_peptide2gi,
546
+ modification_index_in_protein_seq_list)
547
+
548
+ summary_df_of_unique_protein_with_sites <- get_unique_AAs_i_df(df_with_AAs_i)
549
+
550
+ summary_df_of_unique_proteins_with_sites <- rbind(
551
+ summary_df_of_unique_proteins_with_sites,
552
+ summary_df_of_unique_protein_with_sites
553
+ )
554
+
555
+ if(i%%500==0 | i == unique_protein_count){
556
+ cat('\n completed: ', i, '/', unique_protein_count)
557
+ }
558
+ incProgress(1/unique_protein_count, detail = paste0('\n', 'Completed:', i, '/', unique_protein_count))
559
+
560
+ summary_df_of_unique_proteins_with_sites_rownames <- paste(as.vector(summary_df_of_unique_proteins_with_sites$ID),
561
+ as.vector(summary_df_of_unique_proteins_with_sites$AA_in_protein),
562
+ sep = '_')
563
+ rownames(summary_df_of_unique_proteins_with_sites) <- summary_df_of_unique_proteins_with_sites_rownames
564
+ summary_df_of_unique_proteins_with_sites_colnames <- colnames(summary_df_of_unique_proteins_with_sites)
565
+ index_of_PSMs <- which(grepl('_PSMs', summary_df_of_unique_proteins_with_sites_colnames))
566
+ if(length(index_of_PSMs)>0){
567
+ summary_df_of_unique_proteins_with_sites <- summary_df_of_unique_proteins_with_sites[,-index_of_PSMs]
568
+ }
569
+ summary_df_of_unique_proteins_with_sites$GeneSymbol <- apply(data.frame(summary_df_of_unique_proteins_with_sites$GeneSymbol),
570
+ 1,
571
+ function(x){
572
+ if(grepl('||', x)){
573
+ x <- as.vector(x)
574
+ x <- strsplit(x, split = '||', fixed = TRUE)
575
+ x[[1]][1]
576
+ }
577
+ })
578
+ }
579
+ })
580
+ })
581
+ }
582
+ incProgress(1/2, detail = paste0('\n '))
583
+ }
584
+ cat('\n The 6th step: construct over.')
585
+
586
+ })
587
+ return(summary_df_of_unique_proteins_with_sites)
588
+ }
589
+
590
+
591
+ merge_profiling_file_from_Firmiana <- function(firmiana_gene_dir, US_cutoff = 1, experiment_gene_file_path){
592
+ requireNamespace('utils')
593
+
594
+ withProgress(message = 'Step5 : Normalization [Normalizing phosphoproteomics data based on proteomics data.] ', style = "notification", detail = "processing...", value = 0,{
595
+ for (j in 1:2) {
596
+ if(j == 1){
597
+ DATA_DIR <- normalizePath(firmiana_gene_dir, mustWork = FALSE)
598
+ if(!file.exists(DATA_DIR)){
599
+ cat(DATA_DIR, ' -> ', 'No the file')
600
+ stop('')
601
+ }
602
+ data_list <- list()
603
+ file_names <- list.files(path = DATA_DIR, pattern = '.txt')
604
+ file_names_count <- length(file_names)
605
+ if(length(file_names_count)<1){
606
+ stop('The directory of ', DATA_DIR, ' has no files.')
607
+ }
608
+
609
+ exp_names <- apply(data.frame(file_names), 1, function(x){
610
+ x <- strsplit(x, split = '_')[[1]][1]
611
+ x
612
+ })
613
+
614
+ experiment_code <- utils::read.table(experiment_gene_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
615
+ experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
616
+
617
+ index_of_match <- match(experiment_code, exp_names)
618
+ na_index <- which(is.na(index_of_match))
619
+ na_count <- length(na_index)
620
+ if(na_count > 0){
621
+ na_experiment_code <- experiment_code[na_index]
622
+ cat(
623
+ '\n',
624
+ na_experiment_code,
625
+ 'not in',
626
+ DATA_DIR
627
+ )
628
+ stop('')
629
+ }
630
+
631
+ exp_names <- exp_names[index_of_match]
632
+ file_names <- file_names[index_of_match]
633
+ file_names_count <- length(file_names)
634
+
635
+ # Table headers of input data
636
+ # "Gene.ID" "Symbol" "Annotation" "Modification" "Description"
637
+ # "Protein.GI" "Protein.Num" "Area" "FoT.1e.6." "iBAQ"
638
+ # "Peptide.Num" "Unique.Peptide.Num" "Strict.Peptide.Num" "US.Peptide.Num" "Identified.Proteins.Num"
639
+ # "Unique.Proteins.Num"
640
+
641
+ # New table headers of input data
642
+ file_data_colnames <- c(
643
+ "Gene_ID", "Symbol", "Annotation", "Modification", "Description",
644
+ "Protein_GI", "Protein_Num", "Area", "FoT5", "iBAQ",
645
+ "Peptide_Num", "UPeptide_Num", "SPeptide_Num", "USPeptide_Num", "Identified_Proteins_Num", "Unique_Proteins_Num"
646
+ )
647
+ kept_colnames <- c(
648
+ "Symbol", "iBAQ", "USPeptide_Num"
649
+ )
650
+ kept_colnames_index <- match(kept_colnames, file_data_colnames)
651
+ cat('\n Merge profiling files downloaded from Firmiana.')
652
+ cat('\n Total files: ', file_names_count)
653
+ for(i in seq_len(file_names_count)){
654
+ file_name <- file_names[i]
655
+ file_path <- normalizePath(file.path(DATA_DIR, file_name))
656
+ file_data <- utils::read.delim(file_path, header = TRUE, stringsAsFactors = NA, sep = '\t')
657
+ colnames(file_data) <- file_data_colnames
658
+ file_data <- file_data[, kept_colnames_index]
659
+
660
+ index_of_US <- which(file_data$USPeptide_Num >= US_cutoff)
661
+ file_data <- file_data[index_of_US, c(1,2)]
662
+ exp_name <- exp_names[i]
663
+ file_data_colnames.i <- colnames(file_data)
664
+ file_data_colnames.i <- paste(exp_name, file_data_colnames.i, sep = '_')
665
+ file_data_colnames.i[1] <- 'Symbol'
666
+ colnames(file_data) <- file_data_colnames.i
667
+ data_list[[i]] <- file_data
668
+ cat('\n Read and filter: ', i, '/', file_names_count)
669
+ incProgress(1/seq_len(file_names_count), detail = paste0('\n Read and filter: ', i, '/', file_names_count))
670
+ }
671
+ attr(data_list, 'names') <- exp_names
672
+
673
+ data_list_count <- length(data_list)
674
+ merge_df <- data_list[[1]]
675
+ merge_df_colnames <- colnames(merge_df)
676
+ }
677
+
678
+ if(j == 2){
679
+ cat('\n merge_complete: ', 1, '/', data_list_count)
680
+ if(data_list_count>1){
681
+ for(i in 2:data_list_count){
682
+ tmp_merge_df <- data_list[[i]]
683
+ merge_df <- merge(merge_df, tmp_merge_df, by = 'Symbol', all = TRUE)
684
+ cat('\n merge_complete: ', i, '/', data_list_count)
685
+ incProgress(1/data_list_count, detail = paste0('\n merge_complete: ', i, '/', data_list_count))
686
+ }
687
+ }
688
+ Symbol <- as.vector(merge_df[,1])
689
+ Value <- as.matrix(merge_df[,-1])
690
+ index_of_NA <- which(is.na(Value))
691
+ if(length(index_of_NA)>0){
692
+ Value[index_of_NA] <- 0
693
+ }
694
+ colnames(Value) <- exp_names
695
+ merge_df_no_NA <- data.frame(Symbol, Value)
696
+ }
697
+ }
698
+ incProgress(1/2, detail = '')
699
+ })
700
+ return(merge_df_no_NA)
701
+ }
702
+
703
+
704
+ get_normalized_data_FOT5 <- function(data_frame, experiment_code_file_path
705
+ ){
706
+ requireNamespace('utils')
707
+ # cat('\n The 7th step: Normalize data and filter data only including phosphorylation site.')
708
+ cat('Normalize proteomics data based on the total sum (x 1e5).')
709
+ experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
710
+ experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
711
+ data_frame_colnames <- colnames(data_frame)
712
+ ID <- as.vector(data_frame[,1])
713
+ Value_raw <- data_frame[,-1]
714
+ Value_FOT5 <- Value_raw
715
+ Value_FOT5_col <- ncol(Value_FOT5)
716
+ for(i in seq_len(Value_FOT5_col)){
717
+ x <- Value_raw[,i]
718
+ valid_index <- which(x>0)
719
+ valid_x <- x[valid_index]
720
+ valid_x_sum <- sum(valid_x)
721
+ valid_x_FOT5 <- valid_x/valid_x_sum*1e5
722
+ Value_FOT5[valid_index,i] <- valid_x_FOT5
723
+ }
724
+ data_frame_normaliation <- data.frame(ID, Value_FOT5)
725
+ data_frame_normaliation_colnames <- c(data_frame_colnames[1], experiment_code)
726
+ colnames(data_frame_normaliation) <- data_frame_normaliation_colnames
727
+ return(data_frame_normaliation)
728
+ }
729
+
730
+
731
+ keep_psites_with_max_in_topX2 <- function(phospho_data, percent_of_kept_sites = 3/4){
732
+ percent_of_kept_sites_str <- paste('top', percent_of_kept_sites*100, '%', sep = '')
733
+ cat('\n The 8th step: filter psites with row maximum in', percent_of_kept_sites_str, '.')
734
+ # ID <- as.vector(phospho_data[,1])
735
+ Value <- phospho_data[,-c(1,2,3)]
736
+ Value_rowmax <- apply(Value, 1, function(x){
737
+ x <- as.vector(unlist(x))
738
+ max(x)
739
+ })
740
+ index_of_Value_rowmax_desc <- order(Value_rowmax, decreasing = TRUE)
741
+ count_of_kept_sites <- round(nrow(Value)*percent_of_kept_sites)
742
+ index_of_Value_rowmax_desc_kept <- index_of_Value_rowmax_desc[seq_len(count_of_kept_sites)]
743
+ phospho_data_meet_percent <- phospho_data[index_of_Value_rowmax_desc_kept,]
744
+ cat('\n The 8th step: filter over with ', percent_of_kept_sites_str, ' cutoff.')
745
+ return(phospho_data_meet_percent)
746
+ }
747
+
748
+
749
+
750
+
751
+
752
+
753
+
754
+
755
+ analysis_deps_limma2 <- function(expr_data_frame, group, comparison_factor,
756
+ log2_label = FALSE, adjust_method = 'BH'){
757
+ requireNamespace('limma')
758
+ requireNamespace('stats')
759
+ # experiment_design_file_path <- "D:\\Phosphate-data\\Bioinfomatics\\demo_data_from_WYN\\experiment_design_noPair.txt"
760
+ # experiment_design_file <- read.table(experiment_design_file_path, sep = '\t', header = T)
761
+ # group <- experiment_design_file$Group[experiment_design_file$Data_Type == 'Phospho']
762
+ # group <- paste('t', group, sep = '')
763
+ # group <- factor(group, levels = c('t0', 't10', 't30', 't120'))
764
+ # expr_data_frame <- data_frame_normalization_0
765
+
766
+ expr_ID <- as.vector(expr_data_frame[,1])
767
+ expr_Valule <- expr_data_frame[,-1]
768
+ if(!log2_label){
769
+ expr_Valule <- log2(expr_data_frame[,-1]) # have to log
770
+ }
771
+ expr_Valule_row_duplicated <- apply(expr_Valule, 1, function(x){
772
+ stats::var(x)
773
+ })
774
+ expr_Valule_col <- ncol(expr_Valule)
775
+ duplicated_row_index <- which(expr_Valule_row_duplicated == 0)
776
+ if(length(duplicated_row_index)>0){
777
+ # Zero sample variances detected, have been offset away from zero
778
+ expr_ID <- expr_ID[-duplicated_row_index]
779
+ expr_Valule <- expr_Valule[-duplicated_row_index,]
780
+ }
781
+ # rownames(expr_Valule) <- expr_ID
782
+
783
+ design <- stats::model.matrix(~ 0 + group)
784
+ cat('\n', 'The matrix of experiment design.')
785
+ print(design)
786
+ colnames(design) <- levels(factor(group))
787
+ rownames(design) <- colnames(expr_Valule)
788
+ # comparison_statement <- c('t10-t0', 't30-t0', 't120-t0')
789
+ # comparison_statement <- c('t10-t0')
790
+ group_levels <- comparison_factor
791
+ group_levels_count <- length(group_levels)
792
+ if(group_levels_count<2){
793
+ cat('\n', 'Do not construct pairwise comparison pattern.')
794
+ stop('')
795
+ }else{
796
+ comparison_statement <- NULL
797
+ i_end <- group_levels_count - 1
798
+ for(i in seq_len(i_end)){
799
+ ctrl <- group_levels[i]
800
+ j_start <- i + 1
801
+ for(j in j_start:group_levels_count){
802
+ treat <- group_levels[j]
803
+ cs <- paste(treat, '-', ctrl, sep = '')
804
+ comparison_statement <- c(comparison_statement, cs)
805
+ }
806
+ }
807
+ cat('\n', 'The combination of pairwise comparison(s).')
808
+ cat('\n', comparison_statement, '\n')
809
+ }
810
+
811
+
812
+
813
+ contrast.matrix <- limma::makeContrasts(contrasts = comparison_statement, levels = design)
814
+ cat('\n', 'The matrix of comparison statement, compare other groups with control.')
815
+ print(contrast.matrix) # the matrix of comparison statement, compare other groups with control.
816
+
817
+
818
+ # step1
819
+ fit <- limma::lmFit(expr_Valule, design)
820
+
821
+ # step2
822
+ fit2 <- limma::contrasts.fit(fit, contrast.matrix) # An important step.
823
+ fit2 <- limma::eBayes(fit2) # default no trend!
824
+
825
+
826
+ # return(fit2)
827
+ # step3
828
+ alls <- limma::topTable(fit2, coef = 1, adjust.method = adjust_method, p.value = 1, number = Inf) # logFC = log(a/b) = log(a) - log(b) = A - B
829
+ # results <- decideTests(fit2, method = "global", adjust.method = adjust_method, p.value = minPvalue, lfc = minFC)
830
+ # vennDiagram(results)
831
+ alls <- stats::na.omit(alls)
832
+
833
+ # plot
834
+ ID <- rownames(alls)
835
+ logFC <- alls$logFC # log2
836
+ pvalue <- alls$adj.P.Val
837
+
838
+ result_df <- data.frame(ID, logFC, pvalue)
839
+
840
+ return(result_df)
841
+ }
842
+
843
+ analysis_deps_sam2 <- function(expr_data_frame, group, log2_label = FALSE,
844
+ nperms = 100, rand = NULL, minFDR = 0.05,
845
+ samr_plot = TRUE){
846
+ requireNamespace('samr')
847
+ requireNamespace('stats')
848
+ expr_ID <- as.vector(expr_data_frame[,1])
849
+ #(李佳澳)加入赋值
850
+ expr_Valule <- expr_data_frame[,-1]
851
+ #结束
852
+ if(!log2_label){
853
+ expr_Valule <- log2(expr_data_frame[,-1]) # have to log
854
+ }
855
+ expr_Valule_row_duplicated <- apply(expr_Valule, 1, function(x){
856
+ stats::var(x)
857
+ })
858
+ expr_Valule_col <- ncol(expr_Valule)
859
+ duplicated_row_index <- which(expr_Valule_row_duplicated == 0)
860
+ if(length(duplicated_row_index)>0){
861
+ expr_ID <- expr_ID[-duplicated_row_index]
862
+ expr_Valule <- expr_Valule[-duplicated_row_index,]
863
+ }
864
+
865
+
866
+ # construct the samr data
867
+ sam_data <- list(x = as.matrix(expr_Valule), y = as.numeric(as.factor(group)),
868
+ geneid = expr_ID, genenames = expr_ID, logged2=TRUE)
869
+
870
+ group_nlevels <- nlevels(group)
871
+ if(group_nlevels < 2){
872
+ cat('\n', 'Groups are less than one.', '\n')
873
+ stop('')
874
+ }
875
+
876
+ if(group_nlevels == 2){
877
+ resp_type <- "Two class unpaired"
878
+ }else{
879
+ resp_type <- "Multiclass"
880
+ }
881
+ cat('\n', resp_type, '\n')
882
+ samr_obj <- samr::samr(sam_data, resp.type = resp_type, nperms = nperms, random.seed = rand)
883
+
884
+ # Compute the delta values
885
+ delta_table <- samr::samr.compute.delta.table(samr_obj)
886
+
887
+ # Determine a FDR cut-off
888
+ index_less_than_min_FDR <- which(delta_table[,5] < minFDR)
889
+ if(length(index_less_than_min_FDR) < 1){
890
+ cat('\n', 'Not found appropiate cutoff less than specific minimum FDR.')
891
+ stop('')
892
+ }else{
893
+ delta_index <- index_less_than_min_FDR[1]
894
+ delta <- delta_table[delta_index,1]
895
+ }
896
+
897
+
898
+ if(samr_plot){
899
+ cat('\n', 'Plot samr plot to view DEPs (or DEGs) distribution.')
900
+ samr::samr.plot(samr_obj, delta)
901
+ }
902
+
903
+ # Extract significant genes at the cut-off delta
904
+ siggenes_table <- samr::samr.compute.siggenes.table(samr_obj, delta, sam_data, delta_table, all.genes = FALSE)
905
+ genes_up_n <- siggenes_table$ngenes.up
906
+ if(genes_up_n > 0){
907
+ genes_up_df <- data.frame(siggenes_table$genes.up)
908
+ genes_up_df_col <- ncol(genes_up_df)
909
+ genes_up_df <- genes_up_df[,c(3,7:genes_up_df_col)]
910
+ genes_up_df_col <- ncol(genes_up_df)
911
+ genes_up_df[,genes_up_df_col] <- as.numeric(genes_up_df[,genes_up_df_col])/100
912
+ genes_up_df_colnames <- colnames(genes_up_df)
913
+ colnames(genes_up_df) <- c('ID', genes_up_df_colnames[-c(1,genes_up_df_col)], 'qvalue')
914
+
915
+ }else{
916
+ genes_up_df <- NULL
917
+ }
918
+
919
+ genes_lo_n <- siggenes_table$ngenes.lo
920
+ if(genes_lo_n > 0){
921
+ genes_lo_df <- data.frame(siggenes_table$genes.lo)
922
+ genes_lo_df_col <- ncol(genes_lo_df)
923
+ genes_lo_df <- genes_lo_df[,c(3,7:genes_lo_df_col)]
924
+ genes_lo_df_col <- ncol(genes_lo_df)
925
+ genes_lo_df[,genes_lo_df_col] <- as.numeric(genes_lo_df[,genes_lo_df_col])/100
926
+ genes_lo_df_colnames <- colnames(genes_lo_df)
927
+ colnames(genes_lo_df) <- c('ID', genes_lo_df_colnames[-c(1,genes_lo_df_col)], 'qvalue')
928
+ }else{
929
+ genes_lo_df <- NULL
930
+ }
931
+
932
+ sam_result_list <- list(
933
+ genes_up_df <- genes_up_df,
934
+ genes_down_df <- genes_lo_df
935
+ )
936
+
937
+ return(sam_result_list)
938
+ }
939
+
940
+
941
+ get_summary_from_ksea2 <- function(
942
+ ptypes_data,
943
+ species = 'human',
944
+ log2_label = TRUE,
945
+ ratio_cutoff = 3
946
+ ){
947
+ requireNamespace('utils')
948
+ withProgress(message = "Running KSEA", style = "notification", detail = "processing...",{
949
+ # read relationship of kinase-substrate provided by PhosMap
950
+ # KSRR: kinase substrate regulation relationship
951
+ # A data frame contanning relationship of kinase-substrate that consists of "kinase", "substrate", "site", "sequence" and "predicted" columns.
952
+ KSRR_FILE_PATH <- paste0("./PhosMap_datasets/kinase_substrate_regulation_relationship_table/", species, "/", species, "_ksrr.csv")
953
+ kinase_substrate_regulation_relationship <- utils::read.csv(KSRR_FILE_PATH, header = TRUE, sep= ",", stringsAsFactors = NA)
954
+
955
+ ID <- as.vector(ptypes_data[,1])
956
+ ptypes_data_ratio <- ptypes_data[,-1]
957
+ if(!log2_label){
958
+
959
+ ptypes_data_ratio <- log2(ptypes_data_ratio)
960
+ }
961
+ ptypes_data_ratio_colnames <- colnames(ptypes_data_ratio)
962
+
963
+
964
+
965
+ ksea_es_list <- list()
966
+ ksea_pvalue_list <- list()
967
+ ksea_regulons_list <- list()
968
+ ksea_activity_list <- list()
969
+ ksea_trans_list <- list()
970
+ ptypes_data_exp_count <- ncol(ptypes_data_ratio)
971
+ cat('\n Starting KSEA')
972
+ for(i in seq_len(ptypes_data_exp_count)){
973
+ cat('\n completing: ', i, '/', ptypes_data_exp_count)
974
+ ptypes_data_ratio_in_single_exp <- as.numeric(unlist(ptypes_data_ratio[,i]))
975
+ ksea_result_list_i <- get_ksea_result_list(
976
+ ptypes_data_ratio_in_single_exp, ID,
977
+ kinase_substrate_regulation_relationship,
978
+ ksea_activity_i_pvalue = 0.05
979
+ )
980
+ ksea_es_list[[i]] <- ksea_result_list_i$ksea_es_i_non_NA
981
+ ksea_pvalue_list[[i]] <- ksea_result_list_i$ksea_pvalue_i_non_NA
982
+ ksea_regulons_list[[i]] <- ksea_result_list_i$ksea_regulons_i_non_NA
983
+ ksea_activity_list[[i]] <- ksea_result_list_i$ksea_activity_i
984
+ ksea_trans_list[[i]] <- ksea_result_list_i$ksea_trans_i
985
+ cat('\n completed: ', i, '/', ptypes_data_exp_count)
986
+ incProgress(1/ptypes_data_exp_count, detail = paste0("\n completed: ", i, "/",ptypes_data_exp_count))
987
+ }
988
+ cat('\n Ending KSEA')
989
+
990
+ cat('\n Extracting information data frame derived from KSEA')
991
+ cat('\n ********** Regulation direction from KSEA **********')
992
+ cat('\n ********** Pvalue from KSEA **********')
993
+ cat('\n ********** Activity from KSEA **********')
994
+ cat('\n ********** Kinase_site_substrate quantification matrix after KSEA **********')
995
+ cat('\n')
996
+
997
+ ksea_regulons <- unique(unlist(ksea_regulons_list))
998
+ ksea_regulons_count <- length(ksea_regulons)
999
+ # enrichment score from ksea
1000
+ # pvalue from ksea
1001
+ # regulons (kinase) from ksea
1002
+ # kinase activity based on pvalue and enrichment score computed by ksea
1003
+ # regulation direction: 1 = activate, 0 = no work, -1 = supress
1004
+ ksea_regulons_regulation_direction_df <- get_ksea_regulons_info(ksea_regulons, ksea_trans_list, ksea_trans_list,
1005
+ ptypes_data_ratio_colnames)
1006
+ ksea_regulons_pvalue_df <- get_ksea_regulons_info(ksea_regulons, ksea_trans_list, ksea_pvalue_list,
1007
+ ptypes_data_ratio_colnames)
1008
+ ksea_regulons_activity_df <- get_ksea_regulons_info(ksea_regulons, ksea_trans_list, ksea_activity_list,
1009
+ ptypes_data_ratio_colnames)
1010
+
1011
+ ksea_kinase_site_substrate_original_ratio_df <- get_substrate_expr_df(ID,
1012
+ kinase_substrate_regulation_relationship,
1013
+ ksea_regulons,
1014
+ ptypes_data_ratio,
1015
+ ratio_cutoff)
1016
+ summary_df_list_from_ksea <- list(
1017
+ ksea_regulons_regulation_direction_df = ksea_regulons_regulation_direction_df, # regulation direction: 1 = activate, 0 = no work, -1 = supress
1018
+ ksea_regulons_pvalue_df = ksea_regulons_pvalue_df, # pvalue from ksea
1019
+ ksea_regulons_activity_df = ksea_regulons_activity_df, # kinase activity based on pvalue and enrichment score computed by ksea
1020
+ ksea_kinase_site_substrate_original_ratio_df = ksea_kinase_site_substrate_original_ratio_df #
1021
+ )
1022
+
1023
+ cat('\n KSEA OK! ^_^')
1024
+
1025
+ return(summary_df_list_from_ksea)
1026
+ })
1027
+ }
1028
+
1029
+
1030
+ mea_based_on_background <- function(foreground, AA_in_protein, background, motifx_pvalue){
1031
+ # foreground <- as.vector(foreground)
1032
+ # background <- as.vector(background$Aligned_Seq)
1033
+ center_vector_candidate <- c('S', 'T', 'Y')
1034
+ center_vector_candidate_len <- length(center_vector_candidate)
1035
+ center_vector <- NULL
1036
+ for(i in seq_len(center_vector_candidate_len)){
1037
+ cat(i)
1038
+ center <- center_vector_candidate[i]
1039
+ if(length(grep(center, AA_in_protein)) > 0){
1040
+ center_vector <- c(center_vector, center)
1041
+ }
1042
+ }
1043
+ cat('Start executing motifx and find motif pattern. \n')
1044
+ cat('Foreground sequences: ', length(foreground), '.\n', sep = '')
1045
+ cat('Background sequences: ', length(background), '.\n', sep = '')
1046
+ cat('Phosphorylation: [', center_vector, '] exists in foreground.\n', sep = '')
1047
+ cat('Motifx pvalue cutoff: ', motifx_pvalue, '.\n', sep = '')
1048
+ motifs_list <- get_motifs_list(foreground, background, center_vector, motifx_pvalue)
1049
+ cat('Motifx analysis OK! ^_^', '\n')
1050
+ print(motifs_list)
1051
+ cat('\n')
1052
+ return(motifs_list)
1053
+ }
1054
+
1055
+
1056
+ get_motifs_list <- function(foreground, background, center_vector, motifx_pvalue){
1057
+ motifs_list <- list()
1058
+ motifs_list_names <- NULL
1059
+ motifs_list_index <- 0
1060
+ center_vector_len <- length(center_vector)
1061
+ for(i in seq_len(center_vector_len)){
1062
+ cat(center_vector_len)
1063
+ cat(i)
1064
+ center <- center_vector[i]
1065
+ motifs <- get_motif_analysis_summary(foreground, background, center = center, min_sequence_count = 1, min_pvalue = motifx_pvalue)
1066
+ if(!is.null(motifs)){
1067
+ motifs_list_index <- motifs_list_index + 1
1068
+ motifs_list[[motifs_list_index]] <- motifs
1069
+ motifs_list_names <- c(motifs_list_names, center)
1070
+ }
1071
+ }
1072
+ if(motifs_list_index > 0){
1073
+ names(motifs_list) <- motifs_list_names
1074
+ return(motifs_list)
1075
+ }else{
1076
+ return(NULL)
1077
+ }
1078
+ }
1079
+
1080
+
1081
+ get_motif_analysis_summary <- function(
1082
+ foreground,
1083
+ background,
1084
+ center='S',
1085
+ min_sequence_count = 1,
1086
+ min_pvalue = 0.01
1087
+ ){
1088
+ check_result_list <- check_mea_input(foreground, background, center)
1089
+ loop_foreground <- check_result_list$foreground
1090
+ loop_background <- check_result_list$background
1091
+ motif_result_list <- list()
1092
+ motif_result_list_index <- 0
1093
+ while(length(loop_foreground) >= min_sequence_count){
1094
+ motif_result_loop_i <- seach_motif_pattern(
1095
+ loop_foreground,
1096
+ loop_background,
1097
+ min_sequence_count = min_sequence_count,
1098
+ min_pvalue = min_pvalue,
1099
+ center = center,
1100
+ width = check_result_list$width
1101
+ )
1102
+ if(is.null(motif_result_loop_i)){
1103
+ break
1104
+ }
1105
+ motif_result_list_index <- motif_result_list_index + 1
1106
+ motif_result_list[[motif_result_list_index]] <- motif_result_loop_i
1107
+ loop_foreground <- loop_foreground[!grepl(motif_result_loop_i$motif_pattern, loop_foreground)]
1108
+ loop_background <- loop_background[!grepl(motif_result_loop_i$motif_pattern, loop_background)]
1109
+ }
1110
+
1111
+ summry_list <- data.frame(
1112
+ motif = vapply(motif_result_list, function(x){x$motif_pattern},c('character')),
1113
+ score = vapply(motif_result_list, function(x){x$motif_pattern_score}, c(1)),
1114
+ foreground_matches = vapply(motif_result_list, function(x){x$foreground_matches}, 1),
1115
+ foreground_size = vapply(motif_result_list, function(x){x$foreground_size}, 1),
1116
+ background_matches = vapply(motif_result_list, function(x){x$background_matches}, 1),
1117
+ background_size = vapply(motif_result_list, function(x){x$background_size}, 1)
1118
+ )
1119
+
1120
+ foreground_fold_increase <- summry_list$foreground_matches/summry_list$foreground_size
1121
+ background_fold_increase <- summry_list$background_matches/summry_list$background_size
1122
+ summry_list$fold_increase <- foreground_fold_increase/background_fold_increase
1123
+
1124
+ if(nrow(summry_list) == 0){
1125
+ return(NULL)
1126
+ }
1127
+ return(summry_list)
1128
+ }
1129
+
1130
+
1131
+ get_normalized_data_of_psites2 <- function(data_frame, experiment_code_file_path, nathreshold, normmethod = "global", imputemethod = "minimum/10", topN = NA, mod_types = c('S', 'T', 'Y')){
1132
+ requireNamespace('utils')
1133
+ experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
1134
+ # experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
1135
+
1136
+ nathreshold <- length(experiment_code$Experiment_Code) - nathreshold
1137
+ if(nathreshold < 0) {
1138
+ nathreshold = 0
1139
+ }
1140
+ NAnumthresig <- c()
1141
+ for (row in 1:nrow(data_frame)) {
1142
+ NAnumthresig[row] <- (sum(data_frame[row,][-c(seq(6))] == 0) <= nathreshold)
1143
+ # NAnumthresigtest[raw] <- (sum(newdata2[raw,][-c(1,2)] == 0) >= NAnumthre)
1144
+ }
1145
+ data_frame <- data_frame[NAnumthresig,]
1146
+
1147
+ data_frame_colnames <- colnames(data_frame)
1148
+
1149
+ cat('\n The 7th step is running.')
1150
+ summary_df_ID_Info <- data_frame[, seq_len(6)]
1151
+ summary_df_ID_Info$AA_in_protein <- toupper(summary_df_ID_Info$AA_in_protein)
1152
+ summary_df_Value <- data_frame[, -(seq_len(6))]
1153
+
1154
+ cat('\n Filtering data only including S/T/Y modifications.')
1155
+ ptypes <- mod_types
1156
+ index_of_AA_in_protein <- apply(data.frame(summary_df_ID_Info$AA_in_protein), 1, function(x){
1157
+ if(grepl('S', x) | grepl('T', x) | grepl('Y', x)){
1158
+ return(TRUE)
1159
+ }else{
1160
+ return(FALSE)
1161
+ }
1162
+ })
1163
+ index_of_ptypes <- which(index_of_AA_in_protein)
1164
+ if(length(index_of_ptypes)>0){
1165
+ ptypes_id_df <- summary_df_ID_Info[index_of_ptypes,]
1166
+ ptypes_value <- summary_df_Value[index_of_ptypes,]
1167
+ }else{
1168
+ message('No data with modifications taking place on ', paste(mod_types, collapse = '|'))
1169
+ stop('')
1170
+ }
1171
+
1172
+ Value_FOT5 <- ptypes_value
1173
+ Value_FOT5_col <- ncol(Value_FOT5)
1174
+ if(is.na(topN)){
1175
+ if(normmethod == "global") {
1176
+ for(i in seq_len(Value_FOT5_col)){
1177
+ x <- as.vector(unlist(ptypes_value[,i]))
1178
+ Value_FOT5[,i] <- x/sum(x)*1e5
1179
+ }
1180
+ } else if(normmethod == "median") {
1181
+ for(i in seq_len(Value_FOT5_col)){
1182
+ x <- as.vector(unlist(ptypes_value[,i]))
1183
+ Value_FOT5[,i] <- x/median(x)*1e5
1184
+ }
1185
+ }
1186
+ }else{
1187
+ if(normmethod == "global") {
1188
+ for(i in seq_len(Value_FOT5_col)){
1189
+ x <- as.vector(unlist(ptypes_value[,i]))
1190
+ x_order <- order(x, decreasing = TRUE)
1191
+ x_order_top <- x_order[seq_len(topN)]
1192
+ x[-x_order_top] <- 0
1193
+ Value_FOT5[,i] <- x/sum(x)*1e5
1194
+ }
1195
+ } else if(normmethod == "median") {
1196
+ for(i in seq_len(Value_FOT5_col)){
1197
+ x <- as.vector(unlist(ptypes_value[,i]))
1198
+ x_order <- order(x, decreasing = TRUE)
1199
+ x_order_top <- x_order[seq_len(topN)]
1200
+ x[-x_order_top] <- 0
1201
+ Value_FOT5[,i] <- x/median(x)*1e5
1202
+ }
1203
+ }
1204
+ }
1205
+ ptypes_value_FOT5 <- as.matrix(Value_FOT5)
1206
+
1207
+ index_of_zero <- which(ptypes_value_FOT5==0)
1208
+ if(imputemethod=="0"){
1209
+ ptypes_value_FOT5[index_of_zero] <- 0
1210
+ }else if(imputemethod=="minimum"){
1211
+ min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
1212
+ ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero
1213
+ }else if(imputemethod=="minimum/10"){
1214
+ min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
1215
+ ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero*0.1
1216
+ }
1217
+
1218
+ ptypes_df_list <- list(
1219
+ ptypes_area_df_with_id = data.frame(ptypes_id_df, ptypes_value),
1220
+ ptypes_fot5_df_with_id = data.frame(ptypes_id_df, ptypes_value_FOT5)
1221
+ )
1222
+
1223
+ cat('\n The 7th step is over ^_^.')
1224
+ return(ptypes_df_list)
1225
+ }
1226
+
1227
+
1228
+ get_normalized_data_FOT52 <- function(data_frame, experiment_code_file_path, normmethod = "global", imputemethod = "minimum/10"){
1229
+ requireNamespace('utils')
1230
+ cat('\n The 7th step: Normalize data and filter data only including phosphorylation site.')
1231
+ experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
1232
+ experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
1233
+ data_frame_colnames <- colnames(data_frame)
1234
+ ID <- as.vector(data_frame[,1])
1235
+ Value_raw <- data_frame[,-1]
1236
+ Value_FOT5 <- Value_raw
1237
+ Value_FOT5_col <- ncol(Value_FOT5)
1238
+ if(normmethod == "global") {
1239
+ for(i in seq_len(Value_FOT5_col)){
1240
+ x <- Value_raw[,i]
1241
+ valid_index <- which(x>0)
1242
+ valid_x <- x[valid_index]
1243
+ valid_x_sum <- sum(valid_x)
1244
+ valid_x_FOT5 <- valid_x/valid_x_sum*1e5
1245
+ Value_FOT5[valid_index,i] <- valid_x_FOT5
1246
+ }
1247
+ } else if(normmethod == "median") {
1248
+ for(i in seq_len(Value_FOT5_col)){
1249
+ x <- Value_raw[,i]
1250
+ valid_index <- which(x>0)
1251
+ valid_x <- x[valid_index]
1252
+ valid_x_median <- median(valid_x)
1253
+ valid_x_FOT5 <- valid_x/valid_x_median*1e5
1254
+ Value_FOT5[valid_index,i] <- valid_x_FOT5
1255
+ }
1256
+ }
1257
+ Value_FOT5 <- as.matrix(Value_FOT5)
1258
+
1259
+ index_of_zero <- which(Value_FOT5==0)
1260
+ if(imputemethod=="0"){
1261
+ Value_FOT5[index_of_zero] <- 0
1262
+ }else if(imputemethod=="minimum"){
1263
+ min_value_of_non_zero <- min(Value_FOT5[-index_of_zero])
1264
+ Value_FOT5[index_of_zero] <- min_value_of_non_zero
1265
+ }else if(imputemethod=="minimum/10"){
1266
+ min_value_of_non_zero <- min(Value_FOT5[-index_of_zero])
1267
+ Value_FOT5[index_of_zero] <- min_value_of_non_zero*0.1
1268
+ }
1269
+
1270
+ data_frame_normaliation <- data.frame(ID, Value_FOT5)
1271
+ data_frame_normaliation_colnames <- c(data_frame_colnames[1], experiment_code)
1272
+ colnames(data_frame_normaliation) <- data_frame_normaliation_colnames
1273
+ return(data_frame_normaliation)
1274
+ }
1275
+
backend/analysis.R ADDED
File without changes
backend/fill_missing_values.R ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fill_missing_values <- function(nadata, method) {
2
+ df <- df1 <- nadata
3
+ if (method == "none") {
4
+ df[is.na(df)] <- 0
5
+ } else if (method == "minimum") {
6
+ fill_value <- min(df1, na.rm = TRUE)
7
+ df[is.na(df)] <- fill_value
8
+ } else if (method == "minimum/10") {
9
+ fill_value <- min(df1, na.rm = TRUE) / 10
10
+ df[is.na(df)] <- fill_value
11
+ } else if (method == "bpca") {
12
+ # take medium time
13
+ library(pcaMethods)
14
+ data_zero1 <- pcaMethods::pca(as.matrix(df1), nPcs = ncol(df1)-1, method = "bpca", maxSteps =100)
15
+ df <- completeObs(data_zero1)
16
+ } else if (method == "lls" && anyNA(df1)) {
17
+ # take long time
18
+ library(pcaMethods)
19
+ data_zero1 <- llsImpute(t(df1), k = 10, allVariables = TRUE)
20
+ df <- t(completeObs(data_zero1))
21
+ } else if (method == "impseq") {
22
+ # library(rrcovNA)
23
+ df <- impSeq(df1)
24
+ } else if(method == "impseqrob"){
25
+ # library(rrcovNA)
26
+ data_zero1 <- impSeqRob(df1, alpha = 0.9)
27
+ df <- data_zero1$x
28
+ } else if(method == "knnmethod"){
29
+ # library(impute)
30
+ data_zero1 <- impute.knn(as.matrix(df1), k = 10, rowmax = 1, colmax = 1)
31
+ df <- data_zero1$data
32
+ } else if(method == "colmedian"){
33
+ # library(e1071)
34
+ df <- impute(df1, what = "median")
35
+ } else if(method == "rowmedian"){
36
+ # library(e1071)
37
+ dfx <- impute(t(df1), what = "median")
38
+ df <- t(dfx)
39
+ }
40
+ return(df)
41
+ }
backend/get_aligned_seq_for_mea02.R ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ get_aligned_seq_for_mea02 <- function(ID, Sequence, AA_in_protein, fixed_length, species = 'human', fasta_type = 'refseq'){
2
+ requireNamespace('stringr')
3
+ requireNamespace('utils')
4
+ # require(PhosMap)
5
+ cat('Aligned sequence based on fasta library for motif enrichment anlysis.\n')
6
+
7
+ fasta_library_dir = "./PhosMap_datasets/fasta_library/"
8
+ fasta_data <- utils::read.table((paste0(fasta_library_dir, fasta_type, "/", species, "/", species, "_", fasta_type, "_fasta.txt")), sep = '\t', header = TRUE)
9
+
10
+ border_limit <- floor(fixed_length/2)
11
+ aligned_seq <- NULL
12
+ GI_nrow <- length(ID)
13
+ cat('Pre-align:', GI_nrow, 'phos-pepitdes.\n')
14
+ cat('Fixed sequence length is ', fixed_length, '.\n', sep = '')
15
+ cat('It needs few time.\n')
16
+ for(i in seq_len(GI_nrow)){
17
+ gi <- ID[i]
18
+ aa_index <- AA_in_protein[i]
19
+ loc_index <- as.numeric(stringr::str_split(aa_index, "[STY]", n = Inf, simplify = FALSE)[[1]])[2]
20
+ index <- which(fasta_data[,1] == gi)
21
+ if(length(index) > 0){
22
+ refseq <- as.vector(fasta_data[index,2])
23
+ refseq_len <- nchar(refseq)
24
+
25
+ left_limit <- loc_index - border_limit
26
+ right_limit <- loc_index + border_limit
27
+
28
+ if(left_limit>=1 & right_limit>refseq_len){
29
+ right_limit <- refseq_len
30
+ truncated_seq <- stringr::str_sub(refseq, left_limit, right_limit)
31
+ truncated_seq <- stringr::str_pad(truncated_seq, fixed_length, "right", pad = '_')
32
+ }else if(left_limit<1 & right_limit<=refseq_len){
33
+ left_limit <- 1
34
+ truncated_seq <- stringr::str_sub(refseq, left_limit, right_limit)
35
+ truncated_seq <- stringr::str_pad(truncated_seq, fixed_length, "left", pad = '_')
36
+ }else if(left_limit<1 & right_limit>refseq_len){
37
+ left_limit <- 1
38
+ right_limit <- refseq_len
39
+ truncated_seq <- stringr::str_sub(refseq, left_limit, right_limit)
40
+ truncated_seq <- stringr::str_pad(truncated_seq, fixed_length, "both", pad = '_')
41
+ }else{
42
+ truncated_seq <- stringr::str_sub(refseq, left_limit, right_limit)
43
+ }
44
+ }else{
45
+ truncated_seq <- NA
46
+ }
47
+ aligned_seq <- c(aligned_seq, truncated_seq)
48
+ if(i %% 5000 == 0){
49
+ cat('Aligned:', i, 'phos-pepitdes.\n')
50
+ }
51
+ if(i == GI_nrow){
52
+ cat('Aligned:', i, 'phos-pepitdes.\n')
53
+ cat('Finish OK! ^_^\n')
54
+ }
55
+
56
+ }
57
+ cat('\n')
58
+ aligned_sequence_df_based_on_fasta_library <- data.frame(ID, Sequence, AA_in_protein, aligned_seq)
59
+ return(aligned_sequence_df_based_on_fasta_library)
60
+ }
backend/get_normalized_data_of_psites3.R ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ get_normalized_data_of_psites3 <- function(data_frame, experiment_code_file_path, nathreshold, normmethod = "global", imputemethod = "minimum/10", topN = NA, mod_types = c('S', 'T', 'Y')){
2
+ requireNamespace('utils')
3
+ experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
4
+ # experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
5
+
6
+ nathreshold <- length(experiment_code$Experiment_Code) - nathreshold
7
+ if(nathreshold < 0) {
8
+ nathreshold = 0
9
+ }
10
+ NAnumthresig <- c()
11
+ for (row in 1:nrow(data_frame)) {
12
+ NAnumthresig[row] <- (sum(data_frame[row,][-c(seq(6))] == 0) <= nathreshold)
13
+ # NAnumthresigtest[raw] <- (sum(newdata2[raw,][-c(1,2)] == 0) >= NAnumthre)
14
+ }
15
+ data_frame <- data_frame[NAnumthresig,]
16
+
17
+ data_frame_colnames <- colnames(data_frame)
18
+
19
+ cat('\n The 7th step is running.')
20
+ summary_df_ID_Info <- data_frame[, seq_len(6)]
21
+ summary_df_ID_Info$AA_in_protein <- toupper(summary_df_ID_Info$AA_in_protein)
22
+ summary_df_Value <- data_frame[, -(seq_len(6))]
23
+
24
+ cat('\n Filtering data only including S/T/Y modifications.')
25
+ ptypes <- mod_types
26
+ index_of_AA_in_protein <- apply(data.frame(summary_df_ID_Info$AA_in_protein), 1, function(x){
27
+ if(grepl('S', x) | grepl('T', x) | grepl('Y', x)){
28
+ return(TRUE)
29
+ }else{
30
+ return(FALSE)
31
+ }
32
+ })
33
+ index_of_ptypes <- which(index_of_AA_in_protein)
34
+ if(length(index_of_ptypes)>0){
35
+ ptypes_id_df <- summary_df_ID_Info[index_of_ptypes,]
36
+ ptypes_value <- summary_df_Value[index_of_ptypes,]
37
+ }else{
38
+ message('No data with modifications taking place on ', paste(mod_types, collapse = '|'))
39
+ stop('')
40
+ }
41
+
42
+ Value_FOT5 <- ptypes_value
43
+ Value_FOT5_col <- ncol(Value_FOT5)
44
+ if(is.na(topN)){
45
+ if(normmethod == "global") {
46
+ for(i in seq_len(Value_FOT5_col)){
47
+ x <- as.vector(unlist(ptypes_value[,i]))
48
+ Value_FOT5[,i] <- x/sum(x)*1e5
49
+ }
50
+ } else if(normmethod == "median") {
51
+ for(i in seq_len(Value_FOT5_col)){
52
+ x <- as.vector(unlist(ptypes_value[,i]))
53
+ Value_FOT5[,i] <- x/median(x)*1e5
54
+ }
55
+ }
56
+ }else{
57
+ if(normmethod == "global") {
58
+ for(i in seq_len(Value_FOT5_col)){
59
+ x <- as.vector(unlist(ptypes_value[,i]))
60
+ x_order <- order(x, decreasing = TRUE)
61
+ x_order_top <- x_order[seq_len(topN)]
62
+ x[-x_order_top] <- 0
63
+ Value_FOT5[,i] <- x/sum(x)*1e5
64
+ }
65
+ } else if(normmethod == "median") {
66
+ for(i in seq_len(Value_FOT5_col)){
67
+ x <- as.vector(unlist(ptypes_value[,i]))
68
+ x_order <- order(x, decreasing = TRUE)
69
+ x_order_top <- x_order[seq_len(topN)]
70
+ x[-x_order_top] <- 0
71
+ Value_FOT5[,i] <- x/median(x)*1e5
72
+ }
73
+ }
74
+ }
75
+ ptypes_value_FOT5 <- as.matrix(Value_FOT5)
76
+
77
+ index_of_zero <- which(ptypes_value_FOT5==0)
78
+ # if(imputemethod=="0"){
79
+ # ptypes_value_FOT5[index_of_zero] <- 0
80
+ # }else if(imputemethod=="minimum"){
81
+ # min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
82
+ # ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero
83
+ # }else if(imputemethod=="minimum/10"){
84
+ # min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
85
+ # ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero*0.1
86
+ # }
87
+ ptypes_value_FOT5 <- as.data.frame(ptypes_value_FOT5)
88
+ ptypes_value_FOT5[ptypes_value_FOT5 == 0] <- NA
89
+ fill_missing_values01 <- function(nadata, method) {
90
+ df <- df1 <- nadata
91
+ if (method == "none") {
92
+ df[is.na(df)] <- 0
93
+ } else if (method == "minimum") {
94
+ fill_value <- min(df1, na.rm = TRUE)
95
+ df[is.na(df)] <- fill_value
96
+ } else if (method == "minimum/10") {
97
+ fill_value <- min(df1, na.rm = TRUE) / 10
98
+ df[is.na(df)] <- fill_value
99
+ } else if (method == "bpca") {
100
+ # take medium time
101
+ library(pcaMethods)
102
+ data_zero1<-pcaMethods::pca(as.matrix(df1), nPcs = ncol(df1)-1, method = "bpca", maxSteps =100)
103
+ df<-completeObs(data_zero1)
104
+ } else if (method == "lls") {
105
+ # take long time
106
+ # library(pcaMethods)
107
+ data_zero1<-llsImpute(t(df1), k = 10, allVariables = TRUE)
108
+ df<-t(completeObs(data_zero1))
109
+ } else if (method == "impseq") {
110
+ # library(rrcovNA)
111
+ df <- impSeq(df1)
112
+ } else if(method=="impseqrob"){
113
+ # library(rrcovNA)
114
+ data_zero1 <- impSeqRob(df1, alpha=0.9)
115
+ df<-data_zero1$x
116
+ } else if(method=="knnmethod"){
117
+ # library(impute)
118
+ data_zero1<-impute.knn(as.matrix(df1),k = 10, rowmax = 1, colmax = 1)#rowmax = 0.9, colmax = 0.9
119
+ df<-data_zero1$data
120
+ } else if(method=="colmedian"){
121
+ # library(e1071)
122
+ df<-impute(df1,what ="median")
123
+ } else if(method=="rowmedian"){
124
+ # library(e1071)
125
+ dfx<-impute(t(df1),what ="median")
126
+ df<-t(dfx)
127
+ # } else if(method=="grr"){
128
+ # library(DreamAI)
129
+ # df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
130
+ # } else if(method=="mle"){
131
+ # library(norm)
132
+ # xxm<-as.matrix(df1)
133
+ # ss <- norm::prelim.norm(xxm)
134
+ # thx <- norm::em.norm(ss)
135
+ # norm::rngseed(123)
136
+ # df <- norm::imp.norm(ss, thx, xxm)
137
+ }
138
+ return(df)
139
+ }
140
+ ptypes_value_FOT5 = fill_missing_values01(ptypes_value_FOT5, imputemethod)
141
+
142
+ ptypes_df_list <- list(
143
+ ptypes_area_df_with_id = data.frame(ptypes_id_df, ptypes_value),
144
+ ptypes_fot5_df_with_id = data.frame(ptypes_id_df, ptypes_value_FOT5)
145
+ )
146
+
147
+ cat('\n The 7th step is over ^_^.')
148
+ return(ptypes_df_list)
149
+ }
backend/get_normalized_data_of_psites4.R ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ get_normalized_data_of_psites4 <- function(data_frame, experiment_code_file_path, nathreshold, normmethod = "global", imputemethod = "minimum/10", topN = NA, mod_types = c('S', 'T', 'Y'), design_file){
2
+ requireNamespace('utils')
3
+ experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
4
+ # experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
5
+
6
+ nathreshold <- length(experiment_code$Experiment_Code) - nathreshold
7
+ if(nathreshold < 0) {
8
+ nathreshold = 0
9
+ }
10
+ NAnumthresig <- c()
11
+ for (row in 1:nrow(data_frame)) {
12
+ NAnumthresig[row] <- (sum(data_frame[row,][-c(seq(6))] == 0) <= nathreshold)
13
+ # NAnumthresigtest[raw] <- (sum(newdata2[raw,][-c(1,2)] == 0) >= NAnumthre)
14
+ }
15
+ data_frame <- data_frame[NAnumthresig,]
16
+
17
+ data_frame_colnames <- colnames(data_frame)
18
+
19
+ cat('\n The 7th step is running.')
20
+ summary_df_ID_Info <- data_frame[, seq_len(6)]
21
+ summary_df_ID_Info$AA_in_protein <- toupper(summary_df_ID_Info$AA_in_protein)
22
+ summary_df_Value <- data_frame[, -(seq_len(6))]
23
+
24
+ cat('\n Filtering data only including S/T/Y modifications.')
25
+ ptypes <- mod_types
26
+ index_of_AA_in_protein <- apply(data.frame(summary_df_ID_Info$AA_in_protein), 1, function(x){
27
+ if(grepl('S', x) | grepl('T', x) | grepl('Y', x)){
28
+ return(TRUE)
29
+ }else{
30
+ return(FALSE)
31
+ }
32
+ })
33
+ index_of_ptypes <- which(index_of_AA_in_protein)
34
+ if(length(index_of_ptypes)>0){
35
+ ptypes_id_df <- summary_df_ID_Info[index_of_ptypes,]
36
+ ptypes_value <- summary_df_Value[index_of_ptypes,]
37
+ }else{
38
+ message('No data with modifications taking place on ', paste(mod_types, collapse = '|'))
39
+ stop('')
40
+ }
41
+
42
+ Value_FOT5 <- ptypes_value
43
+ Value_FOT5_col <- ncol(Value_FOT5)
44
+ if(is.na(topN)){
45
+ if(normmethod == "global") {
46
+ for(i in seq_len(Value_FOT5_col)){
47
+ x <- as.vector(unlist(ptypes_value[,i]))
48
+ Value_FOT5[,i] <- x/sum(x)*1e5
49
+ }
50
+ } else if(normmethod == "median") {
51
+ for(i in seq_len(Value_FOT5_col)){
52
+ x <- as.vector(unlist(ptypes_value[,i]))
53
+ Value_FOT5[,i] <- x/median(x)*1e5
54
+ }
55
+ }
56
+ }else{
57
+ if(normmethod == "global") {
58
+ for(i in seq_len(Value_FOT5_col)){
59
+ x <- as.vector(unlist(ptypes_value[,i]))
60
+ x_order <- order(x, decreasing = TRUE)
61
+ x_order_top <- x_order[seq_len(topN)]
62
+ x[-x_order_top] <- 0
63
+ Value_FOT5[,i] <- x/sum(x)*1e5
64
+ }
65
+ } else if(normmethod == "median") {
66
+ for(i in seq_len(Value_FOT5_col)){
67
+ x <- as.vector(unlist(ptypes_value[,i]))
68
+ x_order <- order(x, decreasing = TRUE)
69
+ x_order_top <- x_order[seq_len(topN)]
70
+ x[-x_order_top] <- 0
71
+ Value_FOT5[,i] <- x/median(x)*1e5
72
+ }
73
+ }
74
+ }
75
+ ptypes_value_FOT5 <- as.matrix(Value_FOT5)
76
+
77
+ index_of_zero <- which(ptypes_value_FOT5==0)
78
+ # if(imputemethod=="0"){
79
+ # ptypes_value_FOT5[index_of_zero] <- 0
80
+ # }else if(imputemethod=="minimum"){
81
+ # min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
82
+ # ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero
83
+ # }else if(imputemethod=="minimum/10"){
84
+ # min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
85
+ # ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero*0.1
86
+ # }
87
+ ptypes_value_FOT5 <- as.data.frame(ptypes_value_FOT5)
88
+ ptypes_value_FOT5[ptypes_value_FOT5 == 0] <- NA
89
+ fill_missing_values01 <- function(nadata, method) {
90
+ df <- df1 <- nadata
91
+ if (method == "none") {
92
+ df[is.na(df)] <- 0
93
+ } else if (method == "minimum") {
94
+ fill_value <- min(df1, na.rm = TRUE)
95
+ df[is.na(df)] <- fill_value
96
+ } else if (method == "minimum/10") {
97
+ fill_value <- min(df1, na.rm = TRUE) / 10
98
+ df[is.na(df)] <- fill_value
99
+ } else if (method == "bpca") {
100
+ # take medium time
101
+ library(pcaMethods)
102
+ data_zero1<-pcaMethods::pca(as.matrix(df1), nPcs = ncol(df1)-1, method = "bpca", maxSteps =100)
103
+ df<-completeObs(data_zero1)
104
+ } else if (method == "lls") {
105
+ # take long time
106
+ # library(pcaMethods)
107
+ data_zero1<-llsImpute(t(df1), k = 10, allVariables = TRUE)
108
+ df<-t(completeObs(data_zero1))
109
+ } else if (method == "impseq") {
110
+ # library(rrcovNA)
111
+ df <- impSeq(df1)
112
+ } else if(method=="impseqrob"){
113
+ # library(rrcovNA)
114
+ data_zero1 <- impSeqRob(df1, alpha=0.9)
115
+ df<-data_zero1$x
116
+ } else if(method=="knnmethod"){
117
+ # library(impute)
118
+ data_zero1<-impute.knn(as.matrix(df1),k = 10, rowmax = 1, colmax = 1)#rowmax = 0.9, colmax = 0.9
119
+ df<-data_zero1$data
120
+ } else if(method=="colmedian"){
121
+ # library(e1071)
122
+ df<-impute(df1,what ="median")
123
+ } else if(method=="rowmedian"){
124
+ # library(e1071)
125
+ dfx<-impute(t(df1),what ="median")
126
+ df<-t(dfx)
127
+ # } else if(method=="grr"){
128
+ # library(DreamAI)
129
+ # df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
130
+ # } else if(method=="mle"){
131
+ # library(norm)
132
+ # xxm<-as.matrix(df1)
133
+ # ss <- norm::prelim.norm(xxm)
134
+ # thx <- norm::em.norm(ss)
135
+ # norm::rngseed(123)
136
+ # df <- norm::imp.norm(ss, thx, xxm)
137
+ }
138
+ return(df)
139
+ }
140
+
141
+ errorlabel = FALSE
142
+ errorlabel_values <- c()
143
+ if (imputemethod %in% c('bpca', 'rowmedian', 'lls', 'knnmethod')) {
144
+ for (group in unique(design_file$Group)) {
145
+ samples <- design_file[design_file$Group == group,1]
146
+ group_data <- ptypes_value_FOT5[, samples]
147
+ # Check if any row in group_data has missing values
148
+ if (any(rowSums(is.na(group_data)) > 0)) {
149
+ errorlabel <- TRUE
150
+ } else {
151
+ errorlabel <- FALSE
152
+ }
153
+ errorlabel_values <- c(errorlabel_values, errorlabel)
154
+ }
155
+ }
156
+
157
+ if (!any(errorlabel_values)) {
158
+ for (group in unique(design_file$Group)) {
159
+ # 选择该分组下的所有样本
160
+ # samples <- design_file$Experiment_code[design_file$Group == group]
161
+ samples <- design_file[design_file$Group == group,1]
162
+
163
+ # 从原始数据框中提取该分组下的所有样本数据
164
+ group_data <- ptypes_value_FOT5[, samples]
165
+
166
+ # 对该分组下的样本进行缺失值填充
167
+ filled_group_data <- fill_missing_values(group_data, method = imputemethod)
168
+
169
+ # 将填充后的数据框添加到结果列表中
170
+ if (exists('result_list')) {
171
+ result_list <- c(result_list, list(filled_group_data))
172
+ } else {
173
+ result_list <- list(filled_group_data)
174
+ }
175
+ }
176
+
177
+ # 将所有填充后的数据框合并为一个数据框
178
+ ptypes_value_FOT5 <- Reduce(cbind, result_list)
179
+ # ptypes_value_FOT5 = fill_missing_values01(ptypes_value_FOT5, imputemethod)
180
+
181
+ ptypes_df_list <- list(
182
+ ptypes_area_df_with_id = data.frame(ptypes_id_df, ptypes_value),
183
+ ptypes_fot5_df_with_id = data.frame(ptypes_id_df, ptypes_value_FOT5)
184
+ )
185
+
186
+ cat('\n The 7th step is over ^_^.')
187
+ return(ptypes_df_list)
188
+ } else {
189
+ empty_list <- list()
190
+ return(empty_list)
191
+ }
192
+ }
backend/import_extract.R ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # unzip file and return file path
2
+ get_file_path <- function(inputfile, pathname) {
3
+ # if (dir.exists(paste0("tmp/", pathname))) {
4
+ # unlink(paste0("tmp/", pathname), recursive = TRUE)
5
+ # }
6
+ zip::unzip(
7
+ inputfile$datapath,
8
+ # exdir = paste0("tmp/", pathname)
9
+ exdir = pathname
10
+ )
11
+ namestrs = inputfile$name
12
+ normalizePath(paste0(pathname, "/", substring(namestrs, 0, nchar(namestrs)-4)))
13
+ }
14
+
15
+ # Get a list of file names without suffixes based on the path
16
+ get_target_name <- function(path, depth) {
17
+ if(depth == 2) {
18
+ path = normalizePath(list.files(path, full.names = T))
19
+ }
20
+ tmp <- list.files(path)
21
+ substring(tmp, 0, nchar(tmp)-4)
22
+ }
23
+
backend/preprocess.R ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # imageMap <- function(inputId, imgsrc, opts) {
2
+ # areas <- lapply(names(opts), function(n)
3
+ # shiny::tags$area(title=n, coords=opts[[n]],
4
+ # href="#", shape="poly"))
5
+ # js <- paste0("$(document).on('click', 'map area', function(evt) {
6
+ # evt.preventDefault();
7
+ # var val = evt.target.title;
8
+ # Shiny.onInputChange('", inputId, "', val);})")
9
+ # list(
10
+ # shiny::tags$img(src=imgsrc, usemap=paste0("#", inputId),
11
+ # shiny::tags$head(tags$script(shiny::HTML(js)))),
12
+ # shiny::tags$map(name=inputId, areas))
13
+ # }
14
+
15
+ imageMap <- function(inputId, imgsrc, opts) {
16
+ areas <- lapply(names(opts), function(n)
17
+ shiny::tags$area(title=n, coords=opts[[n]],
18
+ href="#", shape="poly"))
19
+ js <- paste0("$(document).on('click', 'map area', function(evt) {
20
+ evt.preventDefault();
21
+ var val = evt.target.title;
22
+ print('hello');})")
23
+ list(
24
+ shiny::tags$img(src=imgsrc, usemap=paste0("#", inputId),
25
+ shiny::tags$head(tags$script(shiny::HTML(js)))),
26
+ shiny::tags$map(name=inputId, areas))
27
+ }
backend/visualization_deps_with_scatter02.R ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #' Visualize differentially expressed results with scatter
2
+ #'
3
+ #' @param deps_data a data frame containing ID, logFC and pvalue.
4
+ #' @param minFC a numeric for the minimum fold change.
5
+ #' @param minPvalue a numeric for the significance cutoff.
6
+ #' @param main an overall title for the plot.
7
+ #' @param show_text a boolean value representing whether or not the text is showed, the default is FALSE.
8
+ #' @param min_up_text cutoff value for showing up-IDs. Only IDs with lower than min_up_text are showed.
9
+ #' @param min_down_text cutoff value for showing down-IDs. Only IDs with lower than min_down_text are showed.
10
+ #'
11
+ #' @author Dongdong Zhan and Mengsha Tong
12
+ #' @export
13
+ #'
14
+ #'
15
+ #' @return A scatter plot for showing differentially expressed results.
16
+ #'
17
+ #' @examples
18
+ #' ftp_url <- "ftp://111.198.139.72:4000/pub/PhosMap_datasets/function_demo_data/visualization_deps_with_scatter.RData"
19
+ #' load_data <- load_data_with_ftp(ftp_url, 'RData')
20
+ #' writeBin(load_data, "visualization_deps_with_scatter.RData")
21
+ #' load("visualization_deps_with_scatter.RData")
22
+ #'
23
+ #' visualization_deps_with_scatter(limma_results_df, minFC = 2,
24
+ #' minPvalue = 0.05, main = 'Differentially expressed proteins \n with limma',
25
+ #' show_text = TRUE, min_up_text = 70, min_down_text = 70
26
+ #' )
27
+ #'
28
+
29
+ visualization_deps_with_scatter02 <- function(
30
+ deps_data,
31
+ minFC = 2,
32
+ minPvalue = 0.05,
33
+ main = 'Differentially expressed proteins',
34
+ show_text = FALSE,
35
+ min_up_text = 15,
36
+ min_down_text = 15
37
+ ){
38
+ p <- ggplot(
39
+ # 数据、映射、颜色
40
+ deps_data, aes(x = logFC, y = -log10(pvalue))) +
41
+ geom_point(alpha=0.4, size=3.5) +
42
+ scale_color_manual(values=c("#546de5", "#d2dae2","#ff4757"))+
43
+ # 辅助线
44
+ geom_vline(xintercept=c(-1,1),lty=4,col="black",lwd=0.8) +
45
+ geom_hline(yintercept = -log10(0.01),lty=4,col="black",lwd=0.8) +
46
+ # 坐标轴
47
+ labs(x="log2(fold change)",
48
+ y="-log10 (p-value)") +
49
+ theme_bw()+
50
+ # 图例
51
+ theme(plot.title = element_text(hjust = 0.5),
52
+ legend.position="right",
53
+ legend.title = element_blank())
54
+ p
55
+
56
+ # requireNamespace('graphics')
57
+ # requireNamespace('stats')
58
+ # x_v <- deps_data$logFC
59
+ # x_v_max <- max(x_v)
60
+ # x_v_right <- ceiling(x_v_max)
61
+ # x_v_min <- min(x_v)
62
+ # x_v_left <- floor(x_v_min)
63
+ #
64
+ # x_up <- log2(minFC)
65
+ # x_down <- log2(1/minFC)
66
+ #
67
+ # zero_index <- which(deps_data$pvalue==0)
68
+ # zero_index_count <- length(zero_index)
69
+ # if(zero_index_count){
70
+ # minimum_p <- min(deps_data$pvalue[-zero_index])
71
+ # min <- minimum_p/10
72
+ # max <- minimum_p-minimum_p/10
73
+ # minimum_p_new <- stats::runif(zero_index_count, min = min, max = max)
74
+ # deps_data$pvalue[zero_index] <- minimum_p_new
75
+ # }
76
+ #
77
+ # y_v <- (-log10(deps_data$pvalue))
78
+ # y_v_max <- max(y_v)
79
+ # y_v_up <- ceiling(y_v_max)
80
+ # y_v_sig <- (-log10(minPvalue))
81
+ #
82
+ #
83
+ # index_of_up <- which(x_v > x_up & y_v > y_v_sig)
84
+ # index_of_down <- which(x_v < x_down & y_v > y_v_sig)
85
+ #
86
+ #
87
+ # graphics::plot(x_v, y_v,
88
+ # xlim = c(x_v_left, x_v_right), ylim = c(0, y_v_up),
89
+ # xlab = 'log2(FC)', ylab = '-log10(pvalue)', main = main)
90
+ # graphics::abline(h = y_v_sig, lty = 'dotdash', col = 'firebrick', lwd = 2)
91
+ # graphics::abline(v = x_up, lty = 'dotdash', col = 'firebrick', lwd = 2)
92
+ # graphics::abline(v = x_down, lty = 'dotdash', col = 'firebrick', lwd = 2)
93
+ #
94
+ # graphics::points(x_v[index_of_up], y_v[index_of_up], pch = 20, col = 'red')
95
+ # graphics::points(x_v[index_of_down], y_v[index_of_down], pch = 20, col = 'blue')
96
+ #
97
+ # if(show_text){
98
+ # s <- as.vector(deps_data$ID)
99
+ # s_up <- s[index_of_up]
100
+ # x_v_up_set <- x_v[index_of_up]
101
+ # x_v_up_set_order <- order(x_v_up_set, decreasing = TRUE)
102
+ # y_v_up_set <- y_v[index_of_up]
103
+ # y_v_up_set_order <- order(y_v_up_set, decreasing = TRUE)
104
+ #
105
+ # index_up_set <- intersect(x_v_up_set_order[seq_len(min_up_text)], y_v_up_set_order[seq_len(min_up_text)])
106
+ # graphics::text(x_v_up_set[index_up_set], y_v_up_set[index_up_set], s_up[index_up_set], pos = 3, cex = 0.6)
107
+ #
108
+ # s_down <- s[index_of_down]
109
+ # x_v_down_set <- x_v[index_of_down]
110
+ # x_v_down_set_order <- order(x_v_down_set, decreasing = FALSE)
111
+ # y_v_down_set <- y_v[index_of_down]
112
+ # y_v_down_set_order <- order(y_v_down_set, decreasing = TRUE)
113
+ #
114
+ # index_down_set <- intersect(x_v_down_set_order[seq_len(min_down_text)], y_v_down_set_order[seq_len(min_down_text)])
115
+ # graphics::text(x_v_down_set[index_down_set], y_v_down_set[index_down_set], s_down[index_down_set], pos = 3, cex = 0.6)
116
+ # }
117
+ }
examplefile/Clinicaltest.csv ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PatientID,status,time
2
+ Exp027012,0,1290
3
+ Exp027013,0,1187
4
+ Exp027014,1,1106
5
+ Exp027015,1,1264
6
+ Exp027016,1,948
7
+ Exp027017,0,1401
8
+ Exp027018,1,961
9
+ Exp027019,0,1867
10
+ Exp027020,1,986
11
+ Exp027021,0,1593
12
+ Exp027022,1,566
13
+ Exp027023,1,1353
14
+ Exp027024,0,1592
15
+ Exp027025,0,1468
16
+ Exp027026,1,120
17
+ Exp027027,1,145
18
+ Exp027028,0,1471
19
+ Exp027029,1,507
20
+ Exp027030,1,1294
21
+ Exp027031,1,317
22
+ Exp027032,1,235
23
+ Exp027033,0,1186
24
+ Exp027034,1,1204
25
+ Exp027035,0,1253
26
+ Exp027036,1,659
27
+ Exp027037,0,1177
28
+ Exp027038,1,807
29
+ Exp027039,1,238
30
+ Exp027040,1,498
31
+ Exp027041,0,781
32
+ Exp027042,1,497
33
+ Exp027043,1,424
34
+ Exp027044,1,407
35
+ Exp027045,1,1421
36
+ Exp027046,0,1386
37
+ Exp027047,0,1390
38
+ Exp027048,0,1348
39
+ Exp027049,0,716
40
+ Exp027050,0,1250
examplefile/analysistools/Clinical_for_Demo.csv ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PatientID,status,time
2
+ Exp027012,0,1290
3
+ Exp027013,0,1187
4
+ Exp027014,1,1106
5
+ Exp027015,1,1264
6
+ Exp027016,1,948
7
+ Exp027017,0,1401
8
+ Exp027018,1,961
9
+ Exp027019,0,1867
10
+ Exp027020,1,986
11
+ Exp027021,0,1593
12
+ Exp027022,1,566
13
+ Exp027023,1,1353
14
+ Exp027024,0,1592
15
+ Exp027025,0,1468
16
+ Exp027026,1,120
17
+ Exp027027,1,145
18
+ Exp027028,0,1471
19
+ Exp027029,1,507
20
+ Exp027030,1,1294
21
+ Exp027031,1,317
22
+ Exp027032,1,235
23
+ Exp027033,0,1186
24
+ Exp027034,1,1204
25
+ Exp027035,0,1253
26
+ Exp027036,1,659
27
+ Exp027037,0,1177
28
+ Exp027038,1,807
29
+ Exp027039,1,238
30
+ Exp027040,1,498
31
+ Exp027041,0,781
32
+ Exp027042,1,497
33
+ Exp027043,1,424
34
+ Exp027044,1,407
35
+ Exp027045,1,1421
36
+ Exp027046,0,1386
37
+ Exp027047,0,1390
38
+ Exp027048,0,1348
39
+ Exp027049,0,716
40
+ Exp027050,0,1250
examplefile/analysistools/Clinical_for_Pre.csv ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ PatientID,status,time
2
+ Exp027015,1,1264
3
+ Exp027016,1,948
4
+ Exp027017,0,1401
5
+ Exp027031,1,317
6
+ Exp027032,1,235
7
+ Exp027033,0,1186
8
+ Exp027046,0,1386
9
+ Exp027047,0,1390
10
+ Exp027048,0,1348
examplefile/analysistools/Clinicaltest.csv ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ PatientID,status,time
2
+ Exp027012,0,1290
3
+ Exp027020,1,986
4
+ Exp027028,0,1471
5
+ Exp027036,1,659
6
+ Exp027044,1,407
examplefile/analysistools/PreNormBasedProSummary.csv ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/analysistools/phosphorylation_exp_design_info.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Experiment_Code Group Description
2
+ Exp027012 0 ctr_0h_R1_IMAC_1.raw
3
+ Exp027013 0 ctr_0h_R1_IMAC_2.raw
4
+ Exp027014 0 ctr_0h_R1_IMAC_3.raw
5
+ Exp027015 0 ctr_0h_R2_IMAC_1.raw
6
+ Exp027016 0 ctr_0h_R2_IMAC_2.raw
7
+ Exp027017 0 ctr_0h_R2_IMAC_3.raw
8
+ Exp027018 0 ctr_0h_R3_IMAC_1.raw
9
+ Exp027019 0 ctr_0h_R3_IMAC_2.raw
10
+ Exp027020 2 PLX_2h_R1_IMAC_1.raw
11
+ Exp027021 2 PLX_2h_R1_IMAC_2.raw
12
+ Exp027022 2 PLX_2h_R1_IMAC_3.raw
13
+ Exp027023 2 PLX_2h_R2_IMAC_2.raw
14
+ Exp027024 2 PLX_2h_R2_IMAC_3.raw
15
+ Exp027025 2 PLX_2h_R3_IMAC_1.raw
16
+ Exp027026 2 PLX_2h_R3_IMAC_2.raw
17
+ Exp027027 2 PLX_2h_R3_IMAC_3.raw
18
+ Exp027028 6 PLX_6h_R1_IMAC_1.raw
19
+ Exp027029 6 PLX_6h_R1_IMAC_2.raw
20
+ Exp027030 6 PLX_6h_R1_IMAC_3.raw
21
+ Exp027031 6 PLX_6h_R2_IMAC_1.raw
22
+ Exp027032 6 PLX_6h_R2_IMAC_2.raw
23
+ Exp027033 6 PLX_6h_R2_IMAC_3.raw
24
+ Exp027034 6 PLX_6h_R3_IMAC_1.raw
25
+ Exp027035 6 PLX_6h_R3_IMAC_2.raw
26
+ Exp027036 24 PLX_24h_R1_IMAC_1.raw
27
+ Exp027037 24 PLX_24h_R1_IMAC_2.raw
28
+ Exp027038 24 PLX_24h_R1_IMAC_3.raw
29
+ Exp027039 24 PLX_24h_R2_IMAC_1.raw
30
+ Exp027040 24 PLX_24h_R2_IMAC_2.raw
31
+ Exp027041 24 PLX_24h_R2_IMAC_3.raw
32
+ Exp027042 24 PLX_24h_R3_IMAC_1.raw
33
+ Exp027043 24 PLX_24h_R3_IMAC_3.raw
34
+ Exp027044 48 PLX_48h_R1_IMAC_2.raw
35
+ Exp027045 48 PLX_48h_R1_IMAC_3.raw
36
+ Exp027046 48 PLX_48h_R2_IMAC_1.raw
37
+ Exp027047 48 PLX_48h_R2_IMAC_2.raw
38
+ Exp027048 48 PLX_48h_R2_IMAC_3.raw
39
+ Exp027049 48 PLX_48h_R3_IMAC_1.raw
40
+ Exp027050 48 PLX_48h_R3_IMAC_2.raw
examplefile/data_frame_normalization_with_control_no_pair.csv ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/download/anaysis_demo.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23bba0455c82f9d839711c3239ef883b8e86a45d20dd5ab18758ab4e19bc8b02
3
+ size 407517
examplefile/download/mascot_xml.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e52baf8a2cb215e2c42e8470f29382a0ca5ce7ed5f19c4104d2d16c557b87c9
3
+ size 21818471
examplefile/download/motif_kinase_relation.xlsx ADDED
Binary file (40.8 kB). View file
 
examplefile/download/phosphorylation_peptide_txt.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aebf94fc6d5de4b116fbeb89856cbed50a34bf37d81e92dbce12d61cb60a53ee
3
+ size 3944372
examplefile/download/profiling_gene_txt.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35cbdc0f2bbb77e85a421ec390f3a7e105ba86ab66e498561b7e802c86908c5
3
+ size 1384071
examplefile/mascot/phosphorylation_exp_design_info.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Experiment_Code Group Description
2
+ Exp027015 0 ctr_0h_R2_IMAC_1.raw
3
+ Exp027016 0 ctr_0h_R2_IMAC_2.raw
4
+ Exp027017 0 ctr_0h_R2_IMAC_3.raw
5
+ Exp027031 6 PLX_6h_R2_IMAC_1.raw
6
+ Exp027032 6 PLX_6h_R2_IMAC_2.raw
7
+ Exp027033 6 PLX_6h_R2_IMAC_3.raw
8
+ Exp027046 48 PLX_48h_R2_IMAC_1.raw
9
+ Exp027047 48 PLX_48h_R2_IMAC_2.raw
10
+ Exp027048 48 PLX_48h_R2_IMAC_3.raw
examplefile/mascot/phosphorylation_peptide_txt/Exp027015_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/phosphorylation_peptide_txt/Exp027016_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/phosphorylation_peptide_txt/Exp027017_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/phosphorylation_peptide_txt/Exp027031_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/phosphorylation_peptide_txt/Exp027032_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/phosphorylation_peptide_txt/Exp027033_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/phosphorylation_peptide_txt/Exp027046_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/phosphorylation_peptide_txt/Exp027047_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/phosphorylation_peptide_txt/Exp027048_peptide.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/profiling_exp_design_info.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Experiment_Code Group Description
2
+ Exp026982 0 ctr_0h_R2_injection_1.raw
3
+ Exp026983 0 ctr_0h_R2_injection_2.raw
4
+ Exp026995 6 PLX_6h_R2_injection_1.raw
5
+ Exp026996 6 PLX_6h_R2_injection_2.raw
6
+ Exp027008 48 PLX_48h_R2_injection_1.raw
7
+ Exp027009 48 PLX_48h_R2_injection_4.raw
examplefile/mascot/profiling_gene_txt/Exp026982_gene.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/profiling_gene_txt/Exp026983_gene.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/profiling_gene_txt/Exp026995_gene.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/profiling_gene_txt/Exp026996_gene.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/profiling_gene_txt/Exp027008_gene.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/mascot/profiling_gene_txt/Exp027009_gene.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/maxquant/Phospho (STY)Sites.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/maxquant/phosphorylation_exp_design_info.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Experiment_Code Group Description
2
+ Exp027015 0 ctr_0h_R2_IMAC_1.raw
3
+ Exp027016 0 ctr_0h_R2_IMAC_2.raw
4
+ Exp027017 0 ctr_0h_R2_IMAC_3.raw
5
+ Exp027031 6 PLX_6h_R2_IMAC_1.raw
6
+ Exp027032 6 PLX_6h_R2_IMAC_2.raw
7
+ Exp027033 6 PLX_6h_R2_IMAC_3.raw
8
+ Exp027046 48 PLX_48h_R2_IMAC_1.raw
9
+ Exp027047 48 PLX_48h_R2_IMAC_2.raw
10
+ Exp027048 48 PLX_48h_R2_IMAC_3.raw
examplefile/maxquant/profiling_exp_design_info.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Experiment_Code Group Description
2
+ Exp026982 0 ctr_0h_R2_injection_1.raw
3
+ Exp026983 0 ctr_0h_R2_injection_2.raw
4
+ Exp026995 6 PLX_6h_R2_injection_1.raw
5
+ Exp026996 6 PLX_6h_R2_injection_2.raw
6
+ Exp027008 48 PLX_48h_R2_injection_1.raw
7
+ Exp027009 48 PLX_48h_R2_injection_4.raw
examplefile/maxquant/proteinGroups.txt ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/motifanalysis.csv ADDED
The diff for this file is too large to render. See raw diff
 
examplefile/phosphorylation_exp_design_info.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Experiment_Code Group Description
2
+ Exp027012 0 ctr_0h_R1_IMAC_1.raw
3
+ Exp027013 0 ctr_0h_R1_IMAC_2.raw
4
+ Exp027014 0 ctr_0h_R1_IMAC_3.raw
5
+ Exp027015 0 ctr_0h_R2_IMAC_1.raw
6
+ Exp027016 0 ctr_0h_R2_IMAC_2.raw
7
+ Exp027017 0 ctr_0h_R2_IMAC_3.raw
8
+ Exp027018 0 ctr_0h_R3_IMAC_1.raw
9
+ Exp027019 0 ctr_0h_R3_IMAC_2.raw
10
+ Exp027020 2 PLX_2h_R1_IMAC_1.raw
11
+ Exp027021 2 PLX_2h_R1_IMAC_2.raw
12
+ Exp027022 2 PLX_2h_R1_IMAC_3.raw
13
+ Exp027023 2 PLX_2h_R2_IMAC_2.raw
14
+ Exp027024 2 PLX_2h_R2_IMAC_3.raw
15
+ Exp027025 2 PLX_2h_R3_IMAC_1.raw
16
+ Exp027026 2 PLX_2h_R3_IMAC_2.raw
17
+ Exp027027 2 PLX_2h_R3_IMAC_3.raw
18
+ Exp027028 6 PLX_6h_R1_IMAC_1.raw
19
+ Exp027029 6 PLX_6h_R1_IMAC_2.raw
20
+ Exp027030 6 PLX_6h_R1_IMAC_3.raw
21
+ Exp027031 6 PLX_6h_R2_IMAC_1.raw
22
+ Exp027032 6 PLX_6h_R2_IMAC_2.raw
23
+ Exp027033 6 PLX_6h_R2_IMAC_3.raw
24
+ Exp027034 6 PLX_6h_R3_IMAC_1.raw
25
+ Exp027035 6 PLX_6h_R3_IMAC_2.raw
26
+ Exp027036 24 PLX_24h_R1_IMAC_1.raw
27
+ Exp027037 24 PLX_24h_R1_IMAC_2.raw
28
+ Exp027038 24 PLX_24h_R1_IMAC_3.raw
29
+ Exp027039 24 PLX_24h_R2_IMAC_1.raw
30
+ Exp027040 24 PLX_24h_R2_IMAC_2.raw
31
+ Exp027041 24 PLX_24h_R2_IMAC_3.raw
32
+ Exp027042 24 PLX_24h_R3_IMAC_1.raw
33
+ Exp027043 24 PLX_24h_R3_IMAC_3.raw
34
+ Exp027044 48 PLX_48h_R1_IMAC_2.raw
35
+ Exp027045 48 PLX_48h_R1_IMAC_3.raw
36
+ Exp027046 48 PLX_48h_R2_IMAC_1.raw
37
+ Exp027047 48 PLX_48h_R2_IMAC_2.raw
38
+ Exp027048 48 PLX_48h_R2_IMAC_3.raw
39
+ Exp027049 48 PLX_48h_R3_IMAC_1.raw
40
+ Exp027050 48 PLX_48h_R3_IMAC_2.raw
examplefile/root/mascot/mascot_xml/Exp027015/Exp027015_F1_R1.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbe17096e89e287b9984b93499f376f814bb9b263d4ae3128fe3c13c40d4df26
3
+ size 42212493
examplefile/root/mascot/mascot_xml/Exp027016/Exp027016_F1_R1.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b81689b2f7d4e3e41f1a4edd9ed80b01f7a29901b7d745601be70aa15611a825
3
+ size 38644883
examplefile/root/mascot/mascot_xml/Exp027017/Exp027017_F1_R1.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5cbcc19156bbb4f0e2a92c5dca4fc2d5d01d4ac6c0d210ee28639b2d559b252
3
+ size 39234041