Upload 73 files
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +10 -0
- Dockerfile +14 -0
- README.md +12 -0
- backend/FUNCTIONS.R +1275 -0
- backend/analysis.R +0 -0
- backend/fill_missing_values.R +41 -0
- backend/get_aligned_seq_for_mea02.R +60 -0
- backend/get_normalized_data_of_psites3.R +149 -0
- backend/get_normalized_data_of_psites4.R +192 -0
- backend/import_extract.R +23 -0
- backend/preprocess.R +27 -0
- backend/visualization_deps_with_scatter02.R +117 -0
- examplefile/Clinicaltest.csv +40 -0
- examplefile/analysistools/Clinical_for_Demo.csv +40 -0
- examplefile/analysistools/Clinical_for_Pre.csv +10 -0
- examplefile/analysistools/Clinicaltest.csv +6 -0
- examplefile/analysistools/PreNormBasedProSummary.csv +0 -0
- examplefile/analysistools/phosphorylation_exp_design_info.txt +40 -0
- examplefile/data_frame_normalization_with_control_no_pair.csv +0 -0
- examplefile/download/anaysis_demo.zip +3 -0
- examplefile/download/mascot_xml.zip +3 -0
- examplefile/download/motif_kinase_relation.xlsx +0 -0
- examplefile/download/phosphorylation_peptide_txt.zip +3 -0
- examplefile/download/profiling_gene_txt.zip +3 -0
- examplefile/mascot/phosphorylation_exp_design_info.txt +10 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027015_peptide.txt +0 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027016_peptide.txt +0 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027017_peptide.txt +0 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027031_peptide.txt +0 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027032_peptide.txt +0 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027033_peptide.txt +0 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027046_peptide.txt +0 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027047_peptide.txt +0 -0
- examplefile/mascot/phosphorylation_peptide_txt/Exp027048_peptide.txt +0 -0
- examplefile/mascot/profiling_exp_design_info.txt +7 -0
- examplefile/mascot/profiling_gene_txt/Exp026982_gene.txt +0 -0
- examplefile/mascot/profiling_gene_txt/Exp026983_gene.txt +0 -0
- examplefile/mascot/profiling_gene_txt/Exp026995_gene.txt +0 -0
- examplefile/mascot/profiling_gene_txt/Exp026996_gene.txt +0 -0
- examplefile/mascot/profiling_gene_txt/Exp027008_gene.txt +0 -0
- examplefile/mascot/profiling_gene_txt/Exp027009_gene.txt +0 -0
- examplefile/maxquant/Phospho (STY)Sites.txt +0 -0
- examplefile/maxquant/phosphorylation_exp_design_info.txt +10 -0
- examplefile/maxquant/profiling_exp_design_info.txt +7 -0
- examplefile/maxquant/proteinGroups.txt +0 -0
- examplefile/motifanalysis.csv +0 -0
- examplefile/phosphorylation_exp_design_info.txt +40 -0
- examplefile/root/mascot/mascot_xml/Exp027015/Exp027015_F1_R1.txt +3 -0
- examplefile/root/mascot/mascot_xml/Exp027016/Exp027016_F1_R1.txt +3 -0
- examplefile/root/mascot/mascot_xml/Exp027017/Exp027017_F1_R1.txt +3 -0
.gitattributes
CHANGED
|
@@ -42,3 +42,13 @@ PhosMap_datasets/motif_library/refseq/rattus/STY_background_of_refseq_rattus_for
|
|
| 42 |
PhosMap_datasets/motif_library/uniprot/human/STY_background_of_uniprot_human_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
|
| 43 |
PhosMap_datasets/motif_library/uniprot/mouse/STY_background_of_uniprot_mouse_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
|
| 44 |
PhosMap_datasets/motif_library/uniprot/rattus/STY_background_of_uniprot_rattus_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
PhosMap_datasets/motif_library/uniprot/human/STY_background_of_uniprot_human_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
|
| 43 |
PhosMap_datasets/motif_library/uniprot/mouse/STY_background_of_uniprot_mouse_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
|
| 44 |
PhosMap_datasets/motif_library/uniprot/rattus/STY_background_of_uniprot_rattus_for_motif_enrichment.txt filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
examplefile/root/mascot/mascot_xml/Exp027015/Exp027015_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
examplefile/root/mascot/mascot_xml/Exp027016/Exp027016_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
examplefile/root/mascot/mascot_xml/Exp027017/Exp027017_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
examplefile/root/mascot/mascot_xml/Exp027031/Exp027031_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
examplefile/root/mascot/mascot_xml/Exp027032/Exp027032_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
examplefile/root/mascot/mascot_xml/Exp027033/Exp027033_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
examplefile/root/mascot/mascot_xml/Exp027046/Exp027046_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
examplefile/root/mascot/mascot_xml/Exp027047/Exp027047_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
examplefile/root/mascot/mascot_xml/Exp027048/Exp027048_F1_R1.txt filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
www/manual.pdf filter=lfs diff=lfs merge=lfs -text
|
Dockerfile
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM liuzandh/phosmap:1.0.0
|
| 2 |
+
|
| 3 |
+
RUN useradd -m -u 1000 user
|
| 4 |
+
|
| 5 |
+
USER user
|
| 6 |
+
|
| 7 |
+
ENV HOME=/home/user \
|
| 8 |
+
PATH=/home/user/.local/bin:$PATH
|
| 9 |
+
|
| 10 |
+
WORKDIR $HOME/app
|
| 11 |
+
|
| 12 |
+
COPY --chown=user . $HOME/app
|
| 13 |
+
|
| 14 |
+
CMD ["R", "--quiet", "-e", "shiny::runApp(host='0.0.0.0', port=7860)"]
|
README.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: PhosMap
|
| 3 |
+
emoji: 📚
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: docker
|
| 7 |
+
pinned: false
|
| 8 |
+
duplicated_from: posit/shiny-for-r-template
|
| 9 |
+
license: mit
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
backend/FUNCTIONS.R
ADDED
|
@@ -0,0 +1,1275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
extract_psites_score <- function(
|
| 2 |
+
phosphorylation_exp_design_info_file_path,
|
| 3 |
+
mascot_xml_dir,
|
| 4 |
+
mascot_txt_dir
|
| 5 |
+
){
|
| 6 |
+
requireNamespace('utils')
|
| 7 |
+
withProgress(message = "Start extracting the confidence of Psites from mascot.xml", detail = "This may take a while...", value = 0, {
|
| 8 |
+
phosphorylation_exp_design_info_file_path <- normalizePath(phosphorylation_exp_design_info_file_path)
|
| 9 |
+
if (!file.exists(phosphorylation_exp_design_info_file_path)) {
|
| 10 |
+
cat('\n', phosphorylation_exp_design_info_file_path, ' -> ', 'No the file.')
|
| 11 |
+
stop('')
|
| 12 |
+
}
|
| 13 |
+
mascot_xml_dir <- normalizePath(mascot_xml_dir)
|
| 14 |
+
if (!file.exists(mascot_xml_dir)) {
|
| 15 |
+
cat('\n', mascot_xml_dir, ' -> ', 'No the directory.')
|
| 16 |
+
stop('')
|
| 17 |
+
}
|
| 18 |
+
mascot_xml_dir_files <- list.files(mascot_xml_dir)
|
| 19 |
+
|
| 20 |
+
mascot_txt_dir <- normalizePath(mascot_txt_dir)
|
| 21 |
+
if (!file.exists(mascot_txt_dir)) {
|
| 22 |
+
cat('\n', mascot_txt_dir, ' -> ', 'No the directory, create it.')
|
| 23 |
+
dir.create(mascot_txt_dir)
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
command <- "python"
|
| 27 |
+
path2script <- system.file("src", "XMLParser_mascot_dat.py", package = "PhosMap") # The location of python script called
|
| 28 |
+
|
| 29 |
+
# path2script <- "w:/R/R-3.3.2/library/PhosMap/src/XMLParser_mascot_dat.py"
|
| 30 |
+
path2script <- normalizePath(path2script, mustWork = FALSE)
|
| 31 |
+
|
| 32 |
+
# Get experiments codes by reading txt files
|
| 33 |
+
experiment_code <- utils::read.table(phosphorylation_exp_design_info_file_path,
|
| 34 |
+
sep = '\t',
|
| 35 |
+
header = TRUE)
|
| 36 |
+
experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
|
| 37 |
+
|
| 38 |
+
# match txt files to mascot_xml_dir
|
| 39 |
+
experiment_match_index <- match(experiment_code, mascot_xml_dir_files)
|
| 40 |
+
na_index <- which(is.na(experiment_match_index))
|
| 41 |
+
if(length(na_index)>0){
|
| 42 |
+
na_experiments <- experiment_code[na_index]
|
| 43 |
+
cat('\n', 'The following experiments do not exist in', mascot_xml_dir, '\n')
|
| 44 |
+
for(na_experiment in na_experiments){
|
| 45 |
+
cat('\n', na_experiment, '\n')
|
| 46 |
+
}
|
| 47 |
+
stop('')
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
experiment_code_count <- length(experiment_code)
|
| 51 |
+
if (experiment_code_count < 1) {
|
| 52 |
+
cat('\n', phosphorylation_exp_design_info_file_path, '\n')
|
| 53 |
+
stopifnot('No experiments')
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
cat('\n Start extracting the confidence of Psites from mascot.xml.')
|
| 57 |
+
cat('\n Total ', experiment_code_count, ' experiment(s).')
|
| 58 |
+
cat('\n It will take a little while.')
|
| 59 |
+
|
| 60 |
+
parent_dir <- dirname(phosphorylation_exp_design_info_file_path)
|
| 61 |
+
parent_dir <- normalizePath(parent_dir)
|
| 62 |
+
log_dir <- normalizePath(file.path(parent_dir, 'log'), mustWork = FALSE)
|
| 63 |
+
if (!file.exists(log_dir)) {
|
| 64 |
+
cat('\n', log_dir, ' -> ', 'No the directory, create it.')
|
| 65 |
+
dir.create(log_dir)
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
log_df <- NULL
|
| 69 |
+
for(i in seq_len(experiment_code_count)){
|
| 70 |
+
experiment_code_i <- experiment_code[i]
|
| 71 |
+
args <- c(experiment_code_i, mascot_xml_dir, mascot_txt_dir) # Set args to vector
|
| 72 |
+
allArgs <- c(path2script, args) # Add python script path to parameters vector
|
| 73 |
+
log_out <- tryCatch(
|
| 74 |
+
{
|
| 75 |
+
output <- system2(command, args = allArgs, stdout = TRUE) # R call python script by pass parameters vector
|
| 76 |
+
cat('\n', i, '->', experiment_code_i, '->', 'success', '\n')
|
| 77 |
+
c(experiment_code_i, 'success')
|
| 78 |
+
},
|
| 79 |
+
|
| 80 |
+
warning = function(w){ # process warning
|
| 81 |
+
cat('\n', i, '->', experiment_code_i, '->', 'warning', '\n')
|
| 82 |
+
print(w)
|
| 83 |
+
log_i <- c(experiment_code_i, 'warning')
|
| 84 |
+
return(log_i)
|
| 85 |
+
},
|
| 86 |
+
|
| 87 |
+
error = function(e){ # process error
|
| 88 |
+
cat('\n', i, '->', experiment_code_i, '->', 'error', '\n')
|
| 89 |
+
print(e)
|
| 90 |
+
log_i <- c(experiment_code_i, 'error')
|
| 91 |
+
return(log_i)
|
| 92 |
+
}
|
| 93 |
+
)
|
| 94 |
+
log_df <- rbind(log_df, log_out)
|
| 95 |
+
incProgress(1/seq_len(experiment_code_count), detail = paste0('\n Completed file: ', i, '/', experiment_code_count))
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
colnames(log_df) <- c('Exp_no', 'Status')
|
| 99 |
+
now_time <- Sys.time()
|
| 100 |
+
now_time <- gsub(':', '-', now_time)
|
| 101 |
+
log_df_file_name <- paste(now_time, 'log_of_extract_psites_score.txt')
|
| 102 |
+
log_df_file_path <- normalizePath(file.path(log_dir, log_df_file_name), mustWork = FALSE)
|
| 103 |
+
utils::write.table(log_df, log_df_file_path, sep = '\t', row.names = FALSE, quote = FALSE)
|
| 104 |
+
|
| 105 |
+
cat('\n Program finish, please see result log to check status.', '->', log_df_file_path)
|
| 106 |
+
})
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
get_file_info_from_dir <- function(specific_dir, experiment_ID){
|
| 111 |
+
requireNamespace('utils')
|
| 112 |
+
withProgress(message = 'Reading peptide identification files', style = "notification", detail = "processing...", value = 0,{
|
| 113 |
+
# read all files from specific director and save them into a list
|
| 114 |
+
all_files <- list.files(specific_dir)
|
| 115 |
+
all_files_count <- length(all_files)
|
| 116 |
+
if(all_files_count>0){
|
| 117 |
+
file_suffix <- get_file_suffix(all_files[1])
|
| 118 |
+
if(file_suffix=='txt'){
|
| 119 |
+
read_file_function <- utils::read.table
|
| 120 |
+
sep <- '\t'
|
| 121 |
+
}else{
|
| 122 |
+
read_file_function <- utils::read.csv
|
| 123 |
+
sep <- ','
|
| 124 |
+
}
|
| 125 |
+
sep_symbol <- paste('.', file_suffix, sep = '')
|
| 126 |
+
all_files_ID <- apply(data.frame(all_files), 1, function(x, sep){
|
| 127 |
+
x <- strsplit(x, split = sep)[[1]][1]
|
| 128 |
+
x
|
| 129 |
+
}, sep=sep_symbol)
|
| 130 |
+
|
| 131 |
+
all_files_ID_code <- apply(data.frame(all_files_ID), 1, function(x, sep){
|
| 132 |
+
x <- strsplit(x, split = sep)[[1]][1]
|
| 133 |
+
x
|
| 134 |
+
}, sep='_')
|
| 135 |
+
all_files_paths <- normalizePath(file.path(specific_dir, all_files))
|
| 136 |
+
|
| 137 |
+
index_of_match <- match(experiment_ID, all_files_ID_code)
|
| 138 |
+
matched_all_files_paths <- all_files_paths[index_of_match]
|
| 139 |
+
matched_all_files_ID <- all_files_ID[index_of_match]
|
| 140 |
+
|
| 141 |
+
file_data_list <- list()
|
| 142 |
+
matched_all_files_count <- length(matched_all_files_paths)
|
| 143 |
+
cat('\n Total file: ', matched_all_files_count)
|
| 144 |
+
for(i in seq_len(matched_all_files_count)){
|
| 145 |
+
# Read bach data and save to file_data_list.
|
| 146 |
+
cat('\n completed: ', i, '/', matched_all_files_count)
|
| 147 |
+
file_data <- as.matrix(read_file_function(matched_all_files_paths[i], header = TRUE, sep = sep))
|
| 148 |
+
file_data_list[[i]] <- file_data
|
| 149 |
+
incProgress(1/matched_all_files_count, detail = paste0('\n completed: ', i, '/', matched_all_files_count))
|
| 150 |
+
}
|
| 151 |
+
attr(file_data_list,'names') <- matched_all_files_ID
|
| 152 |
+
result_list <- list(file_data_list=file_data_list, file_ID=matched_all_files_ID)
|
| 153 |
+
return(result_list)
|
| 154 |
+
|
| 155 |
+
}else{
|
| 156 |
+
stop('The directory of ', specific_dir, ' has no files.')
|
| 157 |
+
}
|
| 158 |
+
})
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
get_list_with_filtered_sites <- function(peptide_id, files, files_site_score, qc, min_score, min_FDR){
|
| 163 |
+
withProgress(message = 'Reading psites QC files', style = "notification", detail = "processing...", value = 0,{
|
| 164 |
+
peptide_df_with_area_psm_list <- list() # data.frame(area, psm)
|
| 165 |
+
ID_of_seq_gi_site_list <- list() # seq_gi_psite
|
| 166 |
+
ID_DF_list <- list() # seq_gi_psite + data.frame(area, psm)
|
| 167 |
+
peptide_id_len <- length(peptide_id) # File Numbers
|
| 168 |
+
# ************
|
| 169 |
+
# *Required column:
|
| 170 |
+
# *file_peptide: Ion_Score, FDR, Area, PSMs, Sequence, Protein_Groups_Accessions, Modification
|
| 171 |
+
# *file_site_score: pep_seq, pep_var_mod_conf
|
| 172 |
+
cat('\n Total file: ', peptide_id_len)
|
| 173 |
+
for(i in seq_len(peptide_id_len)){
|
| 174 |
+
cat('\n completed: ',i,'/',peptide_id_len)
|
| 175 |
+
|
| 176 |
+
file_peptide <- data.frame(files[[i]])
|
| 177 |
+
# Set parameters 1:reserve peptides with ion score><-20 and FDR<0.01.
|
| 178 |
+
index_of_row_filters_meet_ionscore_and_FDR <- which(as.numeric(as.vector(file_peptide$Ion.Score)) >= min_score &
|
| 179 |
+
as.numeric(as.vector(file_peptide$FDR)) < min_FDR)
|
| 180 |
+
file_peptide <- file_peptide[index_of_row_filters_meet_ionscore_and_FDR, ]
|
| 181 |
+
|
| 182 |
+
if(!qc){
|
| 183 |
+
file_peptide_subset <- file_peptide
|
| 184 |
+
}else{
|
| 185 |
+
# Extract peptides with psites score.
|
| 186 |
+
file_site_score <- as.data.frame(files_site_score[[i]])
|
| 187 |
+
index_of_row_filters_have_site_score <- which(grepl('%', file_site_score$pep_var_mod_conf))
|
| 188 |
+
file_site_score <- file_site_score[index_of_row_filters_have_site_score,]
|
| 189 |
+
|
| 190 |
+
# Reserve peptides with psites score in file_peptide.
|
| 191 |
+
index_of_peptide_with_site_score_in_file_peptide <- match(as.vector(file_site_score[,1]), as.vector(file_peptide[,1]))
|
| 192 |
+
index_of_NA <- which(is.na(index_of_peptide_with_site_score_in_file_peptide))
|
| 193 |
+
if(length(index_of_NA)>0){
|
| 194 |
+
index_of_peptide_with_site_score_in_file_peptide <- index_of_peptide_with_site_score_in_file_peptide[-index_of_NA]
|
| 195 |
+
}
|
| 196 |
+
file_peptide_subset <- file_peptide[index_of_peptide_with_site_score_in_file_peptide,]
|
| 197 |
+
}
|
| 198 |
+
area <- as.numeric(as.vector(file_peptide_subset$Area))
|
| 199 |
+
psms <- as.numeric(as.vector(file_peptide_subset$PSMs))
|
| 200 |
+
|
| 201 |
+
peptide_df_with_area_psm <- data.frame(area, psms)
|
| 202 |
+
peptide_df_with_area_psm_colnames <- paste(peptide_id[i], c('Area', 'PSMs'), sep = '_')
|
| 203 |
+
colnames(peptide_df_with_area_psm) <- peptide_df_with_area_psm_colnames
|
| 204 |
+
|
| 205 |
+
sequence_id <- as.vector(file_peptide_subset$Sequence)
|
| 206 |
+
accession <- as.vector(file_peptide_subset$Protein.Groups.Accessions)
|
| 207 |
+
modification <- as.vector(file_peptide_subset$Modification)
|
| 208 |
+
ID_of_seq_gi_site <- paste(sequence_id, accession, modification, sep = '||')
|
| 209 |
+
|
| 210 |
+
ID_DF <- data.frame(ID_of_seq_gi_site, peptide_df_with_area_psm)
|
| 211 |
+
colnames(ID_DF) <- c("ID", peptide_df_with_area_psm_colnames)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
peptide_df_with_area_psm_list[[i]] <- peptide_df_with_area_psm # area, psm
|
| 215 |
+
ID_of_seq_gi_site_list[[i]] <- ID_of_seq_gi_site # seq_gi_psite
|
| 216 |
+
ID_DF_list[[i]] <- ID_DF # seq_gi_psite, area, psm
|
| 217 |
+
|
| 218 |
+
incProgress(1/peptide_id_len, detail = paste0('\n completed: ',i,'/',peptide_id_len))
|
| 219 |
+
}
|
| 220 |
+
result_list <- list(
|
| 221 |
+
peptide_df_with_area_psm_list = peptide_df_with_area_psm_list,
|
| 222 |
+
ID_of_seq_gi_site_list = ID_of_seq_gi_site_list,
|
| 223 |
+
ID_DF_list = ID_DF_list
|
| 224 |
+
)
|
| 225 |
+
return(result_list)
|
| 226 |
+
})
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
pre_process_filter_psites <- function(firmiana_peptide_dir, psites_score_dir,
|
| 231 |
+
phospho_experiment_design_file_path, qc,
|
| 232 |
+
min_score = 20, min_FDR = 0.01) {
|
| 233 |
+
requireNamespace('utils')
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
withProgress(message = 'Step2:QC and Merging', style = "notification", detail = "processing...", value = 0, max = 4,{
|
| 237 |
+
PEPTIDE_DIR <- normalizePath(firmiana_peptide_dir, mustWork = FALSE)
|
| 238 |
+
if(!file.exists(firmiana_peptide_dir)){
|
| 239 |
+
cat(firmiana_peptide_dir, ' -> ', 'No the directory.')
|
| 240 |
+
stop('')
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
PSITES_WITH_SCORE_DIR <- normalizePath(psites_score_dir, mustWork = FALSE)
|
| 244 |
+
if(!file.exists(psites_score_dir)){
|
| 245 |
+
cat(psites_score_dir, ' -> ', 'No the directory.')
|
| 246 |
+
stop('')
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
phospho_experiment_design_file_path <- normalizePath(phospho_experiment_design_file_path, mustWork = FALSE)
|
| 250 |
+
if(!file.exists(phospho_experiment_design_file_path)){
|
| 251 |
+
cat(phospho_experiment_design_file_path, ' -> ', 'No the file')
|
| 252 |
+
stop('')
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
# read experiment design file and make merged experments keep order of experiment design
|
| 256 |
+
phospho_experiment_design_file <- utils::read.table(phospho_experiment_design_file_path, sep = '\t',
|
| 257 |
+
header = TRUE, stringsAsFactors = NA)
|
| 258 |
+
phospho_experiment_ID <- as.vector(unlist(phospho_experiment_design_file$Experiment_Code))
|
| 259 |
+
for(j in 1:4){
|
| 260 |
+
# withProgress(message = 'please wait', style = "notification", detail = "processing...", value = 0,{
|
| 261 |
+
if(j == 1){
|
| 262 |
+
result_list_from_PEPTIDE_DIR <- get_file_info_from_dir(PEPTIDE_DIR, phospho_experiment_ID)
|
| 263 |
+
files <- result_list_from_PEPTIDE_DIR$file_data_list
|
| 264 |
+
peptide.id <- result_list_from_PEPTIDE_DIR$file_ID
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
if(j == 2){
|
| 268 |
+
cat('\n The 2nd step: read psites QC files.')
|
| 269 |
+
# rewrite function
|
| 270 |
+
get_file_info_from_dir <- function(specific_dir, experiment_ID){
|
| 271 |
+
requireNamespace('utils')
|
| 272 |
+
withProgress(message = 'Reading psites QC files', style = "notification", detail = "processing...", value = 0,{
|
| 273 |
+
# read all files from specific director and save them into a list
|
| 274 |
+
all_files <- list.files(specific_dir)
|
| 275 |
+
all_files_count <- length(all_files)
|
| 276 |
+
if(all_files_count>0){
|
| 277 |
+
file_suffix <- get_file_suffix(all_files[1])
|
| 278 |
+
if(file_suffix=='txt'){
|
| 279 |
+
read_file_function <- utils::read.table
|
| 280 |
+
sep <- '\t'
|
| 281 |
+
}else{
|
| 282 |
+
read_file_function <- utils::read.csv
|
| 283 |
+
sep <- ','
|
| 284 |
+
}
|
| 285 |
+
sep_symbol <- paste('.', file_suffix, sep = '')
|
| 286 |
+
all_files_ID <- apply(data.frame(all_files), 1, function(x, sep){
|
| 287 |
+
x <- strsplit(x, split = sep)[[1]][1]
|
| 288 |
+
x
|
| 289 |
+
}, sep=sep_symbol)
|
| 290 |
+
|
| 291 |
+
all_files_ID_code <- apply(data.frame(all_files_ID), 1, function(x, sep){
|
| 292 |
+
x <- strsplit(x, split = sep)[[1]][1]
|
| 293 |
+
x
|
| 294 |
+
}, sep='_')
|
| 295 |
+
all_files_paths <- normalizePath(file.path(specific_dir, all_files))
|
| 296 |
+
|
| 297 |
+
index_of_match <- match(experiment_ID, all_files_ID_code)
|
| 298 |
+
matched_all_files_paths <- all_files_paths[index_of_match]
|
| 299 |
+
matched_all_files_ID <- all_files_ID[index_of_match]
|
| 300 |
+
|
| 301 |
+
file_data_list <- list()
|
| 302 |
+
matched_all_files_count <- length(matched_all_files_paths)
|
| 303 |
+
cat('\n Total file: ', matched_all_files_count)
|
| 304 |
+
for(i in seq_len(matched_all_files_count)){
|
| 305 |
+
# Read bach data and save to file_data_list.
|
| 306 |
+
cat('\n completed: ', i, '/', matched_all_files_count)
|
| 307 |
+
file_data <- as.matrix(read_file_function(matched_all_files_paths[i], header = TRUE, sep = sep))
|
| 308 |
+
file_data_list[[i]] <- file_data
|
| 309 |
+
incProgress(1/matched_all_files_count, detail = paste0('\n completed: ', i, '/', matched_all_files_count))
|
| 310 |
+
}
|
| 311 |
+
attr(file_data_list,'names') <- matched_all_files_ID
|
| 312 |
+
result_list <- list(file_data_list=file_data_list, file_ID=matched_all_files_ID)
|
| 313 |
+
return(result_list)
|
| 314 |
+
|
| 315 |
+
}else{
|
| 316 |
+
stop('The directory of ', specific_dir, ' has no files.')
|
| 317 |
+
}
|
| 318 |
+
})
|
| 319 |
+
|
| 320 |
+
}
|
| 321 |
+
result_list_from_PSITES_WITH_SCORE_DIR <- get_file_info_from_dir(PSITES_WITH_SCORE_DIR,
|
| 322 |
+
phospho_experiment_ID)
|
| 323 |
+
files_site_score <- result_list_from_PSITES_WITH_SCORE_DIR$file_data_list
|
| 324 |
+
site_score.id <- result_list_from_PSITES_WITH_SCORE_DIR$file_ID
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
|
| 329 |
+
if(j == 3){
|
| 330 |
+
cat('\n The 3rd step: filter peptides based on site quality.')
|
| 331 |
+
result_list_with_filtered_sites <- get_list_with_filtered_sites(peptide.id, files,
|
| 332 |
+
files_site_score, qc,
|
| 333 |
+
min_score, min_FDR)
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
peptide_df_with_area_psm_list <- result_list_with_filtered_sites$peptide_df_with_area_psm_list # including: area, psm
|
| 337 |
+
ID_of_seq_gi_site_list <- result_list_with_filtered_sites$ID_of_seq_gi_site_list # including: seq_gi_psite
|
| 338 |
+
ID_DF_list <- result_list_with_filtered_sites$ID_DF_list # including: seq_gi_psite, area, psm
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
if(j == 4){
|
| 343 |
+
#### (4) Based on unique peptide, merge all experiments ####
|
| 344 |
+
cat('\n The 4th step: merge data based on peptides (unique ID).')
|
| 345 |
+
withProgress(message = 'Merging data based on peptides (unique ID)', style = "notification", detail = "processing...", value = 0,{
|
| 346 |
+
for (i in 1:1) {
|
| 347 |
+
merge_df_with_phospho_peptides <- get_merged_phospho_df(peptide.id,
|
| 348 |
+
peptide_df_with_area_psm_list,
|
| 349 |
+
ID_of_seq_gi_site_list, ID_DF_list)
|
| 350 |
+
|
| 351 |
+
# delete psm column
|
| 352 |
+
merge_df_with_phospho_peptides_colnames <- colnames(merge_df_with_phospho_peptides)
|
| 353 |
+
index_of_PSMs <- grep('_PSMs', merge_df_with_phospho_peptides_colnames)
|
| 354 |
+
merge_df_with_phospho_peptides <- merge_df_with_phospho_peptides[,-index_of_PSMs]
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
merge_df_with_phospho_peptides_colnames <- colnames(merge_df_with_phospho_peptides)
|
| 359 |
+
ID <- as.vector(merge_df_with_phospho_peptides[,1])
|
| 360 |
+
Value <- merge_df_with_phospho_peptides[,-1]
|
| 361 |
+
Value_colnames <- colnames(Value)
|
| 362 |
+
Value_colnames_ID <- apply(data.frame(Value_colnames), 1, function(x){
|
| 363 |
+
x <- strsplit(x, split = '_')[[1]][1]
|
| 364 |
+
x
|
| 365 |
+
})
|
| 366 |
+
index_of_match <- match(phospho_experiment_ID, Value_colnames_ID)
|
| 367 |
+
Value <- Value[,index_of_match]
|
| 368 |
+
merge_df_with_phospho_peptides <- data.frame(ID, Value)
|
| 369 |
+
colnames(merge_df_with_phospho_peptides) <- c(merge_df_with_phospho_peptides_colnames[1], phospho_experiment_ID)
|
| 370 |
+
incProgress(1, detail = 'finishing...')
|
| 371 |
+
}
|
| 372 |
+
})
|
| 373 |
+
|
| 374 |
+
return(merge_df_with_phospho_peptides)
|
| 375 |
+
}
|
| 376 |
+
incProgress(1, detail = '')
|
| 377 |
+
}
|
| 378 |
+
})
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
|
| 382 |
+
get_combined_data_frame02 <- function(merge_df_with_phospho_peptides, species = 'human', id_type = 'RefSeq_Protein_GI'
|
| 383 |
+
){
|
| 384 |
+
# Read library file, map GI to Gene Symbol
|
| 385 |
+
requireNamespace('utils')
|
| 386 |
+
requireNamespace('stringr')
|
| 387 |
+
|
| 388 |
+
cat('\n The 5th step: write the data frame with symbols mapping to genes.')
|
| 389 |
+
|
| 390 |
+
withProgress(message = 'Writing the data frame with symbols mapping to genes', style = "notification", detail = "This may take a while...", value = 0,{
|
| 391 |
+
id_coversion_table_dir = "./PhosMap_datasets/id_coversion_table/"
|
| 392 |
+
id_coversion_table = utils::read.table((paste0(id_coversion_table_dir, species, "_ID.txt")), sep = '\t', header = TRUE)
|
| 393 |
+
|
| 394 |
+
cat('\n The 5th step is running.')
|
| 395 |
+
# Split a string: sequenceID, accession, modification
|
| 396 |
+
seq_gi_site_vector <- as.vector(merge_df_with_phospho_peptides$ID_of_seq_gi_site)
|
| 397 |
+
Sequence <- apply(data.frame(seq_gi_site_vector), 1, function(x){
|
| 398 |
+
strsplit(x, split="||", fixed = TRUE)[[1]][1]
|
| 399 |
+
})
|
| 400 |
+
ID <- apply(data.frame(seq_gi_site_vector), 1, function(x){
|
| 401 |
+
strsplit(x, split="||", fixed = TRUE)[[1]][2]
|
| 402 |
+
})
|
| 403 |
+
Modification <- apply(data.frame(seq_gi_site_vector), 1, function(x){
|
| 404 |
+
strsplit(x, split="||", fixed = TRUE)[[1]][3]
|
| 405 |
+
})
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
##########################################################################################################
|
| 409 |
+
# id_types <- c('GeneID', 'RefSeq_Protein_GI', 'RefSeq_Protein_Accession', 'Uniprot_Protein_Accession')
|
| 410 |
+
# GeneSymbol
|
| 411 |
+
# construct dict
|
| 412 |
+
# id_type <- 'RefSeq_Protein_GI'
|
| 413 |
+
MappingDf <- id_coversion_table[, c('GeneSymbol', id_type)]
|
| 414 |
+
invalid_index <- which(as.vector(unlist(MappingDf[,2])) == '' | as.vector(unlist(MappingDf[,2])) == '-')
|
| 415 |
+
if(length(invalid_index)>0){
|
| 416 |
+
MappingDf <- MappingDf[-invalid_index,]
|
| 417 |
+
}
|
| 418 |
+
MappingDf_row <- nrow(MappingDf)
|
| 419 |
+
cat('\n', 'Construct dictionary based on GeneSymbol and specific ID.')
|
| 420 |
+
mapping_dict <- NULL
|
| 421 |
+
cat('\n', 'The total:', MappingDf_row)
|
| 422 |
+
for(i in 1:MappingDf_row){
|
| 423 |
+
x <- as.vector(MappingDf[i,1])
|
| 424 |
+
y <- as.vector(unlist(MappingDf[i,2]))
|
| 425 |
+
y <- strsplit(y, split = '; ')[[1]]
|
| 426 |
+
x_v <- rep(x, length(y))
|
| 427 |
+
names(x_v) <- y
|
| 428 |
+
mapping_dict <- c(mapping_dict, x_v)
|
| 429 |
+
if(i%%5000==0 | i == MappingDf_row){
|
| 430 |
+
cat('\n', 'Completed:', i, '/', MappingDf_row)
|
| 431 |
+
# incProgress(1/seq_len(MappingDf_row), detail = paste0('\n', 'Completed:', i, '/', MappingDf_row))
|
| 432 |
+
}
|
| 433 |
+
incProgress(1/MappingDf_row, detail = paste0('\n', 'Completed:', i, '/', MappingDf_row))
|
| 434 |
+
}
|
| 435 |
+
##########################################################################################################
|
| 436 |
+
|
| 437 |
+
GeneSymbol <- apply(data.frame(ID), 1, function(x, mapping_dict, id_type){
|
| 438 |
+
gi_all <- strsplit(x, split=";", fixed = TRUE)[[1]]
|
| 439 |
+
|
| 440 |
+
gi_mapping_symbol <- apply(data.frame(gi_all), 1, function(y, mapping_dict, id_type){
|
| 441 |
+
if(id_type == 'RefSeq_Protein_GI'){
|
| 442 |
+
y = stringr::str_replace_all(y, 'gi[|]', '')
|
| 443 |
+
}
|
| 444 |
+
return(mapping_dict[y])
|
| 445 |
+
}, mapping_dict = mapping_dict, id_type)
|
| 446 |
+
|
| 447 |
+
gi_mapping_symbol_unique <- unique(gi_mapping_symbol[which(!is.na(gi_mapping_symbol))])
|
| 448 |
+
gi_mapping_symbol_unique_count <- length(gi_mapping_symbol_unique)
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
if(gi_mapping_symbol_unique_count == 0){
|
| 452 |
+
return(NA)
|
| 453 |
+
}else if(gi_mapping_symbol_unique_count == 1){
|
| 454 |
+
return(gi_mapping_symbol_unique)
|
| 455 |
+
}else{
|
| 456 |
+
return(paste(gi_all, collapse = ';'))
|
| 457 |
+
}
|
| 458 |
+
}, mapping_dict = mapping_dict, id_type = id_type)
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
# sequenceID, accession, symbol, modification, quantification_value_in_experiment
|
| 462 |
+
df_of_combination <- data.frame(Sequence, ID, Modification, GeneSymbol, merge_df_with_phospho_peptides[,-1]) # delete first column
|
| 463 |
+
index_of_NonNA <- which(!is.na(GeneSymbol))
|
| 464 |
+
df_of_combination <- df_of_combination[index_of_NonNA,]
|
| 465 |
+
cat('\n The 5th step is over ^_^.')
|
| 466 |
+
cat('\n The 5th step: write the data frame with symbols mapping to genes.')
|
| 467 |
+
incProgress(1, detail = 'Please wait a moment')
|
| 468 |
+
})
|
| 469 |
+
return(df_of_combination)
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
|
| 473 |
+
get_summary_with_unique_sites02 <- function(combined_df_with_mapped_gene_symbol, species = 'human', fasta_type = 'refseq'
|
| 474 |
+
){
|
| 475 |
+
requireNamespace('utils')
|
| 476 |
+
requireNamespace('stringr')
|
| 477 |
+
# unique phosphorylation sites
|
| 478 |
+
withProgress(message = 'Constructing the data frame with unique phosphorylation site for each protein sequence', style = "notification", detail = "This may take a while...", value = 0,{
|
| 479 |
+
cat('\n The 6th step: construct the data frame with unique phosphorylation site for each protein sequence.')
|
| 480 |
+
|
| 481 |
+
path <- "./PhosMap_datasets/fasta_library/"
|
| 482 |
+
fasta_data <- utils::read.table(paste0(path, fasta_type, "/", species, "/", species, "_", fasta_type, "_fasta.txt"), header=TRUE, sep="\t")
|
| 483 |
+
|
| 484 |
+
id_data <- combined_df_with_mapped_gene_symbol
|
| 485 |
+
|
| 486 |
+
# Keep peptides assigned to unique protein
|
| 487 |
+
id_data_only_peptide2gi <- id_data[which(!grepl(';', as.vector(id_data$ID))),]
|
| 488 |
+
|
| 489 |
+
for(j in 1:2){
|
| 490 |
+
if(j == 1){
|
| 491 |
+
withProgress(message = 'Getting modification index in protein sequence. ', style = "notification", detail = "This may take a while...", value = 0,{
|
| 492 |
+
get_modification_index <- function(id_data_only_peptide2gi, fasta_data){
|
| 493 |
+
# 1
|
| 494 |
+
# Get modification index in protein sequence.
|
| 495 |
+
cat('\n', 'Get modification index in protein sequence.')
|
| 496 |
+
id_data_only_peptide2gi_row <- nrow(id_data_only_peptide2gi)
|
| 497 |
+
modification_index_in_protein_seq_list <- list()
|
| 498 |
+
for(i in seq_len(id_data_only_peptide2gi_row)){
|
| 499 |
+
peptide_seq <- as.vector(id_data_only_peptide2gi$Sequence[i])
|
| 500 |
+
peptide_id <- as.vector(id_data_only_peptide2gi$ID[i])
|
| 501 |
+
modification_index_in_peptide_seq <- unlist(gregexpr("[a-z]", peptide_seq))
|
| 502 |
+
protein_seq <- as.vector(fasta_data$Sequence[which(fasta_data$ID==peptide_id)])
|
| 503 |
+
first_index_of_peptide2protein <- unlist(gregexpr(toupper(peptide_seq), protein_seq))
|
| 504 |
+
modification_index_in_protein_seq <- NULL
|
| 505 |
+
for(elemt in first_index_of_peptide2protein){
|
| 506 |
+
tmp_modification_index_in_protein_seq <- elemt + modification_index_in_peptide_seq -1
|
| 507 |
+
modification_index_in_protein_seq <- c(modification_index_in_protein_seq,
|
| 508 |
+
tmp_modification_index_in_protein_seq)
|
| 509 |
+
}
|
| 510 |
+
modification_index_in_protein_seq_list[[i]] <- modification_index_in_protein_seq
|
| 511 |
+
if(i%%500==0 | i==id_data_only_peptide2gi_row ){
|
| 512 |
+
cat('\n completed: ', i, '/', id_data_only_peptide2gi_row)
|
| 513 |
+
}
|
| 514 |
+
incProgress(1/id_data_only_peptide2gi_row, detail = paste0('\n', 'Completed:', i, '/', id_data_only_peptide2gi_row))
|
| 515 |
+
}
|
| 516 |
+
return(modification_index_in_protein_seq_list)
|
| 517 |
+
}
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
# Determine locations of the psites each peptide mapped to protein squence.
|
| 521 |
+
modification_index_in_protein_seq_list <- get_modification_index(id_data_only_peptide2gi,
|
| 522 |
+
fasta_data)
|
| 523 |
+
|
| 524 |
+
proteins_in_id_data_only_peptide2gi <- as.vector(id_data_only_peptide2gi$ID)
|
| 525 |
+
sequences_in_id_data_only_peptide2gi <- as.vector(id_data_only_peptide2gi$Sequence)
|
| 526 |
+
value_in_id_data_only_peptide2gi <- id_data_only_peptide2gi[, -c(seq_len(4))]
|
| 527 |
+
|
| 528 |
+
unique_proteins <- unique(proteins_in_id_data_only_peptide2gi)
|
| 529 |
+
unique_protein_count <- length(unique_proteins)
|
| 530 |
+
})
|
| 531 |
+
}
|
| 532 |
+
|
| 533 |
+
if(j == 2){
|
| 534 |
+
# Show psites and modifications of one protein, merge the values with the same modification type.
|
| 535 |
+
cat('\n', 'Map phosphorylation sites to protein sequence and eliminate redundancy.')
|
| 536 |
+
withProgress(message = 'Mapping phosphorylation sites to protein sequence and eliminate redundancy. ', style = "notification", detail = "This may take a while...", value = 0,{
|
| 537 |
+
system.time({
|
| 538 |
+
summary_df_of_unique_proteins_with_sites <- c()
|
| 539 |
+
for(i in seq_len(unique_protein_count)){
|
| 540 |
+
|
| 541 |
+
df_with_AAs_i <- get_df_with_AAs_i(unique_proteins,
|
| 542 |
+
i,
|
| 543 |
+
id_data_only_peptide2gi,
|
| 544 |
+
proteins_in_id_data_only_peptide2gi,
|
| 545 |
+
sequences_in_id_data_only_peptide2gi,
|
| 546 |
+
modification_index_in_protein_seq_list)
|
| 547 |
+
|
| 548 |
+
summary_df_of_unique_protein_with_sites <- get_unique_AAs_i_df(df_with_AAs_i)
|
| 549 |
+
|
| 550 |
+
summary_df_of_unique_proteins_with_sites <- rbind(
|
| 551 |
+
summary_df_of_unique_proteins_with_sites,
|
| 552 |
+
summary_df_of_unique_protein_with_sites
|
| 553 |
+
)
|
| 554 |
+
|
| 555 |
+
if(i%%500==0 | i == unique_protein_count){
|
| 556 |
+
cat('\n completed: ', i, '/', unique_protein_count)
|
| 557 |
+
}
|
| 558 |
+
incProgress(1/unique_protein_count, detail = paste0('\n', 'Completed:', i, '/', unique_protein_count))
|
| 559 |
+
|
| 560 |
+
summary_df_of_unique_proteins_with_sites_rownames <- paste(as.vector(summary_df_of_unique_proteins_with_sites$ID),
|
| 561 |
+
as.vector(summary_df_of_unique_proteins_with_sites$AA_in_protein),
|
| 562 |
+
sep = '_')
|
| 563 |
+
rownames(summary_df_of_unique_proteins_with_sites) <- summary_df_of_unique_proteins_with_sites_rownames
|
| 564 |
+
summary_df_of_unique_proteins_with_sites_colnames <- colnames(summary_df_of_unique_proteins_with_sites)
|
| 565 |
+
index_of_PSMs <- which(grepl('_PSMs', summary_df_of_unique_proteins_with_sites_colnames))
|
| 566 |
+
if(length(index_of_PSMs)>0){
|
| 567 |
+
summary_df_of_unique_proteins_with_sites <- summary_df_of_unique_proteins_with_sites[,-index_of_PSMs]
|
| 568 |
+
}
|
| 569 |
+
summary_df_of_unique_proteins_with_sites$GeneSymbol <- apply(data.frame(summary_df_of_unique_proteins_with_sites$GeneSymbol),
|
| 570 |
+
1,
|
| 571 |
+
function(x){
|
| 572 |
+
if(grepl('||', x)){
|
| 573 |
+
x <- as.vector(x)
|
| 574 |
+
x <- strsplit(x, split = '||', fixed = TRUE)
|
| 575 |
+
x[[1]][1]
|
| 576 |
+
}
|
| 577 |
+
})
|
| 578 |
+
}
|
| 579 |
+
})
|
| 580 |
+
})
|
| 581 |
+
}
|
| 582 |
+
incProgress(1/2, detail = paste0('\n '))
|
| 583 |
+
}
|
| 584 |
+
cat('\n The 6th step: construct over.')
|
| 585 |
+
|
| 586 |
+
})
|
| 587 |
+
return(summary_df_of_unique_proteins_with_sites)
|
| 588 |
+
}
|
| 589 |
+
|
| 590 |
+
|
| 591 |
+
merge_profiling_file_from_Firmiana <- function(firmiana_gene_dir, US_cutoff = 1, experiment_gene_file_path){
|
| 592 |
+
requireNamespace('utils')
|
| 593 |
+
|
| 594 |
+
withProgress(message = 'Step5 : Normalization [Normalizing phosphoproteomics data based on proteomics data.] ', style = "notification", detail = "processing...", value = 0,{
|
| 595 |
+
for (j in 1:2) {
|
| 596 |
+
if(j == 1){
|
| 597 |
+
DATA_DIR <- normalizePath(firmiana_gene_dir, mustWork = FALSE)
|
| 598 |
+
if(!file.exists(DATA_DIR)){
|
| 599 |
+
cat(DATA_DIR, ' -> ', 'No the file')
|
| 600 |
+
stop('')
|
| 601 |
+
}
|
| 602 |
+
data_list <- list()
|
| 603 |
+
file_names <- list.files(path = DATA_DIR, pattern = '.txt')
|
| 604 |
+
file_names_count <- length(file_names)
|
| 605 |
+
if(length(file_names_count)<1){
|
| 606 |
+
stop('The directory of ', DATA_DIR, ' has no files.')
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
exp_names <- apply(data.frame(file_names), 1, function(x){
|
| 610 |
+
x <- strsplit(x, split = '_')[[1]][1]
|
| 611 |
+
x
|
| 612 |
+
})
|
| 613 |
+
|
| 614 |
+
experiment_code <- utils::read.table(experiment_gene_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
|
| 615 |
+
experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
|
| 616 |
+
|
| 617 |
+
index_of_match <- match(experiment_code, exp_names)
|
| 618 |
+
na_index <- which(is.na(index_of_match))
|
| 619 |
+
na_count <- length(na_index)
|
| 620 |
+
if(na_count > 0){
|
| 621 |
+
na_experiment_code <- experiment_code[na_index]
|
| 622 |
+
cat(
|
| 623 |
+
'\n',
|
| 624 |
+
na_experiment_code,
|
| 625 |
+
'not in',
|
| 626 |
+
DATA_DIR
|
| 627 |
+
)
|
| 628 |
+
stop('')
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
exp_names <- exp_names[index_of_match]
|
| 632 |
+
file_names <- file_names[index_of_match]
|
| 633 |
+
file_names_count <- length(file_names)
|
| 634 |
+
|
| 635 |
+
# Table headers of input data
|
| 636 |
+
# "Gene.ID" "Symbol" "Annotation" "Modification" "Description"
|
| 637 |
+
# "Protein.GI" "Protein.Num" "Area" "FoT.1e.6." "iBAQ"
|
| 638 |
+
# "Peptide.Num" "Unique.Peptide.Num" "Strict.Peptide.Num" "US.Peptide.Num" "Identified.Proteins.Num"
|
| 639 |
+
# "Unique.Proteins.Num"
|
| 640 |
+
|
| 641 |
+
# New table headers of input data
|
| 642 |
+
file_data_colnames <- c(
|
| 643 |
+
"Gene_ID", "Symbol", "Annotation", "Modification", "Description",
|
| 644 |
+
"Protein_GI", "Protein_Num", "Area", "FoT5", "iBAQ",
|
| 645 |
+
"Peptide_Num", "UPeptide_Num", "SPeptide_Num", "USPeptide_Num", "Identified_Proteins_Num", "Unique_Proteins_Num"
|
| 646 |
+
)
|
| 647 |
+
kept_colnames <- c(
|
| 648 |
+
"Symbol", "iBAQ", "USPeptide_Num"
|
| 649 |
+
)
|
| 650 |
+
kept_colnames_index <- match(kept_colnames, file_data_colnames)
|
| 651 |
+
cat('\n Merge profiling files downloaded from Firmiana.')
|
| 652 |
+
cat('\n Total files: ', file_names_count)
|
| 653 |
+
for(i in seq_len(file_names_count)){
|
| 654 |
+
file_name <- file_names[i]
|
| 655 |
+
file_path <- normalizePath(file.path(DATA_DIR, file_name))
|
| 656 |
+
file_data <- utils::read.delim(file_path, header = TRUE, stringsAsFactors = NA, sep = '\t')
|
| 657 |
+
colnames(file_data) <- file_data_colnames
|
| 658 |
+
file_data <- file_data[, kept_colnames_index]
|
| 659 |
+
|
| 660 |
+
index_of_US <- which(file_data$USPeptide_Num >= US_cutoff)
|
| 661 |
+
file_data <- file_data[index_of_US, c(1,2)]
|
| 662 |
+
exp_name <- exp_names[i]
|
| 663 |
+
file_data_colnames.i <- colnames(file_data)
|
| 664 |
+
file_data_colnames.i <- paste(exp_name, file_data_colnames.i, sep = '_')
|
| 665 |
+
file_data_colnames.i[1] <- 'Symbol'
|
| 666 |
+
colnames(file_data) <- file_data_colnames.i
|
| 667 |
+
data_list[[i]] <- file_data
|
| 668 |
+
cat('\n Read and filter: ', i, '/', file_names_count)
|
| 669 |
+
incProgress(1/seq_len(file_names_count), detail = paste0('\n Read and filter: ', i, '/', file_names_count))
|
| 670 |
+
}
|
| 671 |
+
attr(data_list, 'names') <- exp_names
|
| 672 |
+
|
| 673 |
+
data_list_count <- length(data_list)
|
| 674 |
+
merge_df <- data_list[[1]]
|
| 675 |
+
merge_df_colnames <- colnames(merge_df)
|
| 676 |
+
}
|
| 677 |
+
|
| 678 |
+
if(j == 2){
|
| 679 |
+
cat('\n merge_complete: ', 1, '/', data_list_count)
|
| 680 |
+
if(data_list_count>1){
|
| 681 |
+
for(i in 2:data_list_count){
|
| 682 |
+
tmp_merge_df <- data_list[[i]]
|
| 683 |
+
merge_df <- merge(merge_df, tmp_merge_df, by = 'Symbol', all = TRUE)
|
| 684 |
+
cat('\n merge_complete: ', i, '/', data_list_count)
|
| 685 |
+
incProgress(1/data_list_count, detail = paste0('\n merge_complete: ', i, '/', data_list_count))
|
| 686 |
+
}
|
| 687 |
+
}
|
| 688 |
+
Symbol <- as.vector(merge_df[,1])
|
| 689 |
+
Value <- as.matrix(merge_df[,-1])
|
| 690 |
+
index_of_NA <- which(is.na(Value))
|
| 691 |
+
if(length(index_of_NA)>0){
|
| 692 |
+
Value[index_of_NA] <- 0
|
| 693 |
+
}
|
| 694 |
+
colnames(Value) <- exp_names
|
| 695 |
+
merge_df_no_NA <- data.frame(Symbol, Value)
|
| 696 |
+
}
|
| 697 |
+
}
|
| 698 |
+
incProgress(1/2, detail = '')
|
| 699 |
+
})
|
| 700 |
+
return(merge_df_no_NA)
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
+
|
| 704 |
+
get_normalized_data_FOT5 <- function(data_frame, experiment_code_file_path
|
| 705 |
+
){
|
| 706 |
+
requireNamespace('utils')
|
| 707 |
+
# cat('\n The 7th step: Normalize data and filter data only including phosphorylation site.')
|
| 708 |
+
cat('Normalize proteomics data based on the total sum (x 1e5).')
|
| 709 |
+
experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
|
| 710 |
+
experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
|
| 711 |
+
data_frame_colnames <- colnames(data_frame)
|
| 712 |
+
ID <- as.vector(data_frame[,1])
|
| 713 |
+
Value_raw <- data_frame[,-1]
|
| 714 |
+
Value_FOT5 <- Value_raw
|
| 715 |
+
Value_FOT5_col <- ncol(Value_FOT5)
|
| 716 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 717 |
+
x <- Value_raw[,i]
|
| 718 |
+
valid_index <- which(x>0)
|
| 719 |
+
valid_x <- x[valid_index]
|
| 720 |
+
valid_x_sum <- sum(valid_x)
|
| 721 |
+
valid_x_FOT5 <- valid_x/valid_x_sum*1e5
|
| 722 |
+
Value_FOT5[valid_index,i] <- valid_x_FOT5
|
| 723 |
+
}
|
| 724 |
+
data_frame_normaliation <- data.frame(ID, Value_FOT5)
|
| 725 |
+
data_frame_normaliation_colnames <- c(data_frame_colnames[1], experiment_code)
|
| 726 |
+
colnames(data_frame_normaliation) <- data_frame_normaliation_colnames
|
| 727 |
+
return(data_frame_normaliation)
|
| 728 |
+
}
|
| 729 |
+
|
| 730 |
+
|
| 731 |
+
keep_psites_with_max_in_topX2 <- function(phospho_data, percent_of_kept_sites = 3/4){
|
| 732 |
+
percent_of_kept_sites_str <- paste('top', percent_of_kept_sites*100, '%', sep = '')
|
| 733 |
+
cat('\n The 8th step: filter psites with row maximum in', percent_of_kept_sites_str, '.')
|
| 734 |
+
# ID <- as.vector(phospho_data[,1])
|
| 735 |
+
Value <- phospho_data[,-c(1,2,3)]
|
| 736 |
+
Value_rowmax <- apply(Value, 1, function(x){
|
| 737 |
+
x <- as.vector(unlist(x))
|
| 738 |
+
max(x)
|
| 739 |
+
})
|
| 740 |
+
index_of_Value_rowmax_desc <- order(Value_rowmax, decreasing = TRUE)
|
| 741 |
+
count_of_kept_sites <- round(nrow(Value)*percent_of_kept_sites)
|
| 742 |
+
index_of_Value_rowmax_desc_kept <- index_of_Value_rowmax_desc[seq_len(count_of_kept_sites)]
|
| 743 |
+
phospho_data_meet_percent <- phospho_data[index_of_Value_rowmax_desc_kept,]
|
| 744 |
+
cat('\n The 8th step: filter over with ', percent_of_kept_sites_str, ' cutoff.')
|
| 745 |
+
return(phospho_data_meet_percent)
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
|
| 749 |
+
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
|
| 753 |
+
|
| 754 |
+
|
| 755 |
+
analysis_deps_limma2 <- function(expr_data_frame, group, comparison_factor,
|
| 756 |
+
log2_label = FALSE, adjust_method = 'BH'){
|
| 757 |
+
requireNamespace('limma')
|
| 758 |
+
requireNamespace('stats')
|
| 759 |
+
# experiment_design_file_path <- "D:\\Phosphate-data\\Bioinfomatics\\demo_data_from_WYN\\experiment_design_noPair.txt"
|
| 760 |
+
# experiment_design_file <- read.table(experiment_design_file_path, sep = '\t', header = T)
|
| 761 |
+
# group <- experiment_design_file$Group[experiment_design_file$Data_Type == 'Phospho']
|
| 762 |
+
# group <- paste('t', group, sep = '')
|
| 763 |
+
# group <- factor(group, levels = c('t0', 't10', 't30', 't120'))
|
| 764 |
+
# expr_data_frame <- data_frame_normalization_0
|
| 765 |
+
|
| 766 |
+
expr_ID <- as.vector(expr_data_frame[,1])
|
| 767 |
+
expr_Valule <- expr_data_frame[,-1]
|
| 768 |
+
if(!log2_label){
|
| 769 |
+
expr_Valule <- log2(expr_data_frame[,-1]) # have to log
|
| 770 |
+
}
|
| 771 |
+
expr_Valule_row_duplicated <- apply(expr_Valule, 1, function(x){
|
| 772 |
+
stats::var(x)
|
| 773 |
+
})
|
| 774 |
+
expr_Valule_col <- ncol(expr_Valule)
|
| 775 |
+
duplicated_row_index <- which(expr_Valule_row_duplicated == 0)
|
| 776 |
+
if(length(duplicated_row_index)>0){
|
| 777 |
+
# Zero sample variances detected, have been offset away from zero
|
| 778 |
+
expr_ID <- expr_ID[-duplicated_row_index]
|
| 779 |
+
expr_Valule <- expr_Valule[-duplicated_row_index,]
|
| 780 |
+
}
|
| 781 |
+
# rownames(expr_Valule) <- expr_ID
|
| 782 |
+
|
| 783 |
+
design <- stats::model.matrix(~ 0 + group)
|
| 784 |
+
cat('\n', 'The matrix of experiment design.')
|
| 785 |
+
print(design)
|
| 786 |
+
colnames(design) <- levels(factor(group))
|
| 787 |
+
rownames(design) <- colnames(expr_Valule)
|
| 788 |
+
# comparison_statement <- c('t10-t0', 't30-t0', 't120-t0')
|
| 789 |
+
# comparison_statement <- c('t10-t0')
|
| 790 |
+
group_levels <- comparison_factor
|
| 791 |
+
group_levels_count <- length(group_levels)
|
| 792 |
+
if(group_levels_count<2){
|
| 793 |
+
cat('\n', 'Do not construct pairwise comparison pattern.')
|
| 794 |
+
stop('')
|
| 795 |
+
}else{
|
| 796 |
+
comparison_statement <- NULL
|
| 797 |
+
i_end <- group_levels_count - 1
|
| 798 |
+
for(i in seq_len(i_end)){
|
| 799 |
+
ctrl <- group_levels[i]
|
| 800 |
+
j_start <- i + 1
|
| 801 |
+
for(j in j_start:group_levels_count){
|
| 802 |
+
treat <- group_levels[j]
|
| 803 |
+
cs <- paste(treat, '-', ctrl, sep = '')
|
| 804 |
+
comparison_statement <- c(comparison_statement, cs)
|
| 805 |
+
}
|
| 806 |
+
}
|
| 807 |
+
cat('\n', 'The combination of pairwise comparison(s).')
|
| 808 |
+
cat('\n', comparison_statement, '\n')
|
| 809 |
+
}
|
| 810 |
+
|
| 811 |
+
|
| 812 |
+
|
| 813 |
+
contrast.matrix <- limma::makeContrasts(contrasts = comparison_statement, levels = design)
|
| 814 |
+
cat('\n', 'The matrix of comparison statement, compare other groups with control.')
|
| 815 |
+
print(contrast.matrix) # the matrix of comparison statement, compare other groups with control.
|
| 816 |
+
|
| 817 |
+
|
| 818 |
+
# step1
|
| 819 |
+
fit <- limma::lmFit(expr_Valule, design)
|
| 820 |
+
|
| 821 |
+
# step2
|
| 822 |
+
fit2 <- limma::contrasts.fit(fit, contrast.matrix) # An important step.
|
| 823 |
+
fit2 <- limma::eBayes(fit2) # default no trend!
|
| 824 |
+
|
| 825 |
+
|
| 826 |
+
# return(fit2)
|
| 827 |
+
# step3
|
| 828 |
+
alls <- limma::topTable(fit2, coef = 1, adjust.method = adjust_method, p.value = 1, number = Inf) # logFC = log(a/b) = log(a) - log(b) = A - B
|
| 829 |
+
# results <- decideTests(fit2, method = "global", adjust.method = adjust_method, p.value = minPvalue, lfc = minFC)
|
| 830 |
+
# vennDiagram(results)
|
| 831 |
+
alls <- stats::na.omit(alls)
|
| 832 |
+
|
| 833 |
+
# plot
|
| 834 |
+
ID <- rownames(alls)
|
| 835 |
+
logFC <- alls$logFC # log2
|
| 836 |
+
pvalue <- alls$adj.P.Val
|
| 837 |
+
|
| 838 |
+
result_df <- data.frame(ID, logFC, pvalue)
|
| 839 |
+
|
| 840 |
+
return(result_df)
|
| 841 |
+
}
|
| 842 |
+
|
| 843 |
+
analysis_deps_sam2 <- function(expr_data_frame, group, log2_label = FALSE,
|
| 844 |
+
nperms = 100, rand = NULL, minFDR = 0.05,
|
| 845 |
+
samr_plot = TRUE){
|
| 846 |
+
requireNamespace('samr')
|
| 847 |
+
requireNamespace('stats')
|
| 848 |
+
expr_ID <- as.vector(expr_data_frame[,1])
|
| 849 |
+
#(李佳澳)加入赋值
|
| 850 |
+
expr_Valule <- expr_data_frame[,-1]
|
| 851 |
+
#结束
|
| 852 |
+
if(!log2_label){
|
| 853 |
+
expr_Valule <- log2(expr_data_frame[,-1]) # have to log
|
| 854 |
+
}
|
| 855 |
+
expr_Valule_row_duplicated <- apply(expr_Valule, 1, function(x){
|
| 856 |
+
stats::var(x)
|
| 857 |
+
})
|
| 858 |
+
expr_Valule_col <- ncol(expr_Valule)
|
| 859 |
+
duplicated_row_index <- which(expr_Valule_row_duplicated == 0)
|
| 860 |
+
if(length(duplicated_row_index)>0){
|
| 861 |
+
expr_ID <- expr_ID[-duplicated_row_index]
|
| 862 |
+
expr_Valule <- expr_Valule[-duplicated_row_index,]
|
| 863 |
+
}
|
| 864 |
+
|
| 865 |
+
|
| 866 |
+
# construct the samr data
|
| 867 |
+
sam_data <- list(x = as.matrix(expr_Valule), y = as.numeric(as.factor(group)),
|
| 868 |
+
geneid = expr_ID, genenames = expr_ID, logged2=TRUE)
|
| 869 |
+
|
| 870 |
+
group_nlevels <- nlevels(group)
|
| 871 |
+
if(group_nlevels < 2){
|
| 872 |
+
cat('\n', 'Groups are less than one.', '\n')
|
| 873 |
+
stop('')
|
| 874 |
+
}
|
| 875 |
+
|
| 876 |
+
if(group_nlevels == 2){
|
| 877 |
+
resp_type <- "Two class unpaired"
|
| 878 |
+
}else{
|
| 879 |
+
resp_type <- "Multiclass"
|
| 880 |
+
}
|
| 881 |
+
cat('\n', resp_type, '\n')
|
| 882 |
+
samr_obj <- samr::samr(sam_data, resp.type = resp_type, nperms = nperms, random.seed = rand)
|
| 883 |
+
|
| 884 |
+
# Compute the delta values
|
| 885 |
+
delta_table <- samr::samr.compute.delta.table(samr_obj)
|
| 886 |
+
|
| 887 |
+
# Determine a FDR cut-off
|
| 888 |
+
index_less_than_min_FDR <- which(delta_table[,5] < minFDR)
|
| 889 |
+
if(length(index_less_than_min_FDR) < 1){
|
| 890 |
+
cat('\n', 'Not found appropiate cutoff less than specific minimum FDR.')
|
| 891 |
+
stop('')
|
| 892 |
+
}else{
|
| 893 |
+
delta_index <- index_less_than_min_FDR[1]
|
| 894 |
+
delta <- delta_table[delta_index,1]
|
| 895 |
+
}
|
| 896 |
+
|
| 897 |
+
|
| 898 |
+
if(samr_plot){
|
| 899 |
+
cat('\n', 'Plot samr plot to view DEPs (or DEGs) distribution.')
|
| 900 |
+
samr::samr.plot(samr_obj, delta)
|
| 901 |
+
}
|
| 902 |
+
|
| 903 |
+
# Extract significant genes at the cut-off delta
|
| 904 |
+
siggenes_table <- samr::samr.compute.siggenes.table(samr_obj, delta, sam_data, delta_table, all.genes = FALSE)
|
| 905 |
+
genes_up_n <- siggenes_table$ngenes.up
|
| 906 |
+
if(genes_up_n > 0){
|
| 907 |
+
genes_up_df <- data.frame(siggenes_table$genes.up)
|
| 908 |
+
genes_up_df_col <- ncol(genes_up_df)
|
| 909 |
+
genes_up_df <- genes_up_df[,c(3,7:genes_up_df_col)]
|
| 910 |
+
genes_up_df_col <- ncol(genes_up_df)
|
| 911 |
+
genes_up_df[,genes_up_df_col] <- as.numeric(genes_up_df[,genes_up_df_col])/100
|
| 912 |
+
genes_up_df_colnames <- colnames(genes_up_df)
|
| 913 |
+
colnames(genes_up_df) <- c('ID', genes_up_df_colnames[-c(1,genes_up_df_col)], 'qvalue')
|
| 914 |
+
|
| 915 |
+
}else{
|
| 916 |
+
genes_up_df <- NULL
|
| 917 |
+
}
|
| 918 |
+
|
| 919 |
+
genes_lo_n <- siggenes_table$ngenes.lo
|
| 920 |
+
if(genes_lo_n > 0){
|
| 921 |
+
genes_lo_df <- data.frame(siggenes_table$genes.lo)
|
| 922 |
+
genes_lo_df_col <- ncol(genes_lo_df)
|
| 923 |
+
genes_lo_df <- genes_lo_df[,c(3,7:genes_lo_df_col)]
|
| 924 |
+
genes_lo_df_col <- ncol(genes_lo_df)
|
| 925 |
+
genes_lo_df[,genes_lo_df_col] <- as.numeric(genes_lo_df[,genes_lo_df_col])/100
|
| 926 |
+
genes_lo_df_colnames <- colnames(genes_lo_df)
|
| 927 |
+
colnames(genes_lo_df) <- c('ID', genes_lo_df_colnames[-c(1,genes_lo_df_col)], 'qvalue')
|
| 928 |
+
}else{
|
| 929 |
+
genes_lo_df <- NULL
|
| 930 |
+
}
|
| 931 |
+
|
| 932 |
+
sam_result_list <- list(
|
| 933 |
+
genes_up_df <- genes_up_df,
|
| 934 |
+
genes_down_df <- genes_lo_df
|
| 935 |
+
)
|
| 936 |
+
|
| 937 |
+
return(sam_result_list)
|
| 938 |
+
}
|
| 939 |
+
|
| 940 |
+
|
| 941 |
+
get_summary_from_ksea2 <- function(
|
| 942 |
+
ptypes_data,
|
| 943 |
+
species = 'human',
|
| 944 |
+
log2_label = TRUE,
|
| 945 |
+
ratio_cutoff = 3
|
| 946 |
+
){
|
| 947 |
+
requireNamespace('utils')
|
| 948 |
+
withProgress(message = "Running KSEA", style = "notification", detail = "processing...",{
|
| 949 |
+
# read relationship of kinase-substrate provided by PhosMap
|
| 950 |
+
# KSRR: kinase substrate regulation relationship
|
| 951 |
+
# A data frame contanning relationship of kinase-substrate that consists of "kinase", "substrate", "site", "sequence" and "predicted" columns.
|
| 952 |
+
KSRR_FILE_PATH <- paste0("./PhosMap_datasets/kinase_substrate_regulation_relationship_table/", species, "/", species, "_ksrr.csv")
|
| 953 |
+
kinase_substrate_regulation_relationship <- utils::read.csv(KSRR_FILE_PATH, header = TRUE, sep= ",", stringsAsFactors = NA)
|
| 954 |
+
|
| 955 |
+
ID <- as.vector(ptypes_data[,1])
|
| 956 |
+
ptypes_data_ratio <- ptypes_data[,-1]
|
| 957 |
+
if(!log2_label){
|
| 958 |
+
|
| 959 |
+
ptypes_data_ratio <- log2(ptypes_data_ratio)
|
| 960 |
+
}
|
| 961 |
+
ptypes_data_ratio_colnames <- colnames(ptypes_data_ratio)
|
| 962 |
+
|
| 963 |
+
|
| 964 |
+
|
| 965 |
+
ksea_es_list <- list()
|
| 966 |
+
ksea_pvalue_list <- list()
|
| 967 |
+
ksea_regulons_list <- list()
|
| 968 |
+
ksea_activity_list <- list()
|
| 969 |
+
ksea_trans_list <- list()
|
| 970 |
+
ptypes_data_exp_count <- ncol(ptypes_data_ratio)
|
| 971 |
+
cat('\n Starting KSEA')
|
| 972 |
+
for(i in seq_len(ptypes_data_exp_count)){
|
| 973 |
+
cat('\n completing: ', i, '/', ptypes_data_exp_count)
|
| 974 |
+
ptypes_data_ratio_in_single_exp <- as.numeric(unlist(ptypes_data_ratio[,i]))
|
| 975 |
+
ksea_result_list_i <- get_ksea_result_list(
|
| 976 |
+
ptypes_data_ratio_in_single_exp, ID,
|
| 977 |
+
kinase_substrate_regulation_relationship,
|
| 978 |
+
ksea_activity_i_pvalue = 0.05
|
| 979 |
+
)
|
| 980 |
+
ksea_es_list[[i]] <- ksea_result_list_i$ksea_es_i_non_NA
|
| 981 |
+
ksea_pvalue_list[[i]] <- ksea_result_list_i$ksea_pvalue_i_non_NA
|
| 982 |
+
ksea_regulons_list[[i]] <- ksea_result_list_i$ksea_regulons_i_non_NA
|
| 983 |
+
ksea_activity_list[[i]] <- ksea_result_list_i$ksea_activity_i
|
| 984 |
+
ksea_trans_list[[i]] <- ksea_result_list_i$ksea_trans_i
|
| 985 |
+
cat('\n completed: ', i, '/', ptypes_data_exp_count)
|
| 986 |
+
incProgress(1/ptypes_data_exp_count, detail = paste0("\n completed: ", i, "/",ptypes_data_exp_count))
|
| 987 |
+
}
|
| 988 |
+
cat('\n Ending KSEA')
|
| 989 |
+
|
| 990 |
+
cat('\n Extracting information data frame derived from KSEA')
|
| 991 |
+
cat('\n ********** Regulation direction from KSEA **********')
|
| 992 |
+
cat('\n ********** Pvalue from KSEA **********')
|
| 993 |
+
cat('\n ********** Activity from KSEA **********')
|
| 994 |
+
cat('\n ********** Kinase_site_substrate quantification matrix after KSEA **********')
|
| 995 |
+
cat('\n')
|
| 996 |
+
|
| 997 |
+
ksea_regulons <- unique(unlist(ksea_regulons_list))
|
| 998 |
+
ksea_regulons_count <- length(ksea_regulons)
|
| 999 |
+
# enrichment score from ksea
|
| 1000 |
+
# pvalue from ksea
|
| 1001 |
+
# regulons (kinase) from ksea
|
| 1002 |
+
# kinase activity based on pvalue and enrichment score computed by ksea
|
| 1003 |
+
# regulation direction: 1 = activate, 0 = no work, -1 = supress
|
| 1004 |
+
ksea_regulons_regulation_direction_df <- get_ksea_regulons_info(ksea_regulons, ksea_trans_list, ksea_trans_list,
|
| 1005 |
+
ptypes_data_ratio_colnames)
|
| 1006 |
+
ksea_regulons_pvalue_df <- get_ksea_regulons_info(ksea_regulons, ksea_trans_list, ksea_pvalue_list,
|
| 1007 |
+
ptypes_data_ratio_colnames)
|
| 1008 |
+
ksea_regulons_activity_df <- get_ksea_regulons_info(ksea_regulons, ksea_trans_list, ksea_activity_list,
|
| 1009 |
+
ptypes_data_ratio_colnames)
|
| 1010 |
+
|
| 1011 |
+
ksea_kinase_site_substrate_original_ratio_df <- get_substrate_expr_df(ID,
|
| 1012 |
+
kinase_substrate_regulation_relationship,
|
| 1013 |
+
ksea_regulons,
|
| 1014 |
+
ptypes_data_ratio,
|
| 1015 |
+
ratio_cutoff)
|
| 1016 |
+
summary_df_list_from_ksea <- list(
|
| 1017 |
+
ksea_regulons_regulation_direction_df = ksea_regulons_regulation_direction_df, # regulation direction: 1 = activate, 0 = no work, -1 = supress
|
| 1018 |
+
ksea_regulons_pvalue_df = ksea_regulons_pvalue_df, # pvalue from ksea
|
| 1019 |
+
ksea_regulons_activity_df = ksea_regulons_activity_df, # kinase activity based on pvalue and enrichment score computed by ksea
|
| 1020 |
+
ksea_kinase_site_substrate_original_ratio_df = ksea_kinase_site_substrate_original_ratio_df #
|
| 1021 |
+
)
|
| 1022 |
+
|
| 1023 |
+
cat('\n KSEA OK! ^_^')
|
| 1024 |
+
|
| 1025 |
+
return(summary_df_list_from_ksea)
|
| 1026 |
+
})
|
| 1027 |
+
}
|
| 1028 |
+
|
| 1029 |
+
|
| 1030 |
+
mea_based_on_background <- function(foreground, AA_in_protein, background, motifx_pvalue){
|
| 1031 |
+
# foreground <- as.vector(foreground)
|
| 1032 |
+
# background <- as.vector(background$Aligned_Seq)
|
| 1033 |
+
center_vector_candidate <- c('S', 'T', 'Y')
|
| 1034 |
+
center_vector_candidate_len <- length(center_vector_candidate)
|
| 1035 |
+
center_vector <- NULL
|
| 1036 |
+
for(i in seq_len(center_vector_candidate_len)){
|
| 1037 |
+
cat(i)
|
| 1038 |
+
center <- center_vector_candidate[i]
|
| 1039 |
+
if(length(grep(center, AA_in_protein)) > 0){
|
| 1040 |
+
center_vector <- c(center_vector, center)
|
| 1041 |
+
}
|
| 1042 |
+
}
|
| 1043 |
+
cat('Start executing motifx and find motif pattern. \n')
|
| 1044 |
+
cat('Foreground sequences: ', length(foreground), '.\n', sep = '')
|
| 1045 |
+
cat('Background sequences: ', length(background), '.\n', sep = '')
|
| 1046 |
+
cat('Phosphorylation: [', center_vector, '] exists in foreground.\n', sep = '')
|
| 1047 |
+
cat('Motifx pvalue cutoff: ', motifx_pvalue, '.\n', sep = '')
|
| 1048 |
+
motifs_list <- get_motifs_list(foreground, background, center_vector, motifx_pvalue)
|
| 1049 |
+
cat('Motifx analysis OK! ^_^', '\n')
|
| 1050 |
+
print(motifs_list)
|
| 1051 |
+
cat('\n')
|
| 1052 |
+
return(motifs_list)
|
| 1053 |
+
}
|
| 1054 |
+
|
| 1055 |
+
|
| 1056 |
+
get_motifs_list <- function(foreground, background, center_vector, motifx_pvalue){
|
| 1057 |
+
motifs_list <- list()
|
| 1058 |
+
motifs_list_names <- NULL
|
| 1059 |
+
motifs_list_index <- 0
|
| 1060 |
+
center_vector_len <- length(center_vector)
|
| 1061 |
+
for(i in seq_len(center_vector_len)){
|
| 1062 |
+
cat(center_vector_len)
|
| 1063 |
+
cat(i)
|
| 1064 |
+
center <- center_vector[i]
|
| 1065 |
+
motifs <- get_motif_analysis_summary(foreground, background, center = center, min_sequence_count = 1, min_pvalue = motifx_pvalue)
|
| 1066 |
+
if(!is.null(motifs)){
|
| 1067 |
+
motifs_list_index <- motifs_list_index + 1
|
| 1068 |
+
motifs_list[[motifs_list_index]] <- motifs
|
| 1069 |
+
motifs_list_names <- c(motifs_list_names, center)
|
| 1070 |
+
}
|
| 1071 |
+
}
|
| 1072 |
+
if(motifs_list_index > 0){
|
| 1073 |
+
names(motifs_list) <- motifs_list_names
|
| 1074 |
+
return(motifs_list)
|
| 1075 |
+
}else{
|
| 1076 |
+
return(NULL)
|
| 1077 |
+
}
|
| 1078 |
+
}
|
| 1079 |
+
|
| 1080 |
+
|
| 1081 |
+
get_motif_analysis_summary <- function(
|
| 1082 |
+
foreground,
|
| 1083 |
+
background,
|
| 1084 |
+
center='S',
|
| 1085 |
+
min_sequence_count = 1,
|
| 1086 |
+
min_pvalue = 0.01
|
| 1087 |
+
){
|
| 1088 |
+
check_result_list <- check_mea_input(foreground, background, center)
|
| 1089 |
+
loop_foreground <- check_result_list$foreground
|
| 1090 |
+
loop_background <- check_result_list$background
|
| 1091 |
+
motif_result_list <- list()
|
| 1092 |
+
motif_result_list_index <- 0
|
| 1093 |
+
while(length(loop_foreground) >= min_sequence_count){
|
| 1094 |
+
motif_result_loop_i <- seach_motif_pattern(
|
| 1095 |
+
loop_foreground,
|
| 1096 |
+
loop_background,
|
| 1097 |
+
min_sequence_count = min_sequence_count,
|
| 1098 |
+
min_pvalue = min_pvalue,
|
| 1099 |
+
center = center,
|
| 1100 |
+
width = check_result_list$width
|
| 1101 |
+
)
|
| 1102 |
+
if(is.null(motif_result_loop_i)){
|
| 1103 |
+
break
|
| 1104 |
+
}
|
| 1105 |
+
motif_result_list_index <- motif_result_list_index + 1
|
| 1106 |
+
motif_result_list[[motif_result_list_index]] <- motif_result_loop_i
|
| 1107 |
+
loop_foreground <- loop_foreground[!grepl(motif_result_loop_i$motif_pattern, loop_foreground)]
|
| 1108 |
+
loop_background <- loop_background[!grepl(motif_result_loop_i$motif_pattern, loop_background)]
|
| 1109 |
+
}
|
| 1110 |
+
|
| 1111 |
+
summry_list <- data.frame(
|
| 1112 |
+
motif = vapply(motif_result_list, function(x){x$motif_pattern},c('character')),
|
| 1113 |
+
score = vapply(motif_result_list, function(x){x$motif_pattern_score}, c(1)),
|
| 1114 |
+
foreground_matches = vapply(motif_result_list, function(x){x$foreground_matches}, 1),
|
| 1115 |
+
foreground_size = vapply(motif_result_list, function(x){x$foreground_size}, 1),
|
| 1116 |
+
background_matches = vapply(motif_result_list, function(x){x$background_matches}, 1),
|
| 1117 |
+
background_size = vapply(motif_result_list, function(x){x$background_size}, 1)
|
| 1118 |
+
)
|
| 1119 |
+
|
| 1120 |
+
foreground_fold_increase <- summry_list$foreground_matches/summry_list$foreground_size
|
| 1121 |
+
background_fold_increase <- summry_list$background_matches/summry_list$background_size
|
| 1122 |
+
summry_list$fold_increase <- foreground_fold_increase/background_fold_increase
|
| 1123 |
+
|
| 1124 |
+
if(nrow(summry_list) == 0){
|
| 1125 |
+
return(NULL)
|
| 1126 |
+
}
|
| 1127 |
+
return(summry_list)
|
| 1128 |
+
}
|
| 1129 |
+
|
| 1130 |
+
|
| 1131 |
+
get_normalized_data_of_psites2 <- function(data_frame, experiment_code_file_path, nathreshold, normmethod = "global", imputemethod = "minimum/10", topN = NA, mod_types = c('S', 'T', 'Y')){
|
| 1132 |
+
requireNamespace('utils')
|
| 1133 |
+
experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
|
| 1134 |
+
# experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
|
| 1135 |
+
|
| 1136 |
+
nathreshold <- length(experiment_code$Experiment_Code) - nathreshold
|
| 1137 |
+
if(nathreshold < 0) {
|
| 1138 |
+
nathreshold = 0
|
| 1139 |
+
}
|
| 1140 |
+
NAnumthresig <- c()
|
| 1141 |
+
for (row in 1:nrow(data_frame)) {
|
| 1142 |
+
NAnumthresig[row] <- (sum(data_frame[row,][-c(seq(6))] == 0) <= nathreshold)
|
| 1143 |
+
# NAnumthresigtest[raw] <- (sum(newdata2[raw,][-c(1,2)] == 0) >= NAnumthre)
|
| 1144 |
+
}
|
| 1145 |
+
data_frame <- data_frame[NAnumthresig,]
|
| 1146 |
+
|
| 1147 |
+
data_frame_colnames <- colnames(data_frame)
|
| 1148 |
+
|
| 1149 |
+
cat('\n The 7th step is running.')
|
| 1150 |
+
summary_df_ID_Info <- data_frame[, seq_len(6)]
|
| 1151 |
+
summary_df_ID_Info$AA_in_protein <- toupper(summary_df_ID_Info$AA_in_protein)
|
| 1152 |
+
summary_df_Value <- data_frame[, -(seq_len(6))]
|
| 1153 |
+
|
| 1154 |
+
cat('\n Filtering data only including S/T/Y modifications.')
|
| 1155 |
+
ptypes <- mod_types
|
| 1156 |
+
index_of_AA_in_protein <- apply(data.frame(summary_df_ID_Info$AA_in_protein), 1, function(x){
|
| 1157 |
+
if(grepl('S', x) | grepl('T', x) | grepl('Y', x)){
|
| 1158 |
+
return(TRUE)
|
| 1159 |
+
}else{
|
| 1160 |
+
return(FALSE)
|
| 1161 |
+
}
|
| 1162 |
+
})
|
| 1163 |
+
index_of_ptypes <- which(index_of_AA_in_protein)
|
| 1164 |
+
if(length(index_of_ptypes)>0){
|
| 1165 |
+
ptypes_id_df <- summary_df_ID_Info[index_of_ptypes,]
|
| 1166 |
+
ptypes_value <- summary_df_Value[index_of_ptypes,]
|
| 1167 |
+
}else{
|
| 1168 |
+
message('No data with modifications taking place on ', paste(mod_types, collapse = '|'))
|
| 1169 |
+
stop('')
|
| 1170 |
+
}
|
| 1171 |
+
|
| 1172 |
+
Value_FOT5 <- ptypes_value
|
| 1173 |
+
Value_FOT5_col <- ncol(Value_FOT5)
|
| 1174 |
+
if(is.na(topN)){
|
| 1175 |
+
if(normmethod == "global") {
|
| 1176 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 1177 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 1178 |
+
Value_FOT5[,i] <- x/sum(x)*1e5
|
| 1179 |
+
}
|
| 1180 |
+
} else if(normmethod == "median") {
|
| 1181 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 1182 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 1183 |
+
Value_FOT5[,i] <- x/median(x)*1e5
|
| 1184 |
+
}
|
| 1185 |
+
}
|
| 1186 |
+
}else{
|
| 1187 |
+
if(normmethod == "global") {
|
| 1188 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 1189 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 1190 |
+
x_order <- order(x, decreasing = TRUE)
|
| 1191 |
+
x_order_top <- x_order[seq_len(topN)]
|
| 1192 |
+
x[-x_order_top] <- 0
|
| 1193 |
+
Value_FOT5[,i] <- x/sum(x)*1e5
|
| 1194 |
+
}
|
| 1195 |
+
} else if(normmethod == "median") {
|
| 1196 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 1197 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 1198 |
+
x_order <- order(x, decreasing = TRUE)
|
| 1199 |
+
x_order_top <- x_order[seq_len(topN)]
|
| 1200 |
+
x[-x_order_top] <- 0
|
| 1201 |
+
Value_FOT5[,i] <- x/median(x)*1e5
|
| 1202 |
+
}
|
| 1203 |
+
}
|
| 1204 |
+
}
|
| 1205 |
+
ptypes_value_FOT5 <- as.matrix(Value_FOT5)
|
| 1206 |
+
|
| 1207 |
+
index_of_zero <- which(ptypes_value_FOT5==0)
|
| 1208 |
+
if(imputemethod=="0"){
|
| 1209 |
+
ptypes_value_FOT5[index_of_zero] <- 0
|
| 1210 |
+
}else if(imputemethod=="minimum"){
|
| 1211 |
+
min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
|
| 1212 |
+
ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero
|
| 1213 |
+
}else if(imputemethod=="minimum/10"){
|
| 1214 |
+
min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
|
| 1215 |
+
ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero*0.1
|
| 1216 |
+
}
|
| 1217 |
+
|
| 1218 |
+
ptypes_df_list <- list(
|
| 1219 |
+
ptypes_area_df_with_id = data.frame(ptypes_id_df, ptypes_value),
|
| 1220 |
+
ptypes_fot5_df_with_id = data.frame(ptypes_id_df, ptypes_value_FOT5)
|
| 1221 |
+
)
|
| 1222 |
+
|
| 1223 |
+
cat('\n The 7th step is over ^_^.')
|
| 1224 |
+
return(ptypes_df_list)
|
| 1225 |
+
}
|
| 1226 |
+
|
| 1227 |
+
|
| 1228 |
+
get_normalized_data_FOT52 <- function(data_frame, experiment_code_file_path, normmethod = "global", imputemethod = "minimum/10"){
|
| 1229 |
+
requireNamespace('utils')
|
| 1230 |
+
cat('\n The 7th step: Normalize data and filter data only including phosphorylation site.')
|
| 1231 |
+
experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
|
| 1232 |
+
experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
|
| 1233 |
+
data_frame_colnames <- colnames(data_frame)
|
| 1234 |
+
ID <- as.vector(data_frame[,1])
|
| 1235 |
+
Value_raw <- data_frame[,-1]
|
| 1236 |
+
Value_FOT5 <- Value_raw
|
| 1237 |
+
Value_FOT5_col <- ncol(Value_FOT5)
|
| 1238 |
+
if(normmethod == "global") {
|
| 1239 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 1240 |
+
x <- Value_raw[,i]
|
| 1241 |
+
valid_index <- which(x>0)
|
| 1242 |
+
valid_x <- x[valid_index]
|
| 1243 |
+
valid_x_sum <- sum(valid_x)
|
| 1244 |
+
valid_x_FOT5 <- valid_x/valid_x_sum*1e5
|
| 1245 |
+
Value_FOT5[valid_index,i] <- valid_x_FOT5
|
| 1246 |
+
}
|
| 1247 |
+
} else if(normmethod == "median") {
|
| 1248 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 1249 |
+
x <- Value_raw[,i]
|
| 1250 |
+
valid_index <- which(x>0)
|
| 1251 |
+
valid_x <- x[valid_index]
|
| 1252 |
+
valid_x_median <- median(valid_x)
|
| 1253 |
+
valid_x_FOT5 <- valid_x/valid_x_median*1e5
|
| 1254 |
+
Value_FOT5[valid_index,i] <- valid_x_FOT5
|
| 1255 |
+
}
|
| 1256 |
+
}
|
| 1257 |
+
Value_FOT5 <- as.matrix(Value_FOT5)
|
| 1258 |
+
|
| 1259 |
+
index_of_zero <- which(Value_FOT5==0)
|
| 1260 |
+
if(imputemethod=="0"){
|
| 1261 |
+
Value_FOT5[index_of_zero] <- 0
|
| 1262 |
+
}else if(imputemethod=="minimum"){
|
| 1263 |
+
min_value_of_non_zero <- min(Value_FOT5[-index_of_zero])
|
| 1264 |
+
Value_FOT5[index_of_zero] <- min_value_of_non_zero
|
| 1265 |
+
}else if(imputemethod=="minimum/10"){
|
| 1266 |
+
min_value_of_non_zero <- min(Value_FOT5[-index_of_zero])
|
| 1267 |
+
Value_FOT5[index_of_zero] <- min_value_of_non_zero*0.1
|
| 1268 |
+
}
|
| 1269 |
+
|
| 1270 |
+
data_frame_normaliation <- data.frame(ID, Value_FOT5)
|
| 1271 |
+
data_frame_normaliation_colnames <- c(data_frame_colnames[1], experiment_code)
|
| 1272 |
+
colnames(data_frame_normaliation) <- data_frame_normaliation_colnames
|
| 1273 |
+
return(data_frame_normaliation)
|
| 1274 |
+
}
|
| 1275 |
+
|
backend/analysis.R
ADDED
|
File without changes
|
backend/fill_missing_values.R
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fill_missing_values <- function(nadata, method) {
|
| 2 |
+
df <- df1 <- nadata
|
| 3 |
+
if (method == "none") {
|
| 4 |
+
df[is.na(df)] <- 0
|
| 5 |
+
} else if (method == "minimum") {
|
| 6 |
+
fill_value <- min(df1, na.rm = TRUE)
|
| 7 |
+
df[is.na(df)] <- fill_value
|
| 8 |
+
} else if (method == "minimum/10") {
|
| 9 |
+
fill_value <- min(df1, na.rm = TRUE) / 10
|
| 10 |
+
df[is.na(df)] <- fill_value
|
| 11 |
+
} else if (method == "bpca") {
|
| 12 |
+
# take medium time
|
| 13 |
+
library(pcaMethods)
|
| 14 |
+
data_zero1 <- pcaMethods::pca(as.matrix(df1), nPcs = ncol(df1)-1, method = "bpca", maxSteps =100)
|
| 15 |
+
df <- completeObs(data_zero1)
|
| 16 |
+
} else if (method == "lls" && anyNA(df1)) {
|
| 17 |
+
# take long time
|
| 18 |
+
library(pcaMethods)
|
| 19 |
+
data_zero1 <- llsImpute(t(df1), k = 10, allVariables = TRUE)
|
| 20 |
+
df <- t(completeObs(data_zero1))
|
| 21 |
+
} else if (method == "impseq") {
|
| 22 |
+
# library(rrcovNA)
|
| 23 |
+
df <- impSeq(df1)
|
| 24 |
+
} else if(method == "impseqrob"){
|
| 25 |
+
# library(rrcovNA)
|
| 26 |
+
data_zero1 <- impSeqRob(df1, alpha = 0.9)
|
| 27 |
+
df <- data_zero1$x
|
| 28 |
+
} else if(method == "knnmethod"){
|
| 29 |
+
# library(impute)
|
| 30 |
+
data_zero1 <- impute.knn(as.matrix(df1), k = 10, rowmax = 1, colmax = 1)
|
| 31 |
+
df <- data_zero1$data
|
| 32 |
+
} else if(method == "colmedian"){
|
| 33 |
+
# library(e1071)
|
| 34 |
+
df <- impute(df1, what = "median")
|
| 35 |
+
} else if(method == "rowmedian"){
|
| 36 |
+
# library(e1071)
|
| 37 |
+
dfx <- impute(t(df1), what = "median")
|
| 38 |
+
df <- t(dfx)
|
| 39 |
+
}
|
| 40 |
+
return(df)
|
| 41 |
+
}
|
backend/get_aligned_seq_for_mea02.R
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
get_aligned_seq_for_mea02 <- function(ID, Sequence, AA_in_protein, fixed_length, species = 'human', fasta_type = 'refseq'){
|
| 2 |
+
requireNamespace('stringr')
|
| 3 |
+
requireNamespace('utils')
|
| 4 |
+
# require(PhosMap)
|
| 5 |
+
cat('Aligned sequence based on fasta library for motif enrichment anlysis.\n')
|
| 6 |
+
|
| 7 |
+
fasta_library_dir = "./PhosMap_datasets/fasta_library/"
|
| 8 |
+
fasta_data <- utils::read.table((paste0(fasta_library_dir, fasta_type, "/", species, "/", species, "_", fasta_type, "_fasta.txt")), sep = '\t', header = TRUE)
|
| 9 |
+
|
| 10 |
+
border_limit <- floor(fixed_length/2)
|
| 11 |
+
aligned_seq <- NULL
|
| 12 |
+
GI_nrow <- length(ID)
|
| 13 |
+
cat('Pre-align:', GI_nrow, 'phos-pepitdes.\n')
|
| 14 |
+
cat('Fixed sequence length is ', fixed_length, '.\n', sep = '')
|
| 15 |
+
cat('It needs few time.\n')
|
| 16 |
+
for(i in seq_len(GI_nrow)){
|
| 17 |
+
gi <- ID[i]
|
| 18 |
+
aa_index <- AA_in_protein[i]
|
| 19 |
+
loc_index <- as.numeric(stringr::str_split(aa_index, "[STY]", n = Inf, simplify = FALSE)[[1]])[2]
|
| 20 |
+
index <- which(fasta_data[,1] == gi)
|
| 21 |
+
if(length(index) > 0){
|
| 22 |
+
refseq <- as.vector(fasta_data[index,2])
|
| 23 |
+
refseq_len <- nchar(refseq)
|
| 24 |
+
|
| 25 |
+
left_limit <- loc_index - border_limit
|
| 26 |
+
right_limit <- loc_index + border_limit
|
| 27 |
+
|
| 28 |
+
if(left_limit>=1 & right_limit>refseq_len){
|
| 29 |
+
right_limit <- refseq_len
|
| 30 |
+
truncated_seq <- stringr::str_sub(refseq, left_limit, right_limit)
|
| 31 |
+
truncated_seq <- stringr::str_pad(truncated_seq, fixed_length, "right", pad = '_')
|
| 32 |
+
}else if(left_limit<1 & right_limit<=refseq_len){
|
| 33 |
+
left_limit <- 1
|
| 34 |
+
truncated_seq <- stringr::str_sub(refseq, left_limit, right_limit)
|
| 35 |
+
truncated_seq <- stringr::str_pad(truncated_seq, fixed_length, "left", pad = '_')
|
| 36 |
+
}else if(left_limit<1 & right_limit>refseq_len){
|
| 37 |
+
left_limit <- 1
|
| 38 |
+
right_limit <- refseq_len
|
| 39 |
+
truncated_seq <- stringr::str_sub(refseq, left_limit, right_limit)
|
| 40 |
+
truncated_seq <- stringr::str_pad(truncated_seq, fixed_length, "both", pad = '_')
|
| 41 |
+
}else{
|
| 42 |
+
truncated_seq <- stringr::str_sub(refseq, left_limit, right_limit)
|
| 43 |
+
}
|
| 44 |
+
}else{
|
| 45 |
+
truncated_seq <- NA
|
| 46 |
+
}
|
| 47 |
+
aligned_seq <- c(aligned_seq, truncated_seq)
|
| 48 |
+
if(i %% 5000 == 0){
|
| 49 |
+
cat('Aligned:', i, 'phos-pepitdes.\n')
|
| 50 |
+
}
|
| 51 |
+
if(i == GI_nrow){
|
| 52 |
+
cat('Aligned:', i, 'phos-pepitdes.\n')
|
| 53 |
+
cat('Finish OK! ^_^\n')
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
}
|
| 57 |
+
cat('\n')
|
| 58 |
+
aligned_sequence_df_based_on_fasta_library <- data.frame(ID, Sequence, AA_in_protein, aligned_seq)
|
| 59 |
+
return(aligned_sequence_df_based_on_fasta_library)
|
| 60 |
+
}
|
backend/get_normalized_data_of_psites3.R
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
get_normalized_data_of_psites3 <- function(data_frame, experiment_code_file_path, nathreshold, normmethod = "global", imputemethod = "minimum/10", topN = NA, mod_types = c('S', 'T', 'Y')){
|
| 2 |
+
requireNamespace('utils')
|
| 3 |
+
experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
|
| 4 |
+
# experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
|
| 5 |
+
|
| 6 |
+
nathreshold <- length(experiment_code$Experiment_Code) - nathreshold
|
| 7 |
+
if(nathreshold < 0) {
|
| 8 |
+
nathreshold = 0
|
| 9 |
+
}
|
| 10 |
+
NAnumthresig <- c()
|
| 11 |
+
for (row in 1:nrow(data_frame)) {
|
| 12 |
+
NAnumthresig[row] <- (sum(data_frame[row,][-c(seq(6))] == 0) <= nathreshold)
|
| 13 |
+
# NAnumthresigtest[raw] <- (sum(newdata2[raw,][-c(1,2)] == 0) >= NAnumthre)
|
| 14 |
+
}
|
| 15 |
+
data_frame <- data_frame[NAnumthresig,]
|
| 16 |
+
|
| 17 |
+
data_frame_colnames <- colnames(data_frame)
|
| 18 |
+
|
| 19 |
+
cat('\n The 7th step is running.')
|
| 20 |
+
summary_df_ID_Info <- data_frame[, seq_len(6)]
|
| 21 |
+
summary_df_ID_Info$AA_in_protein <- toupper(summary_df_ID_Info$AA_in_protein)
|
| 22 |
+
summary_df_Value <- data_frame[, -(seq_len(6))]
|
| 23 |
+
|
| 24 |
+
cat('\n Filtering data only including S/T/Y modifications.')
|
| 25 |
+
ptypes <- mod_types
|
| 26 |
+
index_of_AA_in_protein <- apply(data.frame(summary_df_ID_Info$AA_in_protein), 1, function(x){
|
| 27 |
+
if(grepl('S', x) | grepl('T', x) | grepl('Y', x)){
|
| 28 |
+
return(TRUE)
|
| 29 |
+
}else{
|
| 30 |
+
return(FALSE)
|
| 31 |
+
}
|
| 32 |
+
})
|
| 33 |
+
index_of_ptypes <- which(index_of_AA_in_protein)
|
| 34 |
+
if(length(index_of_ptypes)>0){
|
| 35 |
+
ptypes_id_df <- summary_df_ID_Info[index_of_ptypes,]
|
| 36 |
+
ptypes_value <- summary_df_Value[index_of_ptypes,]
|
| 37 |
+
}else{
|
| 38 |
+
message('No data with modifications taking place on ', paste(mod_types, collapse = '|'))
|
| 39 |
+
stop('')
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
Value_FOT5 <- ptypes_value
|
| 43 |
+
Value_FOT5_col <- ncol(Value_FOT5)
|
| 44 |
+
if(is.na(topN)){
|
| 45 |
+
if(normmethod == "global") {
|
| 46 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 47 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 48 |
+
Value_FOT5[,i] <- x/sum(x)*1e5
|
| 49 |
+
}
|
| 50 |
+
} else if(normmethod == "median") {
|
| 51 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 52 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 53 |
+
Value_FOT5[,i] <- x/median(x)*1e5
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
}else{
|
| 57 |
+
if(normmethod == "global") {
|
| 58 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 59 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 60 |
+
x_order <- order(x, decreasing = TRUE)
|
| 61 |
+
x_order_top <- x_order[seq_len(topN)]
|
| 62 |
+
x[-x_order_top] <- 0
|
| 63 |
+
Value_FOT5[,i] <- x/sum(x)*1e5
|
| 64 |
+
}
|
| 65 |
+
} else if(normmethod == "median") {
|
| 66 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 67 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 68 |
+
x_order <- order(x, decreasing = TRUE)
|
| 69 |
+
x_order_top <- x_order[seq_len(topN)]
|
| 70 |
+
x[-x_order_top] <- 0
|
| 71 |
+
Value_FOT5[,i] <- x/median(x)*1e5
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
ptypes_value_FOT5 <- as.matrix(Value_FOT5)
|
| 76 |
+
|
| 77 |
+
index_of_zero <- which(ptypes_value_FOT5==0)
|
| 78 |
+
# if(imputemethod=="0"){
|
| 79 |
+
# ptypes_value_FOT5[index_of_zero] <- 0
|
| 80 |
+
# }else if(imputemethod=="minimum"){
|
| 81 |
+
# min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
|
| 82 |
+
# ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero
|
| 83 |
+
# }else if(imputemethod=="minimum/10"){
|
| 84 |
+
# min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
|
| 85 |
+
# ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero*0.1
|
| 86 |
+
# }
|
| 87 |
+
ptypes_value_FOT5 <- as.data.frame(ptypes_value_FOT5)
|
| 88 |
+
ptypes_value_FOT5[ptypes_value_FOT5 == 0] <- NA
|
| 89 |
+
fill_missing_values01 <- function(nadata, method) {
|
| 90 |
+
df <- df1 <- nadata
|
| 91 |
+
if (method == "none") {
|
| 92 |
+
df[is.na(df)] <- 0
|
| 93 |
+
} else if (method == "minimum") {
|
| 94 |
+
fill_value <- min(df1, na.rm = TRUE)
|
| 95 |
+
df[is.na(df)] <- fill_value
|
| 96 |
+
} else if (method == "minimum/10") {
|
| 97 |
+
fill_value <- min(df1, na.rm = TRUE) / 10
|
| 98 |
+
df[is.na(df)] <- fill_value
|
| 99 |
+
} else if (method == "bpca") {
|
| 100 |
+
# take medium time
|
| 101 |
+
library(pcaMethods)
|
| 102 |
+
data_zero1<-pcaMethods::pca(as.matrix(df1), nPcs = ncol(df1)-1, method = "bpca", maxSteps =100)
|
| 103 |
+
df<-completeObs(data_zero1)
|
| 104 |
+
} else if (method == "lls") {
|
| 105 |
+
# take long time
|
| 106 |
+
# library(pcaMethods)
|
| 107 |
+
data_zero1<-llsImpute(t(df1), k = 10, allVariables = TRUE)
|
| 108 |
+
df<-t(completeObs(data_zero1))
|
| 109 |
+
} else if (method == "impseq") {
|
| 110 |
+
# library(rrcovNA)
|
| 111 |
+
df <- impSeq(df1)
|
| 112 |
+
} else if(method=="impseqrob"){
|
| 113 |
+
# library(rrcovNA)
|
| 114 |
+
data_zero1 <- impSeqRob(df1, alpha=0.9)
|
| 115 |
+
df<-data_zero1$x
|
| 116 |
+
} else if(method=="knnmethod"){
|
| 117 |
+
# library(impute)
|
| 118 |
+
data_zero1<-impute.knn(as.matrix(df1),k = 10, rowmax = 1, colmax = 1)#rowmax = 0.9, colmax = 0.9
|
| 119 |
+
df<-data_zero1$data
|
| 120 |
+
} else if(method=="colmedian"){
|
| 121 |
+
# library(e1071)
|
| 122 |
+
df<-impute(df1,what ="median")
|
| 123 |
+
} else if(method=="rowmedian"){
|
| 124 |
+
# library(e1071)
|
| 125 |
+
dfx<-impute(t(df1),what ="median")
|
| 126 |
+
df<-t(dfx)
|
| 127 |
+
# } else if(method=="grr"){
|
| 128 |
+
# library(DreamAI)
|
| 129 |
+
# df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
|
| 130 |
+
# } else if(method=="mle"){
|
| 131 |
+
# library(norm)
|
| 132 |
+
# xxm<-as.matrix(df1)
|
| 133 |
+
# ss <- norm::prelim.norm(xxm)
|
| 134 |
+
# thx <- norm::em.norm(ss)
|
| 135 |
+
# norm::rngseed(123)
|
| 136 |
+
# df <- norm::imp.norm(ss, thx, xxm)
|
| 137 |
+
}
|
| 138 |
+
return(df)
|
| 139 |
+
}
|
| 140 |
+
ptypes_value_FOT5 = fill_missing_values01(ptypes_value_FOT5, imputemethod)
|
| 141 |
+
|
| 142 |
+
ptypes_df_list <- list(
|
| 143 |
+
ptypes_area_df_with_id = data.frame(ptypes_id_df, ptypes_value),
|
| 144 |
+
ptypes_fot5_df_with_id = data.frame(ptypes_id_df, ptypes_value_FOT5)
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
cat('\n The 7th step is over ^_^.')
|
| 148 |
+
return(ptypes_df_list)
|
| 149 |
+
}
|
backend/get_normalized_data_of_psites4.R
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
get_normalized_data_of_psites4 <- function(data_frame, experiment_code_file_path, nathreshold, normmethod = "global", imputemethod = "minimum/10", topN = NA, mod_types = c('S', 'T', 'Y'), design_file){
|
| 2 |
+
requireNamespace('utils')
|
| 3 |
+
experiment_code <- utils::read.table(experiment_code_file_path, header = TRUE, sep = '\t', stringsAsFactors = NA)
|
| 4 |
+
# experiment_code <- as.vector(unlist(experiment_code$Experiment_Code))
|
| 5 |
+
|
| 6 |
+
nathreshold <- length(experiment_code$Experiment_Code) - nathreshold
|
| 7 |
+
if(nathreshold < 0) {
|
| 8 |
+
nathreshold = 0
|
| 9 |
+
}
|
| 10 |
+
NAnumthresig <- c()
|
| 11 |
+
for (row in 1:nrow(data_frame)) {
|
| 12 |
+
NAnumthresig[row] <- (sum(data_frame[row,][-c(seq(6))] == 0) <= nathreshold)
|
| 13 |
+
# NAnumthresigtest[raw] <- (sum(newdata2[raw,][-c(1,2)] == 0) >= NAnumthre)
|
| 14 |
+
}
|
| 15 |
+
data_frame <- data_frame[NAnumthresig,]
|
| 16 |
+
|
| 17 |
+
data_frame_colnames <- colnames(data_frame)
|
| 18 |
+
|
| 19 |
+
cat('\n The 7th step is running.')
|
| 20 |
+
summary_df_ID_Info <- data_frame[, seq_len(6)]
|
| 21 |
+
summary_df_ID_Info$AA_in_protein <- toupper(summary_df_ID_Info$AA_in_protein)
|
| 22 |
+
summary_df_Value <- data_frame[, -(seq_len(6))]
|
| 23 |
+
|
| 24 |
+
cat('\n Filtering data only including S/T/Y modifications.')
|
| 25 |
+
ptypes <- mod_types
|
| 26 |
+
index_of_AA_in_protein <- apply(data.frame(summary_df_ID_Info$AA_in_protein), 1, function(x){
|
| 27 |
+
if(grepl('S', x) | grepl('T', x) | grepl('Y', x)){
|
| 28 |
+
return(TRUE)
|
| 29 |
+
}else{
|
| 30 |
+
return(FALSE)
|
| 31 |
+
}
|
| 32 |
+
})
|
| 33 |
+
index_of_ptypes <- which(index_of_AA_in_protein)
|
| 34 |
+
if(length(index_of_ptypes)>0){
|
| 35 |
+
ptypes_id_df <- summary_df_ID_Info[index_of_ptypes,]
|
| 36 |
+
ptypes_value <- summary_df_Value[index_of_ptypes,]
|
| 37 |
+
}else{
|
| 38 |
+
message('No data with modifications taking place on ', paste(mod_types, collapse = '|'))
|
| 39 |
+
stop('')
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
Value_FOT5 <- ptypes_value
|
| 43 |
+
Value_FOT5_col <- ncol(Value_FOT5)
|
| 44 |
+
if(is.na(topN)){
|
| 45 |
+
if(normmethod == "global") {
|
| 46 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 47 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 48 |
+
Value_FOT5[,i] <- x/sum(x)*1e5
|
| 49 |
+
}
|
| 50 |
+
} else if(normmethod == "median") {
|
| 51 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 52 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 53 |
+
Value_FOT5[,i] <- x/median(x)*1e5
|
| 54 |
+
}
|
| 55 |
+
}
|
| 56 |
+
}else{
|
| 57 |
+
if(normmethod == "global") {
|
| 58 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 59 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 60 |
+
x_order <- order(x, decreasing = TRUE)
|
| 61 |
+
x_order_top <- x_order[seq_len(topN)]
|
| 62 |
+
x[-x_order_top] <- 0
|
| 63 |
+
Value_FOT5[,i] <- x/sum(x)*1e5
|
| 64 |
+
}
|
| 65 |
+
} else if(normmethod == "median") {
|
| 66 |
+
for(i in seq_len(Value_FOT5_col)){
|
| 67 |
+
x <- as.vector(unlist(ptypes_value[,i]))
|
| 68 |
+
x_order <- order(x, decreasing = TRUE)
|
| 69 |
+
x_order_top <- x_order[seq_len(topN)]
|
| 70 |
+
x[-x_order_top] <- 0
|
| 71 |
+
Value_FOT5[,i] <- x/median(x)*1e5
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
ptypes_value_FOT5 <- as.matrix(Value_FOT5)
|
| 76 |
+
|
| 77 |
+
index_of_zero <- which(ptypes_value_FOT5==0)
|
| 78 |
+
# if(imputemethod=="0"){
|
| 79 |
+
# ptypes_value_FOT5[index_of_zero] <- 0
|
| 80 |
+
# }else if(imputemethod=="minimum"){
|
| 81 |
+
# min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
|
| 82 |
+
# ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero
|
| 83 |
+
# }else if(imputemethod=="minimum/10"){
|
| 84 |
+
# min_value_of_non_zero <- min(ptypes_value_FOT5[-index_of_zero])
|
| 85 |
+
# ptypes_value_FOT5[index_of_zero] <- min_value_of_non_zero*0.1
|
| 86 |
+
# }
|
| 87 |
+
ptypes_value_FOT5 <- as.data.frame(ptypes_value_FOT5)
|
| 88 |
+
ptypes_value_FOT5[ptypes_value_FOT5 == 0] <- NA
|
| 89 |
+
fill_missing_values01 <- function(nadata, method) {
|
| 90 |
+
df <- df1 <- nadata
|
| 91 |
+
if (method == "none") {
|
| 92 |
+
df[is.na(df)] <- 0
|
| 93 |
+
} else if (method == "minimum") {
|
| 94 |
+
fill_value <- min(df1, na.rm = TRUE)
|
| 95 |
+
df[is.na(df)] <- fill_value
|
| 96 |
+
} else if (method == "minimum/10") {
|
| 97 |
+
fill_value <- min(df1, na.rm = TRUE) / 10
|
| 98 |
+
df[is.na(df)] <- fill_value
|
| 99 |
+
} else if (method == "bpca") {
|
| 100 |
+
# take medium time
|
| 101 |
+
library(pcaMethods)
|
| 102 |
+
data_zero1<-pcaMethods::pca(as.matrix(df1), nPcs = ncol(df1)-1, method = "bpca", maxSteps =100)
|
| 103 |
+
df<-completeObs(data_zero1)
|
| 104 |
+
} else if (method == "lls") {
|
| 105 |
+
# take long time
|
| 106 |
+
# library(pcaMethods)
|
| 107 |
+
data_zero1<-llsImpute(t(df1), k = 10, allVariables = TRUE)
|
| 108 |
+
df<-t(completeObs(data_zero1))
|
| 109 |
+
} else if (method == "impseq") {
|
| 110 |
+
# library(rrcovNA)
|
| 111 |
+
df <- impSeq(df1)
|
| 112 |
+
} else if(method=="impseqrob"){
|
| 113 |
+
# library(rrcovNA)
|
| 114 |
+
data_zero1 <- impSeqRob(df1, alpha=0.9)
|
| 115 |
+
df<-data_zero1$x
|
| 116 |
+
} else if(method=="knnmethod"){
|
| 117 |
+
# library(impute)
|
| 118 |
+
data_zero1<-impute.knn(as.matrix(df1),k = 10, rowmax = 1, colmax = 1)#rowmax = 0.9, colmax = 0.9
|
| 119 |
+
df<-data_zero1$data
|
| 120 |
+
} else if(method=="colmedian"){
|
| 121 |
+
# library(e1071)
|
| 122 |
+
df<-impute(df1,what ="median")
|
| 123 |
+
} else if(method=="rowmedian"){
|
| 124 |
+
# library(e1071)
|
| 125 |
+
dfx<-impute(t(df1),what ="median")
|
| 126 |
+
df<-t(dfx)
|
| 127 |
+
# } else if(method=="grr"){
|
| 128 |
+
# library(DreamAI)
|
| 129 |
+
# df<-impute.RegImpute(data=as.matrix(df1), fillmethod = "row_mean", maxiter_RegImpute = 10,conv_nrmse = 1e-03)
|
| 130 |
+
# } else if(method=="mle"){
|
| 131 |
+
# library(norm)
|
| 132 |
+
# xxm<-as.matrix(df1)
|
| 133 |
+
# ss <- norm::prelim.norm(xxm)
|
| 134 |
+
# thx <- norm::em.norm(ss)
|
| 135 |
+
# norm::rngseed(123)
|
| 136 |
+
# df <- norm::imp.norm(ss, thx, xxm)
|
| 137 |
+
}
|
| 138 |
+
return(df)
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
errorlabel = FALSE
|
| 142 |
+
errorlabel_values <- c()
|
| 143 |
+
if (imputemethod %in% c('bpca', 'rowmedian', 'lls', 'knnmethod')) {
|
| 144 |
+
for (group in unique(design_file$Group)) {
|
| 145 |
+
samples <- design_file[design_file$Group == group,1]
|
| 146 |
+
group_data <- ptypes_value_FOT5[, samples]
|
| 147 |
+
# Check if any row in group_data has missing values
|
| 148 |
+
if (any(rowSums(is.na(group_data)) > 0)) {
|
| 149 |
+
errorlabel <- TRUE
|
| 150 |
+
} else {
|
| 151 |
+
errorlabel <- FALSE
|
| 152 |
+
}
|
| 153 |
+
errorlabel_values <- c(errorlabel_values, errorlabel)
|
| 154 |
+
}
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
if (!any(errorlabel_values)) {
|
| 158 |
+
for (group in unique(design_file$Group)) {
|
| 159 |
+
# 选择该分组下的所有样本
|
| 160 |
+
# samples <- design_file$Experiment_code[design_file$Group == group]
|
| 161 |
+
samples <- design_file[design_file$Group == group,1]
|
| 162 |
+
|
| 163 |
+
# 从原始数据框中提取该分组下的所有样本数据
|
| 164 |
+
group_data <- ptypes_value_FOT5[, samples]
|
| 165 |
+
|
| 166 |
+
# 对该分组下的样本进行缺失值填充
|
| 167 |
+
filled_group_data <- fill_missing_values(group_data, method = imputemethod)
|
| 168 |
+
|
| 169 |
+
# 将填充后的数据框添加到结果列表中
|
| 170 |
+
if (exists('result_list')) {
|
| 171 |
+
result_list <- c(result_list, list(filled_group_data))
|
| 172 |
+
} else {
|
| 173 |
+
result_list <- list(filled_group_data)
|
| 174 |
+
}
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
# 将所有填充后的数据框合并为一个数据框
|
| 178 |
+
ptypes_value_FOT5 <- Reduce(cbind, result_list)
|
| 179 |
+
# ptypes_value_FOT5 = fill_missing_values01(ptypes_value_FOT5, imputemethod)
|
| 180 |
+
|
| 181 |
+
ptypes_df_list <- list(
|
| 182 |
+
ptypes_area_df_with_id = data.frame(ptypes_id_df, ptypes_value),
|
| 183 |
+
ptypes_fot5_df_with_id = data.frame(ptypes_id_df, ptypes_value_FOT5)
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
cat('\n The 7th step is over ^_^.')
|
| 187 |
+
return(ptypes_df_list)
|
| 188 |
+
} else {
|
| 189 |
+
empty_list <- list()
|
| 190 |
+
return(empty_list)
|
| 191 |
+
}
|
| 192 |
+
}
|
backend/import_extract.R
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# unzip file and return file path
|
| 2 |
+
get_file_path <- function(inputfile, pathname) {
|
| 3 |
+
# if (dir.exists(paste0("tmp/", pathname))) {
|
| 4 |
+
# unlink(paste0("tmp/", pathname), recursive = TRUE)
|
| 5 |
+
# }
|
| 6 |
+
zip::unzip(
|
| 7 |
+
inputfile$datapath,
|
| 8 |
+
# exdir = paste0("tmp/", pathname)
|
| 9 |
+
exdir = pathname
|
| 10 |
+
)
|
| 11 |
+
namestrs = inputfile$name
|
| 12 |
+
normalizePath(paste0(pathname, "/", substring(namestrs, 0, nchar(namestrs)-4)))
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
# Get a list of file names without suffixes based on the path
|
| 16 |
+
get_target_name <- function(path, depth) {
|
| 17 |
+
if(depth == 2) {
|
| 18 |
+
path = normalizePath(list.files(path, full.names = T))
|
| 19 |
+
}
|
| 20 |
+
tmp <- list.files(path)
|
| 21 |
+
substring(tmp, 0, nchar(tmp)-4)
|
| 22 |
+
}
|
| 23 |
+
|
backend/preprocess.R
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# imageMap <- function(inputId, imgsrc, opts) {
|
| 2 |
+
# areas <- lapply(names(opts), function(n)
|
| 3 |
+
# shiny::tags$area(title=n, coords=opts[[n]],
|
| 4 |
+
# href="#", shape="poly"))
|
| 5 |
+
# js <- paste0("$(document).on('click', 'map area', function(evt) {
|
| 6 |
+
# evt.preventDefault();
|
| 7 |
+
# var val = evt.target.title;
|
| 8 |
+
# Shiny.onInputChange('", inputId, "', val);})")
|
| 9 |
+
# list(
|
| 10 |
+
# shiny::tags$img(src=imgsrc, usemap=paste0("#", inputId),
|
| 11 |
+
# shiny::tags$head(tags$script(shiny::HTML(js)))),
|
| 12 |
+
# shiny::tags$map(name=inputId, areas))
|
| 13 |
+
# }
|
| 14 |
+
|
| 15 |
+
imageMap <- function(inputId, imgsrc, opts) {
|
| 16 |
+
areas <- lapply(names(opts), function(n)
|
| 17 |
+
shiny::tags$area(title=n, coords=opts[[n]],
|
| 18 |
+
href="#", shape="poly"))
|
| 19 |
+
js <- paste0("$(document).on('click', 'map area', function(evt) {
|
| 20 |
+
evt.preventDefault();
|
| 21 |
+
var val = evt.target.title;
|
| 22 |
+
print('hello');})")
|
| 23 |
+
list(
|
| 24 |
+
shiny::tags$img(src=imgsrc, usemap=paste0("#", inputId),
|
| 25 |
+
shiny::tags$head(tags$script(shiny::HTML(js)))),
|
| 26 |
+
shiny::tags$map(name=inputId, areas))
|
| 27 |
+
}
|
backend/visualization_deps_with_scatter02.R
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#' Visualize differentially expressed results with scatter
|
| 2 |
+
#'
|
| 3 |
+
#' @param deps_data a data frame containing ID, logFC and pvalue.
|
| 4 |
+
#' @param minFC a numeric for the minimum fold change.
|
| 5 |
+
#' @param minPvalue a numeric for the significance cutoff.
|
| 6 |
+
#' @param main an overall title for the plot.
|
| 7 |
+
#' @param show_text a boolean value representing whether or not the text is showed, the default is FALSE.
|
| 8 |
+
#' @param min_up_text cutoff value for showing up-IDs. Only IDs with lower than min_up_text are showed.
|
| 9 |
+
#' @param min_down_text cutoff value for showing down-IDs. Only IDs with lower than min_down_text are showed.
|
| 10 |
+
#'
|
| 11 |
+
#' @author Dongdong Zhan and Mengsha Tong
|
| 12 |
+
#' @export
|
| 13 |
+
#'
|
| 14 |
+
#'
|
| 15 |
+
#' @return A scatter plot for showing differentially expressed results.
|
| 16 |
+
#'
|
| 17 |
+
#' @examples
|
| 18 |
+
#' ftp_url <- "ftp://111.198.139.72:4000/pub/PhosMap_datasets/function_demo_data/visualization_deps_with_scatter.RData"
|
| 19 |
+
#' load_data <- load_data_with_ftp(ftp_url, 'RData')
|
| 20 |
+
#' writeBin(load_data, "visualization_deps_with_scatter.RData")
|
| 21 |
+
#' load("visualization_deps_with_scatter.RData")
|
| 22 |
+
#'
|
| 23 |
+
#' visualization_deps_with_scatter(limma_results_df, minFC = 2,
|
| 24 |
+
#' minPvalue = 0.05, main = 'Differentially expressed proteins \n with limma',
|
| 25 |
+
#' show_text = TRUE, min_up_text = 70, min_down_text = 70
|
| 26 |
+
#' )
|
| 27 |
+
#'
|
| 28 |
+
|
| 29 |
+
visualization_deps_with_scatter02 <- function(
|
| 30 |
+
deps_data,
|
| 31 |
+
minFC = 2,
|
| 32 |
+
minPvalue = 0.05,
|
| 33 |
+
main = 'Differentially expressed proteins',
|
| 34 |
+
show_text = FALSE,
|
| 35 |
+
min_up_text = 15,
|
| 36 |
+
min_down_text = 15
|
| 37 |
+
){
|
| 38 |
+
p <- ggplot(
|
| 39 |
+
# 数据、映射、颜色
|
| 40 |
+
deps_data, aes(x = logFC, y = -log10(pvalue))) +
|
| 41 |
+
geom_point(alpha=0.4, size=3.5) +
|
| 42 |
+
scale_color_manual(values=c("#546de5", "#d2dae2","#ff4757"))+
|
| 43 |
+
# 辅助线
|
| 44 |
+
geom_vline(xintercept=c(-1,1),lty=4,col="black",lwd=0.8) +
|
| 45 |
+
geom_hline(yintercept = -log10(0.01),lty=4,col="black",lwd=0.8) +
|
| 46 |
+
# 坐标轴
|
| 47 |
+
labs(x="log2(fold change)",
|
| 48 |
+
y="-log10 (p-value)") +
|
| 49 |
+
theme_bw()+
|
| 50 |
+
# 图例
|
| 51 |
+
theme(plot.title = element_text(hjust = 0.5),
|
| 52 |
+
legend.position="right",
|
| 53 |
+
legend.title = element_blank())
|
| 54 |
+
p
|
| 55 |
+
|
| 56 |
+
# requireNamespace('graphics')
|
| 57 |
+
# requireNamespace('stats')
|
| 58 |
+
# x_v <- deps_data$logFC
|
| 59 |
+
# x_v_max <- max(x_v)
|
| 60 |
+
# x_v_right <- ceiling(x_v_max)
|
| 61 |
+
# x_v_min <- min(x_v)
|
| 62 |
+
# x_v_left <- floor(x_v_min)
|
| 63 |
+
#
|
| 64 |
+
# x_up <- log2(minFC)
|
| 65 |
+
# x_down <- log2(1/minFC)
|
| 66 |
+
#
|
| 67 |
+
# zero_index <- which(deps_data$pvalue==0)
|
| 68 |
+
# zero_index_count <- length(zero_index)
|
| 69 |
+
# if(zero_index_count){
|
| 70 |
+
# minimum_p <- min(deps_data$pvalue[-zero_index])
|
| 71 |
+
# min <- minimum_p/10
|
| 72 |
+
# max <- minimum_p-minimum_p/10
|
| 73 |
+
# minimum_p_new <- stats::runif(zero_index_count, min = min, max = max)
|
| 74 |
+
# deps_data$pvalue[zero_index] <- minimum_p_new
|
| 75 |
+
# }
|
| 76 |
+
#
|
| 77 |
+
# y_v <- (-log10(deps_data$pvalue))
|
| 78 |
+
# y_v_max <- max(y_v)
|
| 79 |
+
# y_v_up <- ceiling(y_v_max)
|
| 80 |
+
# y_v_sig <- (-log10(minPvalue))
|
| 81 |
+
#
|
| 82 |
+
#
|
| 83 |
+
# index_of_up <- which(x_v > x_up & y_v > y_v_sig)
|
| 84 |
+
# index_of_down <- which(x_v < x_down & y_v > y_v_sig)
|
| 85 |
+
#
|
| 86 |
+
#
|
| 87 |
+
# graphics::plot(x_v, y_v,
|
| 88 |
+
# xlim = c(x_v_left, x_v_right), ylim = c(0, y_v_up),
|
| 89 |
+
# xlab = 'log2(FC)', ylab = '-log10(pvalue)', main = main)
|
| 90 |
+
# graphics::abline(h = y_v_sig, lty = 'dotdash', col = 'firebrick', lwd = 2)
|
| 91 |
+
# graphics::abline(v = x_up, lty = 'dotdash', col = 'firebrick', lwd = 2)
|
| 92 |
+
# graphics::abline(v = x_down, lty = 'dotdash', col = 'firebrick', lwd = 2)
|
| 93 |
+
#
|
| 94 |
+
# graphics::points(x_v[index_of_up], y_v[index_of_up], pch = 20, col = 'red')
|
| 95 |
+
# graphics::points(x_v[index_of_down], y_v[index_of_down], pch = 20, col = 'blue')
|
| 96 |
+
#
|
| 97 |
+
# if(show_text){
|
| 98 |
+
# s <- as.vector(deps_data$ID)
|
| 99 |
+
# s_up <- s[index_of_up]
|
| 100 |
+
# x_v_up_set <- x_v[index_of_up]
|
| 101 |
+
# x_v_up_set_order <- order(x_v_up_set, decreasing = TRUE)
|
| 102 |
+
# y_v_up_set <- y_v[index_of_up]
|
| 103 |
+
# y_v_up_set_order <- order(y_v_up_set, decreasing = TRUE)
|
| 104 |
+
#
|
| 105 |
+
# index_up_set <- intersect(x_v_up_set_order[seq_len(min_up_text)], y_v_up_set_order[seq_len(min_up_text)])
|
| 106 |
+
# graphics::text(x_v_up_set[index_up_set], y_v_up_set[index_up_set], s_up[index_up_set], pos = 3, cex = 0.6)
|
| 107 |
+
#
|
| 108 |
+
# s_down <- s[index_of_down]
|
| 109 |
+
# x_v_down_set <- x_v[index_of_down]
|
| 110 |
+
# x_v_down_set_order <- order(x_v_down_set, decreasing = FALSE)
|
| 111 |
+
# y_v_down_set <- y_v[index_of_down]
|
| 112 |
+
# y_v_down_set_order <- order(y_v_down_set, decreasing = TRUE)
|
| 113 |
+
#
|
| 114 |
+
# index_down_set <- intersect(x_v_down_set_order[seq_len(min_down_text)], y_v_down_set_order[seq_len(min_down_text)])
|
| 115 |
+
# graphics::text(x_v_down_set[index_down_set], y_v_down_set[index_down_set], s_down[index_down_set], pos = 3, cex = 0.6)
|
| 116 |
+
# }
|
| 117 |
+
}
|
examplefile/Clinicaltest.csv
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PatientID,status,time
|
| 2 |
+
Exp027012,0,1290
|
| 3 |
+
Exp027013,0,1187
|
| 4 |
+
Exp027014,1,1106
|
| 5 |
+
Exp027015,1,1264
|
| 6 |
+
Exp027016,1,948
|
| 7 |
+
Exp027017,0,1401
|
| 8 |
+
Exp027018,1,961
|
| 9 |
+
Exp027019,0,1867
|
| 10 |
+
Exp027020,1,986
|
| 11 |
+
Exp027021,0,1593
|
| 12 |
+
Exp027022,1,566
|
| 13 |
+
Exp027023,1,1353
|
| 14 |
+
Exp027024,0,1592
|
| 15 |
+
Exp027025,0,1468
|
| 16 |
+
Exp027026,1,120
|
| 17 |
+
Exp027027,1,145
|
| 18 |
+
Exp027028,0,1471
|
| 19 |
+
Exp027029,1,507
|
| 20 |
+
Exp027030,1,1294
|
| 21 |
+
Exp027031,1,317
|
| 22 |
+
Exp027032,1,235
|
| 23 |
+
Exp027033,0,1186
|
| 24 |
+
Exp027034,1,1204
|
| 25 |
+
Exp027035,0,1253
|
| 26 |
+
Exp027036,1,659
|
| 27 |
+
Exp027037,0,1177
|
| 28 |
+
Exp027038,1,807
|
| 29 |
+
Exp027039,1,238
|
| 30 |
+
Exp027040,1,498
|
| 31 |
+
Exp027041,0,781
|
| 32 |
+
Exp027042,1,497
|
| 33 |
+
Exp027043,1,424
|
| 34 |
+
Exp027044,1,407
|
| 35 |
+
Exp027045,1,1421
|
| 36 |
+
Exp027046,0,1386
|
| 37 |
+
Exp027047,0,1390
|
| 38 |
+
Exp027048,0,1348
|
| 39 |
+
Exp027049,0,716
|
| 40 |
+
Exp027050,0,1250
|
examplefile/analysistools/Clinical_for_Demo.csv
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PatientID,status,time
|
| 2 |
+
Exp027012,0,1290
|
| 3 |
+
Exp027013,0,1187
|
| 4 |
+
Exp027014,1,1106
|
| 5 |
+
Exp027015,1,1264
|
| 6 |
+
Exp027016,1,948
|
| 7 |
+
Exp027017,0,1401
|
| 8 |
+
Exp027018,1,961
|
| 9 |
+
Exp027019,0,1867
|
| 10 |
+
Exp027020,1,986
|
| 11 |
+
Exp027021,0,1593
|
| 12 |
+
Exp027022,1,566
|
| 13 |
+
Exp027023,1,1353
|
| 14 |
+
Exp027024,0,1592
|
| 15 |
+
Exp027025,0,1468
|
| 16 |
+
Exp027026,1,120
|
| 17 |
+
Exp027027,1,145
|
| 18 |
+
Exp027028,0,1471
|
| 19 |
+
Exp027029,1,507
|
| 20 |
+
Exp027030,1,1294
|
| 21 |
+
Exp027031,1,317
|
| 22 |
+
Exp027032,1,235
|
| 23 |
+
Exp027033,0,1186
|
| 24 |
+
Exp027034,1,1204
|
| 25 |
+
Exp027035,0,1253
|
| 26 |
+
Exp027036,1,659
|
| 27 |
+
Exp027037,0,1177
|
| 28 |
+
Exp027038,1,807
|
| 29 |
+
Exp027039,1,238
|
| 30 |
+
Exp027040,1,498
|
| 31 |
+
Exp027041,0,781
|
| 32 |
+
Exp027042,1,497
|
| 33 |
+
Exp027043,1,424
|
| 34 |
+
Exp027044,1,407
|
| 35 |
+
Exp027045,1,1421
|
| 36 |
+
Exp027046,0,1386
|
| 37 |
+
Exp027047,0,1390
|
| 38 |
+
Exp027048,0,1348
|
| 39 |
+
Exp027049,0,716
|
| 40 |
+
Exp027050,0,1250
|
examplefile/analysistools/Clinical_for_Pre.csv
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PatientID,status,time
|
| 2 |
+
Exp027015,1,1264
|
| 3 |
+
Exp027016,1,948
|
| 4 |
+
Exp027017,0,1401
|
| 5 |
+
Exp027031,1,317
|
| 6 |
+
Exp027032,1,235
|
| 7 |
+
Exp027033,0,1186
|
| 8 |
+
Exp027046,0,1386
|
| 9 |
+
Exp027047,0,1390
|
| 10 |
+
Exp027048,0,1348
|
examplefile/analysistools/Clinicaltest.csv
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PatientID,status,time
|
| 2 |
+
Exp027012,0,1290
|
| 3 |
+
Exp027020,1,986
|
| 4 |
+
Exp027028,0,1471
|
| 5 |
+
Exp027036,1,659
|
| 6 |
+
Exp027044,1,407
|
examplefile/analysistools/PreNormBasedProSummary.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/analysistools/phosphorylation_exp_design_info.txt
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Experiment_Code Group Description
|
| 2 |
+
Exp027012 0 ctr_0h_R1_IMAC_1.raw
|
| 3 |
+
Exp027013 0 ctr_0h_R1_IMAC_2.raw
|
| 4 |
+
Exp027014 0 ctr_0h_R1_IMAC_3.raw
|
| 5 |
+
Exp027015 0 ctr_0h_R2_IMAC_1.raw
|
| 6 |
+
Exp027016 0 ctr_0h_R2_IMAC_2.raw
|
| 7 |
+
Exp027017 0 ctr_0h_R2_IMAC_3.raw
|
| 8 |
+
Exp027018 0 ctr_0h_R3_IMAC_1.raw
|
| 9 |
+
Exp027019 0 ctr_0h_R3_IMAC_2.raw
|
| 10 |
+
Exp027020 2 PLX_2h_R1_IMAC_1.raw
|
| 11 |
+
Exp027021 2 PLX_2h_R1_IMAC_2.raw
|
| 12 |
+
Exp027022 2 PLX_2h_R1_IMAC_3.raw
|
| 13 |
+
Exp027023 2 PLX_2h_R2_IMAC_2.raw
|
| 14 |
+
Exp027024 2 PLX_2h_R2_IMAC_3.raw
|
| 15 |
+
Exp027025 2 PLX_2h_R3_IMAC_1.raw
|
| 16 |
+
Exp027026 2 PLX_2h_R3_IMAC_2.raw
|
| 17 |
+
Exp027027 2 PLX_2h_R3_IMAC_3.raw
|
| 18 |
+
Exp027028 6 PLX_6h_R1_IMAC_1.raw
|
| 19 |
+
Exp027029 6 PLX_6h_R1_IMAC_2.raw
|
| 20 |
+
Exp027030 6 PLX_6h_R1_IMAC_3.raw
|
| 21 |
+
Exp027031 6 PLX_6h_R2_IMAC_1.raw
|
| 22 |
+
Exp027032 6 PLX_6h_R2_IMAC_2.raw
|
| 23 |
+
Exp027033 6 PLX_6h_R2_IMAC_3.raw
|
| 24 |
+
Exp027034 6 PLX_6h_R3_IMAC_1.raw
|
| 25 |
+
Exp027035 6 PLX_6h_R3_IMAC_2.raw
|
| 26 |
+
Exp027036 24 PLX_24h_R1_IMAC_1.raw
|
| 27 |
+
Exp027037 24 PLX_24h_R1_IMAC_2.raw
|
| 28 |
+
Exp027038 24 PLX_24h_R1_IMAC_3.raw
|
| 29 |
+
Exp027039 24 PLX_24h_R2_IMAC_1.raw
|
| 30 |
+
Exp027040 24 PLX_24h_R2_IMAC_2.raw
|
| 31 |
+
Exp027041 24 PLX_24h_R2_IMAC_3.raw
|
| 32 |
+
Exp027042 24 PLX_24h_R3_IMAC_1.raw
|
| 33 |
+
Exp027043 24 PLX_24h_R3_IMAC_3.raw
|
| 34 |
+
Exp027044 48 PLX_48h_R1_IMAC_2.raw
|
| 35 |
+
Exp027045 48 PLX_48h_R1_IMAC_3.raw
|
| 36 |
+
Exp027046 48 PLX_48h_R2_IMAC_1.raw
|
| 37 |
+
Exp027047 48 PLX_48h_R2_IMAC_2.raw
|
| 38 |
+
Exp027048 48 PLX_48h_R2_IMAC_3.raw
|
| 39 |
+
Exp027049 48 PLX_48h_R3_IMAC_1.raw
|
| 40 |
+
Exp027050 48 PLX_48h_R3_IMAC_2.raw
|
examplefile/data_frame_normalization_with_control_no_pair.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/download/anaysis_demo.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23bba0455c82f9d839711c3239ef883b8e86a45d20dd5ab18758ab4e19bc8b02
|
| 3 |
+
size 407517
|
examplefile/download/mascot_xml.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e52baf8a2cb215e2c42e8470f29382a0ca5ce7ed5f19c4104d2d16c557b87c9
|
| 3 |
+
size 21818471
|
examplefile/download/motif_kinase_relation.xlsx
ADDED
|
Binary file (40.8 kB). View file
|
|
|
examplefile/download/phosphorylation_peptide_txt.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aebf94fc6d5de4b116fbeb89856cbed50a34bf37d81e92dbce12d61cb60a53ee
|
| 3 |
+
size 3944372
|
examplefile/download/profiling_gene_txt.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e35cbdc0f2bbb77e85a421ec390f3a7e105ba86ab66e498561b7e802c86908c5
|
| 3 |
+
size 1384071
|
examplefile/mascot/phosphorylation_exp_design_info.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Experiment_Code Group Description
|
| 2 |
+
Exp027015 0 ctr_0h_R2_IMAC_1.raw
|
| 3 |
+
Exp027016 0 ctr_0h_R2_IMAC_2.raw
|
| 4 |
+
Exp027017 0 ctr_0h_R2_IMAC_3.raw
|
| 5 |
+
Exp027031 6 PLX_6h_R2_IMAC_1.raw
|
| 6 |
+
Exp027032 6 PLX_6h_R2_IMAC_2.raw
|
| 7 |
+
Exp027033 6 PLX_6h_R2_IMAC_3.raw
|
| 8 |
+
Exp027046 48 PLX_48h_R2_IMAC_1.raw
|
| 9 |
+
Exp027047 48 PLX_48h_R2_IMAC_2.raw
|
| 10 |
+
Exp027048 48 PLX_48h_R2_IMAC_3.raw
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027015_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027016_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027017_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027031_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027032_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027033_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027046_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027047_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/phosphorylation_peptide_txt/Exp027048_peptide.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/profiling_exp_design_info.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Experiment_Code Group Description
|
| 2 |
+
Exp026982 0 ctr_0h_R2_injection_1.raw
|
| 3 |
+
Exp026983 0 ctr_0h_R2_injection_2.raw
|
| 4 |
+
Exp026995 6 PLX_6h_R2_injection_1.raw
|
| 5 |
+
Exp026996 6 PLX_6h_R2_injection_2.raw
|
| 6 |
+
Exp027008 48 PLX_48h_R2_injection_1.raw
|
| 7 |
+
Exp027009 48 PLX_48h_R2_injection_4.raw
|
examplefile/mascot/profiling_gene_txt/Exp026982_gene.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/profiling_gene_txt/Exp026983_gene.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/profiling_gene_txt/Exp026995_gene.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/profiling_gene_txt/Exp026996_gene.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/profiling_gene_txt/Exp027008_gene.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/mascot/profiling_gene_txt/Exp027009_gene.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/maxquant/Phospho (STY)Sites.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/maxquant/phosphorylation_exp_design_info.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Experiment_Code Group Description
|
| 2 |
+
Exp027015 0 ctr_0h_R2_IMAC_1.raw
|
| 3 |
+
Exp027016 0 ctr_0h_R2_IMAC_2.raw
|
| 4 |
+
Exp027017 0 ctr_0h_R2_IMAC_3.raw
|
| 5 |
+
Exp027031 6 PLX_6h_R2_IMAC_1.raw
|
| 6 |
+
Exp027032 6 PLX_6h_R2_IMAC_2.raw
|
| 7 |
+
Exp027033 6 PLX_6h_R2_IMAC_3.raw
|
| 8 |
+
Exp027046 48 PLX_48h_R2_IMAC_1.raw
|
| 9 |
+
Exp027047 48 PLX_48h_R2_IMAC_2.raw
|
| 10 |
+
Exp027048 48 PLX_48h_R2_IMAC_3.raw
|
examplefile/maxquant/profiling_exp_design_info.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Experiment_Code Group Description
|
| 2 |
+
Exp026982 0 ctr_0h_R2_injection_1.raw
|
| 3 |
+
Exp026983 0 ctr_0h_R2_injection_2.raw
|
| 4 |
+
Exp026995 6 PLX_6h_R2_injection_1.raw
|
| 5 |
+
Exp026996 6 PLX_6h_R2_injection_2.raw
|
| 6 |
+
Exp027008 48 PLX_48h_R2_injection_1.raw
|
| 7 |
+
Exp027009 48 PLX_48h_R2_injection_4.raw
|
examplefile/maxquant/proteinGroups.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/motifanalysis.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
examplefile/phosphorylation_exp_design_info.txt
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Experiment_Code Group Description
|
| 2 |
+
Exp027012 0 ctr_0h_R1_IMAC_1.raw
|
| 3 |
+
Exp027013 0 ctr_0h_R1_IMAC_2.raw
|
| 4 |
+
Exp027014 0 ctr_0h_R1_IMAC_3.raw
|
| 5 |
+
Exp027015 0 ctr_0h_R2_IMAC_1.raw
|
| 6 |
+
Exp027016 0 ctr_0h_R2_IMAC_2.raw
|
| 7 |
+
Exp027017 0 ctr_0h_R2_IMAC_3.raw
|
| 8 |
+
Exp027018 0 ctr_0h_R3_IMAC_1.raw
|
| 9 |
+
Exp027019 0 ctr_0h_R3_IMAC_2.raw
|
| 10 |
+
Exp027020 2 PLX_2h_R1_IMAC_1.raw
|
| 11 |
+
Exp027021 2 PLX_2h_R1_IMAC_2.raw
|
| 12 |
+
Exp027022 2 PLX_2h_R1_IMAC_3.raw
|
| 13 |
+
Exp027023 2 PLX_2h_R2_IMAC_2.raw
|
| 14 |
+
Exp027024 2 PLX_2h_R2_IMAC_3.raw
|
| 15 |
+
Exp027025 2 PLX_2h_R3_IMAC_1.raw
|
| 16 |
+
Exp027026 2 PLX_2h_R3_IMAC_2.raw
|
| 17 |
+
Exp027027 2 PLX_2h_R3_IMAC_3.raw
|
| 18 |
+
Exp027028 6 PLX_6h_R1_IMAC_1.raw
|
| 19 |
+
Exp027029 6 PLX_6h_R1_IMAC_2.raw
|
| 20 |
+
Exp027030 6 PLX_6h_R1_IMAC_3.raw
|
| 21 |
+
Exp027031 6 PLX_6h_R2_IMAC_1.raw
|
| 22 |
+
Exp027032 6 PLX_6h_R2_IMAC_2.raw
|
| 23 |
+
Exp027033 6 PLX_6h_R2_IMAC_3.raw
|
| 24 |
+
Exp027034 6 PLX_6h_R3_IMAC_1.raw
|
| 25 |
+
Exp027035 6 PLX_6h_R3_IMAC_2.raw
|
| 26 |
+
Exp027036 24 PLX_24h_R1_IMAC_1.raw
|
| 27 |
+
Exp027037 24 PLX_24h_R1_IMAC_2.raw
|
| 28 |
+
Exp027038 24 PLX_24h_R1_IMAC_3.raw
|
| 29 |
+
Exp027039 24 PLX_24h_R2_IMAC_1.raw
|
| 30 |
+
Exp027040 24 PLX_24h_R2_IMAC_2.raw
|
| 31 |
+
Exp027041 24 PLX_24h_R2_IMAC_3.raw
|
| 32 |
+
Exp027042 24 PLX_24h_R3_IMAC_1.raw
|
| 33 |
+
Exp027043 24 PLX_24h_R3_IMAC_3.raw
|
| 34 |
+
Exp027044 48 PLX_48h_R1_IMAC_2.raw
|
| 35 |
+
Exp027045 48 PLX_48h_R1_IMAC_3.raw
|
| 36 |
+
Exp027046 48 PLX_48h_R2_IMAC_1.raw
|
| 37 |
+
Exp027047 48 PLX_48h_R2_IMAC_2.raw
|
| 38 |
+
Exp027048 48 PLX_48h_R2_IMAC_3.raw
|
| 39 |
+
Exp027049 48 PLX_48h_R3_IMAC_1.raw
|
| 40 |
+
Exp027050 48 PLX_48h_R3_IMAC_2.raw
|
examplefile/root/mascot/mascot_xml/Exp027015/Exp027015_F1_R1.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bbe17096e89e287b9984b93499f376f814bb9b263d4ae3128fe3c13c40d4df26
|
| 3 |
+
size 42212493
|
examplefile/root/mascot/mascot_xml/Exp027016/Exp027016_F1_R1.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b81689b2f7d4e3e41f1a4edd9ed80b01f7a29901b7d745601be70aa15611a825
|
| 3 |
+
size 38644883
|
examplefile/root/mascot/mascot_xml/Exp027017/Exp027017_F1_R1.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5cbcc19156bbb4f0e2a92c5dca4fc2d5d01d4ac6c0d210ee28639b2d559b252
|
| 3 |
+
size 39234041
|