Spaces:
Runtime error
Runtime error
LC
Browse files- .gitignore +2 -1
- Analysis (R).R +0 -14
- Excel Analysis(Pablo).ipynb +0 -0
- app.py +240 -2
- dataset.png +0 -0
- functions.py +0 -213
- descarga.jfif → img/descarga.jfif +0 -0
.gitignore
CHANGED
@@ -127,4 +127,5 @@ dmypy.json
|
|
127 |
|
128 |
# Pyre type checker
|
129 |
.pyre/
|
130 |
-
datamdata
|
|
|
|
127 |
|
128 |
# Pyre type checker
|
129 |
.pyre/
|
130 |
+
datamdata/
|
131 |
+
Notebooks/
|
Analysis (R).R
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
install.packages("readxl")
|
2 |
-
library("readxl")
|
3 |
-
setwd("C:/Users/Pablo/Desktop/BioinfoProject")
|
4 |
-
|
5 |
-
sample_gene <- read_excel("Study Results.xlsx",sheet = 1)
|
6 |
-
factors <- read_excel("Study Results.xlsx",sheet = 2)
|
7 |
-
factors[names(factors)[1:4]]
|
8 |
-
|
9 |
-
|
10 |
-
merging = merge(x = sample_gene, y = factors, by = 'sampleID')
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
merging
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Excel Analysis(Pablo).ipynb
DELETED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -1,5 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
-
from functions import *
|
3 |
directory = os.path.abspath("")
|
4 |
# from EDA_IMDb_functions import *
|
5 |
|
@@ -10,7 +248,7 @@ directory = os.path.abspath("")
|
|
10 |
st.set_page_config(layout="wide")
|
11 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
12 |
dw,col1,wl = st.columns((1,0.5,1))
|
13 |
-
col1.image('descarga.jfif')
|
14 |
st.markdown("<h1 style='text-align:center;'>Somatic Mutations Analysis in skin</h1>",unsafe_allow_html=True)
|
15 |
|
16 |
st.sidebar.markdown("<h2 style='text-align:center;'>Index</h2>",unsafe_allow_html=True)
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pickle
|
3 |
+
import pandas as pd
|
4 |
+
import requests
|
5 |
+
from selenium import webdriver
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
#Simple assignment
|
8 |
+
from selenium.webdriver import Firefox
|
9 |
+
from selenium.webdriver.common.keys import Keys
|
10 |
+
from selenium.common.exceptions import NoSuchElementException
|
11 |
+
import requests
|
12 |
+
import os
|
13 |
+
import seaborn as sns
|
14 |
+
from collections import Counter
|
15 |
+
import plotly.express as px
|
16 |
+
import streamlit as st
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
### Scrap the cosmic id information
|
21 |
+
# ### FRAMEWORKS NEEDED
|
22 |
+
|
23 |
+
def scrap():
|
24 |
+
#### Setting options to the driver
|
25 |
+
options = webdriver.FirefoxOptions()
|
26 |
+
options.add_argument('--headless')
|
27 |
+
options.add_argument('--no-sandbox')
|
28 |
+
options.add_argument('--disable-dev-shm-usage')
|
29 |
+
options.capabilities
|
30 |
+
### Setting options of webdriver
|
31 |
+
# a) Setting the chromedriver
|
32 |
+
browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
|
33 |
+
### Functions and execution to run the scrapping
|
34 |
+
|
35 |
+
|
36 |
+
def getinfofromtable(oddrows:list,score:float,headertable)->list:
|
37 |
+
rows = []
|
38 |
+
for row in oddrows:
|
39 |
+
cols = []
|
40 |
+
for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
|
41 |
+
if i==headertable.index( 'Primary Tissue') or i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
|
42 |
+
cols.append(col.text)
|
43 |
+
cols.append(score)
|
44 |
+
rows.append(cols)
|
45 |
+
return rows
|
46 |
+
def getinfocosmic(mutationid):
|
47 |
+
import time
|
48 |
+
search = browser.find_element_by_id('search-field')
|
49 |
+
search = search.find_element_by_class_name("text_def")
|
50 |
+
search.send_keys(mutationid)
|
51 |
+
search.send_keys(Keys.RETURN)
|
52 |
+
time.sleep(5)
|
53 |
+
try:
|
54 |
+
container = browser.find_element_by_id("section-list")
|
55 |
+
|
56 |
+
except NoSuchElementException:
|
57 |
+
return []
|
58 |
+
|
59 |
+
try:
|
60 |
+
|
61 |
+
subq1 = container.text[container.text.find("score")+len("score"):]
|
62 |
+
score = float(subq1[:subq1.find(")")].strip())
|
63 |
+
except ValueError:
|
64 |
+
score = 0
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
section = browser.find_element_by_id("DataTables_Table_0")
|
69 |
+
|
70 |
+
|
71 |
+
headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]
|
72 |
+
|
73 |
+
oddrows = section.find_elements_by_class_name("odd")
|
74 |
+
evenrows = section.find_elements_by_class_name("even")
|
75 |
+
|
76 |
+
l1 = getinfofromtable(oddrows,score,headertable)
|
77 |
+
l1.extend(getinfofromtable(evenrows,score,headertable))
|
78 |
+
|
79 |
+
# browser.close()
|
80 |
+
return l1
|
81 |
+
## Looking for cosmic id info
|
82 |
+
cosl = []
|
83 |
+
browser.get("https://cancer.sanger.ac.uk/cosmic")
|
84 |
+
for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
|
85 |
+
if cos.find(",")!=-1:
|
86 |
+
cos = cos.split(",")[0]
|
87 |
+
|
88 |
+
cosl.append(getinfocosmic(cos))
|
89 |
+
browser.get("https://cancer.sanger.ac.uk/cosmic")
|
90 |
+
### Pieplots
|
91 |
+
def pieplot(merging,id=0):
|
92 |
+
genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
|
93 |
+
if id==0:
|
94 |
+
gtype = genecount[genecount.UV_exposure_tissue=="Intermittently-photoexposed"]
|
95 |
+
if id ==1 :
|
96 |
+
gtype = genecount[genecount.UV_exposure_tissue=="Chronically-photoexposed"]
|
97 |
+
else:
|
98 |
+
gtype = genecount
|
99 |
+
|
100 |
+
gtype = gtype.groupby("gene_name").count()["sampleID"].reset_index()
|
101 |
+
gtype.sort_values(by="sampleID",ascending=False,inplace=True)
|
102 |
+
#define Seaborn color palette to use
|
103 |
+
colors = sns.color_palette('pastel')[0:len(gtype)]
|
104 |
+
#create pie chart
|
105 |
+
# plt.suptitle("Gene Occuring for different genes")
|
106 |
+
plt.pie(gtype.sampleID, labels =gtype.gene_name, colors = colors, autopct='%.0f%%',radius=2,textprops={"fontsize":9})
|
107 |
+
plt.show()
|
108 |
+
|
109 |
+
### Depending on what result you want you return one or another
|
110 |
+
def filterp4(dfgenes,id=0):
|
111 |
+
if id==0 or id==1:
|
112 |
+
|
113 |
+
if id==0:
|
114 |
+
chexposed= dfgenes[dfgenes.UV_exposure_tissue=="Intermittently-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
|
115 |
+
if id==1:
|
116 |
+
chexposed= dfgenes[dfgenes.UV_exposure_tissue=="Chronically-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
|
117 |
+
return px.bar(chexposed,x="gene_name",y="mean_mut",error_y="std")
|
118 |
+
if id==2:
|
119 |
+
return px.bar(dfgenes,x="gene_name",y="mean_mut",color="UV_exposure_tissue",barmode='group',error_y="std")
|
120 |
+
|
121 |
+
### Read scrapping done with cosmic ids
|
122 |
+
def read_scrap()->list:
|
123 |
+
with open('my_pickle_file.pickle', 'rb') as f :
|
124 |
+
cosbase = pickle.load(f)
|
125 |
+
return cosbase
|
126 |
+
### GendfClean
|
127 |
+
def gendfclean(cosbase,cid)->pd.DataFrame:
|
128 |
+
dfd = {"tissue": None , "histology": None,"zygosity": None, "score": None }
|
129 |
+
for i,key in enumerate(list(dfd.keys())):
|
130 |
+
dfd[key] = list(map(lambda x : np.array(x)[:,i].tolist() if x!=[] else [] ,cosbase))
|
131 |
+
|
132 |
+
dfd["cosmic_id"] = cid.tolist()
|
133 |
+
cosmicdb = pd.DataFrame(dfd)
|
134 |
+
cosmicdb = cosmicdb[(cosmicdb['tissue'].map(lambda d: len(d)) > 0) & (cosmicdb['histology'].map(lambda d: len(d)) > 0) & (cosmicdb['zygosity'].map(lambda d: len(d)) > 0) & (cosmicdb['score'].map(lambda d: len(d)) > 0) ]
|
135 |
+
|
136 |
+
cosmicdb["score"] = cosmicdb.score.apply(lambda x: float(x[0]))
|
137 |
+
|
138 |
+
return cosmicdb
|
139 |
+
|
140 |
+
### Look for stats of a gene
|
141 |
+
def inputgene(lookforgene,merging,id =0)->dict:
|
142 |
+
### id = 0--> Intermittently exposed
|
143 |
+
### id = 1--> Continuously exposed
|
144 |
+
genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
|
145 |
+
tgene = genecount[genecount.gene_name==lookforgene]
|
146 |
+
if id==0:
|
147 |
+
ph_gene = tgene[tgene.UV_exposure_tissue=='Intermittently-photoexposed']
|
148 |
+
else:
|
149 |
+
ph_gene = tgene[tgene.UV_exposure_tissue=="Chronically-photoexposed"]
|
150 |
+
### Statistiacs about gene|samples
|
151 |
+
stats = ph_gene.chr.describe()
|
152 |
+
dc = dict(stats)
|
153 |
+
dc["gene_name"] = lookforgene
|
154 |
+
if id==0:
|
155 |
+
dc["UV_exposure_tissue"] = 'Intermittently-photoexposed'
|
156 |
+
else:
|
157 |
+
dc["UV_exposure_tissue"] = 'Chronically-photoexposed'
|
158 |
+
return dc
|
159 |
+
### Look for stats of all genes
|
160 |
+
def gene_exposed(merging,id=0):
|
161 |
+
return pd.DataFrame(list(map(lambda gene: inputgene(gene,merging,id),merging.gene_name.unique())))
|
162 |
+
### Merge stats for continuous and intermittently exposed
|
163 |
+
def mergecontintinfo(merging):
|
164 |
+
### Continuously Exposed
|
165 |
+
cont_exposed_info = gene_exposed(merging,1)
|
166 |
+
### Intermittently Exposed
|
167 |
+
int_exposed_info = gene_exposed(merging,0)
|
168 |
+
return pd.concat([cont_exposed_info,int_exposed_info],axis=0)
|
169 |
+
|
170 |
+
#### Common tissues, zygosities and histologies
|
171 |
+
def explodecommon(bd,N,col):
|
172 |
+
return Counter(bd[col].apply(lambda x: list(x.keys())).explode()).most_common(N)
|
173 |
+
def pdcommon(db,col,uv:str)->pd.DataFrame:
|
174 |
+
df = pd.DataFrame(db).rename(columns={0:col,1:"Times_{}".format(col)})
|
175 |
+
df["UV_exposure_tissue"] = uv
|
176 |
+
return df
|
177 |
+
def get_N_common(df,col,N=10)->pd.DataFrame:
|
178 |
+
cosm = df.copy(True)
|
179 |
+
cosm[col] = cosm[col].apply(lambda x: Counter(x))
|
180 |
+
intcosm = cosm[cosm.UV_exposure_tissue=="Intermittently-photoexposed"]
|
181 |
+
contcosm = cosm[cosm.UV_exposure_tissue=="Chronically-photoexposed"]
|
182 |
+
|
183 |
+
infotissues = explodecommon(cosm,N,col)
|
184 |
+
inttissues = explodecommon(intcosm,N,col)
|
185 |
+
contissues = explodecommon(contcosm,N,col)
|
186 |
+
|
187 |
+
df1 = pdcommon(infotissues,col,"Total")
|
188 |
+
df2 = pdcommon(inttissues,col,"Intermittently-photoexposed")
|
189 |
+
df3 = pdcommon(contissues,col,"Chronically-photoexposed")
|
190 |
+
return pd.concat([df1,df2,df3],axis=0)
|
191 |
+
|
192 |
+
### Deatiled information of mutation type
|
193 |
+
def mut_type(x):
|
194 |
+
if x.mut_type=="Indel":
|
195 |
+
|
196 |
+
if len(x.ref)>len(x.mut):
|
197 |
+
return "Del"
|
198 |
+
elif len(x.mut)>len(x.ref):
|
199 |
+
return "In"
|
200 |
+
# if len(x.ref)>1 and len(x.mut)>1:
|
201 |
+
|
202 |
+
return x.ref+">"+x.mut
|
203 |
+
return x.mut_type
|
204 |
+
|
205 |
+
|
206 |
+
def distribution_gene(df,hue):
|
207 |
+
|
208 |
+
|
209 |
+
plot4 = df.groupby([hue,"mut_type_cus"]).count().reset_index().iloc[:,:3]
|
210 |
+
plot4 = plot4.rename(columns={"sampleID":"n_mut"})
|
211 |
+
plot4 = plot4.sort_values(by="mut_type_cus",ascending=True)
|
212 |
+
fig = px.bar(plot4,x="mut_type_cus",y="n_mut",color=hue,barmode="group")
|
213 |
+
return fig
|
214 |
+
|
215 |
+
|
216 |
+
|
217 |
+
|
218 |
+
|
219 |
+
|
220 |
+
|
221 |
+
|
222 |
+
|
223 |
+
|
224 |
+
|
225 |
+
|
226 |
+
|
227 |
+
|
228 |
+
|
229 |
+
|
230 |
+
|
231 |
+
|
232 |
+
|
233 |
+
|
234 |
+
|
235 |
+
|
236 |
+
|
237 |
+
|
238 |
+
|
239 |
+
|
240 |
|
|
|
241 |
directory = os.path.abspath("")
|
242 |
# from EDA_IMDb_functions import *
|
243 |
|
|
|
248 |
st.set_page_config(layout="wide")
|
249 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
250 |
dw,col1,wl = st.columns((1,0.5,1))
|
251 |
+
col1.image('img/descarga.jfif')
|
252 |
st.markdown("<h1 style='text-align:center;'>Somatic Mutations Analysis in skin</h1>",unsafe_allow_html=True)
|
253 |
|
254 |
st.sidebar.markdown("<h2 style='text-align:center;'>Index</h2>",unsafe_allow_html=True)
|
dataset.png
DELETED
Binary file (52.6 kB)
|
|
functions.py
DELETED
@@ -1,213 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import pickle
|
3 |
-
import pandas as pd
|
4 |
-
import requests
|
5 |
-
from selenium import webdriver
|
6 |
-
import matplotlib.pyplot as plt
|
7 |
-
#Simple assignment
|
8 |
-
from selenium.webdriver import Firefox
|
9 |
-
from selenium.webdriver.common.keys import Keys
|
10 |
-
from selenium.common.exceptions import NoSuchElementException
|
11 |
-
import requests
|
12 |
-
import os
|
13 |
-
import seaborn as sns
|
14 |
-
from collections import Counter
|
15 |
-
import plotly.express as px
|
16 |
-
import streamlit as st
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
### Scrap the cosmic id information
|
21 |
-
# ### FRAMEWORKS NEEDED
|
22 |
-
|
23 |
-
def scrap():
|
24 |
-
#### Setting options to the driver
|
25 |
-
options = webdriver.FirefoxOptions()
|
26 |
-
options.add_argument('--headless')
|
27 |
-
options.add_argument('--no-sandbox')
|
28 |
-
options.add_argument('--disable-dev-shm-usage')
|
29 |
-
options.capabilities
|
30 |
-
### Setting options of webdriver
|
31 |
-
# a) Setting the chromedriver
|
32 |
-
browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
|
33 |
-
### Functions and execution to run the scrapping
|
34 |
-
|
35 |
-
|
36 |
-
def getinfofromtable(oddrows:list,score:float,headertable)->list:
|
37 |
-
rows = []
|
38 |
-
for row in oddrows:
|
39 |
-
cols = []
|
40 |
-
for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
|
41 |
-
if i==headertable.index( 'Primary Tissue') or i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
|
42 |
-
cols.append(col.text)
|
43 |
-
cols.append(score)
|
44 |
-
rows.append(cols)
|
45 |
-
return rows
|
46 |
-
def getinfocosmic(mutationid):
|
47 |
-
import time
|
48 |
-
search = browser.find_element_by_id('search-field')
|
49 |
-
search = search.find_element_by_class_name("text_def")
|
50 |
-
search.send_keys(mutationid)
|
51 |
-
search.send_keys(Keys.RETURN)
|
52 |
-
time.sleep(5)
|
53 |
-
try:
|
54 |
-
container = browser.find_element_by_id("section-list")
|
55 |
-
|
56 |
-
except NoSuchElementException:
|
57 |
-
return []
|
58 |
-
|
59 |
-
try:
|
60 |
-
|
61 |
-
subq1 = container.text[container.text.find("score")+len("score"):]
|
62 |
-
score = float(subq1[:subq1.find(")")].strip())
|
63 |
-
except ValueError:
|
64 |
-
score = 0
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
section = browser.find_element_by_id("DataTables_Table_0")
|
69 |
-
|
70 |
-
|
71 |
-
headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]
|
72 |
-
|
73 |
-
oddrows = section.find_elements_by_class_name("odd")
|
74 |
-
evenrows = section.find_elements_by_class_name("even")
|
75 |
-
|
76 |
-
l1 = getinfofromtable(oddrows,score,headertable)
|
77 |
-
l1.extend(getinfofromtable(evenrows,score,headertable))
|
78 |
-
|
79 |
-
# browser.close()
|
80 |
-
return l1
|
81 |
-
## Looking for cosmic id info
|
82 |
-
cosl = []
|
83 |
-
browser.get("https://cancer.sanger.ac.uk/cosmic")
|
84 |
-
for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
|
85 |
-
if cos.find(",")!=-1:
|
86 |
-
cos = cos.split(",")[0]
|
87 |
-
|
88 |
-
cosl.append(getinfocosmic(cos))
|
89 |
-
browser.get("https://cancer.sanger.ac.uk/cosmic")
|
90 |
-
### Pieplots
|
91 |
-
def pieplot(merging,id=0):
|
92 |
-
genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
|
93 |
-
if id==0:
|
94 |
-
gtype = genecount[genecount.UV_exposure_tissue=="Intermittently-photoexposed"]
|
95 |
-
if id ==1 :
|
96 |
-
gtype = genecount[genecount.UV_exposure_tissue=="Chronically-photoexposed"]
|
97 |
-
else:
|
98 |
-
gtype = genecount
|
99 |
-
|
100 |
-
gtype = gtype.groupby("gene_name").count()["sampleID"].reset_index()
|
101 |
-
gtype.sort_values(by="sampleID",ascending=False,inplace=True)
|
102 |
-
#define Seaborn color palette to use
|
103 |
-
colors = sns.color_palette('pastel')[0:len(gtype)]
|
104 |
-
#create pie chart
|
105 |
-
# plt.suptitle("Gene Occuring for different genes")
|
106 |
-
plt.pie(gtype.sampleID, labels =gtype.gene_name, colors = colors, autopct='%.0f%%',radius=2,textprops={"fontsize":9})
|
107 |
-
plt.show()
|
108 |
-
|
109 |
-
### Depending on what result you want you return one or another
|
110 |
-
def filterp4(dfgenes,id=0):
|
111 |
-
if id==0 or id==1:
|
112 |
-
|
113 |
-
if id==0:
|
114 |
-
chexposed= dfgenes[dfgenes.UV_exposure_tissue=="Intermittently-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
|
115 |
-
if id==1:
|
116 |
-
chexposed= dfgenes[dfgenes.UV_exposure_tissue=="Chronically-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
|
117 |
-
return px.bar(chexposed,x="gene_name",y="mean_mut",error_y="std")
|
118 |
-
if id==2:
|
119 |
-
return px.bar(dfgenes,x="gene_name",y="mean_mut",color="UV_exposure_tissue",barmode='group',error_y="std")
|
120 |
-
|
121 |
-
### Read scrapping done with cosmic ids
|
122 |
-
def read_scrap()->list:
|
123 |
-
with open('my_pickle_file.pickle', 'rb') as f :
|
124 |
-
cosbase = pickle.load(f)
|
125 |
-
return cosbase
|
126 |
-
### GendfClean
|
127 |
-
def gendfclean(cosbase,cid)->pd.DataFrame:
|
128 |
-
dfd = {"tissue": None , "histology": None,"zygosity": None, "score": None }
|
129 |
-
for i,key in enumerate(list(dfd.keys())):
|
130 |
-
dfd[key] = list(map(lambda x : np.array(x)[:,i].tolist() if x!=[] else [] ,cosbase))
|
131 |
-
|
132 |
-
dfd["cosmic_id"] = cid.tolist()
|
133 |
-
cosmicdb = pd.DataFrame(dfd)
|
134 |
-
cosmicdb = cosmicdb[(cosmicdb['tissue'].map(lambda d: len(d)) > 0) & (cosmicdb['histology'].map(lambda d: len(d)) > 0) & (cosmicdb['zygosity'].map(lambda d: len(d)) > 0) & (cosmicdb['score'].map(lambda d: len(d)) > 0) ]
|
135 |
-
|
136 |
-
cosmicdb["score"] = cosmicdb.score.apply(lambda x: float(x[0]))
|
137 |
-
|
138 |
-
return cosmicdb
|
139 |
-
|
140 |
-
### Look for stats of a gene
|
141 |
-
def inputgene(lookforgene,merging,id =0)->dict:
|
142 |
-
### id = 0--> Intermittently exposed
|
143 |
-
### id = 1--> Continuously exposed
|
144 |
-
genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
|
145 |
-
tgene = genecount[genecount.gene_name==lookforgene]
|
146 |
-
if id==0:
|
147 |
-
ph_gene = tgene[tgene.UV_exposure_tissue=='Intermittently-photoexposed']
|
148 |
-
else:
|
149 |
-
ph_gene = tgene[tgene.UV_exposure_tissue=="Chronically-photoexposed"]
|
150 |
-
### Statistiacs about gene|samples
|
151 |
-
stats = ph_gene.chr.describe()
|
152 |
-
dc = dict(stats)
|
153 |
-
dc["gene_name"] = lookforgene
|
154 |
-
if id==0:
|
155 |
-
dc["UV_exposure_tissue"] = 'Intermittently-photoexposed'
|
156 |
-
else:
|
157 |
-
dc["UV_exposure_tissue"] = 'Chronically-photoexposed'
|
158 |
-
return dc
|
159 |
-
### Look for stats of all genes
|
160 |
-
def gene_exposed(merging,id=0):
|
161 |
-
return pd.DataFrame(list(map(lambda gene: inputgene(gene,merging,id),merging.gene_name.unique())))
|
162 |
-
### Merge stats for continuous and intermittently exposed
|
163 |
-
def mergecontintinfo(merging):
|
164 |
-
### Continuously Exposed
|
165 |
-
cont_exposed_info = gene_exposed(merging,1)
|
166 |
-
### Intermittently Exposed
|
167 |
-
int_exposed_info = gene_exposed(merging,0)
|
168 |
-
return pd.concat([cont_exposed_info,int_exposed_info],axis=0)
|
169 |
-
|
170 |
-
#### Common tissues, zygosities and histologies
|
171 |
-
def explodecommon(bd,N,col):
|
172 |
-
return Counter(bd[col].apply(lambda x: list(x.keys())).explode()).most_common(N)
|
173 |
-
def pdcommon(db,col,uv:str)->pd.DataFrame:
|
174 |
-
df = pd.DataFrame(db).rename(columns={0:col,1:"Times_{}".format(col)})
|
175 |
-
df["UV_exposure_tissue"] = uv
|
176 |
-
return df
|
177 |
-
def get_N_common(df,col,N=10)->pd.DataFrame:
|
178 |
-
cosm = df.copy(True)
|
179 |
-
cosm[col] = cosm[col].apply(lambda x: Counter(x))
|
180 |
-
intcosm = cosm[cosm.UV_exposure_tissue=="Intermittently-photoexposed"]
|
181 |
-
contcosm = cosm[cosm.UV_exposure_tissue=="Chronically-photoexposed"]
|
182 |
-
|
183 |
-
infotissues = explodecommon(cosm,N,col)
|
184 |
-
inttissues = explodecommon(intcosm,N,col)
|
185 |
-
contissues = explodecommon(contcosm,N,col)
|
186 |
-
|
187 |
-
df1 = pdcommon(infotissues,col,"Total")
|
188 |
-
df2 = pdcommon(inttissues,col,"Intermittently-photoexposed")
|
189 |
-
df3 = pdcommon(contissues,col,"Chronically-photoexposed")
|
190 |
-
return pd.concat([df1,df2,df3],axis=0)
|
191 |
-
|
192 |
-
### Deatiled information of mutation type
|
193 |
-
def mut_type(x):
|
194 |
-
if x.mut_type=="Indel":
|
195 |
-
|
196 |
-
if len(x.ref)>len(x.mut):
|
197 |
-
return "Del"
|
198 |
-
elif len(x.mut)>len(x.ref):
|
199 |
-
return "In"
|
200 |
-
# if len(x.ref)>1 and len(x.mut)>1:
|
201 |
-
|
202 |
-
return x.ref+">"+x.mut
|
203 |
-
return x.mut_type
|
204 |
-
|
205 |
-
|
206 |
-
def distribution_gene(df,hue):
|
207 |
-
|
208 |
-
|
209 |
-
plot4 = df.groupby([hue,"mut_type_cus"]).count().reset_index().iloc[:,:3]
|
210 |
-
plot4 = plot4.rename(columns={"sampleID":"n_mut"})
|
211 |
-
plot4 = plot4.sort_values(by="mut_type_cus",ascending=True)
|
212 |
-
fig = px.bar(plot4,x="mut_type_cus",y="n_mut",color=hue,barmode="group")
|
213 |
-
return fig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
descarga.jfif → img/descarga.jfif
RENAMED
File without changes
|