Spaces:

Rules99
/

Bioinformatics_Project

Runtime error

App Files Files Community

Rules99 commited on Dec 15, 2021

Commit

356b18e

•

1 Parent(s): 9f8cc36

LC

Browse files

Files changed (7) hide show

.gitignore +2 -1
Analysis (R).R +0 -14
Excel Analysis(Pablo).ipynb +0 -0
app.py +240 -2
dataset.png +0 -0
functions.py +0 -213
descarga.jfif → img/descarga.jfif +0 -0

.gitignore CHANGED Viewed

@@ -127,4 +127,5 @@ dmypy.json
 # Pyre type checker
 .pyre/
-datamdata

 # Pyre type checker
 .pyre/
+datamdata/
+Notebooks/

Analysis (R).R DELETED Viewed

@@ -1,14 +0,0 @@
-install.packages("readxl")
-library("readxl")
-setwd("C:/Users/Pablo/Desktop/BioinfoProject")
-sample_gene <- read_excel("Study Results.xlsx",sheet = 1)
-factors <- read_excel("Study Results.xlsx",sheet = 2)
-factors[names(factors)[1:4]]
-merging = merge(x = sample_gene, y = factors, by = 'sampleID')
-merging

Excel Analysis(Pablo).ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -1,5 +1,243 @@
-from functions import *
 directory = os.path.abspath("")
 # from EDA_IMDb_functions import *
@@ -10,7 +248,7 @@ directory = os.path.abspath("")
 st.set_page_config(layout="wide")
 st.set_option('deprecation.showPyplotGlobalUse', False)
 dw,col1,wl = st.columns((1,0.5,1))
-col1.image('descarga.jfif')
 st.markdown("<h1 style='text-align:center;'>Somatic Mutations Analysis in skin</h1>",unsafe_allow_html=True)
 st.sidebar.markdown("<h2 style='text-align:center;'>Index</h2>",unsafe_allow_html=True)

+import numpy as np
+import pickle
+import pandas as pd
+import requests
+from selenium import webdriver
+import matplotlib.pyplot as plt
+#Simple assignment
+from selenium.webdriver import Firefox
+from selenium.webdriver.common.keys import Keys
+from selenium.common.exceptions import NoSuchElementException
+import requests
+import os
+import seaborn as sns
+from collections import Counter
+import plotly.express as px
+import streamlit as st
+### Scrap the cosmic id information
+# ### FRAMEWORKS NEEDED
+def scrap():
+            #### Setting options to the driver
+            options = webdriver.FirefoxOptions()
+            options.add_argument('--headless')
+            options.add_argument('--no-sandbox')
+            options.add_argument('--disable-dev-shm-usage')
+            options.capabilities
+            ### Setting options of webdriver
+            # a) Setting the chromedriver
+            browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
+            ### Functions and execution to run the scrapping
+            def getinfofromtable(oddrows:list,score:float,headertable)->list:
+                    rows = []
+                    for row in oddrows:
+                        cols = []
+                        for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
+                            if  i==headertable.index( 'Primary Tissue') or  i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
+                                cols.append(col.text)
+                        cols.append(score)
+                        rows.append(cols)
+                    return rows
+            def getinfocosmic(mutationid):
+                    import time
+                    search = browser.find_element_by_id('search-field')
+                    search = search.find_element_by_class_name("text_def")
+                    search.send_keys(mutationid)
+                    search.send_keys(Keys.RETURN)
+                    time.sleep(5)
+                    try:
+                        container = browser.find_element_by_id("section-list")
+                    except NoSuchElementException:
+                        return []
+                    try:
+                        subq1 = container.text[container.text.find("score")+len("score"):]
+                        score = float(subq1[:subq1.find(")")].strip())
+                    except ValueError:
+                        score = 0
+                    section = browser.find_element_by_id("DataTables_Table_0")
+                    headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]
+                    oddrows = section.find_elements_by_class_name("odd")
+                    evenrows = section.find_elements_by_class_name("even")
+                    l1 = getinfofromtable(oddrows,score,headertable)
+                    l1.extend(getinfofromtable(evenrows,score,headertable))
+                    # browser.close()
+                    return l1
+                    ## Looking for cosmic id info
+                    cosl = []
+                    browser.get("https://cancer.sanger.ac.uk/cosmic")
+                    for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
+                            if cos.find(",")!=-1:
+                                    cos = cos.split(",")[0]
+                            cosl.append(getinfocosmic(cos))
+                            browser.get("https://cancer.sanger.ac.uk/cosmic")
+### Pieplots
+def pieplot(merging,id=0):
+    genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
+    if id==0:
+        gtype = genecount[genecount.UV_exposure_tissue=="Intermittently-photoexposed"]
+    if id ==1 :
+        gtype = genecount[genecount.UV_exposure_tissue=="Chronically-photoexposed"]
+    else:
+        gtype = genecount
+    gtype = gtype.groupby("gene_name").count()["sampleID"].reset_index()
+    gtype.sort_values(by="sampleID",ascending=False,inplace=True)
+    #define Seaborn color palette to use
+    colors = sns.color_palette('pastel')[0:len(gtype)]
+    #create pie chart
+    # plt.suptitle("Gene Occuring for different genes")
+    plt.pie(gtype.sampleID, labels =gtype.gene_name, colors = colors, autopct='%.0f%%',radius=2,textprops={"fontsize":9})
+    plt.show()
+### Depending on what result you want you return one or another
+def filterp4(dfgenes,id=0):
+    if id==0 or id==1:
+        if id==0:
+            chexposed=  dfgenes[dfgenes.UV_exposure_tissue=="Intermittently-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
+        if id==1:
+            chexposed=  dfgenes[dfgenes.UV_exposure_tissue=="Chronically-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
+        return px.bar(chexposed,x="gene_name",y="mean_mut",error_y="std")
+    if id==2:
+        return px.bar(dfgenes,x="gene_name",y="mean_mut",color="UV_exposure_tissue",barmode='group',error_y="std")
+### Read scrapping done with cosmic ids
+def read_scrap()->list:
+    with open('my_pickle_file.pickle', 'rb') as f :
+        cosbase = pickle.load(f)
+    return cosbase
+### GendfClean
+def gendfclean(cosbase,cid)->pd.DataFrame:
+        dfd = {"tissue": None , "histology": None,"zygosity": None, "score": None }
+        for i,key in enumerate(list(dfd.keys())):
+            dfd[key] = list(map(lambda x : np.array(x)[:,i].tolist() if x!=[] else [] ,cosbase))
+        dfd["cosmic_id"] = cid.tolist()
+        cosmicdb = pd.DataFrame(dfd)
+        cosmicdb = cosmicdb[(cosmicdb['tissue'].map(lambda d: len(d)) > 0) & (cosmicdb['histology'].map(lambda d: len(d)) > 0) & (cosmicdb['zygosity'].map(lambda d: len(d)) > 0) & (cosmicdb['score'].map(lambda d: len(d)) > 0) ]
+        cosmicdb["score"] = cosmicdb.score.apply(lambda x: float(x[0]))
+        return cosmicdb
+### Look for stats of a gene
+def inputgene(lookforgene,merging,id =0)->dict:
+        ### id = 0--> Intermittently exposed
+        ### id = 1--> Continuously exposed
+        genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
+        tgene = genecount[genecount.gene_name==lookforgene]
+        if id==0:
+            ph_gene = tgene[tgene.UV_exposure_tissue=='Intermittently-photoexposed']
+        else:
+            ph_gene = tgene[tgene.UV_exposure_tissue=="Chronically-photoexposed"]
+        ### Statistiacs about gene|samples
+        stats = ph_gene.chr.describe()
+        dc = dict(stats)
+        dc["gene_name"] = lookforgene
+        if id==0:
+            dc["UV_exposure_tissue"] = 'Intermittently-photoexposed'
+        else:
+            dc["UV_exposure_tissue"] = 'Chronically-photoexposed'
+        return  dc
+### Look for stats of all genes
+def gene_exposed(merging,id=0):
+    return pd.DataFrame(list(map(lambda gene: inputgene(gene,merging,id),merging.gene_name.unique())))
+### Merge stats for continuous and intermittently exposed
+def mergecontintinfo(merging):
+        ### Continuously Exposed
+        cont_exposed_info = gene_exposed(merging,1)
+        ### Intermittently Exposed
+        int_exposed_info = gene_exposed(merging,0)
+        return pd.concat([cont_exposed_info,int_exposed_info],axis=0)
+#### Common tissues, zygosities and histologies
+def explodecommon(bd,N,col):
+        return  Counter(bd[col].apply(lambda x: list(x.keys())).explode()).most_common(N)
+def pdcommon(db,col,uv:str)->pd.DataFrame:
+        df = pd.DataFrame(db).rename(columns={0:col,1:"Times_{}".format(col)})
+        df["UV_exposure_tissue"] = uv
+        return df
+def get_N_common(df,col,N=10)->pd.DataFrame:
+        cosm = df.copy(True)
+        cosm[col] = cosm[col].apply(lambda x: Counter(x))
+        intcosm = cosm[cosm.UV_exposure_tissue=="Intermittently-photoexposed"]
+        contcosm = cosm[cosm.UV_exposure_tissue=="Chronically-photoexposed"]
+        infotissues = explodecommon(cosm,N,col)
+        inttissues = explodecommon(intcosm,N,col)
+        contissues = explodecommon(contcosm,N,col)
+        df1 = pdcommon(infotissues,col,"Total")
+        df2 = pdcommon(inttissues,col,"Intermittently-photoexposed")
+        df3 = pdcommon(contissues,col,"Chronically-photoexposed")
+        return pd.concat([df1,df2,df3],axis=0)
+### Deatiled information of mutation type
+def mut_type(x):
+    if x.mut_type=="Indel":
+            if len(x.ref)>len(x.mut):
+                    return "Del"
+            elif len(x.mut)>len(x.ref):
+                    return "In"
+        #     if len(x.ref)>1 and len(x.mut)>1:
+            return x.ref+">"+x.mut
+    return x.mut_type
+def distribution_gene(df,hue):
+    plot4 = df.groupby([hue,"mut_type_cus"]).count().reset_index().iloc[:,:3]
+    plot4 = plot4.rename(columns={"sampleID":"n_mut"})
+    plot4 = plot4.sort_values(by="mut_type_cus",ascending=True)
+    fig = px.bar(plot4,x="mut_type_cus",y="n_mut",color=hue,barmode="group")
+    return fig
 directory = os.path.abspath("")
 # from EDA_IMDb_functions import *
 st.set_page_config(layout="wide")
 st.set_option('deprecation.showPyplotGlobalUse', False)
 dw,col1,wl = st.columns((1,0.5,1))
+col1.image('img/descarga.jfif')
 st.markdown("<h1 style='text-align:center;'>Somatic Mutations Analysis in skin</h1>",unsafe_allow_html=True)
 st.sidebar.markdown("<h2 style='text-align:center;'>Index</h2>",unsafe_allow_html=True)

dataset.png DELETED Viewed

Binary file (52.6 kB)

functions.py DELETED Viewed

@@ -1,213 +0,0 @@
-import numpy as np
-import pickle
-import pandas as pd
-import requests
-from selenium import webdriver
-import matplotlib.pyplot as plt
-#Simple assignment
-from selenium.webdriver import Firefox
-from selenium.webdriver.common.keys import Keys
-from selenium.common.exceptions import NoSuchElementException
-import requests
-import os
-import seaborn as sns
-from collections import Counter
-import plotly.express as px
-import streamlit as st
-### Scrap the cosmic id information
-# ### FRAMEWORKS NEEDED
-def scrap():
-            #### Setting options to the driver
-            options = webdriver.FirefoxOptions()
-            options.add_argument('--headless')
-            options.add_argument('--no-sandbox')
-            options.add_argument('--disable-dev-shm-usage')
-            options.capabilities
-            ### Setting options of webdriver
-            # a) Setting the chromedriver
-            browser = Firefox(options=options,executable_path=r"C:\Users\Pablo\OneDrive\Documents\Documentos\Escuela Politécnica Superior Leganés\4 AÑO\ASIGNATURAS\1 CUATRI\WEB ANALYTICS\PART 2\Milestone3\geckodriver.exe")
-            ### Functions and execution to run the scrapping
-            def getinfofromtable(oddrows:list,score:float,headertable)->list:
-                    rows = []
-                    for row in oddrows:
-                        cols = []
-                        for (i,col) in enumerate(row.find_elements_by_css_selector("td")):
-                            if  i==headertable.index( 'Primary Tissue') or  i==headertable.index('Primary Histology') or i==headertable.index('Zygosity'):
-                                cols.append(col.text)
-                        cols.append(score)
-                        rows.append(cols)
-                    return rows
-            def getinfocosmic(mutationid):
-                    import time
-                    search = browser.find_element_by_id('search-field')
-                    search = search.find_element_by_class_name("text_def")
-                    search.send_keys(mutationid)
-                    search.send_keys(Keys.RETURN)
-                    time.sleep(5)
-                    try:
-                        container = browser.find_element_by_id("section-list")
-                    except NoSuchElementException:
-                        return []
-                    try:
-                        subq1 = container.text[container.text.find("score")+len("score"):]
-                        score = float(subq1[:subq1.find(")")].strip())
-                    except ValueError:
-                        score = 0
-                    section = browser.find_element_by_id("DataTables_Table_0")
-                    headertable = [header.text for header in section.find_element_by_tag_name("thead").find_elements_by_tag_name("th")]
-                    oddrows = section.find_elements_by_class_name("odd")
-                    evenrows = section.find_elements_by_class_name("even")
-                    l1 = getinfofromtable(oddrows,score,headertable)
-                    l1.extend(getinfofromtable(evenrows,score,headertable))
-                    # browser.close()
-                    return l1
-                    ## Looking for cosmic id info
-                    cosl = []
-                    browser.get("https://cancer.sanger.ac.uk/cosmic")
-                    for cos in cosmicinfo.reset_index()["COSMIC_ID"].iloc[20:]:
-                            if cos.find(",")!=-1:
-                                    cos = cos.split(",")[0]
-                            cosl.append(getinfocosmic(cos))
-                            browser.get("https://cancer.sanger.ac.uk/cosmic")
-### Pieplots
-def pieplot(merging,id=0):
-    genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
-    if id==0:
-        gtype = genecount[genecount.UV_exposure_tissue=="Intermittently-photoexposed"]
-    if id ==1 :
-        gtype = genecount[genecount.UV_exposure_tissue=="Chronically-photoexposed"]
-    else:
-        gtype = genecount
-    gtype = gtype.groupby("gene_name").count()["sampleID"].reset_index()
-    gtype.sort_values(by="sampleID",ascending=False,inplace=True)
-    #define Seaborn color palette to use
-    colors = sns.color_palette('pastel')[0:len(gtype)]
-    #create pie chart
-    # plt.suptitle("Gene Occuring for different genes")
-    plt.pie(gtype.sampleID, labels =gtype.gene_name, colors = colors, autopct='%.0f%%',radius=2,textprops={"fontsize":9})
-    plt.show()
-### Depending on what result you want you return one or another
-def filterp4(dfgenes,id=0):
-    if id==0 or id==1:
-        if id==0:
-            chexposed=  dfgenes[dfgenes.UV_exposure_tissue=="Intermittently-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
-        if id==1:
-            chexposed=  dfgenes[dfgenes.UV_exposure_tissue=="Chronically-photoexposed"].sort_values(by=["mean_mut"],ascending=False)
-        return px.bar(chexposed,x="gene_name",y="mean_mut",error_y="std")
-    if id==2:
-        return px.bar(dfgenes,x="gene_name",y="mean_mut",color="UV_exposure_tissue",barmode='group',error_y="std")
-### Read scrapping done with cosmic ids
-def read_scrap()->list:
-    with open('my_pickle_file.pickle', 'rb') as f :
-        cosbase = pickle.load(f)
-    return cosbase
-### GendfClean
-def gendfclean(cosbase,cid)->pd.DataFrame:
-        dfd = {"tissue": None , "histology": None,"zygosity": None, "score": None }
-        for i,key in enumerate(list(dfd.keys())):
-            dfd[key] = list(map(lambda x : np.array(x)[:,i].tolist() if x!=[] else [] ,cosbase))
-        dfd["cosmic_id"] = cid.tolist()
-        cosmicdb = pd.DataFrame(dfd)
-        cosmicdb = cosmicdb[(cosmicdb['tissue'].map(lambda d: len(d)) > 0) & (cosmicdb['histology'].map(lambda d: len(d)) > 0) & (cosmicdb['zygosity'].map(lambda d: len(d)) > 0) & (cosmicdb['score'].map(lambda d: len(d)) > 0) ]
-        cosmicdb["score"] = cosmicdb.score.apply(lambda x: float(x[0]))
-        return cosmicdb
-### Look for stats of a gene
-def inputgene(lookforgene,merging,id =0)->dict:
-        ### id = 0--> Intermittently exposed
-        ### id = 1--> Continuously exposed
-        genecount = merging.groupby(by=["gene_name","UV_exposure_tissue","sampleID"]).count().reset_index()
-        tgene = genecount[genecount.gene_name==lookforgene]
-        if id==0:
-            ph_gene = tgene[tgene.UV_exposure_tissue=='Intermittently-photoexposed']
-        else:
-            ph_gene = tgene[tgene.UV_exposure_tissue=="Chronically-photoexposed"]
-        ### Statistiacs about gene|samples
-        stats = ph_gene.chr.describe()
-        dc = dict(stats)
-        dc["gene_name"] = lookforgene
-        if id==0:
-            dc["UV_exposure_tissue"] = 'Intermittently-photoexposed'
-        else:
-            dc["UV_exposure_tissue"] = 'Chronically-photoexposed'
-        return  dc
-### Look for stats of all genes
-def gene_exposed(merging,id=0):
-    return pd.DataFrame(list(map(lambda gene: inputgene(gene,merging,id),merging.gene_name.unique())))
-### Merge stats for continuous and intermittently exposed
-def mergecontintinfo(merging):
-        ### Continuously Exposed
-        cont_exposed_info = gene_exposed(merging,1)
-        ### Intermittently Exposed
-        int_exposed_info = gene_exposed(merging,0)
-        return pd.concat([cont_exposed_info,int_exposed_info],axis=0)
-#### Common tissues, zygosities and histologies
-def explodecommon(bd,N,col):
-        return  Counter(bd[col].apply(lambda x: list(x.keys())).explode()).most_common(N)
-def pdcommon(db,col,uv:str)->pd.DataFrame:
-        df = pd.DataFrame(db).rename(columns={0:col,1:"Times_{}".format(col)})
-        df["UV_exposure_tissue"] = uv
-        return df
-def get_N_common(df,col,N=10)->pd.DataFrame:
-        cosm = df.copy(True)
-        cosm[col] = cosm[col].apply(lambda x: Counter(x))
-        intcosm = cosm[cosm.UV_exposure_tissue=="Intermittently-photoexposed"]
-        contcosm = cosm[cosm.UV_exposure_tissue=="Chronically-photoexposed"]
-        infotissues = explodecommon(cosm,N,col)
-        inttissues = explodecommon(intcosm,N,col)
-        contissues = explodecommon(contcosm,N,col)
-        df1 = pdcommon(infotissues,col,"Total")
-        df2 = pdcommon(inttissues,col,"Intermittently-photoexposed")
-        df3 = pdcommon(contissues,col,"Chronically-photoexposed")
-        return pd.concat([df1,df2,df3],axis=0)
-### Deatiled information of mutation type
-def mut_type(x):
-    if x.mut_type=="Indel":
-            if len(x.ref)>len(x.mut):
-                    return "Del"
-            elif len(x.mut)>len(x.ref):
-                    return "In"
-        #     if len(x.ref)>1 and len(x.mut)>1:
-            return x.ref+">"+x.mut
-    return x.mut_type
-def distribution_gene(df,hue):
-    plot4 = df.groupby([hue,"mut_type_cus"]).count().reset_index().iloc[:,:3]
-    plot4 = plot4.rename(columns={"sampleID":"n_mut"})
-    plot4 = plot4.sort_values(by="mut_type_cus",ascending=True)
-    fig = px.bar(plot4,x="mut_type_cus",y="n_mut",color=hue,barmode="group")
-    return fig

descarga.jfif → img/descarga.jfif RENAMED Viewed

File without changes