Spaces:

rongguangw
/

GenMIND

Sleeping

App Files Files Community

GenMIND / app.py

rongguangw

Update app.py

d1546dc verified 8 months ago

raw

history blame contribute delete

4.64 kB

	import sklearn
	import fnmatch
	import numpy as np
	import gradio as gr
	import pandas as pd


	description = """
	# 🧠 Generative models of MRI-derived neuroimaging features and associated dataset of 18000 samples

	<!-- Provide a quick summary of the dataset. -->

	GenMIND is a dataset which consists of 18,000 synthetic neuroimaging data samples covering worldwide healthy population across human lifespan.

	## Dataset Details

	The dataset has the following characteristics:

	* Generative models were trained on 40,000 subjects from the iSTAGING consortium to synthesize 145 brain anatomical region-of-interest (ROI) volumes which are derived from structural T1-weighted magnetic resonance imaging (MRI).
	* The dataset includes participants’ demographic information, such as sex, age and race, which are beneficial for research focusing on mitigating algorithmic bias and promoting fairnes.
	* Besides 18,000 samples in the dataset, we also share the pre-trained generative models for users to customize their needs for data synthesis.
	* Please check our paper for more details: https://arxiv.org/abs/2407.12897.

	## Dataset Download

	* For Safari users, please right click on the "Download" button below and select "Download Linked File"
	* Other browser users can directly click on the "Download" button to save the data
	* Download via command line:
	```
	wget https://rongguangw-genmind.hf.space/file=dataset/genmind_dataset.csv
	```
	* You can also customize sex and race for the subjects and sample size by generating online in the below panels. The synthesized data will be showing in the lower right panel, and then, you can save the data by clicking on the "Download" button.
	"""

	citation = """
	## Citation
	```
	@article{chintapalli2024neurosynth,
	title={Generative models of MRI-derived neuroimaging features and associated dataset of 18000 samples},
	author={Chintapalli, Sai Spandana and Wang, Rongguang and Yang, Zhijian and Tassopoulou, Vasiliki and Yu, Fanyang and Bashyam, Vishnu and Erus, Guray and Chaudhari, Pratik and Shou, Haochang and Davatzikos, Christos},
	journal={arXiv preprint arXiv:2407.12897},
	year={2024}
	}
	```
	"""

	file_name = "dataset/genmind_dataset.csv"
	save_name = "dataset/customized_genmind.csv"
	example_df = pd.read_csv(file_name)


	def infer(sex, race, num_sample):
	num_sample = int(num_sample)
	col_dict = np.load("model/col_dict.npz", allow_pickle=True)['dict'].item()
	input = np.load("model/kde_{}_{}.npz".format(race.lower(), sex.lower()), allow_pickle=True)['model'].item()
	kde, scaler, cols_names = input['model'], input['scaler'], input['columns']

	sample = kde.sample(num_sample, random_state=0)
	sample = scaler.inverse_transform(sample)
	cov_list = np.array([[f'Synth_{i+1}', sex[0], race] for i in range(num_sample)])
	new_data = np.concatenate([cov_list, sample], axis=1)
	cols=['PTID','Sex','Race','Age']
	cols.extend([col_dict[i] for i in fnmatch.filter(cols_names,'H_*')])
	df_kde_synth = pd.DataFrame(new_data, columns=cols)
	df_kde_synth['Age'] = round(df_kde_synth['Age'].astype('float'))
	df_kde_synth.to_csv(save_name, index=False)
	return gr.Dataframe(df_kde_synth.head(), label='Results (only showing the first few rows)', show_label=True), gr.Button("Download", link="/file="+save_name)

	with gr.Blocks() as demo:
	gr.Markdown(description)

	with gr.Group():
	example = gr.Dataframe(example_df.head(),
	label='Example data (only showing the first five rows, download to check the full table)',
	show_label=True)
	gr.Button("Download", link="/file="+file_name)

	gr.Markdown("## Customized Dataset Generation")
	gr.Interface(
	fn=infer,
	inputs= [
	gr.Radio(
	choices=['Female', 'Male'],
	value='Female',
	type='value',
	label='Gender',
	interactive=True,
	),
	gr.Radio(
	choices=['White', 'Black', 'Asian'],
	value='Asian',
	type='value',
	label='Race',
	interactive=True,
	),
	gr.Textbox(
	label='Number of subject',
	value=10,
	show_label=True,
	placeholder='Enter sample size (in integer) ...'
	)],
	outputs=["dataframe", "button"],
	cache_examples=False
	)

	gr.Markdown(citation)

	if __name__ == "__main__":
	demo.launch(debug=True, share=True, allowed_paths=["dataset/"])