diff --git a/.gitattributes b/.gitattributes index 2528dea64cb7bd0bd9baecc22fd343c316cbfb12..11112df8a2694920381faa7506e01c1827b17e62 100644 --- a/.gitattributes +++ b/.gitattributes @@ -38,3 +38,10 @@ examples/caixukun.mp4 filter=lfs diff=lfs merge=lfs -text examples/flower.mp4 filter=lfs diff=lfs merge=lfs -text examples/food_menu.png filter=lfs diff=lfs merge=lfs -text examples/star_kun.mp4 filter=lfs diff=lfs merge=lfs -text +model/lib/pointnet2/build/lib.linux-x86_64-cpython-39/pointnet2_cuda.cpython-39-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query.o filter=lfs diff=lfs merge=lfs -text +model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points.o filter=lfs diff=lfs merge=lfs -text +model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate.o filter=lfs diff=lfs merge=lfs -text +model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/pointnet2_api.o filter=lfs diff=lfs merge=lfs -text +model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling.o filter=lfs diff=lfs merge=lfs -text +model/lib/pointnet2/dist/pointnet2-0.0.0-py3.9-linux-x86_64.egg filter=lfs diff=lfs merge=lfs -text diff --git a/app.py b/app.py index e7f193d938cd04ac8f33daa9c8b609b9acf32b9b..c9ecae2f56f481eb9f0796cebcd9ae47de7781a6 100644 --- a/app.py +++ b/app.py @@ -22,6 +22,10 @@ from data.fintune_dataset import make_audio_features from data import video_utils from dataclasses import dataclass from huggingface_hub import hf_hub_download +import plotly.graph_objects as go +from data.fintune_dataset import pc_norm +from functools import partial +import glob T_random_resized_crop = transforms.Compose([ transforms.RandomResizedCrop(size=(224, 224), scale=(0.9, 1.0), ratio=(0.75, 1.3333), interpolation=3, @@ -39,6 +43,17 @@ def load_video(video_path): video_feats = video_utils.load_and_transform_video_data(video_path, video_path, clip_duration=1, clips_per_video=5) return video_feats[:, :, 0] +def load_point(point_path): + point_feat = np.load(point_path) + point_feat = torch.tensor(point_feat) + point_feat = pc_norm(point_feat) + return point_feat + +def load_fmri(fmri_path): + data = np.load(fmri_path) + data = data.mean(axis=0) + data = torch.tensor(data[None]) + return data def model_worker( rank: int, args: argparse.Namespace, barrier: mp.Barrier, @@ -79,7 +94,8 @@ def model_worker( with default_tensor_type(dtype=target_dtype, device="cuda"): model = MetaModel(args.llama_type, args.llama_config, tokenizer_path=args.tokenizer_path) for ckpt_id in range(args.num_ckpts): - ckpt_path = hf_hub_download(repo_id=args.pretrained_path, filename=args.ckpt_format.format(str(ckpt_id))) + # ckpt_path = hf_hub_download(repo_id=args.pretrained_path, filename=args.ckpt_format.format(str(ckpt_id))) + ckpt_path = os.path.join(args.pretrained_path, args.ckpt_format.format(str(ckpt_id))) print(f"Loading pretrained weights {ckpt_path}") checkpoint = torch.load(ckpt_path, map_location='cpu') msg = model.load_state_dict(checkpoint, strict=False) @@ -91,7 +107,7 @@ def model_worker( barrier.wait() while True: - img_path, audio_path, video_path, chatbot, max_gen_len, temperature, top_p, modality = request_queue.get() + img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, temperature, top_p, modality = request_queue.get() if 'image' in modality and img_path is not None: image = Image.open(img_path).convert('RGB') inputs = T_random_resized_crop(image) @@ -99,6 +115,10 @@ def model_worker( inputs = load_video(video_path) elif 'audio' in modality and audio_path is not None: inputs = load_audio(audio_path) + elif 'point' in modality and point_path is not None: + inputs = load_point(point_path) + elif 'fmri' in modality and fmri_path is not None: + inputs = load_fmri(fmri_path) else: inputs = None @@ -164,9 +184,9 @@ def gradio_worker( def show_user_input(msg, chatbot): return "", chatbot + [[msg, None]] - def stream_model_output(img_path, audio_path, video_path, chatbot, max_gen_len, gen_t, top_p, modality): + def stream_model_output(img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality): for queue in request_queues: - queue.put((img_path, audio_path, video_path, chatbot, max_gen_len, gen_t, top_p, modality)) + queue.put((img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality)) while True: content_piece = response_queue.get() chatbot[-1][1] = content_piece["text"] @@ -184,12 +204,27 @@ def gradio_worker( msg = "" return chatbot, msg - def change_modality_image(): - return 'image' - def change_modality_video(): - return 'video' - def change_modality_audio(): - return 'audio' + def show_point_cloud(file): + point = load_point(file).numpy() + fig = go.Figure( + data=[ + go.Scatter3d( + x=point[:,0], y=point[:,1], z=point[:,2], + mode='markers', + marker=dict( + size=1.2, + color=['rgb({},{},{})'.format(r, g, b) for r,g,b in zip(point[:,3], point[:,4], point[:,5])] + ))], + layout=dict( + scene=dict( + xaxis=dict(visible=False), + yaxis=dict(visible=False), + zaxis=dict(visible=False) + )),) + return fig + + def change_modality(modal): + return modal CSS =""" .contain { display: flex; flex-direction: column; } @@ -235,11 +270,28 @@ def gradio_worker( inputs=[audio_path], ) with gr.Tab('Point Cloud') as point_tab: - gr.Markdown('Coming soon🤗') + point_path = gr.File(label='Point Cloud Input', elem_id="pointpath", elem_classes="") + point_vis = gr.Plot() + btn = gr.Button(value="Show Point Cloud") + btn.click(show_point_cloud, point_path, point_vis) + gr.Examples( + examples=glob.glob("examples/point/*.npy"), + inputs=[point_path], + examples_per_page=5, + ) with gr.Tab('IMU') as imu_tab: gr.Markdown('Coming soon🤗') with gr.Tab('fMRI') as fmri_tab: - gr.Markdown('Coming soon🤗') + fmri_path = gr.File(label='fMRI Input', elem_id="fmripath", elem_classes="") + fmri_image_path = gr.Image(interactive=False) + gr.Examples( + examples=[ + [file.replace('.jpg', '.npy'), file] + for file in glob.glob("examples/fmri/*.jpg") + ], + inputs=[fmri_path, fmri_image_path], + examples_per_page=3, + ) with gr.Tab('Depth Map') as depth_tab: gr.Markdown('Coming soon🤗') with gr.Tab('Normal Map') as normal_tab: @@ -252,7 +304,7 @@ def gradio_worker( with gr.Row(): submit_button = gr.Button("Submit", variant="primary") undo_button = gr.Button("Undo") - clear_button = gr.ClearButton([chatbot, msg, img_path, audio_path, video_path]) + clear_button = gr.ClearButton([chatbot, msg, img_path, audio_path, video_path, point_path, fmri_path, point_vis]) with gr.Row(): max_gen_len = gr.Slider( minimum=1, maximum=args.model_max_seq_len // 2, @@ -268,19 +320,21 @@ def gradio_worker( label="Top-p", ) - img_tab.select(change_modality_image, [], [modality]) - video_tab.select(change_modality_video, [], [modality]) - audio_tab.select(change_modality_audio, [], [modality]) + img_tab.select(partial(change_modality, 'image'), [], [modality]) + video_tab.select(partial(change_modality, 'video'), [], [modality]) + audio_tab.select(partial(change_modality, 'audio'), [], [modality]) + point_tab.select(partial(change_modality, 'point'), [], [modality]) + fmri_tab.select(partial(change_modality, 'fmri'), [], [modality]) msg.submit( show_user_input, [msg, chatbot], [msg, chatbot], ).then( - stream_model_output, [img_path, audio_path, video_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot, + stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot, ) submit_button.click( show_user_input, [msg, chatbot], [msg, chatbot], ).then( - stream_model_output, [img_path, audio_path, video_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot, + stream_model_output, [img_path, audio_path, video_path, point_path, fmri_path, chatbot, max_gen_len, gen_t, top_p, modality], chatbot, ) undo_button.click(undo, chatbot, chatbot) # img_path.change(clear, [], [chatbot, msg]) @@ -297,10 +351,11 @@ class DemoConfig: model_max_seq_len = 2048 # pretrained_path = "weights/7B_2048/consolidated.00-of-01.pth" # pretrained_path = hf_hub_download(repo_id="csuhan/OneLLM-7B", filename="consolidated.00-of-01.pth") - pretrained_path = "csuhan/OneLLM-7B-hf" + # pretrained_path = "csuhan/OneLLM-7B-hf" + pretrained_path = "/home/pgao/jiaming/weights/7B_v20_splits/" ckpt_format = "consolidated.00-of-01.s{}.pth" num_ckpts = 10 - master_port = 23861 + master_port = 23863 master_addr = "127.0.0.1" dtype = "fp16" diff --git a/examples/fmri/sample000000498.jpg b/examples/fmri/sample000000498.jpg new file mode 100644 index 0000000000000000000000000000000000000000..04638377ab900f6215333e579c2645b1358db07e Binary files /dev/null and b/examples/fmri/sample000000498.jpg differ diff --git a/examples/fmri/sample000000498.npy b/examples/fmri/sample000000498.npy new file mode 100644 index 0000000000000000000000000000000000000000..47fb8079be709e296992bbd1b065c61c684b1342 --- /dev/null +++ b/examples/fmri/sample000000498.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db81853dbaf9ed1869162d9f7218e6b53edf6ea8fac391ef181f59304c3918d4 +size 94472 diff --git a/examples/fmri/sample000000789.jpg b/examples/fmri/sample000000789.jpg new file mode 100644 index 0000000000000000000000000000000000000000..a264d1d70249cae4ab44348cb9e472ed0cd4bfc6 Binary files /dev/null and b/examples/fmri/sample000000789.jpg differ diff --git a/examples/fmri/sample000000789.npy b/examples/fmri/sample000000789.npy new file mode 100644 index 0000000000000000000000000000000000000000..8788bf494144b9f0b6b17df98860f9054c16a276 --- /dev/null +++ b/examples/fmri/sample000000789.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e659eefe264f472847e496b4990459300fbb73587f0f99f69f630b3bdcd86b53 +size 94472 diff --git a/examples/fmri/sample000001037.jpg b/examples/fmri/sample000001037.jpg new file mode 100644 index 0000000000000000000000000000000000000000..57ff3ba732878894e104c3e93622c165b3f752ad Binary files /dev/null and b/examples/fmri/sample000001037.jpg differ diff --git a/examples/fmri/sample000001037.npy b/examples/fmri/sample000001037.npy new file mode 100644 index 0000000000000000000000000000000000000000..3a5a2cf33676e168f3efa6eab2e141c4a85db3e0 --- /dev/null +++ b/examples/fmri/sample000001037.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e62dcb5ad478c8d50e3b60d558e4df56a03c1f3999883da2719c43464db48dc4 +size 94472 diff --git a/examples/fmri/sample000001554.jpg b/examples/fmri/sample000001554.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0c44473e9eb8d947756355405f0143ce4b537f98 Binary files /dev/null and b/examples/fmri/sample000001554.jpg differ diff --git a/examples/fmri/sample000001554.npy b/examples/fmri/sample000001554.npy new file mode 100644 index 0000000000000000000000000000000000000000..cfe2f7905324d1dfeb39860d5eda5dee9ec252d8 --- /dev/null +++ b/examples/fmri/sample000001554.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e647f3823c878647b2ef18966e033b6f9d865016afc30be2cd06e09b3511f8 +size 94472 diff --git a/examples/fmri/sample000002995.jpg b/examples/fmri/sample000002995.jpg new file mode 100644 index 0000000000000000000000000000000000000000..74ec7d71f26f658ae20488541e667e2a77d98d9b Binary files /dev/null and b/examples/fmri/sample000002995.jpg differ diff --git a/examples/fmri/sample000002995.npy b/examples/fmri/sample000002995.npy new file mode 100644 index 0000000000000000000000000000000000000000..4990a7d7123117c4c55766549c71c7e4531e923e --- /dev/null +++ b/examples/fmri/sample000002995.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9d9187274787d4f1d07292c9c25ea117f7810902b75cd9434aade4a7d659f9d +size 94472 diff --git a/examples/fmri/sample000004364.jpg b/examples/fmri/sample000004364.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fb86fd6d449a0165d220f0d2c414617a9e31e00d Binary files /dev/null and b/examples/fmri/sample000004364.jpg differ diff --git a/examples/fmri/sample000004364.npy b/examples/fmri/sample000004364.npy new file mode 100644 index 0000000000000000000000000000000000000000..75a7ff22e36a07bd28a1d86a02d1afbec9f524c5 --- /dev/null +++ b/examples/fmri/sample000004364.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4800075aa129fd3bbdd27c63c7597b375cac3552555ac1173db11542275125 +size 94472 diff --git a/examples/fmri/sample000004550.jpg b/examples/fmri/sample000004550.jpg new file mode 100644 index 0000000000000000000000000000000000000000..c47e80b1d921ee68877bd7d2d21adff3ddcf8573 Binary files /dev/null and b/examples/fmri/sample000004550.jpg differ diff --git a/examples/fmri/sample000004550.npy b/examples/fmri/sample000004550.npy new file mode 100644 index 0000000000000000000000000000000000000000..cb2ee3c627fed44a56967b732cb9bb7dc5fb2c28 --- /dev/null +++ b/examples/fmri/sample000004550.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64f6c75ff93ddb5988fc07f861fe4ff053996dcd7d72a96807fce77efbb82cd7 +size 94472 diff --git a/examples/fmri/sample000005047.jpg b/examples/fmri/sample000005047.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fc14277ec9c83a145951dee3b1d3ddd94813bf79 Binary files /dev/null and b/examples/fmri/sample000005047.jpg differ diff --git a/examples/fmri/sample000005047.npy b/examples/fmri/sample000005047.npy new file mode 100644 index 0000000000000000000000000000000000000000..961b9bc63816f8774251cc637e93ef51866e1e14 --- /dev/null +++ b/examples/fmri/sample000005047.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e44a51e7f0875a3a5d9b132ccc9f2f0ce2af13a566956272773727f80f2afa3 +size 94472 diff --git a/examples/fmri/sample000005139.jpg b/examples/fmri/sample000005139.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7204d5e7d384d24ec53b83f2dc07ace3774ce9a2 Binary files /dev/null and b/examples/fmri/sample000005139.jpg differ diff --git a/examples/fmri/sample000005139.npy b/examples/fmri/sample000005139.npy new file mode 100644 index 0000000000000000000000000000000000000000..8558f84729ef4400b3d7ed508d5e6ce499535f85 --- /dev/null +++ b/examples/fmri/sample000005139.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d08f633cc921f084de2512717b08f14631581c74eacfaf7e2e025b59adc377b +size 94472 diff --git a/examples/point/0031ba19d3e042c4bcf79eba40ccc812_8192.npy b/examples/point/0031ba19d3e042c4bcf79eba40ccc812_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..c6d580cdc86c4b1ad50065a5acbc643a67c8ca37 --- /dev/null +++ b/examples/point/0031ba19d3e042c4bcf79eba40ccc812_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d3c8cd5e953e30bc86a706e1c69d629bde6711820e3ba4d106ccd1befe7686 +size 196736 diff --git a/examples/point/1a282b70b3f14fd79d739891e4327df5_8192.npy b/examples/point/1a282b70b3f14fd79d739891e4327df5_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..b70e5bbbd4f4e7038f6203894f02a1b43498e9f6 --- /dev/null +++ b/examples/point/1a282b70b3f14fd79d739891e4327df5_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874bc35c0b23a5bc6063e71b420ae51b82190862b87b4c37e8295f5ca4be2e05 +size 196736 diff --git a/examples/point/1ee7fc4081ac47ccaae8112b6e925c51_8192.npy b/examples/point/1ee7fc4081ac47ccaae8112b6e925c51_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..b093b25d638c023d9d1e0bf7aa4a19b486668c0e --- /dev/null +++ b/examples/point/1ee7fc4081ac47ccaae8112b6e925c51_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d011e25b3745bcd9f3266b9d9f4b4cd03d680f007b6fc46de7b621dbc2bbb4 +size 196736 diff --git a/examples/point/2221aec5809d43d88aedf82738432318_8192.npy b/examples/point/2221aec5809d43d88aedf82738432318_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..a9e1868fdc9b2ce87d243a0a0ee0cee7b924b98f --- /dev/null +++ b/examples/point/2221aec5809d43d88aedf82738432318_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7569c06b11aaea351477d244d8e76152501190f16e28227de2ebf7ba5354c0b +size 196736 diff --git a/examples/point/3061e9a9236d4b98b74c95ff189d28a0_8192.npy b/examples/point/3061e9a9236d4b98b74c95ff189d28a0_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..4d246837ff09291d6c4216a7e029685117951540 --- /dev/null +++ b/examples/point/3061e9a9236d4b98b74c95ff189d28a0_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3202eed35e0259d8ff00a49c35b2f0f61324fd90da78ea832f5c356fccd9ea35 +size 196736 diff --git a/examples/point/39930c4d685f4a9c8f7bd63c1a2a7dee_8192.npy b/examples/point/39930c4d685f4a9c8f7bd63c1a2a7dee_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..6a4c24f848b82cc62f7c3bf87f8873ddc0d083e6 --- /dev/null +++ b/examples/point/39930c4d685f4a9c8f7bd63c1a2a7dee_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eafe3bed10c54cf0f862982a36bf4edf5cb7fccb8e0af7c0188a5a900d61286 +size 196736 diff --git a/examples/point/3a61b9d2451c4df2a1c6a6550e1875df_8192.npy b/examples/point/3a61b9d2451c4df2a1c6a6550e1875df_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..cd3e149bf44a0b7419f95e2f4e07298001fcf563 --- /dev/null +++ b/examples/point/3a61b9d2451c4df2a1c6a6550e1875df_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f9be8648887b54273884df915f1f351877499fb6092f2e85b6cfaf67b03476e +size 196736 diff --git a/examples/point/442e0532a7634ad1ba22d28534267e2f_8192.npy b/examples/point/442e0532a7634ad1ba22d28534267e2f_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..831b55f10815d7028011c868fd95531722eef031 --- /dev/null +++ b/examples/point/442e0532a7634ad1ba22d28534267e2f_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf7dfbf43f9794c8a5c90033eb458010d13defa6dc04a1d61b274675e248bfa7 +size 196736 diff --git a/examples/point/463fe91df66e44a681ebbcd7f7ba59e4_8192.npy b/examples/point/463fe91df66e44a681ebbcd7f7ba59e4_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..ab82b31f4088e351abbf3b0a79fdb23b5f3bf643 --- /dev/null +++ b/examples/point/463fe91df66e44a681ebbcd7f7ba59e4_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21033208a5418f923e1c281d5bdd81ac75b09c6d047030350a718bd50d8f5185 +size 196736 diff --git a/examples/point/494854c6ffd540339a1bc6bd91c0349b_8192.npy b/examples/point/494854c6ffd540339a1bc6bd91c0349b_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..80408012c7e5b02fbce43e7d01252ad23223df0a --- /dev/null +++ b/examples/point/494854c6ffd540339a1bc6bd91c0349b_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5a87443e5b14abcf04738ae9ceb01c62e87c3b5108e1db147880b7bdb700160 +size 196736 diff --git a/examples/point/4adbacc99de44b5597ce3e1f62026fc1_8192.npy b/examples/point/4adbacc99de44b5597ce3e1f62026fc1_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..8fd1e3f91e6fc8bf88ea6daed190f571eb0556b9 --- /dev/null +++ b/examples/point/4adbacc99de44b5597ce3e1f62026fc1_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d79afe15fa2c011dd82c7cfbbe17bb062b364824abcb37f7b3da68a763a4c3 +size 196736 diff --git a/examples/point/4d6f0a6aab8e45f4af131ac517c74363_8192.npy b/examples/point/4d6f0a6aab8e45f4af131ac517c74363_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..95b22997a5acc93e2ea0b03ce46b57d2691dc61d --- /dev/null +++ b/examples/point/4d6f0a6aab8e45f4af131ac517c74363_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ad1e243593b19e7e58a77f0512a8f892d663142853efac1e56c5a83db78cc50 +size 196736 diff --git a/examples/point/541747a461f84142a113e2f591a7eb9b_8192.npy b/examples/point/541747a461f84142a113e2f591a7eb9b_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..4a5462ff31a8089082d316f5b6c346e5dbadefc5 --- /dev/null +++ b/examples/point/541747a461f84142a113e2f591a7eb9b_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d00c235c92766c9dde832209df18f036e68c74b11e477f40d283cde1af8b1ea2 +size 196736 diff --git a/examples/point/6796698b4d564377838c06319ec96f90_8192.npy b/examples/point/6796698b4d564377838c06319ec96f90_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..35de7ae372bec975e106c297ae9da201c0e08a8e --- /dev/null +++ b/examples/point/6796698b4d564377838c06319ec96f90_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da91660e20b77a3f9f8a8ab173cc6c0998063282d387049906252bbf661f0b5f +size 196736 diff --git a/examples/point/69865c89fc7344be8ed5c1a54dbddc20_8192.npy b/examples/point/69865c89fc7344be8ed5c1a54dbddc20_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..f6acb82a325911acc69d8a56684ee4b2240831a7 --- /dev/null +++ b/examples/point/69865c89fc7344be8ed5c1a54dbddc20_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a8363e432756f90425b6a5192cf5b8a547a5bd009577e72203fd42fd2319cf +size 196736 diff --git a/examples/point/7610a160fbaf41d681c7d7b9627df85d_8192.npy b/examples/point/7610a160fbaf41d681c7d7b9627df85d_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..511fb7bc10aec9e71fcf5ccde7a2322bdc4b57db --- /dev/null +++ b/examples/point/7610a160fbaf41d681c7d7b9627df85d_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca43630b2f139a5b0a90633f1d13d3f61e4432bf9976de5c233791ab57553dae +size 196736 diff --git a/examples/point/7f309e755a0c4b69b64e667977268087_8192.npy b/examples/point/7f309e755a0c4b69b64e667977268087_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..d2018b84489b367622b1f5462b3b2bca4195282a --- /dev/null +++ b/examples/point/7f309e755a0c4b69b64e667977268087_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0add1f7bb1ad3f4b70c383431e7a2ac372c8e4377d2987c4fc08f6bd6776d5af +size 196736 diff --git a/examples/point/844518dcccf44c86ad1ead660ab103fb_8192.npy b/examples/point/844518dcccf44c86ad1ead660ab103fb_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..1add0562ccfe7db199118d5a96e99bb9bb4b73bb --- /dev/null +++ b/examples/point/844518dcccf44c86ad1ead660ab103fb_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c2a1284188bf60e405e27f2008030247657ea256e9e3f95940afde294a8080 +size 196736 diff --git a/examples/point/88867268d228490da73b99d3126dc25c_8192.npy b/examples/point/88867268d228490da73b99d3126dc25c_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..0c5d9d52ada786f07cd4ffb2d8504ee9d6fc9f4a --- /dev/null +++ b/examples/point/88867268d228490da73b99d3126dc25c_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53bca2ad577b7eaf2c531379f69d9d904ad632a57205763ff5e17f2d2bfa7716 +size 196736 diff --git a/examples/point/8c833864d0bb40f4b3f3a3fe1e99e702_8192.npy b/examples/point/8c833864d0bb40f4b3f3a3fe1e99e702_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..71d5da5b41e311f4d5835a382588cded070e0f6b --- /dev/null +++ b/examples/point/8c833864d0bb40f4b3f3a3fe1e99e702_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89dff97c8370e615e5674b17a7ef527b24af2bfbfe764294a5efa103064ed887 +size 196736 diff --git a/examples/point/983fa8b23a084f5dacd157e6c9ceba97_8192.npy b/examples/point/983fa8b23a084f5dacd157e6c9ceba97_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..ef9998819af5e8486a59bf282aec68de8223939f --- /dev/null +++ b/examples/point/983fa8b23a084f5dacd157e6c9ceba97_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a67c55f4bc9a29edb9eaf458bfdcc67cb93fad504da69f6afe0e10cf1c1e1e +size 196736 diff --git a/examples/point/9e9fa9bb1761412bb0e7ca9913f64e91_8192.npy b/examples/point/9e9fa9bb1761412bb0e7ca9913f64e91_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..d5e46cf24109459ff29a370b7eb51c8914b2f61d --- /dev/null +++ b/examples/point/9e9fa9bb1761412bb0e7ca9913f64e91_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afcffb80b428ee8728a9d30b0c7643655a1419a41872f4313082ad6768ee1eb2 +size 196736 diff --git a/examples/point/a4a0a21eedae4d82b0deac43b61dc4a3_8192.npy b/examples/point/a4a0a21eedae4d82b0deac43b61dc4a3_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..8f63fe8598f7b5f2306cad3e7ed24290d49a8c4e --- /dev/null +++ b/examples/point/a4a0a21eedae4d82b0deac43b61dc4a3_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:895b30c7ac4e89e53b4beaac9199c33d1bd4fa8a39f15fa52fe237365bad9ed4 +size 196736 diff --git a/examples/point/abc413b5e2d14a028dab615b49777167_8192.npy b/examples/point/abc413b5e2d14a028dab615b49777167_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..4ff8fa0f780b4fe80b4ad87ada1f129af14645ad --- /dev/null +++ b/examples/point/abc413b5e2d14a028dab615b49777167_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17dd6a38a6a0ad22e1ac17265d49b50083f196cbd8bfeb1f67cdfbc8df5eacc8 +size 196736 diff --git a/examples/point/b65b0ad074664b4aa624583e52fa685e_8192.npy b/examples/point/b65b0ad074664b4aa624583e52fa685e_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..f0ab2d7f6e9305c87e4aa3a5fee497d7ed8cbd44 --- /dev/null +++ b/examples/point/b65b0ad074664b4aa624583e52fa685e_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06e1b52c90abd98dc01adfa51048dc16806abbd4609c8cb1ab12bb99e52f65d3 +size 196736 diff --git a/examples/point/c20eb3a5a93e4cddb06c2f98626b1830_8192.npy b/examples/point/c20eb3a5a93e4cddb06c2f98626b1830_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..b93a3b131994b9f286d913e5edaa1db69cecc079 --- /dev/null +++ b/examples/point/c20eb3a5a93e4cddb06c2f98626b1830_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3793341c5f418f53b09b13e50a969b9bbb6ac865c4c59c273bbf41a06f1bc238 +size 196736 diff --git a/examples/point/d43136ef2b1a46998b7214df1a15b72c_8192.npy b/examples/point/d43136ef2b1a46998b7214df1a15b72c_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..4ef4d4bea6fa77c01db9cdac6be0714ab9108463 --- /dev/null +++ b/examples/point/d43136ef2b1a46998b7214df1a15b72c_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32cd2cd274d12c93873f41653a3d94599a8ab1108df029365f20cfd474f6bf38 +size 196736 diff --git a/examples/point/d4aa4126fce64397935d128ed54c3def_8192.npy b/examples/point/d4aa4126fce64397935d128ed54c3def_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..21f3f99a8fbcf123b00334081a9d1993769b70d2 --- /dev/null +++ b/examples/point/d4aa4126fce64397935d128ed54c3def_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc8f49dcfeb8aca490d2ac7f08e6790ca09ed464f4be8b744710d365c23a487 +size 196736 diff --git a/examples/point/dd491278516040998667f9dd998fad4f_8192.npy b/examples/point/dd491278516040998667f9dd998fad4f_8192.npy new file mode 100644 index 0000000000000000000000000000000000000000..b214b850f8c2d920c384deb2dd1747ea4124c990 --- /dev/null +++ b/examples/point/dd491278516040998667f9dd998fad4f_8192.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e4d509ddcd27c36f7c413cc31b5372cc4510cf8d73df0521107b404279c4698 +size 196736 diff --git a/model/LLM/__pycache__/onellm.cpython-39.pyc b/model/LLM/__pycache__/onellm.cpython-39.pyc index 320f3ae803542ebcea9d3414a83ae4f5e5845455..c82979b64a1ece8c7e506cf5b6df9a77d81ffe6f 100644 Binary files a/model/LLM/__pycache__/onellm.cpython-39.pyc and b/model/LLM/__pycache__/onellm.cpython-39.pyc differ diff --git a/model/LLM/onellm.py b/model/LLM/onellm.py index 1a5195737c0448e3d83c3301acbd3fce3bcd0a4e..f363317224c40f96578931f6f1e2ac3f9133612e 100644 --- a/model/LLM/onellm.py +++ b/model/LLM/onellm.py @@ -280,8 +280,7 @@ class Transformer(nn.Module): self.routers = nn.ModuleDict() self.start_tag = nn.ParameterDict() self.end_tag = nn.ParameterDict() - # self.modals = ['image', 'audio', 'point', 'video', 'rgbd', 'rgbn', 'fmri', 'imu'] - self.modals = ['image', 'audio', 'video', 'rgbd', 'rgbn', 'fmri', 'imu'] + self.modals = ['image', 'audio', 'point', 'video', 'rgbd', 'rgbn', 'fmri', 'imu'] for modal in self.modals: if modal in ['image', 'video', 'rgbn', 'rgbn']: modal_tokens = 256 + 1 @@ -294,7 +293,7 @@ class Transformer(nn.Module): torch.empty([modal_tokens, clip_width])) nn.init.normal_(self.positional_embedding[modal], std=0.02) elif modal == 'point': - from lib.point_utils import PointPatchEmbed + from model.lib.point_utils import PointPatchEmbed self.conv1[modal] = PointPatchEmbed( in_channels=6, channels=clip_width) modal_tokens = 1024 + 1 diff --git a/model/lib/__pycache__/point_utils.cpython-310.pyc b/model/lib/__pycache__/point_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b52bf4169d4d84233f3178c745896d1fa395824f Binary files /dev/null and b/model/lib/__pycache__/point_utils.cpython-310.pyc differ diff --git a/model/lib/__pycache__/point_utils.cpython-39.pyc b/model/lib/__pycache__/point_utils.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..40cb28b6a960677ebcd805515aaea242a59c1fc6 Binary files /dev/null and b/model/lib/__pycache__/point_utils.cpython-39.pyc differ diff --git a/model/lib/point_utils.py b/model/lib/point_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..834733a64b540a141bfce09f6d0fae3154f89997 --- /dev/null +++ b/model/lib/point_utils.py @@ -0,0 +1,191 @@ +import torch +import torch.nn as nn +from torch.autograd import Function +import pointnet2_cuda + +class KNN(nn.Module): + def __init__(self, neighbors, transpose_mode=True): + super(KNN, self).__init__() + self.neighbors = neighbors + + @torch.no_grad() + def forward(self, support, query): + """ + Args: + support ([tensor]): [B, N, C] + query ([tensor]): [B, M, C] + Returns: + [int]: neighbor idx. [B, M, K] + """ + dist = torch.cdist(support, query) + k_dist = dist.topk(k=self.neighbors, dim=1, largest=False) + return k_dist.values, k_dist.indices.transpose(1, 2).contiguous().int() + + +class GroupingOperation(Function): + + @staticmethod + @torch.cuda.amp.custom_fwd(cast_inputs=torch.float32) + def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: + """ + :param ctx: + :param features: (B, C, N) tensor of features to group + :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with + :return: + output: (B, C, npoint, nsample) tensor + """ + assert features.is_contiguous() + assert idx.is_contiguous() + + B, nfeatures, nsample = idx.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, nfeatures, nsample, device=features.device) + + pointnet2_cuda.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output) + + ctx.for_backwards = (idx, N) + return output + + @staticmethod + def backward(ctx, grad_out: torch.Tensor): + """ + :param ctx: + :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward + :return: + grad_features: (B, C, N) gradient of the features + """ + idx, N = ctx.for_backwards + + B, C, npoint, nsample = grad_out.size() + grad_features = torch.zeros([B, C, N], dtype=torch.float, device=grad_out.device, requires_grad=True) + grad_out_data = grad_out.data.contiguous() + pointnet2_cuda.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data) + return grad_features, None + +grouping_operation = GroupingOperation.apply + + +class KNNGroup(nn.Module): + def __init__(self, nsample: int, + relative_xyz=True, + normalize_dp=False, + return_only_idx=False, + **kwargs + ): + """[summary] + + Args: + nsample (int): maximum number of features to gather in the ball + use_xyz (bool, optional): concate xyz. Defaults to True. + ret_grouped_xyz (bool, optional): [description]. Defaults to False. + normalize_dp (bool, optional): [description]. Defaults to False. + """ + super().__init__() + self.nsample = nsample + self.knn = KNN(nsample, transpose_mode=True) + self.relative_xyz = relative_xyz + self.normalize_dp = normalize_dp + self.return_only_idx = return_only_idx + + def forward(self, query_xyz: torch.Tensor, support_xyz: torch.Tensor, features: torch.Tensor = None): + """ + :param query_xyz: (B, N, 3) xyz coordinates of the features + :param support_xyz: (B, npoint, 3) centroids + :param features: (B, C, N) descriptors of the features + :return: + new_features: (B, 3 + C, npoint, nsample) + """ + _, idx = self.knn(support_xyz, query_xyz) + if self.return_only_idx: + return idx + idx = idx.int() + xyz_trans = support_xyz.transpose(1, 2).contiguous() + grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample) + if self.relative_xyz: + grouped_xyz -= query_xyz.transpose(1, 2).unsqueeze(-1) # relative position + if self.normalize_dp: + grouped_xyz /= torch.amax(torch.sqrt(torch.sum(grouped_xyz**2, dim=1)), dim=(1, 2)).view(-1, 1, 1, 1) + if features is not None: + grouped_features = grouping_operation(features, idx) + return grouped_xyz, grouped_features + else: + return grouped_xyz, None + + +class FurthestPointSampling(Function): + @staticmethod + def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor: + """ + Uses iterative furthest point sampling to select a set of npoint features that have the largest + minimum distance + :param ctx: + :param xyz: (B, N, 3) where N > npoint + :param npoint: int, number of features in the sampled set + :return: + output: (B, npoint) tensor containing the set (idx) + """ + assert xyz.is_contiguous() + + B, N, _ = xyz.size() + # output = torch.cuda.IntTensor(B, npoint, device=xyz.device) + # temp = torch.cuda.FloatTensor(B, N, device=xyz.device).fill_(1e10) + output = torch.cuda.IntTensor(B, npoint) + temp = torch.cuda.FloatTensor(B, N).fill_(1e10) + + pointnet2_cuda.furthest_point_sampling_wrapper( + B, N, npoint, xyz, temp, output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + +furthest_point_sample = FurthestPointSampling.apply + + +class PointPatchEmbed(nn.Module): + + def __init__(self, + sample_ratio=0.0625, + sample_number=1024, + group_size=32, + in_channels=6, + channels=1024, + kernel_size=1, + stride=1, + normalize_dp=False, + relative_xyz=True, + ): + super().__init__() + self.sample_ratio = sample_ratio + self.sample_number = sample_number + self.group_size = group_size + + self.sample_fn = furthest_point_sample + self.grouper = KNNGroup(self.group_size, relative_xyz=relative_xyz, normalize_dp=normalize_dp) + + self.conv1 = nn.Conv2d(in_channels, channels, kernel_size=kernel_size, stride=stride) + + + def forward(self, x): + # coordinates + p = x[:, :, 3:].contiguous() + + B, N, _ = p.shape[:3] + # idx = self.sample_fn(p, int(N * self.sample_ratio)).long() + idx = self.sample_fn(p, self.sample_number).long() + center_p = torch.gather(p, 1, idx.unsqueeze(-1).expand(-1, -1, 3)) + # query neighbors. + _, fj = self.grouper(center_p, p, x.permute(0, 2, 1).contiguous()) # [B, N, 6] -> [B, 6, N] -> [B, 6, 1024, 32] + + # [B, 6, 1024] -> [B, channels, 1024, 1] + fj = self.conv1(fj).max(dim=-1, keepdim=True)[0] + + return fj + + +if __name__ == '__main__': + model = PointPatchEmbed(channels=256).cuda() + input = torch.rand(4, 16384, 6).cuda() + ou = model(input) + import pdb;pdb.set_trace() \ No newline at end of file diff --git a/model/lib/pointnet2/build/lib.linux-x86_64-cpython-39/pointnet2_cuda.cpython-39-x86_64-linux-gnu.so b/model/lib/pointnet2/build/lib.linux-x86_64-cpython-39/pointnet2_cuda.cpython-39-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..560e153c8189128d5cd3034c8ffdd218592d169d --- /dev/null +++ b/model/lib/pointnet2/build/lib.linux-x86_64-cpython-39/pointnet2_cuda.cpython-39-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe149eddea375d371bc331b11073003e0586254a25c6fe3769b2f6febf8bc54 +size 19727240 diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/.ninja_deps b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/.ninja_deps new file mode 100644 index 0000000000000000000000000000000000000000..23bf9cfd32dc02f2f8d7b93e9f1a700f1757e16a Binary files /dev/null and b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/.ninja_deps differ diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/.ninja_log b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/.ninja_log new file mode 100644 index 0000000000000000000000000000000000000000..0a3b9c5eb002e1ee2f92daf19926d37227fc1b14 --- /dev/null +++ b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/.ninja_log @@ -0,0 +1,10 @@ +# ninja log v5 +3 13814 1701773085497639679 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling.o df8f8e58c0087c9c +1 13838 1701773085525639600 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points.o 6b17362810207ae9 +2 13925 1701773085613639348 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate.o 79c442400845cc89 +1 14511 1701773086197637684 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query.o 79a41849bd99bac5 +1 27453 1701773099133600702 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query_gpu.o c5cdc62e1b68c75a +2 27764 1701773099449599796 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points_gpu.o 8692792652d15ef5 +2 27829 1701773099513599612 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate_gpu.o 18b2a84b93ded4ae +3 28276 1701773099961598327 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling_gpu.o f8c8e1e67e2b7e7f +2 30587 1701773102261591724 /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/pointnet2_api.o d21e0eacc4130d8e diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/build.ninja b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/build.ninja new file mode 100644 index 0000000000000000000000000000000000000000..33b0fa2dae420b284a0a3d17f1230d2383b3d1a0 --- /dev/null +++ b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/build.ninja @@ -0,0 +1,41 @@ +ninja_required_version = 1.3 +cxx = c++ +nvcc = /usr/local/cuda-11.7/bin/nvcc + +cflags = -pthread -B /usr/local/anaconda3/envs/onellm/compiler_compat -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /usr/local/anaconda3/envs/onellm/include -I/usr/local/anaconda3/envs/onellm/include -fPIC -O2 -isystem /usr/local/anaconda3/envs/onellm/include -fPIC -I/usr/local/anaconda3/envs/onellm/lib/python3.9/site-packages/torch/include -I/usr/local/anaconda3/envs/onellm/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -I/usr/local/anaconda3/envs/onellm/lib/python3.9/site-packages/torch/include/TH -I/usr/local/anaconda3/envs/onellm/lib/python3.9/site-packages/torch/include/THC -I/usr/local/cuda-11.7/include -I/usr/local/anaconda3/envs/onellm/include/python3.9 -c +post_cflags = -g -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=pointnet2_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++17 +cuda_cflags = -I/usr/local/anaconda3/envs/onellm/lib/python3.9/site-packages/torch/include -I/usr/local/anaconda3/envs/onellm/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -I/usr/local/anaconda3/envs/onellm/lib/python3.9/site-packages/torch/include/TH -I/usr/local/anaconda3/envs/onellm/lib/python3.9/site-packages/torch/include/THC -I/usr/local/cuda-11.7/include -I/usr/local/anaconda3/envs/onellm/include/python3.9 -c +cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -O2 -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=pointnet2_cuda -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 -std=c++17 +cuda_dlink_post_cflags = +ldflags = + +rule compile + command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags + depfile = $out.d + deps = gcc + +rule cuda_compile + depfile = $out.d + deps = gcc + command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags + + + + + +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query.o: compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/ball_query.cpp +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query_gpu.o: cuda_compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/ball_query_gpu.cu +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points.o: compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/group_points.cpp +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points_gpu.o: cuda_compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/group_points_gpu.cu +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate.o: compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/interpolate.cpp +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate_gpu.o: cuda_compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/interpolate_gpu.cu +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/pointnet2_api.o: compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/pointnet2_api.cpp +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling.o: compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/sampling.cpp +build /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling_gpu.o: cuda_compile /data1/jiaming/OneLLM-Inference-huggingface/model/lib/pointnet2/src/sampling_gpu.cu + + + + + + + diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query.o new file mode 100644 index 0000000000000000000000000000000000000000..98cc431b50967470beeb958b3a91fbe5f8fb7a28 --- /dev/null +++ b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9b31781d1db333cff0098b77791f56891c7cae5d87648f2612f11a1f1a9864 +size 7337552 diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query_gpu.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query_gpu.o new file mode 100644 index 0000000000000000000000000000000000000000..fd27102cd859b508785cec38e5e59517f69fed82 Binary files /dev/null and b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/ball_query_gpu.o differ diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points.o new file mode 100644 index 0000000000000000000000000000000000000000..bbdc1e811d967ad589d8f0cea8405a423966fbd2 --- /dev/null +++ b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d9e48d335d2d36d29b3c375db543c52aaa9bd5bb7fdfce0bd4297ba4d5fe6fb +size 6912904 diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points_gpu.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points_gpu.o new file mode 100644 index 0000000000000000000000000000000000000000..3fc7304cdc83ed830dcf262e0e965355929549ed Binary files /dev/null and b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/group_points_gpu.o differ diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate.o new file mode 100644 index 0000000000000000000000000000000000000000..072673655e1b4aa98f4685ea7ccc6e442f65c2b5 --- /dev/null +++ b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b4d725783120b312d13f4b06a66707c565a10ac6974f77936047bd10cd48e4c +size 6921176 diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate_gpu.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate_gpu.o new file mode 100644 index 0000000000000000000000000000000000000000..64a4e42fff8b532e4a081b42767e8fda2a17adb8 Binary files /dev/null and b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/interpolate_gpu.o differ diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/pointnet2_api.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/pointnet2_api.o new file mode 100644 index 0000000000000000000000000000000000000000..35ca46a8561e5f18039129326b13e3242c61422f --- /dev/null +++ b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/pointnet2_api.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40c56336ad9fab872c6bdb28a197a37b2fb07881688b6ab496f9f6871ae11d12 +size 20280040 diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling.o new file mode 100644 index 0000000000000000000000000000000000000000..7c8e9bcf45651724c9db6da97bdb820d422ddff4 --- /dev/null +++ b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling.o @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f531d5ca53aebfeb1aba74c3dd71cecc90a0d87c0d63f5358cfd5c5f8ebbe567 +size 6917856 diff --git a/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling_gpu.o b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling_gpu.o new file mode 100644 index 0000000000000000000000000000000000000000..f01f732437cecdf37161a4b28e5e28fd5baec30d Binary files /dev/null and b/model/lib/pointnet2/build/temp.linux-x86_64-cpython-39/src/sampling_gpu.o differ diff --git a/model/lib/pointnet2/dist/pointnet2-0.0.0-py3.9-linux-x86_64.egg b/model/lib/pointnet2/dist/pointnet2-0.0.0-py3.9-linux-x86_64.egg new file mode 100644 index 0000000000000000000000000000000000000000..0e42557616254366f9321f471960e2ce59a9fea4 --- /dev/null +++ b/model/lib/pointnet2/dist/pointnet2-0.0.0-py3.9-linux-x86_64.egg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a192c711c480c616d7f5f51f8202c3cba529a3135792d83f5b3d353ca59d383 +size 6166015 diff --git a/model/lib/pointnet2/pointnet2.egg-info/PKG-INFO b/model/lib/pointnet2/pointnet2.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..c44293899fb386f21d7a5c12d93d5cfbfb733e65 --- /dev/null +++ b/model/lib/pointnet2/pointnet2.egg-info/PKG-INFO @@ -0,0 +1,3 @@ +Metadata-Version: 2.1 +Name: pointnet2 +Version: 0.0.0 diff --git a/model/lib/pointnet2/pointnet2.egg-info/SOURCES.txt b/model/lib/pointnet2/pointnet2.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..21253f64ce882771ed3a8dd2feb25cd37cc26259 --- /dev/null +++ b/model/lib/pointnet2/pointnet2.egg-info/SOURCES.txt @@ -0,0 +1,14 @@ +setup.py +pointnet2.egg-info/PKG-INFO +pointnet2.egg-info/SOURCES.txt +pointnet2.egg-info/dependency_links.txt +pointnet2.egg-info/top_level.txt +src/ball_query.cpp +src/ball_query_gpu.cu +src/group_points.cpp +src/group_points_gpu.cu +src/interpolate.cpp +src/interpolate_gpu.cu +src/pointnet2_api.cpp +src/sampling.cpp +src/sampling_gpu.cu \ No newline at end of file diff --git a/model/lib/pointnet2/pointnet2.egg-info/dependency_links.txt b/model/lib/pointnet2/pointnet2.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/model/lib/pointnet2/pointnet2.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/model/lib/pointnet2/pointnet2.egg-info/top_level.txt b/model/lib/pointnet2/pointnet2.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..2d59a59591ec1d9290fd49300f0b42015b991a16 --- /dev/null +++ b/model/lib/pointnet2/pointnet2.egg-info/top_level.txt @@ -0,0 +1 @@ +pointnet2_cuda diff --git a/model/lib/pointnet2/pointnet2_modules.py b/model/lib/pointnet2/pointnet2_modules.py new file mode 100644 index 0000000000000000000000000000000000000000..5f125ce5075c738897e5f6a78c71123d0e3e44a2 --- /dev/null +++ b/model/lib/pointnet2/pointnet2_modules.py @@ -0,0 +1,160 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from . import pointnet2_utils +from . import pytorch_utils as pt_utils +from typing import List + + +class _PointnetSAModuleBase(nn.Module): + + def __init__(self): + super().__init__() + self.npoint = None + self.groupers = None + self.mlps = None + self.pool_method = 'max_pool' + + def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor): + """ + :param xyz: (B, N, 3) tensor of the xyz coordinates of the features + :param features: (B, N, C) tensor of the descriptors of the the features + :param new_xyz: + :return: + new_xyz: (B, npoint, 3) tensor of the new features' xyz + new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors + """ + new_features_list = [] + + xyz_flipped = xyz.transpose(1, 2).contiguous() + if new_xyz is None: + new_xyz = pointnet2_utils.gather_operation( + xyz_flipped, + pointnet2_utils.furthest_point_sample(xyz, self.npoint) + ).transpose(1, 2).contiguous() if self.npoint is not None else None + + for i in range(len(self.groupers)): + new_features = self.groupers[i](xyz, new_xyz, features) # (B, C, npoint, nsample) + + new_features = self.mlps[i](new_features) # (B, mlp[-1], npoint, nsample) + if self.pool_method == 'max_pool': + new_features = F.max_pool2d( + new_features, kernel_size=[1, new_features.size(3)] + ) # (B, mlp[-1], npoint, 1) + elif self.pool_method == 'avg_pool': + new_features = F.avg_pool2d( + new_features, kernel_size=[1, new_features.size(3)] + ) # (B, mlp[-1], npoint, 1) + else: + raise NotImplementedError + + new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint) + new_features_list.append(new_features) + + return new_xyz, torch.cat(new_features_list, dim=1) + + +class PointnetSAModuleMSG(_PointnetSAModuleBase): + """Pointnet set abstraction layer with multiscale grouping""" + + def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True, + use_xyz: bool = True, pool_method='max_pool', instance_norm=False): + """ + :param npoint: int + :param radii: list of float, list of radii to group with + :param nsamples: list of int, number of samples in each ball query + :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale + :param bn: whether to use batchnorm + :param use_xyz: + :param pool_method: max_pool / avg_pool + :param instance_norm: whether to use instance_norm + """ + super().__init__() + + assert len(radii) == len(nsamples) == len(mlps) + + self.npoint = npoint + self.groupers = nn.ModuleList() + self.mlps = nn.ModuleList() + for i in range(len(radii)): + radius = radii[i] + nsample = nsamples[i] + self.groupers.append( + pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz) + if npoint is not None else pointnet2_utils.GroupAll(use_xyz) + ) + mlp_spec = mlps[i] + if use_xyz: + mlp_spec[0] += 3 + + self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm)) + self.pool_method = pool_method + + +class PointnetSAModule(PointnetSAModuleMSG): + """Pointnet set abstraction layer""" + + def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None, + bn: bool = True, use_xyz: bool = True, pool_method='max_pool', instance_norm=False): + """ + :param mlp: list of int, spec of the pointnet before the global max_pool + :param npoint: int, number of features + :param radius: float, radius of ball + :param nsample: int, number of samples in the ball query + :param bn: whether to use batchnorm + :param use_xyz: + :param pool_method: max_pool / avg_pool + :param instance_norm: whether to use instance_norm + """ + super().__init__( + mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz, + pool_method=pool_method, instance_norm=instance_norm + ) + + +class PointnetFPModule(nn.Module): + r"""Propigates the features of one set to another""" + + def __init__(self, *, mlp: List[int], bn: bool = True): + """ + :param mlp: list of int + :param bn: whether to use batchnorm + """ + super().__init__() + self.mlp = pt_utils.SharedMLP(mlp, bn=bn) + + def forward( + self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor + ) -> torch.Tensor: + """ + :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features + :param known: (B, m, 3) tensor of the xyz positions of the known features + :param unknow_feats: (B, C1, n) tensor of the features to be propigated to + :param known_feats: (B, C2, m) tensor of features to be propigated + :return: + new_features: (B, mlp[-1], n) tensor of the features of the unknown features + """ + if known is not None: + dist, idx = pointnet2_utils.three_nn(unknown, known) + dist_recip = 1.0 / (dist + 1e-8) + norm = torch.sum(dist_recip, dim=2, keepdim=True) + weight = dist_recip / norm + + interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight) + else: + interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1)) + + if unknow_feats is not None: + new_features = torch.cat([interpolated_feats, unknow_feats], dim=1) # (B, C2 + C1, n) + else: + new_features = interpolated_feats + + new_features = new_features.unsqueeze(-1) + new_features = self.mlp(new_features) + + return new_features.squeeze(-1) + + +if __name__ == "__main__": + pass diff --git a/model/lib/pointnet2/pointnet2_utils.py b/model/lib/pointnet2/pointnet2_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e814102d8feb5e443e64a736e7733818e0a24685 --- /dev/null +++ b/model/lib/pointnet2/pointnet2_utils.py @@ -0,0 +1,290 @@ +import torch +from torch.autograd import Variable +from torch.autograd import Function +import torch.nn as nn +from typing import Tuple + +import pointnet2_cuda as pointnet2 + + +class FurthestPointSampling(Function): + @staticmethod + def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor: + """ + Uses iterative furthest point sampling to select a set of npoint features that have the largest + minimum distance + :param ctx: + :param xyz: (B, N, 3) where N > npoint + :param npoint: int, number of features in the sampled set + :return: + output: (B, npoint) tensor containing the set + """ + assert xyz.is_contiguous() + + B, N, _ = xyz.size() + output = torch.cuda.IntTensor(B, npoint) + temp = torch.cuda.FloatTensor(B, N).fill_(1e10) + + pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +furthest_point_sample = FurthestPointSampling.apply + + +class GatherOperation(Function): + + @staticmethod + def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: + """ + :param ctx: + :param features: (B, C, N) + :param idx: (B, npoint) index tensor of the features to gather + :return: + output: (B, C, npoint) + """ + assert features.is_contiguous() + assert idx.is_contiguous() + + B, npoint = idx.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, npoint) + + pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output) + + ctx.for_backwards = (idx, C, N) + return output + + @staticmethod + def backward(ctx, grad_out): + idx, C, N = ctx.for_backwards + B, npoint = idx.size() + + grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) + grad_out_data = grad_out.data.contiguous() + pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data) + return grad_features, None + + +gather_operation = GatherOperation.apply + + +class ThreeNN(Function): + + @staticmethod + def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Find the three nearest neighbors of unknown in known + :param ctx: + :param unknown: (B, N, 3) + :param known: (B, M, 3) + :return: + dist: (B, N, 3) l2 distance to the three nearest neighbors + idx: (B, N, 3) index of 3 nearest neighbors + """ + assert unknown.is_contiguous() + assert known.is_contiguous() + + B, N, _ = unknown.size() + m = known.size(1) + dist2 = torch.cuda.FloatTensor(B, N, 3) + idx = torch.cuda.IntTensor(B, N, 3) + + pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx) + return torch.sqrt(dist2), idx + + @staticmethod + def backward(ctx, a=None, b=None): + return None, None + + +three_nn = ThreeNN.apply + + +class ThreeInterpolate(Function): + + @staticmethod + def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: + """ + Performs weight linear interpolation on 3 features + :param ctx: + :param features: (B, C, M) Features descriptors to be interpolated from + :param idx: (B, n, 3) three nearest neighbors of the target features in features + :param weight: (B, n, 3) weights + :return: + output: (B, C, N) tensor of the interpolated features + """ + assert features.is_contiguous() + assert idx.is_contiguous() + assert weight.is_contiguous() + + B, c, m = features.size() + n = idx.size(1) + ctx.three_interpolate_for_backward = (idx, weight, m) + output = torch.cuda.FloatTensor(B, c, n) + + pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output) + return output + + @staticmethod + def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + :param ctx: + :param grad_out: (B, C, N) tensor with gradients of outputs + :return: + grad_features: (B, C, M) tensor with gradients of features + None: + None: + """ + idx, weight, m = ctx.three_interpolate_for_backward + B, c, n = grad_out.size() + + grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_()) + grad_out_data = grad_out.data.contiguous() + + pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data) + return grad_features, None, None + + +three_interpolate = ThreeInterpolate.apply + + +class GroupingOperation(Function): + + @staticmethod + def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: + """ + :param ctx: + :param features: (B, C, N) tensor of features to group + :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with + :return: + output: (B, C, npoint, nsample) tensor + """ + assert features.is_contiguous() + assert idx.is_contiguous() + + B, nfeatures, nsample = idx.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) + + pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output) + + ctx.for_backwards = (idx, N) + return output + + @staticmethod + def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + :param ctx: + :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward + :return: + grad_features: (B, C, N) gradient of the features + """ + idx, N = ctx.for_backwards + + B, C, npoint, nsample = grad_out.size() + grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) + + grad_out_data = grad_out.data.contiguous() + pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data) + return grad_features, None + + +grouping_operation = GroupingOperation.apply + + +class BallQuery(Function): + + @staticmethod + def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor: + """ + :param ctx: + :param radius: float, radius of the balls + :param nsample: int, maximum number of features in the balls + :param xyz: (B, N, 3) xyz coordinates of the features + :param new_xyz: (B, npoint, 3) centers of the ball query + :return: + idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls + """ + assert new_xyz.is_contiguous() + assert xyz.is_contiguous() + + B, N, _ = xyz.size() + npoint = new_xyz.size(1) + idx = torch.cuda.IntTensor(B, npoint, nsample).zero_() + + pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None, None + + +ball_query = BallQuery.apply + + +class QueryAndGroup(nn.Module): + def __init__(self, radius: float, nsample: int, use_xyz: bool = True): + """ + :param radius: float, radius of ball + :param nsample: int, maximum number of features to gather in the ball + :param use_xyz: + """ + super().__init__() + self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz + + def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]: + """ + :param xyz: (B, N, 3) xyz coordinates of the features + :param new_xyz: (B, npoint, 3) centroids + :param features: (B, C, N) descriptors of the features + :return: + new_features: (B, 3 + C, npoint, nsample) + """ + idx = ball_query(self.radius, self.nsample, xyz, new_xyz) + xyz_trans = xyz.transpose(1, 2).contiguous() + grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample) + grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1) + + if features is not None: + grouped_features = grouping_operation(features, idx) + if self.use_xyz: + new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, C + 3, npoint, nsample) + else: + new_features = grouped_features + else: + assert self.use_xyz, "Cannot have not features and not use xyz as a feature!" + new_features = grouped_xyz + + return new_features + + +class GroupAll(nn.Module): + def __init__(self, use_xyz: bool = True): + super().__init__() + self.use_xyz = use_xyz + + def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None): + """ + :param xyz: (B, N, 3) xyz coordinates of the features + :param new_xyz: ignored + :param features: (B, C, N) descriptors of the features + :return: + new_features: (B, C + 3, 1, N) + """ + grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) + if features is not None: + grouped_features = features.unsqueeze(2) + if self.use_xyz: + new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, 3 + C, 1, N) + else: + new_features = grouped_features + else: + new_features = grouped_xyz + + return new_features diff --git a/model/lib/pointnet2/pytorch_utils.py b/model/lib/pointnet2/pytorch_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..09cb7bc76d88dde5757ac70b6e05e1e0c768cc1b --- /dev/null +++ b/model/lib/pointnet2/pytorch_utils.py @@ -0,0 +1,236 @@ +import torch.nn as nn +from typing import List, Tuple + + +class SharedMLP(nn.Sequential): + + def __init__( + self, + args: List[int], + *, + bn: bool = False, + activation=nn.ReLU(inplace=True), + preact: bool = False, + first: bool = False, + name: str = "", + instance_norm: bool = False, + ): + super().__init__() + + for i in range(len(args) - 1): + self.add_module( + name + 'layer{}'.format(i), + Conv2d( + args[i], + args[i + 1], + bn=(not first or not preact or (i != 0)) and bn, + activation=activation + if (not first or not preact or (i != 0)) else None, + preact=preact, + instance_norm=instance_norm + ) + ) + + +class _ConvBase(nn.Sequential): + + def __init__( + self, + in_size, + out_size, + kernel_size, + stride, + padding, + activation, + bn, + init, + conv=None, + batch_norm=None, + bias=True, + preact=False, + name="", + instance_norm=False, + instance_norm_func=None + ): + super().__init__() + + bias = bias and (not bn) + conv_unit = conv( + in_size, + out_size, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=bias + ) + init(conv_unit.weight) + if bias: + nn.init.constant_(conv_unit.bias, 0) + + if bn: + if not preact: + bn_unit = batch_norm(out_size) + else: + bn_unit = batch_norm(in_size) + if instance_norm: + if not preact: + in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False) + else: + in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False) + + if preact: + if bn: + self.add_module(name + 'bn', bn_unit) + + if activation is not None: + self.add_module(name + 'activation', activation) + + if not bn and instance_norm: + self.add_module(name + 'in', in_unit) + + self.add_module(name + 'conv', conv_unit) + + if not preact: + if bn: + self.add_module(name + 'bn', bn_unit) + + if activation is not None: + self.add_module(name + 'activation', activation) + + if not bn and instance_norm: + self.add_module(name + 'in', in_unit) + + +class _BNBase(nn.Sequential): + + def __init__(self, in_size, batch_norm=None, name=""): + super().__init__() + self.add_module(name + "bn", batch_norm(in_size)) + + nn.init.constant_(self[0].weight, 1.0) + nn.init.constant_(self[0].bias, 0) + + +class BatchNorm1d(_BNBase): + + def __init__(self, in_size: int, *, name: str = ""): + super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name) + + +class BatchNorm2d(_BNBase): + + def __init__(self, in_size: int, name: str = ""): + super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name) + + +class Conv1d(_ConvBase): + + def __init__( + self, + in_size: int, + out_size: int, + *, + kernel_size: int = 1, + stride: int = 1, + padding: int = 0, + activation=nn.ReLU(inplace=True), + bn: bool = False, + init=nn.init.kaiming_normal_, + bias: bool = True, + preact: bool = False, + name: str = "", + instance_norm=False + ): + super().__init__( + in_size, + out_size, + kernel_size, + stride, + padding, + activation, + bn, + init, + conv=nn.Conv1d, + batch_norm=BatchNorm1d, + bias=bias, + preact=preact, + name=name, + instance_norm=instance_norm, + instance_norm_func=nn.InstanceNorm1d + ) + + +class Conv2d(_ConvBase): + + def __init__( + self, + in_size: int, + out_size: int, + *, + kernel_size: Tuple[int, int] = (1, 1), + stride: Tuple[int, int] = (1, 1), + padding: Tuple[int, int] = (0, 0), + activation=nn.ReLU(inplace=True), + bn: bool = False, + init=nn.init.kaiming_normal_, + bias: bool = True, + preact: bool = False, + name: str = "", + instance_norm=False + ): + super().__init__( + in_size, + out_size, + kernel_size, + stride, + padding, + activation, + bn, + init, + conv=nn.Conv2d, + batch_norm=BatchNorm2d, + bias=bias, + preact=preact, + name=name, + instance_norm=instance_norm, + instance_norm_func=nn.InstanceNorm2d + ) + + +class FC(nn.Sequential): + + def __init__( + self, + in_size: int, + out_size: int, + *, + activation=nn.ReLU(inplace=True), + bn: bool = False, + init=None, + preact: bool = False, + name: str = "" + ): + super().__init__() + + fc = nn.Linear(in_size, out_size, bias=not bn) + if init is not None: + init(fc.weight) + if not bn: + nn.init.constant(fc.bias, 0) + + if preact: + if bn: + self.add_module(name + 'bn', BatchNorm1d(in_size)) + + if activation is not None: + self.add_module(name + 'activation', activation) + + self.add_module(name + 'fc', fc) + + if not preact: + if bn: + self.add_module(name + 'bn', BatchNorm1d(out_size)) + + if activation is not None: + self.add_module(name + 'activation', activation) + diff --git a/model/lib/pointnet2/setup.py b/model/lib/pointnet2/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..99e59e37b90517cc38c35d100f7f9cee0e309368 --- /dev/null +++ b/model/lib/pointnet2/setup.py @@ -0,0 +1,23 @@ +from setuptools import setup +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + +setup( + name='pointnet2', + ext_modules=[ + CUDAExtension('pointnet2_cuda', [ + 'src/pointnet2_api.cpp', + + 'src/ball_query.cpp', + 'src/ball_query_gpu.cu', + 'src/group_points.cpp', + 'src/group_points_gpu.cu', + 'src/interpolate.cpp', + 'src/interpolate_gpu.cu', + 'src/sampling.cpp', + 'src/sampling_gpu.cu', + ], + extra_compile_args={'cxx': ['-g'], + 'nvcc': ['-O2']}) + ], + cmdclass={'build_ext': BuildExtension} +) diff --git a/model/lib/pointnet2/src/ball_query.cpp b/model/lib/pointnet2/src/ball_query.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c9b176e5da5dd89a3378652f0b806925e8ee8996 --- /dev/null +++ b/model/lib/pointnet2/src/ball_query.cpp @@ -0,0 +1,24 @@ +#include +#include +#include +#include +#include +#include +#include "ball_query_gpu.h" + +#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") +#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ") +#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x) + +int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, + at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) { + CHECK_INPUT(new_xyz_tensor); + CHECK_INPUT(xyz_tensor); + const float *new_xyz = new_xyz_tensor.data(); + const float *xyz = xyz_tensor.data(); + int *idx = idx_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream); + return 1; +} diff --git a/model/lib/pointnet2/src/ball_query_gpu.cu b/model/lib/pointnet2/src/ball_query_gpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..f8840aa6650693cea17d337008a15fef13ec1ebc --- /dev/null +++ b/model/lib/pointnet2/src/ball_query_gpu.cu @@ -0,0 +1,67 @@ +#include +#include +#include + +#include "ball_query_gpu.h" +#include "cuda_utils.h" + + +__global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample, + const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) { + // new_xyz: (B, M, 3) + // xyz: (B, N, 3) + // output: + // idx: (B, M, nsample) + int bs_idx = blockIdx.y; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || pt_idx >= m) return; + + new_xyz += bs_idx * m * 3 + pt_idx * 3; + xyz += bs_idx * n * 3; + idx += bs_idx * m * nsample + pt_idx * nsample; + + float radius2 = radius * radius; + float new_x = new_xyz[0]; + float new_y = new_xyz[1]; + float new_z = new_xyz[2]; + + int cnt = 0; + for (int k = 0; k < n; ++k) { + float x = xyz[k * 3 + 0]; + float y = xyz[k * 3 + 1]; + float z = xyz[k * 3 + 2]; + float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z); + if (d2 < radius2){ + if (cnt == 0){ + for (int l = 0; l < nsample; ++l) { + idx[l] = k; + } + } + idx[cnt] = k; + ++cnt; + if (cnt >= nsample) break; + } + } +} + + +void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \ + const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) { + // new_xyz: (B, M, 3) + // xyz: (B, N, 3) + // output: + // idx: (B, M, nsample) + + cudaError_t err; + + dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + + ball_query_kernel_fast<<>>(b, n, m, radius, nsample, new_xyz, xyz, idx); + // cudaDeviceSynchronize(); // for using printf in kernel function + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} \ No newline at end of file diff --git a/model/lib/pointnet2/src/ball_query_gpu.h b/model/lib/pointnet2/src/ball_query_gpu.h new file mode 100644 index 0000000000000000000000000000000000000000..ffc831a8b700f46b50e0b90d49c538aa0fedca50 --- /dev/null +++ b/model/lib/pointnet2/src/ball_query_gpu.h @@ -0,0 +1,15 @@ +#ifndef _BALL_QUERY_GPU_H +#define _BALL_QUERY_GPU_H + +#include +#include +#include +#include + +int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, + at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor); + +void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, + const float *xyz, const float *new_xyz, int *idx, cudaStream_t stream); + +#endif diff --git a/model/lib/pointnet2/src/cuda_utils.h b/model/lib/pointnet2/src/cuda_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..7fe27969179c976a88199bbe962ca4f8d97263a4 --- /dev/null +++ b/model/lib/pointnet2/src/cuda_utils.h @@ -0,0 +1,15 @@ +#ifndef _CUDA_UTILS_H +#define _CUDA_UTILS_H + +#include + +#define TOTAL_THREADS 1024 +#define THREADS_PER_BLOCK 256 +#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) + +inline int opt_n_threads(int work_size) { + const int pow_2 = std::log(static_cast(work_size)) / std::log(2.0); + + return max(min(1 << pow_2, TOTAL_THREADS), 1); +} +#endif diff --git a/model/lib/pointnet2/src/group_points.cpp b/model/lib/pointnet2/src/group_points.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fa80f0e318acc57dabf76ec0a8b1d9dff482ab89 --- /dev/null +++ b/model/lib/pointnet2/src/group_points.cpp @@ -0,0 +1,34 @@ +#include +#include +#include +#include +#include "group_points_gpu.h" +#include +#include + + + +int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, + at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { + + float *grad_points = grad_points_tensor.data(); + const int *idx = idx_tensor.data(); + const float *grad_out = grad_out_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream); + return 1; +} + + +int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, + at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) { + + const float *points = points_tensor.data(); + const int *idx = idx_tensor.data(); + float *out = out_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream); + return 1; +} diff --git a/model/lib/pointnet2/src/group_points_gpu.cu b/model/lib/pointnet2/src/group_points_gpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..c015a8125e38aafa1f960000044978463b7853b1 --- /dev/null +++ b/model/lib/pointnet2/src/group_points_gpu.cu @@ -0,0 +1,86 @@ +#include +#include + +#include "cuda_utils.h" +#include "group_points_gpu.h" + + +__global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample, + const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) { + // grad_out: (B, C, npoints, nsample) + // idx: (B, npoints, nsample) + // output: + // grad_points: (B, C, N) + int bs_idx = blockIdx.z; + int c_idx = blockIdx.y; + int index = blockIdx.x * blockDim.x + threadIdx.x; + int pt_idx = index / nsample; + if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; + + int sample_idx = index % nsample; + grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; + idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; + + atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]); +} + +void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, + const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) { + // grad_out: (B, C, npoints, nsample) + // idx: (B, npoints, nsample) + // output: + // grad_points: (B, C, N) + cudaError_t err; + dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + + group_points_grad_kernel_fast<<>>(b, c, n, npoints, nsample, grad_out, idx, grad_points); + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} + + +__global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample, + const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { + // points: (B, C, N) + // idx: (B, npoints, nsample) + // output: + // out: (B, C, npoints, nsample) + int bs_idx = blockIdx.z; + int c_idx = blockIdx.y; + int index = blockIdx.x * blockDim.x + threadIdx.x; + int pt_idx = index / nsample; + if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return; + + int sample_idx = index % nsample; + + idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx; + int in_idx = bs_idx * c * n + c_idx * n + idx[0]; + int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx; + + out[out_idx] = points[in_idx]; +} + + +void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, + const float *points, const int *idx, float *out, cudaStream_t stream) { + // points: (B, C, N) + // idx: (B, npoints, nsample) + // output: + // out: (B, C, npoints, nsample) + cudaError_t err; + dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + + group_points_kernel_fast<<>>(b, c, n, npoints, nsample, points, idx, out); + // cudaDeviceSynchronize(); // for using printf in kernel function + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} diff --git a/model/lib/pointnet2/src/group_points_gpu.h b/model/lib/pointnet2/src/group_points_gpu.h new file mode 100644 index 0000000000000000000000000000000000000000..76c73ca2600ef75c192b06d28f79a168f1ba368b --- /dev/null +++ b/model/lib/pointnet2/src/group_points_gpu.h @@ -0,0 +1,22 @@ +#ifndef _GROUP_POINTS_GPU_H +#define _GROUP_POINTS_GPU_H + +#include +#include +#include +#include + + +int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample, + at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); + +void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, + const float *points, const int *idx, float *out, cudaStream_t stream); + +int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample, + at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); + +void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample, + const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream); + +#endif diff --git a/model/lib/pointnet2/src/interpolate.cpp b/model/lib/pointnet2/src/interpolate.cpp new file mode 100644 index 0000000000000000000000000000000000000000..88d837f966f52696308b7d85ec1756b2395bb986 --- /dev/null +++ b/model/lib/pointnet2/src/interpolate.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include +#include +#include +#include +#include "interpolate_gpu.h" +#include +#include + + +void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, + at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) { + const float *unknown = unknown_tensor.data(); + const float *known = known_tensor.data(); + float *dist2 = dist2_tensor.data(); + int *idx = idx_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream); +} + + +void three_interpolate_wrapper_fast(int b, int c, int m, int n, + at::Tensor points_tensor, + at::Tensor idx_tensor, + at::Tensor weight_tensor, + at::Tensor out_tensor) { + + const float *points = points_tensor.data(); + const float *weight = weight_tensor.data(); + float *out = out_tensor.data(); + const int *idx = idx_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream); +} + +void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, + at::Tensor grad_out_tensor, + at::Tensor idx_tensor, + at::Tensor weight_tensor, + at::Tensor grad_points_tensor) { + + const float *grad_out = grad_out_tensor.data(); + const float *weight = weight_tensor.data(); + float *grad_points = grad_points_tensor.data(); + const int *idx = idx_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream); +} diff --git a/model/lib/pointnet2/src/interpolate_gpu.cu b/model/lib/pointnet2/src/interpolate_gpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..a123dd8d8d4f5ed23cc4a340abb1141d140fca3c --- /dev/null +++ b/model/lib/pointnet2/src/interpolate_gpu.cu @@ -0,0 +1,161 @@ +#include +#include +#include + +#include "cuda_utils.h" +#include "interpolate_gpu.h" + + +__global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, + const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) { + // unknown: (B, N, 3) + // known: (B, M, 3) + // output: + // dist2: (B, N, 3) + // idx: (B, N, 3) + + int bs_idx = blockIdx.y; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || pt_idx >= n) return; + + unknown += bs_idx * n * 3 + pt_idx * 3; + known += bs_idx * m * 3; + dist2 += bs_idx * n * 3 + pt_idx * 3; + idx += bs_idx * n * 3 + pt_idx * 3; + + float ux = unknown[0]; + float uy = unknown[1]; + float uz = unknown[2]; + + double best1 = 1e40, best2 = 1e40, best3 = 1e40; + int besti1 = 0, besti2 = 0, besti3 = 0; + for (int k = 0; k < m; ++k) { + float x = known[k * 3 + 0]; + float y = known[k * 3 + 1]; + float z = known[k * 3 + 2]; + float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); + if (d < best1) { + best3 = best2; besti3 = besti2; + best2 = best1; besti2 = besti1; + best1 = d; besti1 = k; + } + else if (d < best2) { + best3 = best2; besti3 = besti2; + best2 = d; besti2 = k; + } + else if (d < best3) { + best3 = d; besti3 = k; + } + } + dist2[0] = best1; dist2[1] = best2; dist2[2] = best3; + idx[0] = besti1; idx[1] = besti2; idx[2] = besti3; +} + + +void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, + const float *known, float *dist2, int *idx, cudaStream_t stream) { + // unknown: (B, N, 3) + // known: (B, M, 3) + // output: + // dist2: (B, N, 3) + // idx: (B, N, 3) + + cudaError_t err; + dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + + three_nn_kernel_fast<<>>(b, n, m, unknown, known, dist2, idx); + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} + + +__global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, + const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) { + // points: (B, C, M) + // idx: (B, N, 3) + // weight: (B, N, 3) + // output: + // out: (B, C, N) + + int bs_idx = blockIdx.z; + int c_idx = blockIdx.y; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + + if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; + + weight += bs_idx * n * 3 + pt_idx * 3; + points += bs_idx * c * m + c_idx * m; + idx += bs_idx * n * 3 + pt_idx * 3; + out += bs_idx * c * n + c_idx * n; + + out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]]; +} + +void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, + const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) { + // points: (B, C, M) + // idx: (B, N, 3) + // weight: (B, N, 3) + // output: + // out: (B, C, N) + + cudaError_t err; + dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + three_interpolate_kernel_fast<<>>(b, c, m, n, points, idx, weight, out); + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} + + +__global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, + const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) { + // grad_out: (B, C, N) + // weight: (B, N, 3) + // output: + // grad_points: (B, C, M) + + int bs_idx = blockIdx.z; + int c_idx = blockIdx.y; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + + if (bs_idx >= b || c_idx >= c || pt_idx >= n) return; + + grad_out += bs_idx * c * n + c_idx * n + pt_idx; + weight += bs_idx * n * 3 + pt_idx * 3; + grad_points += bs_idx * c * m + c_idx * m; + idx += bs_idx * n * 3 + pt_idx * 3; + + + atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]); + atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]); + atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]); +} + +void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, + const int *idx, const float *weight, float *grad_points, cudaStream_t stream) { + // grad_out: (B, C, N) + // weight: (B, N, 3) + // output: + // grad_points: (B, C, M) + + cudaError_t err; + dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + three_interpolate_grad_kernel_fast<<>>(b, c, n, m, grad_out, idx, weight, grad_points); + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} \ No newline at end of file diff --git a/model/lib/pointnet2/src/interpolate_gpu.h b/model/lib/pointnet2/src/interpolate_gpu.h new file mode 100644 index 0000000000000000000000000000000000000000..f1771087c5e4146e3c5775d3b929ebffffd11ccb --- /dev/null +++ b/model/lib/pointnet2/src/interpolate_gpu.h @@ -0,0 +1,30 @@ +#ifndef _INTERPOLATE_GPU_H +#define _INTERPOLATE_GPU_H + +#include +#include +#include +#include + + +void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor, + at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor); + +void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown, + const float *known, float *dist2, int *idx, cudaStream_t stream); + + +void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor, + at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor); + +void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n, + const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream); + + +void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor, + at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor); + +void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out, + const int *idx, const float *weight, float *grad_points, cudaStream_t stream); + +#endif diff --git a/model/lib/pointnet2/src/pointnet2_api.cpp b/model/lib/pointnet2/src/pointnet2_api.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d91f0f2176a6080624f071e5535fe509a0ac83c4 --- /dev/null +++ b/model/lib/pointnet2/src/pointnet2_api.cpp @@ -0,0 +1,24 @@ +#include +#include + +#include "ball_query_gpu.h" +#include "group_points_gpu.h" +#include "sampling_gpu.h" +#include "interpolate_gpu.h" + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast"); + + m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast"); + m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast"); + + m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast"); + m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast"); + + m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper"); + + m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast"); + m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast"); + m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast"); +} diff --git a/model/lib/pointnet2/src/sampling.cpp b/model/lib/pointnet2/src/sampling.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f54daa763ed66240c17ba6254ee9d5a39b6dfc0 --- /dev/null +++ b/model/lib/pointnet2/src/sampling.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include +#include "sampling_gpu.h" + + + +int gather_points_wrapper_fast(int b, int c, int n, int npoints, + at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){ + const float *points = points_tensor.data(); + const int *idx = idx_tensor.data(); + float *out = out_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream); + return 1; +} + + +int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, + at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) { + + const float *grad_out = grad_out_tensor.data(); + const int *idx = idx_tensor.data(); + float *grad_points = grad_points_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream); + return 1; +} + + +int furthest_point_sampling_wrapper(int b, int n, int m, + at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) { + + const float *points = points_tensor.data(); + float *temp = temp_tensor.data(); + int *idx = idx_tensor.data(); + + cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream(); + furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream); + return 1; +} diff --git a/model/lib/pointnet2/src/sampling_gpu.cu b/model/lib/pointnet2/src/sampling_gpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..9e49a60dd6a80449be4c6c0d0d710be7b5fe9cd5 --- /dev/null +++ b/model/lib/pointnet2/src/sampling_gpu.cu @@ -0,0 +1,253 @@ +#include +#include + +#include "cuda_utils.h" +#include "sampling_gpu.h" + + +__global__ void gather_points_kernel_fast(int b, int c, int n, int m, + const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) { + // points: (B, C, N) + // idx: (B, M) + // output: + // out: (B, C, M) + + int bs_idx = blockIdx.z; + int c_idx = blockIdx.y; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; + + out += bs_idx * c * m + c_idx * m + pt_idx; + idx += bs_idx * m + pt_idx; + points += bs_idx * c * n + c_idx * n; + out[0] = points[idx[0]]; +} + +void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, + const float *points, const int *idx, float *out, cudaStream_t stream) { + // points: (B, C, N) + // idx: (B, npoints) + // output: + // out: (B, C, npoints) + + cudaError_t err; + dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + + gather_points_kernel_fast<<>>(b, c, n, npoints, points, idx, out); + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} + +__global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out, + const int *__restrict__ idx, float *__restrict__ grad_points) { + // grad_out: (B, C, M) + // idx: (B, M) + // output: + // grad_points: (B, C, N) + + int bs_idx = blockIdx.z; + int c_idx = blockIdx.y; + int pt_idx = blockIdx.x * blockDim.x + threadIdx.x; + if (bs_idx >= b || c_idx >= c || pt_idx >= m) return; + + grad_out += bs_idx * c * m + c_idx * m + pt_idx; + idx += bs_idx * m + pt_idx; + grad_points += bs_idx * c * n + c_idx * n; + + atomicAdd(grad_points + idx[0], grad_out[0]); +} + +void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, + const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) { + // grad_out: (B, C, npoints) + // idx: (B, npoints) + // output: + // grad_points: (B, C, N) + + cudaError_t err; + dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row) + dim3 threads(THREADS_PER_BLOCK); + + gather_points_grad_kernel_fast<<>>(b, c, n, npoints, grad_out, idx, grad_points); + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} + + +__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){ + const float v1 = dists[idx1], v2 = dists[idx2]; + const int i1 = dists_i[idx1], i2 = dists_i[idx2]; + dists[idx1] = max(v1, v2); + dists_i[idx1] = v2 > v1 ? i2 : i1; +} + +template +__global__ void furthest_point_sampling_kernel(int b, int n, int m, + const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) { + // dataset: (B, N, 3) + // tmp: (B, N) + // output: + // idx: (B, M) + + if (m <= 0) return; + __shared__ float dists[block_size]; + __shared__ int dists_i[block_size]; + + int batch_index = blockIdx.x; + dataset += batch_index * n * 3; + temp += batch_index * n; + idxs += batch_index * m; + + int tid = threadIdx.x; + const int stride = block_size; + + int old = 0; + if (threadIdx.x == 0) + idxs[0] = old; + + __syncthreads(); + for (int j = 1; j < m; j++) { + int besti = 0; + float best = -1; + float x1 = dataset[old * 3 + 0]; + float y1 = dataset[old * 3 + 1]; + float z1 = dataset[old * 3 + 2]; + for (int k = tid; k < n; k += stride) { + float x2, y2, z2; + x2 = dataset[k * 3 + 0]; + y2 = dataset[k * 3 + 1]; + z2 = dataset[k * 3 + 2]; + // float mag = (x2 * x2) + (y2 * y2) + (z2 * z2); + // if (mag <= 1e-3) + // continue; + + float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1); + float d2 = min(d, temp[k]); + temp[k] = d2; + besti = d2 > best ? k : besti; + best = d2 > best ? d2 : best; + } + dists[tid] = best; + dists_i[tid] = besti; + __syncthreads(); + + if (block_size >= 1024) { + if (tid < 512) { + __update(dists, dists_i, tid, tid + 512); + } + __syncthreads(); + } + + if (block_size >= 512) { + if (tid < 256) { + __update(dists, dists_i, tid, tid + 256); + } + __syncthreads(); + } + if (block_size >= 256) { + if (tid < 128) { + __update(dists, dists_i, tid, tid + 128); + } + __syncthreads(); + } + if (block_size >= 128) { + if (tid < 64) { + __update(dists, dists_i, tid, tid + 64); + } + __syncthreads(); + } + if (block_size >= 64) { + if (tid < 32) { + __update(dists, dists_i, tid, tid + 32); + } + __syncthreads(); + } + if (block_size >= 32) { + if (tid < 16) { + __update(dists, dists_i, tid, tid + 16); + } + __syncthreads(); + } + if (block_size >= 16) { + if (tid < 8) { + __update(dists, dists_i, tid, tid + 8); + } + __syncthreads(); + } + if (block_size >= 8) { + if (tid < 4) { + __update(dists, dists_i, tid, tid + 4); + } + __syncthreads(); + } + if (block_size >= 4) { + if (tid < 2) { + __update(dists, dists_i, tid, tid + 2); + } + __syncthreads(); + } + if (block_size >= 2) { + if (tid < 1) { + __update(dists, dists_i, tid, tid + 1); + } + __syncthreads(); + } + + old = dists_i[0]; + if (tid == 0) + idxs[j] = old; + } +} + +void furthest_point_sampling_kernel_launcher(int b, int n, int m, + const float *dataset, float *temp, int *idxs, cudaStream_t stream) { + // dataset: (B, N, 3) + // tmp: (B, N) + // output: + // idx: (B, M) + + cudaError_t err; + unsigned int n_threads = opt_n_threads(n); + + switch (n_threads) { + case 1024: + furthest_point_sampling_kernel<1024><<>>(b, n, m, dataset, temp, idxs); break; + case 512: + furthest_point_sampling_kernel<512><<>>(b, n, m, dataset, temp, idxs); break; + case 256: + furthest_point_sampling_kernel<256><<>>(b, n, m, dataset, temp, idxs); break; + case 128: + furthest_point_sampling_kernel<128><<>>(b, n, m, dataset, temp, idxs); break; + case 64: + furthest_point_sampling_kernel<64><<>>(b, n, m, dataset, temp, idxs); break; + case 32: + furthest_point_sampling_kernel<32><<>>(b, n, m, dataset, temp, idxs); break; + case 16: + furthest_point_sampling_kernel<16><<>>(b, n, m, dataset, temp, idxs); break; + case 8: + furthest_point_sampling_kernel<8><<>>(b, n, m, dataset, temp, idxs); break; + case 4: + furthest_point_sampling_kernel<4><<>>(b, n, m, dataset, temp, idxs); break; + case 2: + furthest_point_sampling_kernel<2><<>>(b, n, m, dataset, temp, idxs); break; + case 1: + furthest_point_sampling_kernel<1><<>>(b, n, m, dataset, temp, idxs); break; + default: + furthest_point_sampling_kernel<512><<>>(b, n, m, dataset, temp, idxs); + } + + err = cudaGetLastError(); + if (cudaSuccess != err) { + fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err)); + exit(-1); + } +} diff --git a/model/lib/pointnet2/src/sampling_gpu.h b/model/lib/pointnet2/src/sampling_gpu.h new file mode 100644 index 0000000000000000000000000000000000000000..6200c5914e434ecd2fc3b36313985805f6dbe0cc --- /dev/null +++ b/model/lib/pointnet2/src/sampling_gpu.h @@ -0,0 +1,29 @@ +#ifndef _SAMPLING_GPU_H +#define _SAMPLING_GPU_H + +#include +#include +#include + + +int gather_points_wrapper_fast(int b, int c, int n, int npoints, + at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor); + +void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints, + const float *points, const int *idx, float *out, cudaStream_t stream); + + +int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints, + at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor); + +void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, + const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream); + + +int furthest_point_sampling_wrapper(int b, int n, int m, + at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor); + +void furthest_point_sampling_kernel_launcher(int b, int n, int m, + const float *dataset, float *temp, int *idxs, cudaStream_t stream); + +#endif diff --git a/requirements.txt b/requirements.txt index 20d78a4e7327b08c5afa6c0495efec20ac50dd44..faee9f6220667d03032034629fb9b481eebe7ee4 100755 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,5 @@ torchaudio matplotlib flash-attn gradio -plotly \ No newline at end of file +plotly +model/lib/pointnet2/ \ No newline at end of file