qbhf2's picture
added NvidiaWarp and GarmentCode repos
66c9c8a
# Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
import unittest
import numpy as np
import warp as wp
from warp.tests.unittest_utils import *
wp.init()
@wp.kernel
def load_store_half(f32: wp.array(dtype=wp.float32), f16: wp.array(dtype=wp.float16)):
tid = wp.tid()
# check conversion from f32->f16
a = wp.float16(f32[tid])
b = f16[tid]
wp.expect_eq(a, b)
# check stores
f16[tid] = a
def test_fp16_conversion(test, device):
s = [1.0, 2.0, 3.0, -3.14159]
np_f32 = np.array(s, dtype=np.float32)
np_f16 = np.array(s, dtype=np.float16)
wp_f32 = wp.array(s, dtype=wp.float32, device=device)
wp_f16 = wp.array(s, dtype=wp.float16, device=device)
assert_np_equal(np_f32, wp_f32.numpy())
assert_np_equal(np_f16, wp_f16.numpy())
wp.launch(load_store_half, dim=len(s), inputs=[wp_f32, wp_f16], device=device)
# check that stores worked
assert_np_equal(np_f16, wp_f16.numpy())
@wp.kernel
def value_load_store_half(f16_value: wp.float16, f16_array: wp.array(dtype=wp.float16)):
wp.expect_eq(f16_value, f16_array[0])
# check stores
f16_array[0] = f16_value
def test_fp16_kernel_parameter(test, device):
"""Test the ability to pass in fp16 into kernels as parameters"""
s = [1.0, 2.0, 3.0, -3.14159]
for test_val in s:
np_f16 = np.array([test_val], dtype=np.float16)
wp_f16 = wp.array([test_val], dtype=wp.float16, device=device)
wp.launch(value_load_store_half, (1,), inputs=[wp.float16(test_val), wp_f16], device=device)
# check that stores worked
assert_np_equal(np_f16, wp_f16.numpy())
# Do the same thing but pass in test_val as a Python float to test automatic conversion
wp_f16 = wp.array([test_val], dtype=wp.float16, device=device)
wp.launch(value_load_store_half, (1,), inputs=[test_val, wp_f16], device=device)
assert_np_equal(np_f16, wp_f16.numpy())
@wp.kernel
def mul_half(input: wp.array(dtype=wp.float16), output: wp.array(dtype=wp.float16)):
tid = wp.tid()
# convert to compute type fp32
x = wp.float(input[tid]) * 2.0
# store back as fp16
output[tid] = wp.float16(x)
def test_fp16_grad(test, device):
rng = np.random.default_rng(123)
# checks that gradients are correctly propagated for
# fp16 arrays, even when intermediate calculations
# are performed in e.g.: fp32
s = rng.random(size=15).astype(np.float16)
input = wp.array(s, dtype=wp.float16, device=device, requires_grad=True)
output = wp.zeros_like(input)
tape = wp.Tape()
with tape:
wp.launch(mul_half, dim=len(s), inputs=[input, output], device=device)
ones = wp.array(np.ones(len(output)), dtype=wp.float16, device=device)
tape.backward(grads={output: ones})
assert_np_equal(input.grad.numpy(), np.ones(len(s)) * 2.0)
class TestFp16(unittest.TestCase):
pass
devices = []
if wp.is_cpu_available():
devices.append("cpu")
for cuda_device in get_unique_cuda_test_devices():
if cuda_device.arch >= 70:
devices.append(cuda_device)
add_function_test(TestFp16, "test_fp16_conversion", test_fp16_conversion, devices=devices)
add_function_test(TestFp16, "test_fp16_grad", test_fp16_grad, devices=devices)
add_function_test(TestFp16, "test_fp16_kernel_parameter", test_fp16_kernel_parameter, devices=devices)
if __name__ == "__main__":
wp.build.clear_kernel_cache()
unittest.main(verbosity=2)