File size: 1,427 Bytes
8b7c501
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from peachpy import *
from peachpy.x86_64 import *


def fp16_alt_xmm_to_fp32_ymm(xmm_half):
	ymm_half = YMMRegister()
	VPERMQ(ymm_half, xmm_half.as_ymm, 0b01010000)

	ymm_zero = YMMRegister()
	VPXOR(ymm_zero.as_xmm, ymm_zero.as_xmm, ymm_zero.as_xmm)

	ymm_word = YMMRegister()
	VPUNPCKLWD(ymm_word, ymm_zero, ymm_half)

	ymm_shl1_half = YMMRegister()
	VPADDW(ymm_shl1_half, ymm_half, ymm_half)

	ymm_shl1_nonsign = YMMRegister()
	VPADDD(ymm_shl1_nonsign, ymm_word, ymm_word)

	sign_mask = Constant.float32x8(-0.0)

	ymm_sign = YMMRegister()
	VANDPS(ymm_sign, ymm_word, sign_mask)

	ymm_shr3_nonsign = YMMRegister()
	VPSRLD(ymm_shr3_nonsign, ymm_shl1_nonsign, 4)

	exp_offset = Constant.uint32x8(0x38000000)

	ymm_norm_nonsign = YMMRegister()
	VPADDD(ymm_norm_nonsign, ymm_shr3_nonsign, exp_offset)

	magic_mask = Constant.uint16x16(0x3E80)
	ymm_denorm_nonsign = YMMRegister()
	VPUNPCKLWD(ymm_denorm_nonsign, ymm_shl1_half, magic_mask)

	magic_bias = Constant.float32x8(0.25)
	VSUBPS(ymm_denorm_nonsign, ymm_denorm_nonsign, magic_bias)

	ymm_denorm_cutoff = YMMRegister()
	VMOVDQA(ymm_denorm_cutoff, Constant.uint32x8(0x00800000))
	
	ymm_denorm_mask = YMMRegister()
	VPCMPGTD(ymm_denorm_mask, ymm_denorm_cutoff, ymm_shr3_nonsign)

	ymm_nonsign = YMMRegister()
	VBLENDVPS(ymm_nonsign, ymm_norm_nonsign, ymm_denorm_nonsign, ymm_denorm_mask)

	ymm_float = YMMRegister()
	VORPS(ymm_float, ymm_nonsign, ymm_sign)

	return ymm_float