d3dx9_36: Implementation of D3DXSHMultiply3.

2012-06-15 11:51:54 +08:00 · 2012-06-15 11:51:54 +08:00 · 60a0fb30df
parent cdd18731a7
commit 60a0fb30df
4 changed files with 131 additions and 1 deletions
--- a/dlls/d3dx9_36/d3dx9_36.spec
+++ b/dlls/d3dx9_36/d3dx9_36.spec
@ -280,7 +280,7 @@
@ stub D3DXSHEvalHemisphereLight(long ptr long long ptr ptr ptr)
@ stub D3DXSHEvalSphericalLight(long ptr long long long long ptr ptr ptr)
@ stub D3DXSHMultiply2(ptr ptr ptr)
-@ stub D3DXSHMultiply3(ptr ptr ptr)
+@ stdcall D3DXSHMultiply3(ptr ptr ptr)
@ stub D3DXSHMultiply4(ptr ptr ptr)
@ stub D3DXSHMultiply5(ptr ptr ptr)
@ stub D3DXSHMultiply6(ptr ptr ptr)
--- a/dlls/d3dx9_36/math.c
+++ b/dlls/d3dx9_36/math.c
@ -1967,6 +1967,8 @@ FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, CONST D3DXFLOAT16 *pin, UINT n)
    return pout;
 }

+/*_________________D3DXSH________________*/
+
 FLOAT* WINAPI D3DXSHAdd(FLOAT *out, UINT order, const FLOAT *a, const FLOAT *b)
 {
    UINT i;
@ -1978,3 +1980,102 @@ FLOAT* WINAPI D3DXSHAdd(FLOAT *out, UINT order, const FLOAT *a, const FLOAT *b)

    return out;
 }
+
+FLOAT* WINAPI D3DXSHMultiply3(FLOAT *out, CONST FLOAT *a, CONST FLOAT *b)
+{
+    FLOAT t, ta, tb;
+
+    TRACE("(%p, %p, %p)\n", out, a, b);
+
+    out[0]= 0.28209479f * a[0] * b[0];
+
+    ta = 0.28209479f * a[0] - 0.12615662f * a[6] - 0.21850968f * a[8];
+    tb = 0.28209479f * b[0] - 0.12615662f * b[6] - 0.21850968f * b[8];
+    out[1] = ta * b[1] + tb * a[1];
+    t = a[1] * b[1];
+    out[0] += 0.28209479f * t;
+    out[6] = -0.12615662f * t;
+    out[8] = -0.21850968f * t;
+
+    ta = 0.21850968f * a[5];
+    tb = 0.21850968f * b[5];
+    out[1] += ta * b[2] + tb * a[2];
+    out[2] = ta * b[1] + tb * a[1];
+    t = a[1] * b[2] +a[2] * b[1];
+    out[5] = 0.21850968f * t;
+
+    ta = 0.21850968f * a[4];
+    tb = 0.21850968f * b[4];
+    out[1] += ta * b[3] + tb * a[3];
+    out[3]  = ta * b[1] + tb * a[1];
+    t = a[1] * b[3] + a[3] * b[1];
+    out[4] = 0.21850968f * t;
+
+    ta = 0.28209480f * a[0] + 0.25231326f * a[6];
+    tb = 0.28209480f * b[0] + 0.25231326f * b[6];
+    out[2] += ta * b[2] + tb * a[2];
+    t = a[2] * b[2];
+    out[0] += 0.28209480f * t;
+    out[6] += 0.25231326f * t;
+
+    ta = 0.21850969f * a[7];
+    tb = 0.21850969f * b[7];
+    out[2] += ta * b[3] + tb * a[3];
+    out[3] += ta * b[2] + tb * a[2];
+    t = a[2] * b[3] + a[3] * b[2];
+    out[7] = 0.21850969f * t;
+
+    ta = 0.28209479f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
+    tb = 0.28209479f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
+    out[3] += ta * b[3] + tb * a[3];
+    t = a[3] * b[3];
+    out[0] += 0.28209479f * t;
+    out[6] -= 0.12615663f * t;
+    out[8] += 0.21850969f * t;
+
+    ta = 0.28209479f * a[0] - 0.18022375f * a[6];
+    tb = 0.28209479f * b[0] - 0.18022375f * b[6];
+    out[4] += ta * b[4] + tb * a[4];
+    t = a[4] * b[4];
+    out[0] += 0.28209479f * t;
+    out[6] -= 0.18022375f * t;
+
+    ta = 0.15607835f * a[7];
+    tb = 0.15607835f * b[7];
+    out[4] += ta * b[5] + tb * a[5];
+    out[5] += ta * b[4] + tb * a[4];
+    t = a[4] * b[5] + a[5] * b[4];
+    out[7] += 0.15607834f * t;
+
+    ta = 0.28209479f * a[0] + 0.09011186 * a[6] - 0.15607835f * a[8];
+    tb = 0.28209479f * b[0] + 0.09011186 * b[6] - 0.15607835f * b[8];
+    out[5] += ta * b[5] + tb * a[5];
+    t = a[5] * b[5];
+    out[0] += 0.28209479f * t;
+    out[6] += 0.09011186f * t;
+    out[8] -= 0.15607835f * t;
+
+    ta = 0.28209480f * a[0];
+    tb = 0.28209480f * b[0];
+    out[6] += ta * b[6] + tb * a[6];
+    t = a[6] * b[6];
+    out[0] += 0.28209480f * t;
+    out[6] += 0.18022376f * t;
+
+    ta = 0.28209479f * a[0] + 0.09011186 * a[6] + 0.15607835f * a[8];
+    tb = 0.28209479f * b[0] + 0.09011186 * b[6] + 0.15607835f * b[8];
+    out[7] += ta * b[7] + tb * a[7];
+    t = a[7] * b[7];
+    out[0] += 0.28209479f * t;
+    out[6] += 0.09011186f * t;
+    out[8] += 0.15607835f * t;
+
+    ta = 0.28209479f * a[0] - 0.18022375f * a[6];
+    tb = 0.28209479f * b[0] - 0.18022375f * b[6];
+    out[8] += ta * b[8] + tb * a[8];
+    t = a[8] * b[8];
+    out[0] += 0.28209479f * t;
+    out[6] -= 0.18022375f * t;
+
+    return out;
+}
--- a/dlls/d3dx9_36/tests/math.c
+++ b/dlls/d3dx9_36/tests/math.c
@ -2399,6 +2399,33 @@ static void test_D3DXSHAdd(void)
    }
 }

+static void test_D3DXSHMultiply3(void)
+{
+    unsigned int i;
+    FLOAT a[20], b[20], c[20];
+    const FLOAT expected[] =
+    { 7.813913f, 2.256058f, 5.9484005f, 4.970894f, 2.899858f, 3.598946f, 1.726572f, 5.573538f,
+      0.622063f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, };
+
+
+    for (i = 0; i < 20; i++)
+    {
+        a[i] = 1.0f + (FLOAT)i/100.0f;
+        b[i] = 3.0f - (FLOAT)i/100.0f;
+        c[i] = (FLOAT)i;
+    }
+
+    D3DXSHMultiply3(c, a, b);
+    for (i = 0; i < 9; i++)
+        ok(relative_error(c[i], expected[i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[i], c[i]);
+
+/* D3DXSHMultiply does not modify the elements of the array after the nineth element */
+    for (i = 8; i < 19; i++)
+        ok(relative_error(c[i], expected[i]) < admitted_error, "Expected[%d] = %f, received = %f\n", i, expected[i], c[i]);
+
+    return;
+}
+
 START_TEST(math)
 {
    D3DXColorTest();
@ -2416,4 +2443,5 @@ START_TEST(math)
    test_D3DXVec_Array();
    test_D3DXFloat_Array();
    test_D3DXSHAdd();
+    test_D3DXSHMultiply3();
 }
--- a/include/d3dx9math.h
+++ b/include/d3dx9math.h
@ -378,6 +378,7 @@ D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, CONST FLOAT *pin, UI
 FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, CONST D3DXFLOAT16 *pin, UINT n);

 FLOAT* WINAPI D3DXSHAdd(FLOAT *out, UINT order, CONST FLOAT *a, CONST FLOAT *b);
+FLOAT* WINAPI D3DXSHMultiply3(FLOAT *out, CONST FLOAT *a, CONST FLOAT *b);

 #ifdef __cplusplus
 }