/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"@(#)mlib_v_ImageLookUpU16S32Func.c	9.2	07/11/05 SMI"

#include <vis_proto.h>
#include <mlib_image.h>
#include <mlib_v_ImageLookUpFunc.h>

/* *********************************************************** */

static void mlib_v_ImageLookUp_U16_S32_124_D1(
    const mlib_u16 *src,
    mlib_f32 *dst,
    mlib_s32 xsize,
    const mlib_f32 *table0,
    const mlib_f32 *table1,
    const mlib_f32 *table2,
    const mlib_f32 *table3);

static void mlib_v_ImageLookUp_U16_S32_3_D1(
    const mlib_u16 *src,
    mlib_f32 *dst,
    mlib_s32 xsize,
    const mlib_f32 *table0,
    const mlib_f32 *table1,
    const mlib_f32 *table2);

/* *********************************************************** */

void
mlib_v_ImageLookUp_U16_S32_124_D1(
    const mlib_u16 *src,
    mlib_f32 *dst,
    mlib_s32 xsize,
    const mlib_f32 *table0,
    const mlib_f32 *table1,
    const mlib_f32 *table2,
    const mlib_f32 *table3)
{
/* aligned pointer to source data */
	mlib_u32 *sa;

/* pointer to source data */
	mlib_u16 *sp;

/* source data */
	mlib_u32 s0, s1;

/* aligned pointer to destination */
	mlib_f32 *dp;

/* destination data */
	mlib_f32 acc0, acc1;

/* destination data */
	mlib_f32 acc2, acc3;

/* loop variable */
	mlib_s32 i;
	mlib_u32 s00, s01, s02, s03;

	sa = (mlib_u32 *)src;
	dp = dst;

	i = 0;

	if (xsize >= 4) {

		s0 = (*sa++);
		s1 = (*sa++);
		s00 = (s0 >> 14) & (~3);
		s01 = ((s0 << 16) >> 14);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 8; i += 4, dp += 4) {
			s02 = (s1 >> 14) & (~3);
			s03 = ((s1 << 16) >> 14);
			acc0 = *(mlib_f32 *)((mlib_u8 *)table0 + s00);
			acc1 = *(mlib_f32 *)((mlib_u8 *)table1 + s01);
			acc2 = *(mlib_f32 *)((mlib_u8 *)table2 + s02);
			acc3 = *(mlib_f32 *)((mlib_u8 *)table3 + s03);
			s0 = (*sa++);
			s1 = (*sa++);
			s00 = (s0 >> 14) & (~3);
			s01 = ((s0 << 16) >> 14);
			dp[0] = acc0;
			dp[1] = acc1;
			dp[2] = acc2;
			dp[3] = acc3;
		}

		s02 = (s1 >> 14) & (~3);
		s03 = ((s1 << 16) >> 14);
		acc0 = *(mlib_f32 *)((mlib_u8 *)table0 + s00);
		acc1 = *(mlib_f32 *)((mlib_u8 *)table1 + s01);
		acc2 = *(mlib_f32 *)((mlib_u8 *)table2 + s02);
		acc3 = *(mlib_f32 *)((mlib_u8 *)table3 + s03);
		dp[0] = acc0;
		dp[1] = acc1;
		dp[2] = acc2;
		dp[3] = acc3;
		dp += 4;
		i += 4;
	}

	sp = (mlib_u16 *)sa;

	if (i < xsize) {
		(*dp++) = table0[sp[0]];
		i++;
		sp++;
	}

	if (i < xsize) {
		(*dp++) = table1[sp[0]];
		i++;
		sp++;
	}

	if (i < xsize) {
		(*dp++) = table2[sp[0]];
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_U16_S32_1(
    const mlib_u16 *src,
    mlib_s32 slb,
    mlib_s32 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s32 **table)
{
	mlib_u16 *sl;
	mlib_s32 *dl;
	mlib_f32 *tab = (mlib_f32 *)(&table[0][0]);
	mlib_s32 j;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_u16 *sp = sl;
		mlib_f32 *dp = (mlib_f32 *)dl;
		mlib_s32 off, size = xsize;

		off = (mlib_s32)(((4 - ((mlib_addr)sp & 3)) & 3) >> 1);

		off = (off < size) ? off : size;

		if (off == 1) {
			(*dp++) = tab[((*sp++))];
			size--;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_U16_S32_124_D1(sp, dp, size, tab,
			    tab, tab, tab);
		}

		sl = (mlib_u16 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s32 *)((mlib_u8 *)dl + dlb);
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_U16_S32_2(
    const mlib_u16 *src,
    mlib_s32 slb,
    mlib_s32 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s32 **table)
{
	mlib_u16 *sl;
	mlib_s32 *dl;
	mlib_f32 *tab;
	mlib_s32 j;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_u16 *sp = sl;
		mlib_f32 *dp = (mlib_f32 *)dl;
		mlib_s32 off, size = xsize * 2;
		mlib_f32 *tab0 = (mlib_f32 *)(&table[0][0]);
		mlib_f32 *tab1 = (mlib_f32 *)(&table[1][0]);

		off = (mlib_s32)(((4 - ((mlib_addr)sp & 3)) & 3) >> 1);

		off = (off < size) ? off : size;

		if ((off & 1) != 0) {
			(*dp++) = tab0[((*sp++))];
			size--;
			tab = tab0;
			tab0 = tab1;
			tab1 = tab;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_U16_S32_124_D1(sp, dp, size, tab0,
			    tab1, tab0, tab1);
		}

		sl = (mlib_u16 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s32 *)((mlib_u8 *)dl + dlb);
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_U16_S32_4(
    const mlib_u16 *src,
    mlib_s32 slb,
    mlib_s32 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s32 **table)
{
	mlib_u16 *sl;
	mlib_s32 *dl;
	mlib_f32 *tab;
	mlib_s32 j;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_u16 *sp = sl;
		mlib_f32 *dp = (mlib_f32 *)dl;
		mlib_f32 *tab0 = (mlib_f32 *)(&table[0][0]);
		mlib_f32 *tab1 = (mlib_f32 *)(&table[1][0]);
		mlib_f32 *tab2 = (mlib_f32 *)(&table[2][0]);
		mlib_f32 *tab3 = (mlib_f32 *)(&table[3][0]);
		mlib_s32 off, size = xsize * 4;

		off = (mlib_s32)(((4 - ((mlib_addr)sp & 3)) & 3) >> 1);

		off = (off < size) ? off : size;

		if (off == 1) {
			(*dp++) = tab0[((*sp++))];
			tab = tab0;
			tab0 = tab1;
			tab1 = tab2;
			tab2 = tab3;
			tab3 = tab;
			size--;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_U16_S32_124_D1(sp, dp, size, tab0,
			    tab1, tab2, tab3);
		}

		sl = (mlib_u16 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s32 *)((mlib_u8 *)dl + dlb);
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_U16_S32_3_D1(
    const mlib_u16 *src,
    mlib_f32 *dst,
    mlib_s32 xsize,
    const mlib_f32 *table0,
    const mlib_f32 *table1,
    const mlib_f32 *table2)
{
/* aligned pointer to source data */
	mlib_u32 *sa;

/* pointer to source data */
	mlib_u16 *sp;

/* source data */
	mlib_u32 s0, s1;

/* aligned pointer to destination */
	mlib_f32 *dp;

/* destination data */
	mlib_f32 acc0, acc1;

/* destination data */
	mlib_f32 acc2, acc3;

/* loop variable */
	mlib_s32 i;
	const mlib_f32 *table;
	mlib_u32 s00, s01, s02, s03;

	sa = (mlib_u32 *)src;
	dp = dst;

	i = 0;

	if (xsize >= 4) {

		s0 = (*sa++);
		s1 = (*sa++);
		s00 = (s0 >> 14) & (~3);
		s01 = ((s0 << 16) >> 14);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 8; i += 4, dp += 4) {
			s02 = (s1 >> 14) & (~3);
			s03 = ((s1 << 16) >> 14);
			acc0 = *(mlib_f32 *)((mlib_u8 *)table0 + s00);
			acc1 = *(mlib_f32 *)((mlib_u8 *)table1 + s01);
			acc2 = *(mlib_f32 *)((mlib_u8 *)table2 + s02);
			acc3 = *(mlib_f32 *)((mlib_u8 *)table0 + s03);
			s0 = (*sa++);
			s1 = (*sa++);
			s00 = (s0 >> 14) & (~3);
			s01 = ((s0 << 16) >> 14);
			table = table0;
			table0 = table1;
			table1 = table2;
			table2 = table;
			dp[0] = acc0;
			dp[1] = acc1;
			dp[2] = acc2;
			dp[3] = acc3;
		}

		s02 = (s1 >> 14) & (~3);
		s03 = ((s1 << 16) >> 14);
		acc0 = *(mlib_f32 *)((mlib_u8 *)table0 + s00);
		acc1 = *(mlib_f32 *)((mlib_u8 *)table1 + s01);
		acc2 = *(mlib_f32 *)((mlib_u8 *)table2 + s02);
		acc3 = *(mlib_f32 *)((mlib_u8 *)table0 + s03);
		dp[0] = acc0;
		dp[1] = acc1;
		dp[2] = acc2;
		dp[3] = acc3;
		table = table0;
		table0 = table1;
		table1 = table2;
		table2 = table;
		dp += 4;
		i += 4;
	}

	sp = (mlib_u16 *)sa;

	if (i < xsize) {
		(*dp++) = table0[sp[0]];
		i++;
		sp++;
	}

	if (i < xsize) {
		(*dp++) = table1[sp[0]];
		i++;
		sp++;
	}

	if (i < xsize) {
		(*dp++) = table2[sp[0]];
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_U16_S32_3(
    const mlib_u16 *src,
    mlib_s32 slb,
    mlib_s32 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s32 **table)
{
	mlib_u16 *sl;
	mlib_s32 *dl;
	mlib_f32 *tab;
	mlib_s32 j;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_u16 *sp = sl;
		mlib_f32 *dp = (mlib_f32 *)dl;
		mlib_f32 *tab0 = (mlib_f32 *)(&table[0][0]);
		mlib_f32 *tab1 = (mlib_f32 *)(&table[1][0]);
		mlib_f32 *tab2 = (mlib_f32 *)(&table[2][0]);
		mlib_s32 off, size = xsize * 3;

		off = (mlib_s32)(((4 - ((mlib_addr)sp & 3)) & 3) >> 1);

		off = (off < size) ? off : size;

		if (off == 1) {
			(*dp++) = tab0[((*sp++))];
			tab = tab0;
			tab0 = tab1;
			tab1 = tab2;
			tab2 = tab;
			size--;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_U16_S32_3_D1(sp, dp, size, tab0,
			    tab1, tab2);
		}

		sl = (mlib_u16 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s32 *)((mlib_u8 *)dl + dlb);
	}
}

/* *********************************************************** */
