/*----------------------------- subs.x -------------------------------------- */

/* Some assembly rewrites of line.c functions.
 *
 * void far ILoop (char *va, int dy, int dx, int dvx, int dvy, int color);
 * void far vgaset (int to, int color, int nbytes);
 * void far SLoop (char *va, int count, int dva, int color);
*/

/* void far vgaset (int to, int color, int nbytes);
 *
 * The subroutine will set 'nbytes' bytes at 'to' to 'color'.
*/

#define GrXOR	$0x01
#define GrOR	$0x02

#define P(n)	4*(3+n)(%esp)

#define _to	%edi
#define _color	%eax
#define _nbytes	%ebx
#define _count	%ecx

	.globl _vgaset
_vgaset:
	push	%ebx
	push	%ecx
	push	%edi

	movl	P(1),_to
	movl	P(2),_color
	movl	P(3),_nbytes
	cld
	cmpl	$6,_nbytes		/* minimal length; is 6 ok? */
	jl	vgasetx			/*    MUST be 3 or more! */

	movb	%al,%ah			/* for long set */
	movw	%ax,%cx
	shll	$16,%eax
	movw	%cx,%ax	

	movl	_to,_count
	negl	_count
	andl	$3,_count		/* 'bytes' in head */
	subl	_count,_nbytes		/* update length */
	rep
	stosb				/* set head */
	movl	_nbytes,_count
	shrl	$2,_count		/* 'longs' in body */
	rep
	stosl				/* set body */
	andl	$3,_nbytes		/* 'bytes' in tail */
vgasetx:
	movl	_nbytes,_count
	rep
	stosb				/* set tail */
vgasret:

	pop	%edi
	pop	%ecx
	pop	%ebx
	ret

#undef P
#undef _to
#undef _color
#undef _nbytes
#undef _count

/* void far ILoop (char *va, int dy, int dx, int dvx, int dvy, int color);
 *
 * The inner loop of the Bresenham line drawing algorithm for the generat
 * case. Shorten the loop by sacrificing setup time.
*/

#define P(n)	4*(6+n)(%esp)

#define _va	%esi
#define _dy	%di
#define _dx	%bx
#define _dvx	%edx
#define _dvy	%ebp
#define _color	%cl
#define _error	%ax

	.globl _ILoop
_ILoop:
	pushl	%ebx
	pushl	%ecx
	pushl	%edx
	pushl	%esi
	pushl	%edi
	pushl	%ebp

	movw	P(3),_dx			/* %ebx = dx, count */

	movw	P(2),%dx			/* %dx = dy */
	movw	$0,%ax				/* %dx:%ax = 0x10000*dy */
	divw	%bx				/* %ax = (0x10000*dy)/dx */
	movw	%ax,_dy				/* dy = (0x10000*dy)/dx */

	movl	P(1),_va
	movl	P(4),_dvx
	movl	P(5),_dvy
	movb	P(6),_color
	movw	$0x8000,_error			/* err = 0x8000 */
	addl	_dvx,_dvy

	testb	GrXOR,1+P(6)
	jnz	iloopx2				/* XOR write mode */
	testb	GrOR,1+P(6)
	jz	iloops2				/* Set write mode */
	jmp	iloopo2				/* OR  write mode */

iloops1:
	addl	_dvx,_va			/* va += dvx */
	movb	_color,(_va)			/* *va = c */
iloops2:
	decw	_dx				/* --count */
	js	iloopr
iloops3:
	addw	_dy,_error			/* err += dy */
	jnc	iloops1
	addl	_dvy,_va			/* va += dvy */
	movb	_color,(_va)			/* *va = c */
	decw	_dx				/* --count */
	jns	iloops3
iloopr:
	movl	_va,%eax			/* return (va) */

	popl	%ebp
	popl	%edi
	popl	%esi
	popl	%edx
	popl	%ecx
	popl	%ebx

	ret

iloopx1:
	addl	_dvx,_va			/* va += dvx */
	xorb	_color,(_va)			/* *va ^= c */
iloopx2:
	decw	_dx				/* --count */
	js	iloopr
iloopx3:
	addw	_dy,_error			/* err += dy */
	jnc	iloopx1
	addl	_dvy,_va			/* va += dvy */
	xorb	_color,(_va)			/* *va ^= c */
	decw	_dx				/* --count */
	jns	iloopx3
	jmp	iloopr

iloopo1:
	addl	_dvx,_va			/* va += dvx */
	orb	_color,(_va)			/* *va |= c */
iloopo2:
	decw	_dx				/* --count */
	js	iloopr
iloopo3:
	addw	_dy,_error			/* err += dy */
	jnc	iloopo1
	addl	_dvy,_va			/* va += dvy */
	orb	_color,(_va)			/* *va |= c */
	decw	_dx				/* --count */
	jns	iloopo3
	jmp	iloopr

#undef _va
#undef _dy
#undef _dx
#undef _dvx
#undef _dvy
#undef _color
#undef _error
#undef P


/* void far SLoop (char *va, int count, int dva, int color);
 *
 * Draw simple 8 directions (except horizontal).
*/

#define P(n)	4*(4+n)(%esp)

#define _va	%esi
#define _count	%ebx
#define _dva	%edx
#define _color	%cl

	.globl _SLoop
_SLoop:
	pushl	%ebx
	pushl	%ecx
	pushl	%edx
	pushl	%esi

	movl	P(1),_va
	movl	P(2),_count			/* %ebx = dx, count */
	movl	P(3),_dva
	movb	P(4),_color

	testb	GrXOR,1+P(4)			/* XOR write mode */
	jnz	sloopx2
	testb	GrOR,1+P(4)			/* OR  write mode */
	jnz	sloopo2
sloops2:
	decl	_count				/* --count */
	js	sloopr
sloops3:
	addl	_dva,_va			/* va += dva */
	movb	_color,(_va)			/* *va = c */
	decl	_count				/* --count */
	jns	sloops3
sloopr:
	movl	_va,%eax			/* return (va) */

	popl	%esi
	popl	%edx
	popl	%ecx
	popl	%ebx

	ret

sloopx2:
	decl	_count				/* --count */
	js	sloopr
sloopx3:
	addl	_dva,_va			/* va += dva */
	xorb	_color,(_va)			/* *va ^= c */
	decl	_count				/* --count */
	jns	sloopx3
	jmp	sloopr

sloopo2:
	decl	_count				/* --count */
	js	sloopr
sloopo3:
	addl	_dva,_va			/* va += dva */
	orb	_color,(_va)			/* *va |= c */
	decl	_count				/* --count */
	jns	sloopo3
	jmp	sloopr

#undef _va
#undef _count
#undef _dva
#undef _color
#undef P


#if 0


/**********************************************************************
 *
 * Unused stuff from here on
 *
 **********************************************************************
*/

/* void far ILoop (char *va, int dy, int dx, int dvx, int dvy, int color);
 *
 * The inner loop of the Bresenham line drawing algorithm for the general
 * case. A Plain Jane inplementation.
*/

#define P(n)	4*(6+n)(%esp)

#define _va	%esi
#define _dy	%di
#define _dx	%bp
#define _count	%bx
#define _dvx	%edx
#define _dvy	P(5)
#define _color	%cl
#define _error	%ax

	.globl _ILoopn
_ILoopn:
	pushl	%ebx
	pushl	%ecx
	pushl	%edx
	pushl	%esi
	pushl	%edi
	pushl	%ebp

	movl	P(1),_va
	movw	P(2),_dy
	movw	P(3),_dx
	movl	P(4),_dvx
	movb	P(6),_color

	movw	_dx,_count
	movw	_dx,_error			/* err = dx/2 */
	shrw	$1,_error
	jmp	iloopn2
iloopn1:
	movb	_color,(_va)			/* *va = c */
iloopn2:
	decw	_count				/* --count */
	js	iloopn4
iloopn3:
	addl	_dvx,_va			/* va += dvx */
	addw	_dy,_error			/* err += dy */
	cmpw	_dx,_error			/* err >= dx? */
	jb	iloopn1
	subw	_dx,_error			/* err >= dx? */
	addl	_dvy,_va			/* va += dvy */
	movb	_color,(_va)			/* *va = c */
	decw	_count				/* --count */
	jns	iloopn3
iloopn4:
	movl	_va,%eax			/* return (va) */

	popl	%ebp
	popl	%edi
	popl	%esi
	popl	%edx
	popl	%ecx
	popl	%ebx

	ret

#undef _va
#undef _dy
#undef _dx
#undef _dvx
#undef _dvy
#undef _color
#undef _error

#endif

#undef GrXOR
#undef GrOR
