Quack 3: Arena

2025-06-10 11:59:58 -05:00 · 2024-09-25 15:40:36 -05:00
parent 0bf99969fd
commit 40035fa235
74 changed files with 36999 additions and 0 deletions
--- a/ref_soft/r_polysa.asm
+++ b/ref_soft/r_polysa.asm
@@ -0,0 +1,812 @@
+ .386P
+ .model FLAT
+;
+; d_polysa.s
+; x86 assembly-language polygon model drawing code
+;
+
+include qasm.inc
+include d_if.inc
+
+if	id386
+
+; !!! if this is changed, it must be changed in d_polyse.c too !!!
+;DPS_MAXSPANS			equ		(MAXHEIGHT+1)
+; 1 extra for spanpackage that marks end
+
+;SPAN_SIZE	equ		(((DPS_MAXSPANS + 1 + ((CACHE_SIZE - 1) / spanpackage_t_size)) + 1) * spanpackage_t_size)
+
+MASK_1K	equ		03FFh
+
+_DATA SEGMENT	
+
+ align 4	
+;p10_minus_p20 dd 0
+;p01_minus_p21 dd 0
+;temp0 dd 0
+;temp1 dd 0
+;Ltemp dd 0
+
+aff8entryvec_table dd LDraw8, LDraw7, LDraw6, LDraw5
+ dd LDraw4, LDraw3, LDraw2, LDraw1, LDraw8IR, LDraw7IR, LDraw6IR, LDraw5IR, LDraw4IR, LDraw3IR, LDraw2IR, LDraw1IR
+
+lzistepx dd 0	
+
+ externdef _rand1k:dword	
+ externdef _rand1k_index:dword	
+ externdef _alias_colormap:dword
+
+;PGM
+ externdef _irtable:dword
+ externdef _iractive:byte
+;PGM
+
+_DATA ENDS
+_TEXT SEGMENT	
+
+
+;----------------------------------------------------------------------
+; 8-bpp horizontal span drawing code for affine polygons, with smooth
+; shading and no transparency
+;----------------------------------------------------------------------
+
+;===================================
+;===================================
+
+pspans	equ		4+8
+
+ public _D_PolysetAff8Start	
+_D_PolysetAff8Start:	
+
+ public _R_PolysetDrawSpans8_Opaque 
+_R_PolysetDrawSpans8_Opaque:
+
+ push esi	; preserve register variables
+ push ebx	
+
+ mov esi,ds:dword ptr[pspans+esp]	; point to the first span descriptor
+ mov ecx,ds:dword ptr[_r_zistepx]	
+
+ push ebp	; preserve caller's stack frame
+ push edi	
+
+ ror ecx,16	; put high 16 bits of 1/z step in low word
+ mov edx,ds:dword ptr[spanpackage_t_count+esi]	
+
+ mov ds:dword ptr[lzistepx],ecx	
+
+LSpanLoop:	
+
+;		lcount = d_aspancount - pspanpackage->count;
+;
+;		errorterm += erroradjustup;
+;		if (errorterm >= 0)
+;		{
+;			d_aspancount += d_countextrastep;
+;			errorterm -= erroradjustdown;
+;		}
+;		else
+;		{
+;			d_aspancount += ubasestep;
+;		}
+
+ mov eax,ds:dword ptr[_d_aspancount]
+ sub eax,edx
+
+ mov edx,ds:dword ptr[_erroradjustup]	
+ mov ebx,ds:dword ptr[_errorterm]	
+ add ebx,edx	
+ js LNoTurnover	
+
+ mov edx,ds:dword ptr[_erroradjustdown]	
+ mov edi,ds:dword ptr[_d_countextrastep]	
+ sub ebx,edx	
+ mov ebp,ds:dword ptr[_d_aspancount]	
+ mov ds:dword ptr[_errorterm],ebx	
+ add ebp,edi	
+ mov ds:dword ptr[_d_aspancount],ebp	
+ jmp LRightEdgeStepped	
+
+LNoTurnover:	
+ mov edi,ds:dword ptr[_d_aspancount]	
+ mov edx,ds:dword ptr[_ubasestep]	
+ mov ds:dword ptr[_errorterm],ebx	
+ add edi,edx	
+ mov ds:dword ptr[_d_aspancount],edi	
+
+LRightEdgeStepped:	
+ cmp eax,1	
+
+ jl LNextSpan	
+ jz LExactlyOneLong	
+
+;
+; set up advancetable
+;
+ mov ecx,ds:dword ptr[_a_ststepxwhole]	
+ mov edx,ds:dword ptr[_r_affinetridesc+atd_skinwidth]	
+
+ mov ds:dword ptr[advancetable+4],ecx	; advance base in t
+ add ecx,edx	
+
+ mov ds:dword ptr[advancetable],ecx	; advance extra in t
+ mov ecx,ds:dword ptr[_a_tstepxfrac]	
+
+ mov cx,ds:word ptr[_r_lstepx]	
+ mov edx,eax	; count
+
+ mov ds:dword ptr[tstep],ecx
+ add edx,7	
+
+ shr edx,3	; count of full and partial loops
+ mov ebx,ds:dword ptr[spanpackage_t_sfrac+esi]	
+
+ mov bx,dx	
+ mov ecx,ds:dword ptr[spanpackage_t_pz+esi]	
+
+ neg eax	
+
+ mov edi,ds:dword ptr[spanpackage_t_pdest+esi]	
+ and eax,7	; 0->0, 1->7, 2->6, ... , 7->1
+
+ sub edi,eax	; compensate for hardwired offsets
+ sub ecx,eax	
+
+ sub ecx,eax	
+ mov edx,ds:dword ptr[spanpackage_t_tfrac+esi]	
+
+ mov dx,ds:word ptr[spanpackage_t_light+esi]	
+ mov ebp,ds:dword ptr[spanpackage_t_zi+esi]	
+
+ ror ebp,16	; put high 16 bits of 1/z in low word
+ push esi	
+
+ push eax
+ mov al, [_iractive]
+ cmp al, 0
+ pop eax
+ jne IRInsert
+ 
+ mov esi,ds:dword ptr[spanpackage_t_ptex+esi]	
+ jmp dword ptr[aff8entryvec_table+eax*4]	
+
+IRInsert:
+ mov esi,ds:dword ptr[spanpackage_t_ptex+esi]
+ add eax, 8
+ jmp dword ptr[aff8entryvec_table+eax*4]	
+
+; %bx = count of full and partial loops
+; %ebx high word = sfrac
+; %ecx = pz
+; %dx = light
+; %edx high word = tfrac
+; %esi = ptex
+; %edi = pdest
+; %ebp = 1/z
+; tstep low word = C(r_lstepx)
+; tstep high word = C(a_tstepxfrac)
+; C(a_sstepxfrac) low word = 0
+; C(a_sstepxfrac) high word = C(a_sstepxfrac)
+
+;===
+;Standard Draw Loop
+;===
+LDrawLoop:	
+
+ mov al,[_iractive]
+ cmp al,0
+ jne LDrawLoopIR
+
+; FIXME: do we need to clamp light? We may need at least a buffer bit to
+; keep it from poking into tfrac and causing problems
+
+LDraw8:	
+ cmp bp,ds:word ptr[ecx]	
+ jl Lp1	
+ xor eax,eax	
+ mov ah,dh	
+ mov al,ds:byte ptr[esi]	
+ mov ds:word ptr[ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch8:	
+ mov ds:byte ptr[edi],al	
+Lp1:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw7:	
+ cmp bp,ds:word ptr[2+ecx]	
+ jl Lp2	
+ xor eax,eax	
+ mov ah,dh	
+ mov al,ds:byte ptr[esi]	
+ mov ds:word ptr[2+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch7:	
+ mov ds:byte ptr[1+edi],al	
+Lp2:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw6:	
+ cmp bp,ds:word ptr[4+ecx]	
+ jl Lp3	
+ xor eax,eax	
+ mov ah,dh	
+ mov al,ds:byte ptr[esi]	
+ mov ds:word ptr[4+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch6:	
+ mov ds:byte ptr[2+edi],al	
+Lp3:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw5:	
+ cmp bp,ds:word ptr[6+ecx]	
+ jl Lp4	
+ xor eax,eax	
+ mov ah,dh	
+ mov al,ds:byte ptr[esi]	
+ mov ds:word ptr[6+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch5:	
+ mov ds:byte ptr[3+edi],al	
+Lp4:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw4:	
+ cmp bp,ds:word ptr[8+ecx]	
+ jl Lp5	
+ xor eax,eax	
+ mov ah,dh	
+ mov al,ds:byte ptr[esi]	
+ mov ds:word ptr[8+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch4:	
+ mov ds:byte ptr[4+edi],al	
+Lp5:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw3:	
+ cmp bp,ds:word ptr[10+ecx]	
+ jl Lp6	
+ xor eax,eax	
+ mov ah,dh	
+ mov al,ds:byte ptr[esi]	
+ mov ds:word ptr[10+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch3:	
+ mov ds:byte ptr[5+edi],al	
+Lp6:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw2:	
+ cmp bp,ds:word ptr[12+ecx]	
+ jl Lp7	
+ xor eax,eax	
+ mov ah,dh	
+ mov al,ds:byte ptr[esi]	
+ mov ds:word ptr[12+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch2:	
+ mov ds:byte ptr[6+edi],al	
+Lp7:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw1:	
+ cmp bp,ds:word ptr[14+ecx]	
+ jl Lp8	
+ xor eax,eax	
+ mov ah,dh	
+ mov al,ds:byte ptr[esi]	
+ mov ds:word ptr[14+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch1:	
+ mov ds:byte ptr[7+edi],al	
+Lp8:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+ add edi,8	
+ add ecx,16	
+
+ dec bx	
+ jnz LDrawLoop	
+
+ pop esi	; restore spans pointer
+LNextSpan:	
+ add esi,offset spanpackage_t_size	; point to next span
+LNextSpanESISet:	
+ mov edx,ds:dword ptr[spanpackage_t_count+esi]	
+ cmp edx,offset -999999	; any more spans?
+ jnz LSpanLoop	; yes
+
+ pop edi	
+ pop ebp	; restore the caller's stack frame
+ pop ebx	; restore register variables
+ pop esi	
+ ret	
+
+;=======
+; IR active draw loop
+;=======
+LDrawLoopIR:	
+
+; FIXME: do we need to clamp light? We may need at least a buffer bit to
+; keep it from poking into tfrac and causing problems
+
+LDraw8IR:	
+ cmp bp,ds:word ptr[ecx]	
+ jl Lp1IR
+ xor eax,eax	
+ mov al,ds:byte ptr[esi]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch8IR:	
+ mov ds:byte ptr[edi],al	
+Lp1IR:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw7IR:	
+ cmp bp,ds:word ptr[2+ecx]	
+ jl Lp2IR	
+ xor eax,eax	
+ mov al,ds:byte ptr[esi]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[2+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch7IR:	
+ mov ds:byte ptr[1+edi],al	
+Lp2IR:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw6IR:	
+ cmp bp,ds:word ptr[4+ecx]	
+ jl Lp3IR	
+ xor eax,eax	
+ mov al,ds:byte ptr[esi]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[4+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch6IR:	
+ mov ds:byte ptr[2+edi],al	
+Lp3IR:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw5IR:	
+ cmp bp,ds:word ptr[6+ecx]	
+ jl Lp4IR
+ xor eax,eax	
+ mov al,ds:byte ptr[esi]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[6+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch5IR:	
+ mov ds:byte ptr[3+edi],al	
+Lp4IR:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw4IR:	
+ cmp bp,ds:word ptr[8+ecx]	
+ jl Lp5IR
+ xor eax,eax	
+ mov al,ds:byte ptr[esi]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[8+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch4IR:	
+ mov ds:byte ptr[4+edi],al	
+Lp5IR:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw3IR:	
+ cmp bp,ds:word ptr[10+ecx]	
+ jl Lp6IR	
+ xor eax,eax	
+ mov al,ds:byte ptr[esi]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[10+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch3IR:	
+ mov ds:byte ptr[5+edi],al	
+Lp6IR:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw2IR:	
+ cmp bp,ds:word ptr[12+ecx]	
+ jl Lp7IR
+ xor eax,eax	
+ mov al,ds:byte ptr[esi]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[12+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch2IR:	
+ mov ds:byte ptr[6+edi],al	
+Lp7IR:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LDraw1IR:	
+ cmp bp,ds:word ptr[14+ecx]	
+ jl Lp8IR
+ xor eax,eax
+ mov al,ds:byte ptr[esi]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[14+ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch1IR:	
+ mov ds:byte ptr[7+edi],al	
+Lp8IR:	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebp,ds:dword ptr[lzistepx]	
+ adc ebp,0	
+ add ebx,ds:dword ptr[_a_sstepxfrac]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+ add edi,8	
+ add ecx,16	
+
+ dec bx	
+ jnz LDrawLoopIR	
+
+ pop esi	; restore spans pointer
+LNextSpanIR:	
+ add esi,offset spanpackage_t_size	; point to next span
+LNextSpanESISetIR:	
+ mov edx,ds:dword ptr[spanpackage_t_count+esi]	
+ cmp edx,offset -999999	; any more spans?
+ jnz LSpanLoop	; yes
+
+ pop edi	
+ pop ebp	; restore the caller's stack frame
+ pop ebx	; restore register variables
+ pop esi	
+ ret	
+
+;=======
+; Standard One-Long Draw
+;=======
+; draw a one-long span
+
+LExactlyOneLong:	
+ mov al,[_iractive]
+ cmp al,0
+ jne LExactlyOneLongIR
+
+ mov ecx,ds:dword ptr[spanpackage_t_pz+esi]	
+ mov ebp,ds:dword ptr[spanpackage_t_zi+esi]	
+
+ ror ebp,16	; put high 16 bits of 1/z in low word
+ mov ebx,ds:dword ptr[spanpackage_t_ptex+esi]	
+
+ cmp bp,ds:word ptr[ecx]	
+ jl LNextSpan	
+ xor eax,eax	
+ mov edi,ds:dword ptr[spanpackage_t_pdest+esi]	
+ mov ah,ds:byte ptr[spanpackage_t_light+1+esi]	
+ add esi,offset spanpackage_t_size	; point to next span
+ mov al,ds:byte ptr[ebx]	
+ mov ds:word ptr[ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch9:	
+ mov ds:byte ptr[edi],al	
+
+ jmp LNextSpanESISet	
+
+
+;========
+;========
+; draw a one-long span
+
+LExactlyOneLongIR:	
+
+ mov ecx,ds:dword ptr[spanpackage_t_pz+esi]	
+ mov ebp,ds:dword ptr[spanpackage_t_zi+esi]	
+
+ ror ebp,16	; put high 16 bits of 1/z in low word
+ mov ebx,ds:dword ptr[spanpackage_t_ptex+esi]	
+
+ cmp bp,ds:word ptr[ecx]	
+ jl LNextSpanIR
+ xor eax,eax	
+ mov edi,ds:dword ptr[spanpackage_t_pdest+esi]	
+ add esi,offset spanpackage_t_size	; point to next span
+ mov al,ds:byte ptr[ebx]	
+ mov al,ds:byte ptr[_irtable+eax]
+ mov ds:word ptr[ecx],bp	
+ mov al,ds:byte ptr[12345678h+eax]	
+LPatch9IR:	
+ mov ds:byte ptr[edi],al	
+
+ jmp LNextSpanESISetIR
+
+;===================================
+;===================================
+ public _D_Aff8Patch	
+_D_Aff8Patch:	
+ mov eax,[_alias_colormap]
+ mov ds:dword ptr[LPatch1-4],eax	
+ mov ds:dword ptr[LPatch2-4],eax	
+ mov ds:dword ptr[LPatch3-4],eax	
+ mov ds:dword ptr[LPatch4-4],eax	
+ mov ds:dword ptr[LPatch5-4],eax	
+ mov ds:dword ptr[LPatch6-4],eax	
+ mov ds:dword ptr[LPatch7-4],eax	
+ mov ds:dword ptr[LPatch8-4],eax	
+ mov ds:dword ptr[LPatch9-4],eax	
+ mov ds:dword ptr[LPatch1IR-4],eax	
+ mov ds:dword ptr[LPatch2IR-4],eax	
+ mov ds:dword ptr[LPatch3IR-4],eax	
+ mov ds:dword ptr[LPatch4IR-4],eax	
+ mov ds:dword ptr[LPatch5IR-4],eax	
+ mov ds:dword ptr[LPatch6IR-4],eax	
+ mov ds:dword ptr[LPatch7IR-4],eax	
+ mov ds:dword ptr[LPatch8IR-4],eax	
+ mov ds:dword ptr[LPatch9IR-4],eax	
+
+ ret	
+
+
+
+;===================================
+;===================================
+
+height	equ		4+16
+
+ public _R_PolysetScanLeftEdge	
+_R_PolysetScanLeftEdge:	
+ push ebp	; preserve caller stack frame pointer
+ push esi	; preserve register variables
+ push edi	
+ push ebx	
+
+ mov eax,ds:dword ptr[height+esp]	
+ mov ecx,ds:dword ptr[_d_sfrac]
+
+ and eax,0FFFFh	
+ mov ebx,ds:dword ptr[_d_ptex]	
+ or ecx,eax	
+ mov esi,ds:dword ptr[_d_pedgespanpackage]	
+ mov edx,ds:dword ptr[_d_tfrac]	
+ mov edi,ds:dword ptr[_d_light]	
+ mov ebp,ds:dword ptr[_d_zi]	
+
+; %eax: scratch
+; %ebx: d_ptex
+; %ecx: d_sfrac in high word, count in low word
+; %edx: d_tfrac
+; %esi: d_pedgespanpackage, errorterm, scratch alternately
+; %edi: d_light
+; %ebp: d_zi
+
+;	do
+;	{
+
+LScanLoop:	
+
+;		d_pedgespanpackage->ptex = ptex;
+;		d_pedgespanpackage->pdest = d_pdest;
+;		d_pedgespanpackage->pz = d_pz;
+;		d_pedgespanpackage->count = d_aspancount;
+;		d_pedgespanpackage->light = d_light;
+;		d_pedgespanpackage->zi = d_zi;
+;		d_pedgespanpackage->sfrac = d_sfrac << 16;
+;		d_pedgespanpackage->tfrac = d_tfrac << 16;
+ mov ds:dword ptr[spanpackage_t_ptex+esi],ebx	
+ mov eax,ds:dword ptr[_d_pdest]	
+ mov ds:dword ptr[spanpackage_t_pdest+esi],eax	
+ mov eax,ds:dword ptr[_d_pz]	
+ mov ds:dword ptr[spanpackage_t_pz+esi],eax	
+ mov eax,ds:dword ptr[_d_aspancount]	
+ mov ds:dword ptr[spanpackage_t_count+esi],eax	
+ mov ds:dword ptr[spanpackage_t_light+esi],edi	
+ mov ds:dword ptr[spanpackage_t_zi+esi],ebp	
+ mov ds:dword ptr[spanpackage_t_sfrac+esi],ecx	
+ mov ds:dword ptr[spanpackage_t_tfrac+esi],edx	
+
+; pretouch the next cache line
+ mov al,ds:byte ptr[spanpackage_t_size+esi]	
+
+;		d_pedgespanpackage++;
+ add esi,offset spanpackage_t_size	
+ mov eax,ds:dword ptr[_erroradjustup]	
+ mov ds:dword ptr[_d_pedgespanpackage],esi	
+
+;		errorterm += erroradjustup;
+ mov esi,ds:dword ptr[_errorterm]	
+ add esi,eax	
+ mov eax,ds:dword ptr[_d_pdest]	
+
+;		if (errorterm >= 0)
+;		{
+ js LNoLeftEdgeTurnover	
+
+;			errorterm -= erroradjustdown;
+;			d_pdest += d_pdestextrastep;
+ sub esi,ds:dword ptr[_erroradjustdown]	
+ add eax,ds:dword ptr[_d_pdestextrastep]	
+ mov ds:dword ptr[_errorterm],esi	
+ mov ds:dword ptr[_d_pdest],eax	
+
+;			d_pz += d_pzextrastep;
+;			d_aspancount += d_countextrastep;
+;			d_ptex += d_ptexextrastep;
+;			d_sfrac += d_sfracextrastep;
+;			d_ptex += d_sfrac >> 16;
+;			d_sfrac &= 0xFFFF;
+;			d_tfrac += d_tfracextrastep;
+ mov eax,ds:dword ptr[_d_pz]	
+ mov esi,ds:dword ptr[_d_aspancount]	
+ add eax,ds:dword ptr[_d_pzextrastep]	
+ add ecx,ds:dword ptr[_d_sfracextrastep]	
+ adc ebx,ds:dword ptr[_d_ptexextrastep]	
+ add esi,ds:dword ptr[_d_countextrastep]	
+ mov ds:dword ptr[_d_pz],eax	
+ mov eax,ds:dword ptr[_d_tfracextrastep]	
+ mov ds:dword ptr[_d_aspancount],esi	
+ add edx,eax	
+
+;			if (d_tfrac & 0x10000)
+;			{
+ jnc LSkip1	
+
+;				d_ptex += r_affinetridesc.skinwidth;
+;				d_tfrac &= 0xFFFF;
+ add ebx,ds:dword ptr[_r_affinetridesc+atd_skinwidth]	
+
+;			}
+
+LSkip1:	
+
+;			d_light += d_lightextrastep;
+;			d_zi += d_ziextrastep;
+ add edi,ds:dword ptr[_d_lightextrastep]	
+ add ebp,ds:dword ptr[_d_ziextrastep]	
+
+;		}
+ mov esi,ds:dword ptr[_d_pedgespanpackage]	
+ dec ecx	
+ test ecx,0FFFFh	
+ jnz LScanLoop	
+
+ pop ebx	
+ pop edi	
+ pop esi	
+ pop ebp	
+ ret	
+
+;		else
+;		{
+
+LNoLeftEdgeTurnover:	
+ mov ds:dword ptr[_errorterm],esi	
+
+;			d_pdest += d_pdestbasestep;
+ add eax,ds:dword ptr[_d_pdestbasestep]	
+ mov ds:dword ptr[_d_pdest],eax	
+
+;			d_pz += d_pzbasestep;
+;			d_aspancount += ubasestep;
+;			d_ptex += d_ptexbasestep;
+;			d_sfrac += d_sfracbasestep;
+;			d_ptex += d_sfrac >> 16;
+;			d_sfrac &= 0xFFFF;
+ mov eax,ds:dword ptr[_d_pz]	
+ mov esi,ds:dword ptr[_d_aspancount]	
+ add eax,ds:dword ptr[_d_pzbasestep]	
+ add ecx,ds:dword ptr[_d_sfracbasestep]	
+ adc ebx,ds:dword ptr[_d_ptexbasestep]	
+ add esi,ds:dword ptr[_ubasestep]	
+ mov ds:dword ptr[_d_pz],eax	
+ mov ds:dword ptr[_d_aspancount],esi	
+
+;			d_tfrac += d_tfracbasestep;
+ mov esi,ds:dword ptr[_d_tfracbasestep]	
+ add edx,esi	
+
+;			if (d_tfrac & 0x10000)
+;			{
+ jnc LSkip2	
+
+;				d_ptex += r_affinetridesc.skinwidth;
+;				d_tfrac &= 0xFFFF;
+ add ebx,ds:dword ptr[_r_affinetridesc+atd_skinwidth]	
+
+;			}
+
+LSkip2:	
+
+;			d_light += d_lightbasestep;
+;			d_zi += d_zibasestep;
+ add edi,ds:dword ptr[_d_lightbasestep]	
+ add ebp,ds:dword ptr[_d_zibasestep]	
+
+;		}
+;	} while (--height);
+ mov esi,ds:dword ptr[_d_pedgespanpackage]	
+ dec ecx	
+ test ecx,0FFFFh	
+ jnz LScanLoop	
+
+ pop ebx	
+ pop edi	
+ pop esi	
+ pop ebp	
+ ret	
+
+_TEXT ENDS
+endif	;id386
+ END
--- a/ref_soft/r_polyse.c
+++ b/ref_soft/r_polyse.c
--- a/ref_soft/r_rast.c
+++ b/ref_soft/r_rast.c
@@ -0,0 +1,852 @@
+/*
+Copyright (C) 1997-2001 Id Software, Inc.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+
+See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+// r_rast.c
+
+#include <assert.h>
+
+#include "r_local.h"
+
+#define MAXLEFTCLIPEDGES		100
+
+// !!! if these are changed, they must be changed in asm_draw.h too !!!
+#define FULLY_CLIPPED_CACHED	0x80000000
+#define FRAMECOUNT_MASK			0x7FFFFFFF
+
+unsigned int	cacheoffset;
+
+int			c_faceclip;					// number of faces clipped
+
+
+clipplane_t	*entity_clipplanes;
+clipplane_t	view_clipplanes[4];
+clipplane_t	world_clipplanes[16];
+
+medge_t			*r_pedge;
+
+qboolean		r_leftclipped, r_rightclipped;
+static qboolean	makeleftedge, makerightedge;
+qboolean		r_nearzionly;
+
+int		sintable[1280];
+int		intsintable[1280];
+int		blanktable[1280];		// PGM
+
+mvertex_t	r_leftenter, r_leftexit;
+mvertex_t	r_rightenter, r_rightexit;
+
+typedef struct
+{
+	float	u,v;
+	int		ceilv;
+} evert_t;
+
+int				r_emitted;
+float			r_nearzi;
+float			r_u1, r_v1, r_lzi1;
+int				r_ceilv1;
+
+qboolean		r_lastvertvalid;
+int				r_skyframe;
+
+msurface_t		*r_skyfaces;
+mplane_t		r_skyplanes[6];
+mtexinfo_t		r_skytexinfo[6];
+mvertex_t		*r_skyverts;
+medge_t			*r_skyedges;
+int				*r_skysurfedges;
+
+// I just copied this data from a box map...
+int skybox_planes[12] = {2,-128, 0,-128, 2,128, 1,128, 0,128, 1,-128};
+
+int box_surfedges[24] = { 1,2,3,4,  -1,5,6,7,  8,9,-6,10,  -2,-7,-9,11,
+  12,-3,-11,-8,  -12,-10,-5,-4};
+int box_edges[24] = { 1,2, 2,3, 3,4, 4,1, 1,5, 5,6, 6,2, 7,8, 8,6, 5,7, 8,3, 7,4};
+
+int	box_faces[6] = {0,0,2,2,2,0};
+
+vec3_t	box_vecs[6][2] = {
+	{	{0,-1,0}, {-1,0,0} },
+	{ {0,1,0}, {0,0,-1} },
+	{	{0,-1,0}, {1,0,0} },
+	{ {1,0,0}, {0,0,-1} },
+	{ {0,-1,0}, {0,0,-1} },
+	{ {-1,0,0}, {0,0,-1} }
+};
+
+float	box_verts[8][3] = {
+	{-1,-1,-1},
+	{-1,1,-1},
+	{1,1,-1},
+	{1,-1,-1},
+	{-1,-1,1},
+	{-1,1,1},
+	{1,-1,1},
+	{1,1,1}
+};
+
+// down, west, up, north, east, south
+// {"rt", "bk", "lf", "ft", "up", "dn"};
+
+/*
+================
+R_InitSkyBox
+
+================
+*/
+void R_InitSkyBox (void)
+{
+	int		i;
+	extern model_t *loadmodel;
+
+	r_skyfaces = loadmodel->surfaces + loadmodel->numsurfaces;
+	loadmodel->numsurfaces += 6;
+	r_skyverts = loadmodel->vertexes + loadmodel->numvertexes;
+	loadmodel->numvertexes += 8;
+	r_skyedges = loadmodel->edges + loadmodel->numedges;
+	loadmodel->numedges += 12;
+	r_skysurfedges = loadmodel->surfedges + loadmodel->numsurfedges;
+	loadmodel->numsurfedges += 24;
+	if (loadmodel->numsurfaces > MAX_MAP_FACES
+		|| loadmodel->numvertexes > MAX_MAP_VERTS
+		|| loadmodel->numedges > MAX_MAP_EDGES)
+		ri.Sys_Error (ERR_DROP, "InitSkyBox: map overflow");
+
+	memset (r_skyfaces, 0, 6*sizeof(*r_skyfaces));
+	for (i=0 ; i<6 ; i++)
+	{
+		r_skyplanes[i].normal[skybox_planes[i*2]] = 1;
+		r_skyplanes[i].dist = skybox_planes[i*2+1];
+
+		VectorCopy (box_vecs[i][0], r_skytexinfo[i].vecs[0]);
+		VectorCopy (box_vecs[i][1], r_skytexinfo[i].vecs[1]);
+
+		r_skyfaces[i].plane = &r_skyplanes[i];
+		r_skyfaces[i].numedges = 4;
+		r_skyfaces[i].flags = box_faces[i] | SURF_DRAWSKYBOX;
+		r_skyfaces[i].firstedge = loadmodel->numsurfedges-24+i*4;
+		r_skyfaces[i].texinfo = &r_skytexinfo[i];
+		r_skyfaces[i].texturemins[0] = -128;
+		r_skyfaces[i].texturemins[1] = -128;
+		r_skyfaces[i].extents[0] = 256;
+		r_skyfaces[i].extents[1] = 256;
+	}
+
+	for (i=0 ; i<24 ; i++)
+		if (box_surfedges[i] > 0)
+			r_skysurfedges[i] = loadmodel->numedges-13 + box_surfedges[i];
+		else
+			r_skysurfedges[i] = - (loadmodel->numedges-13 + -box_surfedges[i]);
+
+	for(i=0 ; i<12 ; i++)
+	{
+		r_skyedges[i].v[0] = loadmodel->numvertexes-9+box_edges[i*2+0];
+		r_skyedges[i].v[1] = loadmodel->numvertexes-9+box_edges[i*2+1];
+		r_skyedges[i].cachededgeoffset = 0;
+	}
+}
+
+/*
+================
+R_EmitSkyBox
+================
+*/
+void R_EmitSkyBox (void)
+{
+	int		i, j;
+	int		oldkey;
+
+	if (insubmodel)
+		return;		// submodels should never have skies
+	if (r_skyframe == r_framecount)
+		return;		// already set this frame
+
+	r_skyframe = r_framecount;
+
+	// set the eight fake vertexes
+	for (i=0 ; i<8 ; i++)
+		for (j=0 ; j<3 ; j++)
+			r_skyverts[i].position[j] = r_origin[j] + box_verts[i][j]*128;
+
+	// set the six fake planes
+	for (i=0 ; i<6 ; i++)
+		if (skybox_planes[i*2+1] > 0)
+			r_skyplanes[i].dist = r_origin[skybox_planes[i*2]]+128;
+		else
+			r_skyplanes[i].dist = r_origin[skybox_planes[i*2]]-128;
+
+	// fix texture offseets
+	for (i=0 ; i<6 ; i++)
+	{
+		r_skytexinfo[i].vecs[0][3] = -DotProduct (r_origin, r_skytexinfo[i].vecs[0]);
+		r_skytexinfo[i].vecs[1][3] = -DotProduct (r_origin, r_skytexinfo[i].vecs[1]);
+	}
+
+	// emit the six faces
+	oldkey = r_currentkey;
+	r_currentkey = 0x7ffffff0;
+ 	for (i=0 ; i<6 ; i++)
+	{
+		R_RenderFace (r_skyfaces + i, 15);
+	}
+	r_currentkey = oldkey;		// bsp sorting order
+}
+
+
+#if	!id386
+
+/*
+================
+R_EmitEdge
+================
+*/
+void R_EmitEdge (mvertex_t *pv0, mvertex_t *pv1)
+{
+	edge_t	*edge, *pcheck;
+	int		u_check;
+	float	u, u_step;
+	vec3_t	local, transformed;
+	float	*world;
+	int		v, v2, ceilv0;
+	float	scale, lzi0, u0, v0;
+	int		side;
+
+	if (r_lastvertvalid)
+	{
+		u0 = r_u1;
+		v0 = r_v1;
+		lzi0 = r_lzi1;
+		ceilv0 = r_ceilv1;
+	}
+	else
+	{
+		world = &pv0->position[0];
+	
+	// transform and project
+		VectorSubtract (world, modelorg, local);
+		TransformVector (local, transformed);
+	
+		if (transformed[2] < NEAR_CLIP)
+			transformed[2] = NEAR_CLIP;
+	
+		lzi0 = 1.0 / transformed[2];
+	
+	// FIXME: build x/yscale into transform?
+		scale = xscale * lzi0;
+		u0 = (xcenter + scale*transformed[0]);
+		if (u0 < r_refdef.fvrectx_adj)
+			u0 = r_refdef.fvrectx_adj;
+		if (u0 > r_refdef.fvrectright_adj)
+			u0 = r_refdef.fvrectright_adj;
+	
+		scale = yscale * lzi0;
+		v0 = (ycenter - scale*transformed[1]);
+		if (v0 < r_refdef.fvrecty_adj)
+			v0 = r_refdef.fvrecty_adj;
+		if (v0 > r_refdef.fvrectbottom_adj)
+			v0 = r_refdef.fvrectbottom_adj;
+	
+		ceilv0 = (int) ceil(v0);
+	}
+
+	world = &pv1->position[0];
+
+// transform and project
+	VectorSubtract (world, modelorg, local);
+	TransformVector (local, transformed);
+
+	if (transformed[2] < NEAR_CLIP)
+		transformed[2] = NEAR_CLIP;
+
+	r_lzi1 = 1.0 / transformed[2];
+
+	scale = xscale * r_lzi1;
+	r_u1 = (xcenter + scale*transformed[0]);
+	if (r_u1 < r_refdef.fvrectx_adj)
+		r_u1 = r_refdef.fvrectx_adj;
+	if (r_u1 > r_refdef.fvrectright_adj)
+		r_u1 = r_refdef.fvrectright_adj;
+
+	scale = yscale * r_lzi1;
+	r_v1 = (ycenter - scale*transformed[1]);
+	if (r_v1 < r_refdef.fvrecty_adj)
+		r_v1 = r_refdef.fvrecty_adj;
+	if (r_v1 > r_refdef.fvrectbottom_adj)
+		r_v1 = r_refdef.fvrectbottom_adj;
+
+	if (r_lzi1 > lzi0)
+		lzi0 = r_lzi1;
+
+	if (lzi0 > r_nearzi)	// for mipmap finding
+		r_nearzi = lzi0;
+
+// for right edges, all we want is the effect on 1/z
+	if (r_nearzionly)
+		return;
+
+	r_emitted = 1;
+
+	r_ceilv1 = (int) ceil(r_v1);
+
+
+// create the edge
+	if (ceilv0 == r_ceilv1)
+	{
+	// we cache unclipped horizontal edges as fully clipped
+		if (cacheoffset != 0x7FFFFFFF)
+		{
+			cacheoffset = FULLY_CLIPPED_CACHED |
+					(r_framecount & FRAMECOUNT_MASK);
+		}
+
+		return;		// horizontal edge
+	}
+
+	side = ceilv0 > r_ceilv1;
+
+	edge = edge_p++;
+
+	edge->owner = r_pedge;
+
+	edge->nearzi = lzi0;
+
+	if (side == 0)
+	{
+	// trailing edge (go from p1 to p2)
+		v = ceilv0;
+		v2 = r_ceilv1 - 1;
+
+		edge->surfs[0] = surface_p - surfaces;
+		edge->surfs[1] = 0;
+
+		u_step = ((r_u1 - u0) / (r_v1 - v0));
+		u = u0 + ((float)v - v0) * u_step;
+	}
+	else
+	{
+	// leading edge (go from p2 to p1)
+		v2 = ceilv0 - 1;
+		v = r_ceilv1;
+
+		edge->surfs[0] = 0;
+		edge->surfs[1] = surface_p - surfaces;
+
+		u_step = ((u0 - r_u1) / (v0 - r_v1));
+		u = r_u1 + ((float)v - r_v1) * u_step;
+	}
+
+	edge->u_step = u_step*0x100000;
+	edge->u = u*0x100000 + 0xFFFFF;
+
+// we need to do this to avoid stepping off the edges if a very nearly
+// horizontal edge is less than epsilon above a scan, and numeric error causes
+// it to incorrectly extend to the scan, and the extension of the line goes off
+// the edge of the screen
+// FIXME: is this actually needed?
+	if (edge->u < r_refdef.vrect_x_adj_shift20)
+		edge->u = r_refdef.vrect_x_adj_shift20;
+	if (edge->u > r_refdef.vrectright_adj_shift20)
+		edge->u = r_refdef.vrectright_adj_shift20;
+
+//
+// sort the edge in normally
+//
+	u_check = edge->u;
+	if (edge->surfs[0])
+		u_check++;	// sort trailers after leaders
+
+	if (!newedges[v] || newedges[v]->u >= u_check)
+	{
+		edge->next = newedges[v];
+		newedges[v] = edge;
+	}
+	else
+	{
+		pcheck = newedges[v];
+		while (pcheck->next && pcheck->next->u < u_check)
+			pcheck = pcheck->next;
+		edge->next = pcheck->next;
+		pcheck->next = edge;
+	}
+
+	edge->nextremove = removeedges[v2];
+	removeedges[v2] = edge;
+}
+
+
+/*
+================
+R_ClipEdge
+================
+*/
+void R_ClipEdge (mvertex_t *pv0, mvertex_t *pv1, clipplane_t *clip)
+{
+	float		d0, d1, f;
+	mvertex_t	clipvert;
+
+	if (clip)
+	{
+		do
+		{
+			d0 = DotProduct (pv0->position, clip->normal) - clip->dist;
+			d1 = DotProduct (pv1->position, clip->normal) - clip->dist;
+
+			if (d0 >= 0)
+			{
+			// point 0 is unclipped
+				if (d1 >= 0)
+				{
+				// both points are unclipped
+					continue;
+				}
+
+			// only point 1 is clipped
+
+			// we don't cache clipped edges
+				cacheoffset = 0x7FFFFFFF;
+
+				f = d0 / (d0 - d1);
+				clipvert.position[0] = pv0->position[0] +
+						f * (pv1->position[0] - pv0->position[0]);
+				clipvert.position[1] = pv0->position[1] +
+						f * (pv1->position[1] - pv0->position[1]);
+				clipvert.position[2] = pv0->position[2] +
+						f * (pv1->position[2] - pv0->position[2]);
+
+				if (clip->leftedge)
+				{
+					r_leftclipped = true;
+					r_leftexit = clipvert;
+				}
+				else if (clip->rightedge)
+				{
+					r_rightclipped = true;
+					r_rightexit = clipvert;
+				}
+
+				R_ClipEdge (pv0, &clipvert, clip->next);
+				return;
+			}
+			else
+			{
+			// point 0 is clipped
+				if (d1 < 0)
+				{
+				// both points are clipped
+				// we do cache fully clipped edges
+					if (!r_leftclipped)
+						cacheoffset = FULLY_CLIPPED_CACHED |
+								(r_framecount & FRAMECOUNT_MASK);
+					return;
+				}
+
+			// only point 0 is clipped
+				r_lastvertvalid = false;
+
+			// we don't cache partially clipped edges
+				cacheoffset = 0x7FFFFFFF;
+
+				f = d0 / (d0 - d1);
+				clipvert.position[0] = pv0->position[0] +
+						f * (pv1->position[0] - pv0->position[0]);
+				clipvert.position[1] = pv0->position[1] +
+						f * (pv1->position[1] - pv0->position[1]);
+				clipvert.position[2] = pv0->position[2] +
+						f * (pv1->position[2] - pv0->position[2]);
+
+				if (clip->leftedge)
+				{
+					r_leftclipped = true;
+					r_leftenter = clipvert;
+				}
+				else if (clip->rightedge)
+				{
+					r_rightclipped = true;
+					r_rightenter = clipvert;
+				}
+
+				R_ClipEdge (&clipvert, pv1, clip->next);
+				return;
+			}
+		} while ((clip = clip->next) != NULL);
+	}
+
+// add the edge
+	R_EmitEdge (pv0, pv1);
+}
+
+#endif	// !id386
+
+
+/*
+================
+R_EmitCachedEdge
+================
+*/
+void R_EmitCachedEdge (void)
+{
+	edge_t		*pedge_t;
+
+	pedge_t = (edge_t *)((unsigned long)r_edges + r_pedge->cachededgeoffset);
+
+	if (!pedge_t->surfs[0])
+		pedge_t->surfs[0] = surface_p - surfaces;
+	else
+		pedge_t->surfs[1] = surface_p - surfaces;
+
+	if (pedge_t->nearzi > r_nearzi)	// for mipmap finding
+		r_nearzi = pedge_t->nearzi;
+
+	r_emitted = 1;
+}
+
+
+/*
+================
+R_RenderFace
+================
+*/
+void R_RenderFace (msurface_t *fa, int clipflags)
+{
+	int			i, lindex;
+	unsigned	mask;
+	mplane_t	*pplane;
+	float		distinv;
+	vec3_t		p_normal;
+	medge_t		*pedges, tedge;
+	clipplane_t	*pclip;
+
+	// translucent surfaces are not drawn by the edge renderer
+	if (fa->texinfo->flags & (SURF_TRANS33|SURF_TRANS66))
+	{
+		fa->nextalphasurface = r_alpha_surfaces;
+		r_alpha_surfaces = fa;
+		return;
+	}
+
+	// sky surfaces encountered in the world will cause the
+	// environment box surfaces to be emited
+	if ( fa->texinfo->flags & SURF_SKY )
+	{
+		R_EmitSkyBox ();	
+		return;
+	}
+
+// skip out if no more surfs
+	if ((surface_p) >= surf_max)
+	{
+		r_outofsurfaces++;
+		return;
+	}
+
+// ditto if not enough edges left, or switch to auxedges if possible
+	if ((edge_p + fa->numedges + 4) >= edge_max)
+	{
+		r_outofedges += fa->numedges;
+		return;
+	}
+
+	c_faceclip++;
+
+// set up clip planes
+	pclip = NULL;
+
+	for (i=3, mask = 0x08 ; i>=0 ; i--, mask >>= 1)
+	{
+		if (clipflags & mask)
+		{
+			view_clipplanes[i].next = pclip;
+			pclip = &view_clipplanes[i];
+		}
+	}
+
+// push the edges through
+	r_emitted = 0;
+	r_nearzi = 0;
+	r_nearzionly = false;
+	makeleftedge = makerightedge = false;
+	pedges = currentmodel->edges;
+	r_lastvertvalid = false;
+
+	for (i=0 ; i<fa->numedges ; i++)
+	{
+		lindex = currentmodel->surfedges[fa->firstedge + i];
+
+		if (lindex > 0)
+		{
+			r_pedge = &pedges[lindex];
+
+		// if the edge is cached, we can just reuse the edge
+			if (!insubmodel)
+			{
+				if (r_pedge->cachededgeoffset & FULLY_CLIPPED_CACHED)
+				{
+					if ((r_pedge->cachededgeoffset & FRAMECOUNT_MASK) ==
+						r_framecount)
+					{
+						r_lastvertvalid = false;
+						continue;
+					}
+				}
+				else
+				{
+					if ((((unsigned long)edge_p - (unsigned long)r_edges) >
+						 r_pedge->cachededgeoffset) &&
+						(((edge_t *)((unsigned long)r_edges +
+						 r_pedge->cachededgeoffset))->owner == r_pedge))
+					{
+						R_EmitCachedEdge ();
+						r_lastvertvalid = false;
+						continue;
+					}
+				}
+			}
+
+		// assume it's cacheable
+			cacheoffset = (byte *)edge_p - (byte *)r_edges;
+			r_leftclipped = r_rightclipped = false;
+			R_ClipEdge (&r_pcurrentvertbase[r_pedge->v[0]],
+						&r_pcurrentvertbase[r_pedge->v[1]],
+						pclip);
+			r_pedge->cachededgeoffset = cacheoffset;
+
+			if (r_leftclipped)
+				makeleftedge = true;
+			if (r_rightclipped)
+				makerightedge = true;
+			r_lastvertvalid = true;
+		}
+		else
+		{
+			lindex = -lindex;
+			r_pedge = &pedges[lindex];
+		// if the edge is cached, we can just reuse the edge
+			if (!insubmodel)
+			{
+				if (r_pedge->cachededgeoffset & FULLY_CLIPPED_CACHED)
+				{
+					if ((r_pedge->cachededgeoffset & FRAMECOUNT_MASK) ==
+						r_framecount)
+					{
+						r_lastvertvalid = false;
+						continue;
+					}
+				}
+				else
+				{
+				// it's cached if the cached edge is valid and is owned
+				// by this medge_t
+					if ((((unsigned long)edge_p - (unsigned long)r_edges) >
+						 r_pedge->cachededgeoffset) &&
+						(((edge_t *)((unsigned long)r_edges +
+						 r_pedge->cachededgeoffset))->owner == r_pedge))
+					{
+						R_EmitCachedEdge ();
+						r_lastvertvalid = false;
+						continue;
+					}
+				}
+			}
+
+		// assume it's cacheable
+			cacheoffset = (byte *)edge_p - (byte *)r_edges;
+			r_leftclipped = r_rightclipped = false;
+			R_ClipEdge (&r_pcurrentvertbase[r_pedge->v[1]],
+						&r_pcurrentvertbase[r_pedge->v[0]],
+						pclip);
+			r_pedge->cachededgeoffset = cacheoffset;
+
+			if (r_leftclipped)
+				makeleftedge = true;
+			if (r_rightclipped)
+				makerightedge = true;
+			r_lastvertvalid = true;
+		}
+	}
+
+// if there was a clip off the left edge, add that edge too
+// FIXME: faster to do in screen space?
+// FIXME: share clipped edges?
+	if (makeleftedge)
+	{
+		r_pedge = &tedge;
+		r_lastvertvalid = false;
+		R_ClipEdge (&r_leftexit, &r_leftenter, pclip->next);
+	}
+
+// if there was a clip off the right edge, get the right r_nearzi
+	if (makerightedge)
+	{
+		r_pedge = &tedge;
+		r_lastvertvalid = false;
+		r_nearzionly = true;
+		R_ClipEdge (&r_rightexit, &r_rightenter, view_clipplanes[1].next);
+	}
+
+// if no edges made it out, return without posting the surface
+	if (!r_emitted)
+		return;
+
+	r_polycount++;
+
+	surface_p->msurf = fa;
+	surface_p->nearzi = r_nearzi;
+	surface_p->flags = fa->flags;
+	surface_p->insubmodel = insubmodel;
+	surface_p->spanstate = 0;
+	surface_p->entity = currententity;
+	surface_p->key = r_currentkey++;
+	surface_p->spans = NULL;
+
+	pplane = fa->plane;
+// FIXME: cache this?
+	TransformVector (pplane->normal, p_normal);
+// FIXME: cache this?
+	distinv = 1.0 / (pplane->dist - DotProduct (modelorg, pplane->normal));
+
+	surface_p->d_zistepu = p_normal[0] * xscaleinv * distinv;
+	surface_p->d_zistepv = -p_normal[1] * yscaleinv * distinv;
+	surface_p->d_ziorigin = p_normal[2] * distinv -
+			xcenter * surface_p->d_zistepu -
+			ycenter * surface_p->d_zistepv;
+
+	surface_p++;
+}
+
+
+/*
+================
+R_RenderBmodelFace
+================
+*/
+void R_RenderBmodelFace (bedge_t *pedges, msurface_t *psurf)
+{
+	int			i;
+	unsigned	mask;
+	mplane_t	*pplane;
+	float		distinv;
+	vec3_t		p_normal;
+	medge_t		tedge;
+	clipplane_t	*pclip;
+
+	if (psurf->texinfo->flags & (SURF_TRANS33|SURF_TRANS66))
+	{
+		psurf->nextalphasurface = r_alpha_surfaces;
+		r_alpha_surfaces = psurf;
+		return;
+	}
+
+// skip out if no more surfs
+	if (surface_p >= surf_max)
+	{
+		r_outofsurfaces++;
+		return;
+	}
+
+// ditto if not enough edges left, or switch to auxedges if possible
+	if ((edge_p + psurf->numedges + 4) >= edge_max)
+	{
+		r_outofedges += psurf->numedges;
+		return;
+	}
+
+	c_faceclip++;
+
+// this is a dummy to give the caching mechanism someplace to write to
+	r_pedge = &tedge;
+
+// set up clip planes
+	pclip = NULL;
+
+	for (i=3, mask = 0x08 ; i>=0 ; i--, mask >>= 1)
+	{
+		if (r_clipflags & mask)
+		{
+			view_clipplanes[i].next = pclip;
+			pclip = &view_clipplanes[i];
+		}
+	}
+
+// push the edges through
+	r_emitted = 0;
+	r_nearzi = 0;
+	r_nearzionly = false;
+	makeleftedge = makerightedge = false;
+// FIXME: keep clipped bmodel edges in clockwise order so last vertex caching
+// can be used?
+	r_lastvertvalid = false;
+
+	for ( ; pedges ; pedges = pedges->pnext)
+	{
+		r_leftclipped = r_rightclipped = false;
+		R_ClipEdge (pedges->v[0], pedges->v[1], pclip);
+
+		if (r_leftclipped)
+			makeleftedge = true;
+		if (r_rightclipped)
+			makerightedge = true;
+	}
+
+// if there was a clip off the left edge, add that edge too
+// FIXME: faster to do in screen space?
+// FIXME: share clipped edges?
+	if (makeleftedge)
+	{
+		r_pedge = &tedge;
+		R_ClipEdge (&r_leftexit, &r_leftenter, pclip->next);
+	}
+
+// if there was a clip off the right edge, get the right r_nearzi
+	if (makerightedge)
+	{
+		r_pedge = &tedge;
+		r_nearzionly = true;
+		R_ClipEdge (&r_rightexit, &r_rightenter, view_clipplanes[1].next);
+	}
+
+// if no edges made it out, return without posting the surface
+	if (!r_emitted)
+		return;
+
+	r_polycount++;
+
+	surface_p->msurf = psurf;
+	surface_p->nearzi = r_nearzi;
+	surface_p->flags = psurf->flags;
+	surface_p->insubmodel = true;
+	surface_p->spanstate = 0;
+	surface_p->entity = currententity;
+	surface_p->key = r_currentbkey;
+	surface_p->spans = NULL;
+
+	pplane = psurf->plane;
+// FIXME: cache this?
+	TransformVector (pplane->normal, p_normal);
+// FIXME: cache this?
+	distinv = 1.0 / (pplane->dist - DotProduct (modelorg, pplane->normal));
+
+	surface_p->d_zistepu = p_normal[0] * xscaleinv * distinv;
+	surface_p->d_zistepv = -p_normal[1] * yscaleinv * distinv;
+	surface_p->d_ziorigin = p_normal[2] * distinv -
+			xcenter * surface_p->d_zistepu -
+			ycenter * surface_p->d_zistepv;
+
+	surface_p++;
+}
+
--- a/ref_soft/r_scan.c
+++ b/ref_soft/r_scan.c
@@ -0,0 +1,591 @@
+/*
+Copyright (C) 1997-2001 Id Software, Inc.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+
+See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+// d_scan.c
+//
+// Portable C scan-level rasterization code, all pixel depths.
+
+#include "r_local.h"
+
+unsigned char	*r_turb_pbase, *r_turb_pdest;
+fixed16_t		r_turb_s, r_turb_t, r_turb_sstep, r_turb_tstep;
+int				*r_turb_turb;
+int				r_turb_spancount;
+
+void D_DrawTurbulent8Span (void);
+
+
+/*
+=============
+D_WarpScreen
+
+this performs a slight compression of the screen at the same time as
+the sine warp, to keep the edges from wrapping
+=============
+*/
+void D_WarpScreen (void)
+{
+	int		w, h;
+	int		u,v, u2, v2;
+	byte	*dest;
+	int		*turb;
+	int		*col;
+	byte	**row;
+
+	static int	cached_width, cached_height;
+	static byte	*rowptr[1200+AMP2*2];
+	static int	column[1600+AMP2*2];
+
+	//
+	// these are constant over resolutions, and can be saved
+	//
+	w = r_newrefdef.width;
+	h = r_newrefdef.height;
+	if (w != cached_width || h != cached_height)
+	{
+		cached_width = w;
+		cached_height = h;
+		for (v=0 ; v<h+AMP2*2 ; v++)
+		{
+			v2 = (int)((float)v/(h + AMP2 * 2) * r_refdef.vrect.height);
+			rowptr[v] = r_warpbuffer + (WARP_WIDTH * v2);
+		}
+
+		for (u=0 ; u<w+AMP2*2 ; u++)
+		{
+			u2 = (int)((float)u/(w + AMP2 * 2) * r_refdef.vrect.width);
+			column[u] = u2;
+		}
+	}
+
+	turb = intsintable + ((int)(r_newrefdef.time*SPEED)&(CYCLE-1));
+	dest = vid.buffer + r_newrefdef.y * vid.rowbytes + r_newrefdef.x;
+
+	for (v=0 ; v<h ; v++, dest += vid.rowbytes)
+	{
+		col = &column[turb[v]];
+		row = &rowptr[v];
+		for (u=0 ; u<w ; u+=4)
+		{
+			dest[u+0] = row[turb[u+0]][col[u+0]];
+			dest[u+1] = row[turb[u+1]][col[u+1]];
+			dest[u+2] = row[turb[u+2]][col[u+2]];
+			dest[u+3] = row[turb[u+3]][col[u+3]];
+		}
+	}
+}
+
+
+#if	!id386
+
+/*
+=============
+D_DrawTurbulent8Span
+=============
+*/
+void D_DrawTurbulent8Span (void)
+{
+	int		sturb, tturb;
+
+	do
+	{
+		sturb = ((r_turb_s + r_turb_turb[(r_turb_t>>16)&(CYCLE-1)])>>16)&63;
+		tturb = ((r_turb_t + r_turb_turb[(r_turb_s>>16)&(CYCLE-1)])>>16)&63;
+		*r_turb_pdest++ = *(r_turb_pbase + (tturb<<6) + sturb);
+		r_turb_s += r_turb_sstep;
+		r_turb_t += r_turb_tstep;
+	} while (--r_turb_spancount > 0);
+}
+
+#endif	// !id386
+
+
+/*
+=============
+Turbulent8
+=============
+*/
+void Turbulent8 (espan_t *pspan)
+{
+	int				count;
+	fixed16_t		snext, tnext;
+	float			sdivz, tdivz, zi, z, du, dv, spancountminus1;
+	float			sdivz16stepu, tdivz16stepu, zi16stepu;
+	
+	r_turb_turb = sintable + ((int)(r_newrefdef.time*SPEED)&(CYCLE-1));
+
+	r_turb_sstep = 0;	// keep compiler happy
+	r_turb_tstep = 0;	// ditto
+
+	r_turb_pbase = (unsigned char *)cacheblock;
+
+	sdivz16stepu = d_sdivzstepu * 16;
+	tdivz16stepu = d_tdivzstepu * 16;
+	zi16stepu = d_zistepu * 16;
+
+	do
+	{
+		r_turb_pdest = (unsigned char *)((byte *)d_viewbuffer +
+				(r_screenwidth * pspan->v) + pspan->u);
+
+		count = pspan->count;
+
+	// calculate the initial s/z, t/z, 1/z, s, and t and clamp
+		du = (float)pspan->u;
+		dv = (float)pspan->v;
+
+		sdivz = d_sdivzorigin + dv*d_sdivzstepv + du*d_sdivzstepu;
+		tdivz = d_tdivzorigin + dv*d_tdivzstepv + du*d_tdivzstepu;
+		zi = d_ziorigin + dv*d_zistepv + du*d_zistepu;
+		z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+
+		r_turb_s = (int)(sdivz * z) + sadjust;
+		if (r_turb_s > bbextents)
+			r_turb_s = bbextents;
+		else if (r_turb_s < 0)
+			r_turb_s = 0;
+
+		r_turb_t = (int)(tdivz * z) + tadjust;
+		if (r_turb_t > bbextentt)
+			r_turb_t = bbextentt;
+		else if (r_turb_t < 0)
+			r_turb_t = 0;
+
+		do
+		{
+		// calculate s and t at the far end of the span
+			if (count >= 16)
+				r_turb_spancount = 16;
+			else
+				r_turb_spancount = count;
+
+			count -= r_turb_spancount;
+
+			if (count)
+			{
+			// calculate s/z, t/z, zi->fixed s and t at far end of span,
+			// calculate s and t steps across span by shifting
+				sdivz += sdivz16stepu;
+				tdivz += tdivz16stepu;
+				zi += zi16stepu;
+				z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+
+				snext = (int)(sdivz * z) + sadjust;
+				if (snext > bbextents)
+					snext = bbextents;
+				else if (snext < 16)
+					snext = 16;	// prevent round-off error on <0 steps from
+								//  from causing overstepping & running off the
+								//  edge of the texture
+
+				tnext = (int)(tdivz * z) + tadjust;
+				if (tnext > bbextentt)
+					tnext = bbextentt;
+				else if (tnext < 16)
+					tnext = 16;	// guard against round-off error on <0 steps
+
+				r_turb_sstep = (snext - r_turb_s) >> 4;
+				r_turb_tstep = (tnext - r_turb_t) >> 4;
+			}
+			else
+			{
+			// calculate s/z, t/z, zi->fixed s and t at last pixel in span (so
+			// can't step off polygon), clamp, calculate s and t steps across
+			// span by division, biasing steps low so we don't run off the
+			// texture
+				spancountminus1 = (float)(r_turb_spancount - 1);
+				sdivz += d_sdivzstepu * spancountminus1;
+				tdivz += d_tdivzstepu * spancountminus1;
+				zi += d_zistepu * spancountminus1;
+				z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+				snext = (int)(sdivz * z) + sadjust;
+				if (snext > bbextents)
+					snext = bbextents;
+				else if (snext < 16)
+					snext = 16;	// prevent round-off error on <0 steps from
+								//  from causing overstepping & running off the
+								//  edge of the texture
+
+				tnext = (int)(tdivz * z) + tadjust;
+				if (tnext > bbextentt)
+					tnext = bbextentt;
+				else if (tnext < 16)
+					tnext = 16;	// guard against round-off error on <0 steps
+
+				if (r_turb_spancount > 1)
+				{
+					r_turb_sstep = (snext - r_turb_s) / (r_turb_spancount - 1);
+					r_turb_tstep = (tnext - r_turb_t) / (r_turb_spancount - 1);
+				}
+			}
+
+			r_turb_s = r_turb_s & ((CYCLE<<16)-1);
+			r_turb_t = r_turb_t & ((CYCLE<<16)-1);
+
+			D_DrawTurbulent8Span ();
+
+			r_turb_s = snext;
+			r_turb_t = tnext;
+
+		} while (count > 0);
+
+	} while ((pspan = pspan->pnext) != NULL);
+}
+
+//====================
+//PGM
+/*
+=============
+NonTurbulent8 - this is for drawing scrolling textures. they're warping water textures
+	but the turbulence is automatically 0.
+=============
+*/
+void NonTurbulent8 (espan_t *pspan)
+{
+	int				count;
+	fixed16_t		snext, tnext;
+	float			sdivz, tdivz, zi, z, du, dv, spancountminus1;
+	float			sdivz16stepu, tdivz16stepu, zi16stepu;
+	
+//	r_turb_turb = sintable + ((int)(r_newrefdef.time*SPEED)&(CYCLE-1));
+	r_turb_turb = blanktable;
+
+	r_turb_sstep = 0;	// keep compiler happy
+	r_turb_tstep = 0;	// ditto
+
+	r_turb_pbase = (unsigned char *)cacheblock;
+
+	sdivz16stepu = d_sdivzstepu * 16;
+	tdivz16stepu = d_tdivzstepu * 16;
+	zi16stepu = d_zistepu * 16;
+
+	do
+	{
+		r_turb_pdest = (unsigned char *)((byte *)d_viewbuffer +
+				(r_screenwidth * pspan->v) + pspan->u);
+
+		count = pspan->count;
+
+	// calculate the initial s/z, t/z, 1/z, s, and t and clamp
+		du = (float)pspan->u;
+		dv = (float)pspan->v;
+
+		sdivz = d_sdivzorigin + dv*d_sdivzstepv + du*d_sdivzstepu;
+		tdivz = d_tdivzorigin + dv*d_tdivzstepv + du*d_tdivzstepu;
+		zi = d_ziorigin + dv*d_zistepv + du*d_zistepu;
+		z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+
+		r_turb_s = (int)(sdivz * z) + sadjust;
+		if (r_turb_s > bbextents)
+			r_turb_s = bbextents;
+		else if (r_turb_s < 0)
+			r_turb_s = 0;
+
+		r_turb_t = (int)(tdivz * z) + tadjust;
+		if (r_turb_t > bbextentt)
+			r_turb_t = bbextentt;
+		else if (r_turb_t < 0)
+			r_turb_t = 0;
+
+		do
+		{
+		// calculate s and t at the far end of the span
+			if (count >= 16)
+				r_turb_spancount = 16;
+			else
+				r_turb_spancount = count;
+
+			count -= r_turb_spancount;
+
+			if (count)
+			{
+			// calculate s/z, t/z, zi->fixed s and t at far end of span,
+			// calculate s and t steps across span by shifting
+				sdivz += sdivz16stepu;
+				tdivz += tdivz16stepu;
+				zi += zi16stepu;
+				z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+
+				snext = (int)(sdivz * z) + sadjust;
+				if (snext > bbextents)
+					snext = bbextents;
+				else if (snext < 16)
+					snext = 16;	// prevent round-off error on <0 steps from
+								//  from causing overstepping & running off the
+								//  edge of the texture
+
+				tnext = (int)(tdivz * z) + tadjust;
+				if (tnext > bbextentt)
+					tnext = bbextentt;
+				else if (tnext < 16)
+					tnext = 16;	// guard against round-off error on <0 steps
+
+				r_turb_sstep = (snext - r_turb_s) >> 4;
+				r_turb_tstep = (tnext - r_turb_t) >> 4;
+			}
+			else
+			{
+			// calculate s/z, t/z, zi->fixed s and t at last pixel in span (so
+			// can't step off polygon), clamp, calculate s and t steps across
+			// span by division, biasing steps low so we don't run off the
+			// texture
+				spancountminus1 = (float)(r_turb_spancount - 1);
+				sdivz += d_sdivzstepu * spancountminus1;
+				tdivz += d_tdivzstepu * spancountminus1;
+				zi += d_zistepu * spancountminus1;
+				z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+				snext = (int)(sdivz * z) + sadjust;
+				if (snext > bbextents)
+					snext = bbextents;
+				else if (snext < 16)
+					snext = 16;	// prevent round-off error on <0 steps from
+								//  from causing overstepping & running off the
+								//  edge of the texture
+
+				tnext = (int)(tdivz * z) + tadjust;
+				if (tnext > bbextentt)
+					tnext = bbextentt;
+				else if (tnext < 16)
+					tnext = 16;	// guard against round-off error on <0 steps
+
+				if (r_turb_spancount > 1)
+				{
+					r_turb_sstep = (snext - r_turb_s) / (r_turb_spancount - 1);
+					r_turb_tstep = (tnext - r_turb_t) / (r_turb_spancount - 1);
+				}
+			}
+
+			r_turb_s = r_turb_s & ((CYCLE<<16)-1);
+			r_turb_t = r_turb_t & ((CYCLE<<16)-1);
+
+			D_DrawTurbulent8Span ();
+
+			r_turb_s = snext;
+			r_turb_t = tnext;
+
+		} while (count > 0);
+
+	} while ((pspan = pspan->pnext) != NULL);
+}
+//PGM
+//====================
+
+
+#if	!id386
+
+/*
+=============
+D_DrawSpans16
+
+  FIXME: actually make this subdivide by 16 instead of 8!!!
+=============
+*/
+void D_DrawSpans16 (espan_t *pspan)
+{
+	int				count, spancount;
+	unsigned char	*pbase, *pdest;
+	fixed16_t		s, t, snext, tnext, sstep, tstep;
+	float			sdivz, tdivz, zi, z, du, dv, spancountminus1;
+	float			sdivz8stepu, tdivz8stepu, zi8stepu;
+
+	sstep = 0;	// keep compiler happy
+	tstep = 0;	// ditto
+
+	pbase = (unsigned char *)cacheblock;
+
+	sdivz8stepu = d_sdivzstepu * 8;
+	tdivz8stepu = d_tdivzstepu * 8;
+	zi8stepu = d_zistepu * 8;
+
+	do
+	{
+		pdest = (unsigned char *)((byte *)d_viewbuffer +
+				(r_screenwidth * pspan->v) + pspan->u);
+
+		count = pspan->count;
+
+	// calculate the initial s/z, t/z, 1/z, s, and t and clamp
+		du = (float)pspan->u;
+		dv = (float)pspan->v;
+
+		sdivz = d_sdivzorigin + dv*d_sdivzstepv + du*d_sdivzstepu;
+		tdivz = d_tdivzorigin + dv*d_tdivzstepv + du*d_tdivzstepu;
+		zi = d_ziorigin + dv*d_zistepv + du*d_zistepu;
+		z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+
+		s = (int)(sdivz * z) + sadjust;
+		if (s > bbextents)
+			s = bbextents;
+		else if (s < 0)
+			s = 0;
+
+		t = (int)(tdivz * z) + tadjust;
+		if (t > bbextentt)
+			t = bbextentt;
+		else if (t < 0)
+			t = 0;
+
+		do
+		{
+		// calculate s and t at the far end of the span
+			if (count >= 8)
+				spancount = 8;
+			else
+				spancount = count;
+
+			count -= spancount;
+
+			if (count)
+			{
+			// calculate s/z, t/z, zi->fixed s and t at far end of span,
+			// calculate s and t steps across span by shifting
+				sdivz += sdivz8stepu;
+				tdivz += tdivz8stepu;
+				zi += zi8stepu;
+				z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+
+				snext = (int)(sdivz * z) + sadjust;
+				if (snext > bbextents)
+					snext = bbextents;
+				else if (snext < 8)
+					snext = 8;	// prevent round-off error on <0 steps from
+								//  from causing overstepping & running off the
+								//  edge of the texture
+
+				tnext = (int)(tdivz * z) + tadjust;
+				if (tnext > bbextentt)
+					tnext = bbextentt;
+				else if (tnext < 8)
+					tnext = 8;	// guard against round-off error on <0 steps
+
+				sstep = (snext - s) >> 3;
+				tstep = (tnext - t) >> 3;
+			}
+			else
+			{
+			// calculate s/z, t/z, zi->fixed s and t at last pixel in span (so
+			// can't step off polygon), clamp, calculate s and t steps across
+			// span by division, biasing steps low so we don't run off the
+			// texture
+				spancountminus1 = (float)(spancount - 1);
+				sdivz += d_sdivzstepu * spancountminus1;
+				tdivz += d_tdivzstepu * spancountminus1;
+				zi += d_zistepu * spancountminus1;
+				z = (float)0x10000 / zi;	// prescale to 16.16 fixed-point
+				snext = (int)(sdivz * z) + sadjust;
+				if (snext > bbextents)
+					snext = bbextents;
+				else if (snext < 8)
+					snext = 8;	// prevent round-off error on <0 steps from
+								//  from causing overstepping & running off the
+								//  edge of the texture
+
+				tnext = (int)(tdivz * z) + tadjust;
+				if (tnext > bbextentt)
+					tnext = bbextentt;
+				else if (tnext < 8)
+					tnext = 8;	// guard against round-off error on <0 steps
+
+				if (spancount > 1)
+				{
+					sstep = (snext - s) / (spancount - 1);
+					tstep = (tnext - t) / (spancount - 1);
+				}
+			}
+
+			do
+			{
+				*pdest++ = *(pbase + (s >> 16) + (t >> 16) * cachewidth);
+				s += sstep;
+				t += tstep;
+			} while (--spancount > 0);
+
+			s = snext;
+			t = tnext;
+
+		} while (count > 0);
+
+	} while ((pspan = pspan->pnext) != NULL);
+}
+
+#endif
+
+
+#if	!id386
+
+/*
+=============
+D_DrawZSpans
+=============
+*/
+void D_DrawZSpans (espan_t *pspan)
+{
+	int				count, doublecount, izistep;
+	int				izi;
+	short			*pdest;
+	unsigned		ltemp;
+	float			zi;
+	float			du, dv;
+
+// FIXME: check for clamping/range problems
+// we count on FP exceptions being turned off to avoid range problems
+	izistep = (int)(d_zistepu * 0x8000 * 0x10000);
+
+	do
+	{
+		pdest = d_pzbuffer + (d_zwidth * pspan->v) + pspan->u;
+
+		count = pspan->count;
+
+	// calculate the initial 1/z
+		du = (float)pspan->u;
+		dv = (float)pspan->v;
+
+		zi = d_ziorigin + dv*d_zistepv + du*d_zistepu;
+	// we count on FP exceptions being turned off to avoid range problems
+		izi = (int)(zi * 0x8000 * 0x10000);
+
+		if ((long)pdest & 0x02)
+		{
+			*pdest++ = (short)(izi >> 16);
+			izi += izistep;
+			count--;
+		}
+
+		if ((doublecount = count >> 1) > 0)
+		{
+			do
+			{
+				ltemp = izi >> 16;
+				izi += izistep;
+				ltemp |= izi & 0xFFFF0000;
+				izi += izistep;
+				*(int *)pdest = ltemp;
+				pdest += 2;
+			} while (--doublecount > 0);
+		}
+
+		if (count & 1)
+			*pdest = (short)(izi >> 16);
+
+	} while ((pspan = pspan->pnext) != NULL);
+}
+
+#endif
+
--- a/ref_soft/r_scana.asm
+++ b/ref_soft/r_scana.asm
@@ -0,0 +1,73 @@
+ .386P
+ .model FLAT
+;
+; d_scana.s
+; x86 assembly-language turbulent texture mapping code
+;
+
+include qasm.inc
+include d_if.inc
+
+if id386
+
+_DATA SEGMENT	
+
+_DATA ENDS
+_TEXT SEGMENT	
+
+;----------------------------------------------------------------------
+; turbulent texture mapping code
+;----------------------------------------------------------------------
+
+ align 4	
+ public _D_DrawTurbulent8Span	
+_D_DrawTurbulent8Span:	
+ push ebp	; preserve caller's stack frame pointer
+ push esi	; preserve register variables
+ push edi	
+ push ebx	
+
+ mov esi,ds:dword ptr[_r_turb_s]	
+ mov ecx,ds:dword ptr[_r_turb_t]	
+ mov edi,ds:dword ptr[_r_turb_pdest]	
+ mov ebx,ds:dword ptr[_r_turb_spancount]	
+
+Llp:	
+ mov eax,ecx	
+ mov edx,esi	
+ sar eax,16	
+ mov ebp,ds:dword ptr[_r_turb_turb]	
+ sar edx,16	
+ and eax,offset CYCLE-1	
+ and edx,offset CYCLE-1	
+ mov eax,ds:dword ptr[ebp+eax*4]	
+ mov edx,ds:dword ptr[ebp+edx*4]	
+ add eax,esi	
+ sar eax,16	
+ add edx,ecx	
+ sar edx,16	
+ and eax,offset TURB_TEX_SIZE-1	
+ and edx,offset TURB_TEX_SIZE-1	
+ shl edx,6	
+ mov ebp,ds:dword ptr[_r_turb_pbase]	
+ add edx,eax	
+ inc edi	
+ add esi,ds:dword ptr[_r_turb_sstep]	
+ add ecx,ds:dword ptr[_r_turb_tstep]	
+ mov dl,ds:byte ptr[ebp+edx*1]	
+ dec ebx	
+ mov ds:byte ptr[-1+edi],dl	
+ jnz Llp	
+
+ mov ds:dword ptr[_r_turb_pdest],edi	
+
+ pop ebx	; restore register variables
+ pop edi	
+ pop esi	
+ pop ebp	; restore caller's stack frame pointer
+ ret	
+
+
+_TEXT ENDS
+endif	;id386
+ END
--- a/ref_soft/r_spr8.asm
+++ b/ref_soft/r_spr8.asm
@@ -0,0 +1,884 @@
+ .386P
+ .model FLAT
+;
+; d_spr8.s
+; x86 assembly-language horizontal 8-bpp transparent span-drawing code.
+;
+
+include qasm.inc
+include d_if.inc
+
+if id386
+
+;----------------------------------------------------------------------
+; 8-bpp horizontal span drawing code for polygons, with transparency.
+;----------------------------------------------------------------------
+
+_TEXT SEGMENT	
+
+; out-of-line, rarely-needed clamping code
+
+LClampHigh0:	
+ mov esi,ds:dword ptr[_bbextents]	
+ jmp LClampReentry0	
+LClampHighOrLow0:	
+ jg LClampHigh0	
+ xor esi,esi	
+ jmp LClampReentry0	
+
+LClampHigh1:	
+ mov edx,ds:dword ptr[_bbextentt]	
+ jmp LClampReentry1	
+LClampHighOrLow1:	
+ jg LClampHigh1	
+ xor edx,edx	
+ jmp LClampReentry1	
+
+LClampLow2:	
+ mov ebp,2048	
+ jmp LClampReentry2	
+LClampHigh2:	
+ mov ebp,ds:dword ptr[_bbextents]	
+ jmp LClampReentry2	
+
+LClampLow3:	
+ mov ecx,2048	
+ jmp LClampReentry3	
+LClampHigh3:	
+ mov ecx,ds:dword ptr[_bbextentt]	
+ jmp LClampReentry3	
+
+LClampLow4:	
+ mov eax,2048	
+ jmp LClampReentry4	
+LClampHigh4:	
+ mov eax,ds:dword ptr[_bbextents]	
+ jmp LClampReentry4	
+
+LClampLow5:	
+ mov ebx,2048	
+ jmp LClampReentry5	
+LClampHigh5:	
+ mov ebx,ds:dword ptr[_bbextentt]	
+ jmp LClampReentry5	
+
+
+pspans	equ		4+16
+
+ align 4	
+ public _D_SpriteDrawSpansXXX
+_D_SpriteDrawSpansXXX:	
+ push ebp	; preserve caller's stack frame
+ push edi	
+ push esi	; preserve register variables
+ push ebx	
+
+;
+; set up scaled-by-8 steps, for 8-long segments; also set up cacheblock
+; and span list pointers, and 1/z step in 0.32 fixed-point
+;
+; FIXME: any overlap from rearranging?
+ fld ds:dword ptr[_d_sdivzstepu]	
+ fmul ds:dword ptr[fp_8]	
+ mov edx,ds:dword ptr[_cacheblock]	
+ fld ds:dword ptr[_d_tdivzstepu]	
+ fmul ds:dword ptr[fp_8]	
+ mov ebx,ds:dword ptr[pspans+esp]	; point to the first span descriptor
+ fld ds:dword ptr[_d_zistepu]	
+ fmul ds:dword ptr[fp_8]	
+ mov ds:dword ptr[pbase],edx	; pbase = cacheblock
+ fld ds:dword ptr[_d_zistepu]	
+ fmul ds:dword ptr[fp_64kx64k]	
+ fxch st(3)	
+ fstp ds:dword ptr[sdivz8stepu]	
+ fstp ds:dword ptr[zi8stepu]	
+ fstp ds:dword ptr[tdivz8stepu]	
+ fistp ds:dword ptr[izistep]	
+ mov eax,ds:dword ptr[izistep]	
+ ror eax,16	; put upper 16 bits in low word
+ mov ecx,ds:dword ptr[sspan_t_count+ebx]	
+ mov ds:dword ptr[izistep],eax	
+
+ cmp ecx,0	
+ jle LNextSpan	
+
+LSpanLoop:	
+
+;
+; set up the initial s/z, t/z, and 1/z on the FP stack, and generate the
+; initial s and t values
+;
+; FIXME: pipeline FILD?
+ fild ds:dword ptr[sspan_t_v+ebx]	
+ fild ds:dword ptr[sspan_t_u+ebx]	
+
+ fld st(1)	; dv | du | dv
+ fmul ds:dword ptr[_d_sdivzstepv]	; dv*d_sdivzstepv | du | dv
+ fld st(1)	; du | dv*d_sdivzstepv | du | dv
+ fmul ds:dword ptr[_d_sdivzstepu]	; du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+ fld st(2)	; du | du*d_sdivzstepu | dv*d_sdivzstepv | du | dv
+ fmul ds:dword ptr[_d_tdivzstepu]	; du*d_tdivzstepu | du*d_sdivzstepu |
+;  dv*d_sdivzstepv | du | dv
+ fxch st(1)	; du*d_sdivzstepu | du*d_tdivzstepu |
+;  dv*d_sdivzstepv | du | dv
+ faddp st(2),st(0)	; du*d_tdivzstepu |
+;  du*d_sdivzstepu + dv*d_sdivzstepv | du | dv
+ fxch st(1)	; du*d_sdivzstepu + dv*d_sdivzstepv |
+;  du*d_tdivzstepu | du | dv
+ fld st(3)	; dv | du*d_sdivzstepu + dv*d_sdivzstepv |
+;  du*d_tdivzstepu | du | dv
+ fmul ds:dword ptr[_d_tdivzstepv]	; dv*d_tdivzstepv |
+;  du*d_sdivzstepu + dv*d_sdivzstepv |
+;  du*d_tdivzstepu | du | dv
+ fxch st(1)	; du*d_sdivzstepu + dv*d_sdivzstepv |
+;  dv*d_tdivzstepv | du*d_tdivzstepu | du | dv
+ fadd ds:dword ptr[_d_sdivzorigin]	; sdivz = d_sdivzorigin + dv*d_sdivzstepv +
+;  du*d_sdivzstepu; stays in %st(2) at end
+ fxch st(4)	; dv | dv*d_tdivzstepv | du*d_tdivzstepu | du |
+;  s/z
+ fmul ds:dword ptr[_d_zistepv]	; dv*d_zistepv | dv*d_tdivzstepv |
+;  du*d_tdivzstepu | du | s/z
+ fxch st(1)	; dv*d_tdivzstepv |  dv*d_zistepv |
+;  du*d_tdivzstepu | du | s/z
+ faddp st(2),st(0)	; dv*d_zistepv |
+;  dv*d_tdivzstepv + du*d_tdivzstepu | du | s/z
+ fxch st(2)	; du | dv*d_tdivzstepv + du*d_tdivzstepu |
+;  dv*d_zistepv | s/z
+ fmul ds:dword ptr[_d_zistepu]	; du*d_zistepu |
+;  dv*d_tdivzstepv + du*d_tdivzstepu |
+;  dv*d_zistepv | s/z
+ fxch st(1)	; dv*d_tdivzstepv + du*d_tdivzstepu |
+;  du*d_zistepu | dv*d_zistepv | s/z
+ fadd ds:dword ptr[_d_tdivzorigin]	; tdivz = d_tdivzorigin + dv*d_tdivzstepv +
+;  du*d_tdivzstepu; stays in %st(1) at end
+ fxch st(2)	; dv*d_zistepv | du*d_zistepu | t/z | s/z
+ faddp st(1),st(0)	; dv*d_zistepv + du*d_zistepu | t/z | s/z
+
+ fld ds:dword ptr[fp_64k]	; fp_64k | dv*d_zistepv + du*d_zistepu | t/z | s/z
+ fxch st(1)	; dv*d_zistepv + du*d_zistepu | fp_64k | t/z | s/z
+ fadd ds:dword ptr[_d_ziorigin]	; zi = d_ziorigin + dv*d_zistepv +
+;  du*d_zistepu; stays in %st(0) at end
+; 1/z | fp_64k | t/z | s/z
+
+ fld st(0)	; FIXME: get rid of stall on FMUL?
+ fmul ds:dword ptr[fp_64kx64k]	
+ fxch st(1)	
+
+;
+; calculate and clamp s & t
+;
+ fdiv st(2),st(0)	; 1/z | z*64k | t/z | s/z
+ fxch st(1)	
+
+ fistp ds:dword ptr[izi]	; 0.32 fixed-point 1/z
+ mov ebp,ds:dword ptr[izi]	
+
+;
+; set pz to point to the first z-buffer pixel in the span
+;
+ ror ebp,16	; put upper 16 bits in low word
+ mov eax,ds:dword ptr[sspan_t_v+ebx]	
+ mov ds:dword ptr[izi],ebp	
+ mov ebp,ds:dword ptr[sspan_t_u+ebx]	
+ imul ds:dword ptr[_d_zrowbytes]	
+ shl ebp,1	; a word per pixel
+ add eax,ds:dword ptr[_d_pzbuffer]	
+ add eax,ebp	
+ mov ds:dword ptr[pz],eax	
+
+;
+; point %edi to the first pixel in the span
+;
+ mov ebp,ds:dword ptr[_d_viewbuffer]	
+ mov eax,ds:dword ptr[sspan_t_v+ebx]	
+ push ebx	; preserve spans pointer
+ mov edx,ds:dword ptr[_tadjust]	
+ mov esi,ds:dword ptr[_sadjust]	
+ mov edi,ds:dword ptr[_d_scantable+eax*4]	; v * screenwidth
+ add edi,ebp	
+ mov ebp,ds:dword ptr[sspan_t_u+ebx]	
+ add edi,ebp	; pdest = &pdestspan[scans->u];
+
+;
+; now start the FDIV for the end of the span
+;
+ cmp ecx,8	
+ ja LSetupNotLast1	
+
+ dec ecx	
+ jz LCleanup1	; if only one pixel, no need to start an FDIV
+ mov ds:dword ptr[spancountminus1],ecx	
+
+; finish up the s and t calcs
+ fxch st(1)	; z*64k | 1/z | t/z | s/z
+
+ fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
+ fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
+ fxch st(1)	; z*64k | s | 1/z | t/z | s/z
+ fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
+ fxch st(1)	; s | t | 1/z | t/z | s/z
+ fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
+ fistp ds:dword ptr[t]	; 1/z | t/z | s/z
+
+ fild ds:dword ptr[spancountminus1]	
+
+ fld ds:dword ptr[_d_tdivzstepu]	; _d_tdivzstepu | spancountminus1
+ fld ds:dword ptr[_d_zistepu]	; _d_zistepu | _d_tdivzstepu | spancountminus1
+ fmul st(0),st(2)	; _d_zistepu*scm1 | _d_tdivzstepu | scm1
+ fxch st(1)	; _d_tdivzstepu | _d_zistepu*scm1 | scm1
+ fmul st(0),st(2)	; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
+ fxch st(2)	; scm1 | _d_zistepu*scm1 | _d_tdivzstepu*scm1
+ fmul ds:dword ptr[_d_sdivzstepu]	; _d_sdivzstepu*scm1 | _d_zistepu*scm1 |
+;  _d_tdivzstepu*scm1
+ fxch st(1)	; _d_zistepu*scm1 | _d_sdivzstepu*scm1 |
+;  _d_tdivzstepu*scm1
+ faddp st(3),st(0)	; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
+ fxch st(1)	; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
+ faddp st(3),st(0)	; _d_sdivzstepu*scm1
+ faddp st(3),st(0)	
+
+ fld ds:dword ptr[fp_64k]	
+ fdiv st(0),st(1)	; this is what we've gone to all this trouble to
+;  overlap
+ jmp LFDIVInFlight1	
+
+LCleanup1:	
+; finish up the s and t calcs
+ fxch st(1)	; z*64k | 1/z | t/z | s/z
+
+ fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
+ fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
+ fxch st(1)	; z*64k | s | 1/z | t/z | s/z
+ fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
+ fxch st(1)	; s | t | 1/z | t/z | s/z
+ fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
+ fistp ds:dword ptr[t]	; 1/z | t/z | s/z
+ jmp LFDIVInFlight1	
+
+ align 4	
+LSetupNotLast1:	
+; finish up the s and t calcs
+ fxch st(1)	; z*64k | 1/z | t/z | s/z
+
+ fld st(0)	; z*64k | z*64k | 1/z | t/z | s/z
+ fmul st(0),st(4)	; s | z*64k | 1/z | t/z | s/z
+ fxch st(1)	; z*64k | s | 1/z | t/z | s/z
+ fmul st(0),st(3)	; t | s | 1/z | t/z | s/z
+ fxch st(1)	; s | t | 1/z | t/z | s/z
+ fistp ds:dword ptr[s]	; 1/z | t | t/z | s/z
+ fistp ds:dword ptr[t]	; 1/z | t/z | s/z
+
+ fadd ds:dword ptr[zi8stepu]	
+ fxch st(2)	
+ fadd ds:dword ptr[sdivz8stepu]	
+ fxch st(2)	
+ fld ds:dword ptr[tdivz8stepu]	
+ faddp st(2),st(0)	
+ fld ds:dword ptr[fp_64k]	
+ fdiv st(0),st(1)	; z = 1/1/z
+; this is what we've gone to all this trouble to
+;  overlap
+LFDIVInFlight1:	
+
+ add esi,ds:dword ptr[s]	
+ add edx,ds:dword ptr[t]	
+ mov ebx,ds:dword ptr[_bbextents]	
+ mov ebp,ds:dword ptr[_bbextentt]	
+ cmp esi,ebx	
+ ja LClampHighOrLow0	
+LClampReentry0:	
+ mov ds:dword ptr[s],esi	
+ mov ebx,ds:dword ptr[pbase]	
+ shl esi,16	
+ cmp edx,ebp	
+ mov ds:dword ptr[sfracf],esi	
+ ja LClampHighOrLow1	
+LClampReentry1:	
+ mov ds:dword ptr[t],edx	
+ mov esi,ds:dword ptr[s]	; sfrac = scans->sfrac;
+ shl edx,16	
+ mov eax,ds:dword ptr[t]	; tfrac = scans->tfrac;
+ sar esi,16	
+ mov ds:dword ptr[tfracf],edx	
+
+;
+; calculate the texture starting address
+;
+ sar eax,16	
+ add esi,ebx	
+ imul eax,ds:dword ptr[_cachewidth]	; (tfrac >> 16) * cachewidth
+ add esi,eax	; psource = pbase + (sfrac >> 16) +
+;           ((tfrac >> 16) * cachewidth);
+
+;
+; determine whether last span or not
+;
+ cmp ecx,8	
+ jna LLastSegment	
+
+;
+; not the last segment; do full 8-wide segment
+;
+LNotLastSegment:	
+
+;
+; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+; get there
+;
+
+; pick up after the FDIV that was left in flight previously
+
+ fld st(0)	; duplicate it
+ fmul st(0),st(4)	; s = s/z * z
+ fxch st(1)	
+ fmul st(0),st(3)	; t = t/z * z
+ fxch st(1)	
+ fistp ds:dword ptr[snext]	
+ fistp ds:dword ptr[tnext]	
+ mov eax,ds:dword ptr[snext]	
+ mov edx,ds:dword ptr[tnext]	
+
+ sub ecx,8	; count off this segments' pixels
+ mov ebp,ds:dword ptr[_sadjust]	
+ push ecx	; remember count of remaining pixels
+ mov ecx,ds:dword ptr[_tadjust]	
+
+ add ebp,eax	
+ add ecx,edx	
+
+ mov eax,ds:dword ptr[_bbextents]	
+ mov edx,ds:dword ptr[_bbextentt]	
+
+ cmp ebp,2048	
+ jl LClampLow2	
+ cmp ebp,eax	
+ ja LClampHigh2	
+LClampReentry2:	
+
+ cmp ecx,2048	
+ jl LClampLow3	
+ cmp ecx,edx	
+ ja LClampHigh3	
+LClampReentry3:	
+
+ mov ds:dword ptr[snext],ebp	
+ mov ds:dword ptr[tnext],ecx	
+
+ sub ebp,ds:dword ptr[s]	
+ sub ecx,ds:dword ptr[t]	
+
+;
+; set up advancetable
+;
+ mov eax,ecx	
+ mov edx,ebp	
+ sar edx,19	; sstep >>= 16;
+ mov ebx,ds:dword ptr[_cachewidth]	
+ sar eax,19	; tstep >>= 16;
+ jz LIsZero	
+ imul eax,ebx	; (tstep >> 16) * cachewidth;
+LIsZero:	
+ add eax,edx	; add in sstep
+; (tstep >> 16) * cachewidth + (sstep >> 16);
+ mov edx,ds:dword ptr[tfracf]	
+ mov ds:dword ptr[advancetable+4],eax	; advance base in t
+ add eax,ebx	; ((tstep >> 16) + 1) * cachewidth +
+;  (sstep >> 16);
+ shl ebp,13	; left-justify sstep fractional part
+ mov ds:dword ptr[sstep],ebp	
+ mov ebx,ds:dword ptr[sfracf]	
+ shl ecx,13	; left-justify tstep fractional part
+ mov ds:dword ptr[advancetable],eax	; advance extra in t
+ mov ds:dword ptr[tstep],ecx	
+
+ mov ecx,ds:dword ptr[pz]	
+ mov ebp,ds:dword ptr[izi]	
+
+ cmp bp,ds:word ptr[ecx]	
+ jl Lp1	
+ mov al,ds:byte ptr[esi]	; get first source texel
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp1	
+ mov ds:word ptr[ecx],bp	
+ mov ds:byte ptr[edi],al	; store first dest pixel
+Lp1:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	; advance tfrac fractional part by tstep frac
+
+ sbb eax,eax	; turn tstep carry into -1 (0 if none)
+ add ebx,ds:dword ptr[sstep]	; advance sfrac fractional part by sstep frac
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	; point to next source texel
+
+ cmp bp,ds:word ptr[2+ecx]	
+ jl Lp2	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp2	
+ mov ds:word ptr[2+ecx],bp	
+ mov ds:byte ptr[1+edi],al	
+Lp2:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+ cmp bp,ds:word ptr[4+ecx]	
+ jl Lp3	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp3	
+ mov ds:word ptr[4+ecx],bp	
+ mov ds:byte ptr[2+edi],al	
+Lp3:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+ cmp bp,ds:word ptr[6+ecx]	
+ jl Lp4	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp4	
+ mov ds:word ptr[6+ecx],bp	
+ mov ds:byte ptr[3+edi],al	
+Lp4:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+ cmp bp,ds:word ptr[8+ecx]	
+ jl Lp5	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp5	
+ mov ds:word ptr[8+ecx],bp	
+ mov ds:byte ptr[4+edi],al	
+Lp5:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+;
+; start FDIV for end of next segment in flight, so it can overlap
+;
+ pop eax	
+ cmp eax,8	; more than one segment after this?
+ ja LSetupNotLast2	; yes
+
+ dec eax	
+ jz LFDIVInFlight2	; if only one pixel, no need to start an FDIV
+ mov ds:dword ptr[spancountminus1],eax	
+ fild ds:dword ptr[spancountminus1]	
+
+ fld ds:dword ptr[_d_zistepu]	; _d_zistepu | spancountminus1
+ fmul st(0),st(1)	; _d_zistepu*scm1 | scm1
+ fld ds:dword ptr[_d_tdivzstepu]	; _d_tdivzstepu | _d_zistepu*scm1 | scm1
+ fmul st(0),st(2)	; _d_tdivzstepu*scm1 | _d_zistepu*scm1 | scm1
+ fxch st(1)	; _d_zistepu*scm1 | _d_tdivzstepu*scm1 | scm1
+ faddp st(3),st(0)	; _d_tdivzstepu*scm1 | scm1
+ fxch st(1)	; scm1 | _d_tdivzstepu*scm1
+ fmul ds:dword ptr[_d_sdivzstepu]	; _d_sdivzstepu*scm1 | _d_tdivzstepu*scm1
+ fxch st(1)	; _d_tdivzstepu*scm1 | _d_sdivzstepu*scm1
+ faddp st(3),st(0)	; _d_sdivzstepu*scm1
+ fld ds:dword ptr[fp_64k]	; 64k | _d_sdivzstepu*scm1
+ fxch st(1)	; _d_sdivzstepu*scm1 | 64k
+ faddp st(4),st(0)	; 64k
+
+ fdiv st(0),st(1)	; this is what we've gone to all this trouble to
+;  overlap
+ jmp LFDIVInFlight2	
+
+ align 4	
+LSetupNotLast2:	
+ fadd ds:dword ptr[zi8stepu]	
+ fxch st(2)	
+ fadd ds:dword ptr[sdivz8stepu]	
+ fxch st(2)	
+ fld ds:dword ptr[tdivz8stepu]	
+ faddp st(2),st(0)	
+ fld ds:dword ptr[fp_64k]	
+ fdiv st(0),st(1)	; z = 1/1/z
+; this is what we've gone to all this trouble to
+;  overlap
+LFDIVInFlight2:	
+ push eax	
+
+ cmp bp,ds:word ptr[10+ecx]	
+ jl Lp6	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp6	
+ mov ds:word ptr[10+ecx],bp	
+ mov ds:byte ptr[5+edi],al	
+Lp6:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+ cmp bp,ds:word ptr[12+ecx]	
+ jl Lp7	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp7	
+ mov ds:word ptr[12+ecx],bp	
+ mov ds:byte ptr[6+edi],al	
+Lp7:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+ cmp bp,ds:word ptr[14+ecx]	
+ jl Lp8	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp8	
+ mov ds:word ptr[14+ecx],bp	
+ mov ds:byte ptr[7+edi],al	
+Lp8:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+ add edi,8	
+ add ecx,16	
+ mov ds:dword ptr[tfracf],edx	
+ mov edx,ds:dword ptr[snext]	
+ mov ds:dword ptr[sfracf],ebx	
+ mov ebx,ds:dword ptr[tnext]	
+ mov ds:dword ptr[s],edx	
+ mov ds:dword ptr[t],ebx	
+
+ mov ds:dword ptr[pz],ecx	
+ mov ds:dword ptr[izi],ebp	
+
+ pop ecx	; retrieve count
+
+;
+; determine whether last span or not
+;
+ cmp ecx,8	; are there multiple segments remaining?
+ ja LNotLastSegment	; yes
+
+;
+; last segment of scan
+;
+LLastSegment:	
+
+;
+; advance s/z, t/z, and 1/z, and calculate s & t at end of span and steps to
+; get there. The number of pixels left is variable, and we want to land on the
+; last pixel, not step one past it, so we can't run into arithmetic problems
+;
+ test ecx,ecx	
+ jz LNoSteps	; just draw the last pixel and we're done
+
+; pick up after the FDIV that was left in flight previously
+
+
+ fld st(0)	; duplicate it
+ fmul st(0),st(4)	; s = s/z * z
+ fxch st(1)	
+ fmul st(0),st(3)	; t = t/z * z
+ fxch st(1)	
+ fistp ds:dword ptr[snext]	
+ fistp ds:dword ptr[tnext]	
+
+ mov ebx,ds:dword ptr[_tadjust]	
+ mov eax,ds:dword ptr[_sadjust]	
+
+ add eax,ds:dword ptr[snext]	
+ add ebx,ds:dword ptr[tnext]	
+
+ mov ebp,ds:dword ptr[_bbextents]	
+ mov edx,ds:dword ptr[_bbextentt]	
+
+ cmp eax,2048	
+ jl LClampLow4	
+ cmp eax,ebp	
+ ja LClampHigh4	
+LClampReentry4:	
+ mov ds:dword ptr[snext],eax	
+
+ cmp ebx,2048	
+ jl LClampLow5	
+ cmp ebx,edx	
+ ja LClampHigh5	
+LClampReentry5:	
+
+ cmp ecx,1	; don't bother 
+ je LOnlyOneStep	; if two pixels in segment, there's only one step,
+;  of the segment length
+ sub eax,ds:dword ptr[s]	
+ sub ebx,ds:dword ptr[t]	
+
+ add eax,eax	; convert to 15.17 format so multiply by 1.31
+ add ebx,ebx	;  reciprocal yields 16.48
+ imul ds:dword ptr[reciprocal_table-8+ecx*4]	; sstep = (snext - s) / (spancount-1)
+ mov ebp,edx	
+
+ mov eax,ebx	
+ imul ds:dword ptr[reciprocal_table-8+ecx*4]	; tstep = (tnext - t) / (spancount-1)
+
+LSetEntryvec:	
+;
+; set up advancetable
+;
+ mov ebx,ds:dword ptr[spr8entryvec_table+ecx*4]	
+ mov eax,edx	
+ push ebx	; entry point into code for RET later
+ mov ecx,ebp	
+ sar ecx,16	; sstep >>= 16;
+ mov ebx,ds:dword ptr[_cachewidth]	
+ sar edx,16	; tstep >>= 16;
+ jz LIsZeroLast	
+ imul edx,ebx	; (tstep >> 16) * cachewidth;
+LIsZeroLast:	
+ add edx,ecx	; add in sstep
+; (tstep >> 16) * cachewidth + (sstep >> 16);
+ mov ecx,ds:dword ptr[tfracf]	
+ mov ds:dword ptr[advancetable+4],edx	; advance base in t
+ add edx,ebx	; ((tstep >> 16) + 1) * cachewidth +
+;  (sstep >> 16);
+ shl ebp,16	; left-justify sstep fractional part
+ mov ebx,ds:dword ptr[sfracf]	
+ shl eax,16	; left-justify tstep fractional part
+ mov ds:dword ptr[advancetable],edx	; advance extra in t
+
+ mov ds:dword ptr[tstep],eax	
+ mov ds:dword ptr[sstep],ebp	
+ mov edx,ecx	
+
+ mov ecx,ds:dword ptr[pz]	
+ mov ebp,ds:dword ptr[izi]	
+
+ ret	; jump to the number-of-pixels handler
+
+;----------------------------------------
+
+LNoSteps:	
+ mov ecx,ds:dword ptr[pz]	
+ sub edi,7	; adjust for hardwired offset
+ sub ecx,14	
+ jmp LEndSpan	
+
+
+LOnlyOneStep:	
+ sub eax,ds:dword ptr[s]	
+ sub ebx,ds:dword ptr[t]	
+ mov ebp,eax	
+ mov edx,ebx	
+ jmp LSetEntryvec	
+
+;----------------------------------------
+
+ public Spr8Entry2_8	
+Spr8Entry2_8:	
+ sub edi,6	; adjust for hardwired offsets
+ sub ecx,12	
+ mov al,ds:byte ptr[esi]	
+ jmp LLEntry2_8	
+
+;----------------------------------------
+
+ public Spr8Entry3_8	
+Spr8Entry3_8:	
+ sub edi,5	; adjust for hardwired offsets
+ sub ecx,10	
+ jmp LLEntry3_8	
+
+;----------------------------------------
+
+ public Spr8Entry4_8	
+Spr8Entry4_8:	
+ sub edi,4	; adjust for hardwired offsets
+ sub ecx,8	
+ jmp LLEntry4_8	
+
+;----------------------------------------
+
+ public Spr8Entry5_8	
+Spr8Entry5_8:	
+ sub edi,3	; adjust for hardwired offsets
+ sub ecx,6	
+ jmp LLEntry5_8	
+
+;----------------------------------------
+
+ public Spr8Entry6_8	
+Spr8Entry6_8:	
+ sub edi,2	; adjust for hardwired offsets
+ sub ecx,4	
+ jmp LLEntry6_8	
+
+;----------------------------------------
+
+ public Spr8Entry7_8	
+Spr8Entry7_8:	
+ dec edi	; adjust for hardwired offsets
+ sub ecx,2	
+ jmp LLEntry7_8	
+
+;----------------------------------------
+
+ public Spr8Entry8_8	
+Spr8Entry8_8:	
+ cmp bp,ds:word ptr[ecx]	
+ jl Lp9	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp9	
+ mov ds:word ptr[ecx],bp	
+ mov ds:byte ptr[edi],al	
+Lp9:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+LLEntry7_8:	
+ cmp bp,ds:word ptr[2+ecx]	
+ jl Lp10	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp10	
+ mov ds:word ptr[2+ecx],bp	
+ mov ds:byte ptr[1+edi],al	
+Lp10:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+LLEntry6_8:	
+ cmp bp,ds:word ptr[4+ecx]	
+ jl Lp11	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp11	
+ mov ds:word ptr[4+ecx],bp	
+ mov ds:byte ptr[2+edi],al	
+Lp11:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+LLEntry5_8:	
+ cmp bp,ds:word ptr[6+ecx]	
+ jl Lp12	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp12	
+ mov ds:word ptr[6+ecx],bp	
+ mov ds:byte ptr[3+edi],al	
+Lp12:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+LLEntry4_8:	
+ cmp bp,ds:word ptr[8+ecx]	
+ jl Lp13	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp13	
+ mov ds:word ptr[8+ecx],bp	
+ mov ds:byte ptr[4+edi],al	
+Lp13:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+LLEntry3_8:	
+ cmp bp,ds:word ptr[10+ecx]	
+ jl Lp14	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp14	
+ mov ds:word ptr[10+ecx],bp	
+ mov ds:byte ptr[5+edi],al	
+Lp14:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+LLEntry2_8:	
+ cmp bp,ds:word ptr[12+ecx]	
+ jl Lp15	
+ mov al,ds:byte ptr[esi]	
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp15	
+ mov ds:word ptr[12+ecx],bp	
+ mov ds:byte ptr[6+edi],al	
+Lp15:	
+ add ebp,ds:dword ptr[izistep]	
+ adc ebp,0	
+ add edx,ds:dword ptr[tstep]	
+ sbb eax,eax	
+ add ebx,ds:dword ptr[sstep]	
+ adc esi,ds:dword ptr[advancetable+4+eax*4]	
+
+LEndSpan:	
+ cmp bp,ds:word ptr[14+ecx]	
+ jl Lp16	
+ mov al,ds:byte ptr[esi]	; load first texel in segment
+ cmp al,offset TRANSPARENT_COLOR	
+ jz Lp16	
+ mov ds:word ptr[14+ecx],bp	
+ mov ds:byte ptr[7+edi],al	
+Lp16:	
+
+;
+; clear s/z, t/z, 1/z from FP stack
+;
+ fstp st(0)	
+ fstp st(0)	
+ fstp st(0)	
+
+ pop ebx	; restore spans pointer
+LNextSpan:	
+ add ebx,offset sspan_t_size	; point to next span
+ mov ecx,ds:dword ptr[sspan_t_count+ebx]	
+ cmp ecx,0	; any more spans?
+ jg LSpanLoop	; yes
+ jz LNextSpan	; yes, but this one's empty
+
+ pop ebx	; restore register variables
+ pop esi	
+ pop edi	
+ pop ebp	; restore the caller's stack frame
+ ret	
+
+_TEXT ENDS
+endif	; id386
+ END
--- a/ref_soft/r_sprite.c
+++ b/ref_soft/r_sprite.c
@@ -0,0 +1,123 @@
+/*
+Copyright (C) 1997-2001 Id Software, Inc.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+
+See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+// r_sprite.c
+#include "r_local.h"
+
+extern polydesc_t r_polydesc;
+
+void R_BuildPolygonFromSurface(msurface_t *fa);
+void R_PolygonCalculateGradients (void);
+
+extern void R_PolyChooseSpanletRoutine( float alpha, qboolean isturbulent );
+
+extern vec5_t r_clip_verts[2][MAXWORKINGVERTS+2];
+
+extern void	R_ClipAndDrawPoly( float alpha, qboolean isturbulent, qboolean textured );
+
+/*
+** R_DrawSprite
+**
+** Draw currententity / currentmodel as a single texture
+** mapped polygon
+*/
+void R_DrawSprite (void)
+{
+	vec5_t		*pverts;
+	vec3_t		left, up, right, down;
+	dsprite_t	*s_psprite;
+	dsprframe_t	*s_psprframe;
+
+
+	s_psprite = (dsprite_t *)currentmodel->extradata;
+#if 0
+	if (currententity->frame >= s_psprite->numframes
+		|| currententity->frame < 0)
+	{
+		ri.Con_Printf (PRINT_ALL, "No such sprite frame %i\n", 
+			currententity->frame);
+		currententity->frame = 0;
+	}
+#endif
+	currententity->frame %= s_psprite->numframes;
+
+	s_psprframe = &s_psprite->frames[currententity->frame];
+
+	r_polydesc.pixels       = currentmodel->skins[currententity->frame]->pixels[0];
+	r_polydesc.pixel_width  = s_psprframe->width;
+	r_polydesc.pixel_height = s_psprframe->height;
+	r_polydesc.dist         = 0;
+
+	// generate the sprite's axes, completely parallel to the viewplane.
+	VectorCopy (vup, r_polydesc.vup);
+	VectorCopy (vright, r_polydesc.vright);
+	VectorCopy (vpn, r_polydesc.vpn);
+
+// build the sprite poster in worldspace
+	VectorScale (r_polydesc.vright, 
+		s_psprframe->width - s_psprframe->origin_x, right);
+	VectorScale (r_polydesc.vup, 
+		s_psprframe->height - s_psprframe->origin_y, up);
+	VectorScale (r_polydesc.vright,
+		-s_psprframe->origin_x, left);
+	VectorScale (r_polydesc.vup,
+		-s_psprframe->origin_y, down);
+
+	// invert UP vector for sprites
+	VectorInverse( r_polydesc.vup );
+
+	pverts = r_clip_verts[0];
+
+	pverts[0][0] = r_entorigin[0] + up[0] + left[0];
+	pverts[0][1] = r_entorigin[1] + up[1] + left[1];
+	pverts[0][2] = r_entorigin[2] + up[2] + left[2];
+	pverts[0][3] = 0;
+	pverts[0][4] = 0;
+
+	pverts[1][0] = r_entorigin[0] + up[0] + right[0];
+	pverts[1][1] = r_entorigin[1] + up[1] + right[1];
+	pverts[1][2] = r_entorigin[2] + up[2] + right[2];
+	pverts[1][3] = s_psprframe->width;
+	pverts[1][4] = 0;
+
+	pverts[2][0] = r_entorigin[0] + down[0] + right[0];
+	pverts[2][1] = r_entorigin[1] + down[1] + right[1];
+	pverts[2][2] = r_entorigin[2] + down[2] + right[2];
+	pverts[2][3] = s_psprframe->width;
+	pverts[2][4] = s_psprframe->height;
+
+	pverts[3][0] = r_entorigin[0] + down[0] + left[0];
+	pverts[3][1] = r_entorigin[1] + down[1] + left[1];
+	pverts[3][2] = r_entorigin[2] + down[2] + left[2];
+	pverts[3][3] = 0;
+	pverts[3][4] = s_psprframe->height;
+
+	r_polydesc.nump = 4;
+	r_polydesc.s_offset = ( r_polydesc.pixel_width  >> 1);
+	r_polydesc.t_offset = ( r_polydesc.pixel_height >> 1);
+	VectorCopy( modelorg, r_polydesc.viewer_position );
+
+	r_polydesc.stipple_parity = 1;
+	if ( currententity->flags & RF_TRANSLUCENT )
+		R_ClipAndDrawPoly ( currententity->alpha, false, true );
+	else
+		R_ClipAndDrawPoly ( 1.0F, false, true );
+	r_polydesc.stipple_parity = 0;
+}
+
--- a/ref_soft/r_surf.c
+++ b/ref_soft/r_surf.c
@@ -0,0 +1,651 @@
+/*
+Copyright (C) 1997-2001 Id Software, Inc.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+
+See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+// r_surf.c: surface-related refresh code
+
+#include "r_local.h"
+
+drawsurf_t	r_drawsurf;
+
+int				lightleft, sourcesstep, blocksize, sourcetstep;
+int				lightdelta, lightdeltastep;
+int				lightright, lightleftstep, lightrightstep, blockdivshift;
+unsigned		blockdivmask;
+void			*prowdestbase;
+unsigned char	*pbasesource;
+int				surfrowbytes;	// used by ASM files
+unsigned		*r_lightptr;
+int				r_stepback;
+int				r_lightwidth;
+int				r_numhblocks, r_numvblocks;
+unsigned char	*r_source, *r_sourcemax;
+
+void R_DrawSurfaceBlock8_mip0 (void);
+void R_DrawSurfaceBlock8_mip1 (void);
+void R_DrawSurfaceBlock8_mip2 (void);
+void R_DrawSurfaceBlock8_mip3 (void);
+
+static void	(*surfmiptable[4])(void) = {
+	R_DrawSurfaceBlock8_mip0,
+	R_DrawSurfaceBlock8_mip1,
+	R_DrawSurfaceBlock8_mip2,
+	R_DrawSurfaceBlock8_mip3
+};
+
+void R_BuildLightMap (void);
+extern	unsigned		blocklights[1024];	// allow some very large lightmaps
+
+float           surfscale;
+qboolean        r_cache_thrash;         // set if surface cache is thrashing
+
+int         sc_size;
+surfcache_t	*sc_rover, *sc_base;
+
+/*
+===============
+R_TextureAnimation
+
+Returns the proper texture for a given time and base texture
+===============
+*/
+image_t *R_TextureAnimation (mtexinfo_t *tex)
+{
+	int		c;
+
+	if (!tex->next)
+		return tex->image;
+
+	c = currententity->frame % tex->numframes;
+	while (c)
+	{
+		tex = tex->next;
+		c--;
+	}
+
+	return tex->image;
+}
+
+
+/*
+===============
+R_DrawSurface
+===============
+*/
+void R_DrawSurface (void)
+{
+	unsigned char	*basetptr;
+	int				smax, tmax, twidth;
+	int				u;
+	int				soffset, basetoffset, texwidth;
+	int				horzblockstep;
+	unsigned char	*pcolumndest;
+	void			(*pblockdrawer)(void);
+	image_t			*mt;
+
+	surfrowbytes = r_drawsurf.rowbytes;
+
+	mt = r_drawsurf.image;
+	
+	r_source = mt->pixels[r_drawsurf.surfmip];
+	
+// the fractional light values should range from 0 to (VID_GRADES - 1) << 16
+// from a source range of 0 - 255
+	
+	texwidth = mt->width >> r_drawsurf.surfmip;
+
+	blocksize = 16 >> r_drawsurf.surfmip;
+	blockdivshift = 4 - r_drawsurf.surfmip;
+	blockdivmask = (1 << blockdivshift) - 1;
+	
+	r_lightwidth = (r_drawsurf.surf->extents[0]>>4)+1;
+
+	r_numhblocks = r_drawsurf.surfwidth >> blockdivshift;
+	r_numvblocks = r_drawsurf.surfheight >> blockdivshift;
+
+//==============================
+
+	pblockdrawer = surfmiptable[r_drawsurf.surfmip];
+// TODO: only needs to be set when there is a display settings change
+	horzblockstep = blocksize;
+
+	smax = mt->width >> r_drawsurf.surfmip;
+	twidth = texwidth;
+	tmax = mt->height >> r_drawsurf.surfmip;
+	sourcetstep = texwidth;
+	r_stepback = tmax * twidth;
+
+	r_sourcemax = r_source + (tmax * smax);
+
+	soffset = r_drawsurf.surf->texturemins[0];
+	basetoffset = r_drawsurf.surf->texturemins[1];
+
+// << 16 components are to guarantee positive values for %
+	soffset = ((soffset >> r_drawsurf.surfmip) + (smax << 16)) % smax;
+	basetptr = &r_source[((((basetoffset >> r_drawsurf.surfmip) 
+		+ (tmax << 16)) % tmax) * twidth)];
+
+	pcolumndest = r_drawsurf.surfdat;
+
+	for (u=0 ; u<r_numhblocks; u++)
+	{
+		r_lightptr = blocklights + u;
+
+		prowdestbase = pcolumndest;
+
+		pbasesource = basetptr + soffset;
+
+		(*pblockdrawer)();
+
+		soffset = soffset + blocksize;
+		if (soffset >= smax)
+			soffset = 0;
+
+		pcolumndest += horzblockstep;
+	}
+}
+
+
+//=============================================================================
+
+#if	!id386
+
+/*
+================
+R_DrawSurfaceBlock8_mip0
+================
+*/
+void R_DrawSurfaceBlock8_mip0 (void)
+{
+	int				v, i, b, lightstep, lighttemp, light;
+	unsigned char	pix, *psource, *prowdest;
+
+	psource = pbasesource;
+	prowdest = prowdestbase;
+
+	for (v=0 ; v<r_numvblocks ; v++)
+	{
+	// FIXME: make these locals?
+	// FIXME: use delta rather than both right and left, like ASM?
+		lightleft = r_lightptr[0];
+		lightright = r_lightptr[1];
+		r_lightptr += r_lightwidth;
+		lightleftstep = (r_lightptr[0] - lightleft) >> 4;
+		lightrightstep = (r_lightptr[1] - lightright) >> 4;
+
+		for (i=0 ; i<16 ; i++)
+		{
+			lighttemp = lightleft - lightright;
+			lightstep = lighttemp >> 4;
+
+			light = lightright;
+
+			for (b=15; b>=0; b--)
+			{
+				pix = psource[b];
+				prowdest[b] = ((unsigned char *)vid.colormap)
+						[(light & 0xFF00) + pix];
+				light += lightstep;
+			}
+	
+			psource += sourcetstep;
+			lightright += lightrightstep;
+			lightleft += lightleftstep;
+			prowdest += surfrowbytes;
+		}
+
+		if (psource >= r_sourcemax)
+			psource -= r_stepback;
+	}
+}
+
+
+/*
+================
+R_DrawSurfaceBlock8_mip1
+================
+*/
+void R_DrawSurfaceBlock8_mip1 (void)
+{
+	int				v, i, b, lightstep, lighttemp, light;
+	unsigned char	pix, *psource, *prowdest;
+
+	psource = pbasesource;
+	prowdest = prowdestbase;
+
+	for (v=0 ; v<r_numvblocks ; v++)
+	{
+	// FIXME: make these locals?
+	// FIXME: use delta rather than both right and left, like ASM?
+		lightleft = r_lightptr[0];
+		lightright = r_lightptr[1];
+		r_lightptr += r_lightwidth;
+		lightleftstep = (r_lightptr[0] - lightleft) >> 3;
+		lightrightstep = (r_lightptr[1] - lightright) >> 3;
+
+		for (i=0 ; i<8 ; i++)
+		{
+			lighttemp = lightleft - lightright;
+			lightstep = lighttemp >> 3;
+
+			light = lightright;
+
+			for (b=7; b>=0; b--)
+			{
+				pix = psource[b];
+				prowdest[b] = ((unsigned char *)vid.colormap)
+						[(light & 0xFF00) + pix];
+				light += lightstep;
+			}
+	
+			psource += sourcetstep;
+			lightright += lightrightstep;
+			lightleft += lightleftstep;
+			prowdest += surfrowbytes;
+		}
+
+		if (psource >= r_sourcemax)
+			psource -= r_stepback;
+	}
+}
+
+
+/*
+================
+R_DrawSurfaceBlock8_mip2
+================
+*/
+void R_DrawSurfaceBlock8_mip2 (void)
+{
+	int				v, i, b, lightstep, lighttemp, light;
+	unsigned char	pix, *psource, *prowdest;
+
+	psource = pbasesource;
+	prowdest = prowdestbase;
+
+	for (v=0 ; v<r_numvblocks ; v++)
+	{
+	// FIXME: make these locals?
+	// FIXME: use delta rather than both right and left, like ASM?
+		lightleft = r_lightptr[0];
+		lightright = r_lightptr[1];
+		r_lightptr += r_lightwidth;
+		lightleftstep = (r_lightptr[0] - lightleft) >> 2;
+		lightrightstep = (r_lightptr[1] - lightright) >> 2;
+
+		for (i=0 ; i<4 ; i++)
+		{
+			lighttemp = lightleft - lightright;
+			lightstep = lighttemp >> 2;
+
+			light = lightright;
+
+			for (b=3; b>=0; b--)
+			{
+				pix = psource[b];
+				prowdest[b] = ((unsigned char *)vid.colormap)
+						[(light & 0xFF00) + pix];
+				light += lightstep;
+			}
+	
+			psource += sourcetstep;
+			lightright += lightrightstep;
+			lightleft += lightleftstep;
+			prowdest += surfrowbytes;
+		}
+
+		if (psource >= r_sourcemax)
+			psource -= r_stepback;
+	}
+}
+
+
+/*
+================
+R_DrawSurfaceBlock8_mip3
+================
+*/
+void R_DrawSurfaceBlock8_mip3 (void)
+{
+	int				v, i, b, lightstep, lighttemp, light;
+	unsigned char	pix, *psource, *prowdest;
+
+	psource = pbasesource;
+	prowdest = prowdestbase;
+
+	for (v=0 ; v<r_numvblocks ; v++)
+	{
+	// FIXME: make these locals?
+	// FIXME: use delta rather than both right and left, like ASM?
+		lightleft = r_lightptr[0];
+		lightright = r_lightptr[1];
+		r_lightptr += r_lightwidth;
+		lightleftstep = (r_lightptr[0] - lightleft) >> 1;
+		lightrightstep = (r_lightptr[1] - lightright) >> 1;
+
+		for (i=0 ; i<2 ; i++)
+		{
+			lighttemp = lightleft - lightright;
+			lightstep = lighttemp >> 1;
+
+			light = lightright;
+
+			for (b=1; b>=0; b--)
+			{
+				pix = psource[b];
+				prowdest[b] = ((unsigned char *)vid.colormap)
+						[(light & 0xFF00) + pix];
+				light += lightstep;
+			}
+	
+			psource += sourcetstep;
+			lightright += lightrightstep;
+			lightleft += lightleftstep;
+			prowdest += surfrowbytes;
+		}
+
+		if (psource >= r_sourcemax)
+			psource -= r_stepback;
+	}
+}
+
+#endif
+
+
+//============================================================================
+
+
+/*
+================
+R_InitCaches
+
+================
+*/
+void R_InitCaches (void)
+{
+	int		size;
+	int		pix;
+
+	// calculate size to allocate
+	if (sw_surfcacheoverride->value)
+	{
+		size = sw_surfcacheoverride->value;
+	}
+	else
+	{
+		size = SURFCACHE_SIZE_AT_320X240;
+
+		pix = vid.width*vid.height;
+		if (pix > 64000)
+			size += (pix-64000)*3;
+	}		
+
+	// round up to page size
+	size = (size + 8191) & ~8191;
+
+	ri.Con_Printf (PRINT_ALL,"%ik surface cache\n", size/1024);
+
+	sc_size = size;
+	sc_base = (surfcache_t *)malloc(size);
+	sc_rover = sc_base;
+	
+	sc_base->next = NULL;
+	sc_base->owner = NULL;
+	sc_base->size = sc_size;
+}
+
+
+/*
+==================
+D_FlushCaches
+==================
+*/
+void D_FlushCaches (void)
+{
+	surfcache_t     *c;
+	
+	if (!sc_base)
+		return;
+
+	for (c = sc_base ; c ; c = c->next)
+	{
+		if (c->owner)
+			*c->owner = NULL;
+	}
+	
+	sc_rover = sc_base;
+	sc_base->next = NULL;
+	sc_base->owner = NULL;
+	sc_base->size = sc_size;
+}
+
+/*
+=================
+D_SCAlloc
+=================
+*/
+surfcache_t     *D_SCAlloc (int width, int size)
+{
+	surfcache_t             *new;
+	qboolean                wrapped_this_time;
+
+	if ((width < 0) || (width > 256))
+		ri.Sys_Error (ERR_FATAL,"D_SCAlloc: bad cache width %d\n", width);
+
+	if ((size <= 0) || (size > 0x10000))
+		ri.Sys_Error (ERR_FATAL,"D_SCAlloc: bad cache size %d\n", size);
+	
+	size = (int)&((surfcache_t *)0)->data[size];
+	size = (size + 3) & ~3;
+	if (size > sc_size)
+		ri.Sys_Error (ERR_FATAL,"D_SCAlloc: %i > cache size of %i",size, sc_size);
+
+// if there is not size bytes after the rover, reset to the start
+	wrapped_this_time = false;
+
+	if ( !sc_rover || (byte *)sc_rover - (byte *)sc_base > sc_size - size)
+	{
+		if (sc_rover)
+		{
+			wrapped_this_time = true;
+		}
+		sc_rover = sc_base;
+	}
+		
+// colect and free surfcache_t blocks until the rover block is large enough
+	new = sc_rover;
+	if (sc_rover->owner)
+		*sc_rover->owner = NULL;
+	
+	while (new->size < size)
+	{
+	// free another
+		sc_rover = sc_rover->next;
+		if (!sc_rover)
+			ri.Sys_Error (ERR_FATAL,"D_SCAlloc: hit the end of memory");
+		if (sc_rover->owner)
+			*sc_rover->owner = NULL;
+			
+		new->size += sc_rover->size;
+		new->next = sc_rover->next;
+	}
+
+// create a fragment out of any leftovers
+	if (new->size - size > 256)
+	{
+		sc_rover = (surfcache_t *)( (byte *)new + size);
+		sc_rover->size = new->size - size;
+		sc_rover->next = new->next;
+		sc_rover->width = 0;
+		sc_rover->owner = NULL;
+		new->next = sc_rover;
+		new->size = size;
+	}
+	else
+		sc_rover = new->next;
+	
+	new->width = width;
+// DEBUG
+	if (width > 0)
+		new->height = (size - sizeof(*new) + sizeof(new->data)) / width;
+
+	new->owner = NULL;              // should be set properly after return
+
+	if (d_roverwrapped)
+	{
+		if (wrapped_this_time || (sc_rover >= d_initial_rover))
+			r_cache_thrash = true;
+	}
+	else if (wrapped_this_time)
+	{       
+		d_roverwrapped = true;
+	}
+
+	return new;
+}
+
+
+/*
+=================
+D_SCDump
+=================
+*/
+void D_SCDump (void)
+{
+	surfcache_t             *test;
+
+	for (test = sc_base ; test ; test = test->next)
+	{
+		if (test == sc_rover)
+			ri.Con_Printf (PRINT_ALL,"ROVER:\n");
+		ri.Con_Printf (PRINT_ALL,"%p : %i bytes     %i width\n",test, test->size, test->width);
+	}
+}
+
+//=============================================================================
+
+// if the num is not a power of 2, assume it will not repeat
+
+int     MaskForNum (int num)
+{
+	if (num==128)
+		return 127;
+	if (num==64)
+		return 63;
+	if (num==32)
+		return 31;
+	if (num==16)
+		return 15;
+	return 255;
+}
+
+int D_log2 (int num)
+{
+	int     c;
+	
+	c = 0;
+	
+	while (num>>=1)
+		c++;
+	return c;
+}
+
+//=============================================================================
+
+/*
+================
+D_CacheSurface
+================
+*/
+surfcache_t *D_CacheSurface (msurface_t *surface, int miplevel)
+{
+	surfcache_t     *cache;
+
+//
+// if the surface is animating or flashing, flush the cache
+//
+	r_drawsurf.image = R_TextureAnimation (surface->texinfo);
+	r_drawsurf.lightadj[0] = r_newrefdef.lightstyles[surface->styles[0]].white*128;
+	r_drawsurf.lightadj[1] = r_newrefdef.lightstyles[surface->styles[1]].white*128;
+	r_drawsurf.lightadj[2] = r_newrefdef.lightstyles[surface->styles[2]].white*128;
+	r_drawsurf.lightadj[3] = r_newrefdef.lightstyles[surface->styles[3]].white*128;
+	
+//
+// see if the cache holds apropriate data
+//
+	cache = surface->cachespots[miplevel];
+
+	if (cache && !cache->dlight && surface->dlightframe != r_framecount
+			&& cache->image == r_drawsurf.image
+			&& cache->lightadj[0] == r_drawsurf.lightadj[0]
+			&& cache->lightadj[1] == r_drawsurf.lightadj[1]
+			&& cache->lightadj[2] == r_drawsurf.lightadj[2]
+			&& cache->lightadj[3] == r_drawsurf.lightadj[3] )
+		return cache;
+
+//
+// determine shape of surface
+//
+	surfscale = 1.0 / (1<<miplevel);
+	r_drawsurf.surfmip = miplevel;
+	r_drawsurf.surfwidth = surface->extents[0] >> miplevel;
+	r_drawsurf.rowbytes = r_drawsurf.surfwidth;
+	r_drawsurf.surfheight = surface->extents[1] >> miplevel;
+	
+//
+// allocate memory if needed
+//
+	if (!cache)     // if a texture just animated, don't reallocate it
+	{
+		cache = D_SCAlloc (r_drawsurf.surfwidth,
+						   r_drawsurf.surfwidth * r_drawsurf.surfheight);
+		surface->cachespots[miplevel] = cache;
+		cache->owner = &surface->cachespots[miplevel];
+		cache->mipscale = surfscale;
+	}
+	
+	if (surface->dlightframe == r_framecount)
+		cache->dlight = 1;
+	else
+		cache->dlight = 0;
+
+	r_drawsurf.surfdat = (pixel_t *)cache->data;
+	
+	cache->image = r_drawsurf.image;
+	cache->lightadj[0] = r_drawsurf.lightadj[0];
+	cache->lightadj[1] = r_drawsurf.lightadj[1];
+	cache->lightadj[2] = r_drawsurf.lightadj[2];
+	cache->lightadj[3] = r_drawsurf.lightadj[3];
+
+//
+// draw and light the surface texture
+//
+	r_drawsurf.surf = surface;
+
+	c_surf++;
+
+	// calculate the lightings
+	R_BuildLightMap ();
+	
+	// rasterize the surface into the cache
+	R_DrawSurface ();
+
+	return cache;
+}
+
+
--- a/ref_soft/r_surf8.asm
+++ b/ref_soft/r_surf8.asm
@@ -0,0 +1,771 @@
+ .386P
+ .model FLAT
+;
+; surf8.s
+; x86 assembly-language 8 bpp surface block drawing code.
+;
+
+include qasm.inc
+
+if	id386
+
+_DATA SEGMENT	
+
+sb_v dd 0	
+
+_DATA ENDS
+_TEXT SEGMENT	
+
+ align 4	
+ public _R_Surf8Start	
+_R_Surf8Start:	
+
+;----------------------------------------------------------------------
+; Surface block drawer for mip level 0
+;----------------------------------------------------------------------
+
+ align 4	
+ public _R_DrawSurfaceBlock8_mip0	
+_R_DrawSurfaceBlock8_mip0:	
+ push ebp	; preserve caller's stack frame
+ push edi	
+ push esi	; preserve register variables
+ push ebx	
+
+;		for (v=0 ; v<numvblocks ; v++)
+;		{
+ mov ebx,ds:dword ptr[_r_lightptr]	
+ mov eax,ds:dword ptr[_r_numvblocks]	
+
+ mov ds:dword ptr[sb_v],eax	
+ mov edi,ds:dword ptr[_prowdestbase]	
+
+ mov esi,ds:dword ptr[_pbasesource]	
+
+Lv_loop_mip0:	
+
+;			lightleft = lightptr[0];
+;			lightright = lightptr[1];
+;			lightdelta = (lightleft - lightright) & 0xFFFFF;
+ mov eax,ds:dword ptr[ebx]	; lightleft
+ mov edx,ds:dword ptr[4+ebx]	; lightright
+
+ mov ebp,eax	
+ mov ecx,ds:dword ptr[_r_lightwidth]	
+
+ mov ds:dword ptr[_lightright],edx	
+ sub ebp,edx	
+
+ and ebp,0FFFFFh	
+ lea ebx,ds:dword ptr[ebx+ecx*4]	
+
+;			lightptr += lightwidth;
+ mov ds:dword ptr[_r_lightptr],ebx	
+
+;			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+;			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+;			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+;					0xF0000000;
+ mov ecx,ds:dword ptr[4+ebx]	; lightptr[1]
+ mov ebx,ds:dword ptr[ebx]	; lightptr[0]
+
+ sub ebx,eax	
+ sub ecx,edx	
+
+ sar ecx,4	
+ or ebp,0F0000000h	
+
+ sar ebx,4	
+ mov ds:dword ptr[_lightrightstep],ecx	
+
+ sub ebx,ecx	
+ and ebx,0FFFFFh	
+
+ or ebx,0F0000000h	
+ sub ecx,ecx	; high word must be 0 in loop for addressing
+
+ mov ds:dword ptr[_lightdeltastep],ebx	
+ sub ebx,ebx	; high word must be 0 in loop for addressing
+
+Lblockloop8_mip0:	
+ mov ds:dword ptr[_lightdelta],ebp	
+ mov cl,ds:byte ptr[14+esi]	
+
+ sar ebp,4	
+ mov bh,dh	
+
+ mov bl,ds:byte ptr[15+esi]	
+ add edx,ebp	
+
+ mov ch,dh	
+ add edx,ebp	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch0:	
+ mov bl,ds:byte ptr[13+esi]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch1:	
+ mov cl,ds:byte ptr[12+esi]	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ ror eax,16	
+ mov ch,dh	
+
+ add edx,ebp	
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch2:	
+
+ mov bl,ds:byte ptr[11+esi]	
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch3:	
+
+ mov cl,ds:byte ptr[10+esi]	
+ mov ds:dword ptr[12+edi],eax	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ mov ch,dh	
+ add edx,ebp	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch4:	
+ mov bl,ds:byte ptr[9+esi]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch5:	
+ mov cl,ds:byte ptr[8+esi]	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ ror eax,16	
+ mov ch,dh	
+
+ add edx,ebp	
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch6:	
+
+ mov bl,ds:byte ptr[7+esi]	
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch7:	
+
+ mov cl,ds:byte ptr[6+esi]	
+ mov ds:dword ptr[8+edi],eax	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ mov ch,dh	
+ add edx,ebp	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch8:	
+ mov bl,ds:byte ptr[5+esi]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch9:	
+ mov cl,ds:byte ptr[4+esi]	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ ror eax,16	
+ mov ch,dh	
+
+ add edx,ebp	
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch10:	
+
+ mov bl,ds:byte ptr[3+esi]	
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch11:	
+
+ mov cl,ds:byte ptr[2+esi]	
+ mov ds:dword ptr[4+edi],eax	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ mov ch,dh	
+ add edx,ebp	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch12:	
+ mov bl,ds:byte ptr[1+esi]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch13:	
+ mov cl,ds:byte ptr[esi]	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ ror eax,16	
+ mov ch,dh	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch14:	
+ mov edx,ds:dword ptr[_lightright]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch15:	
+ mov ebp,ds:dword ptr[_lightdelta]	
+
+ mov ds:dword ptr[edi],eax	
+
+ add esi,ds:dword ptr[_sourcetstep]	
+ add edi,ds:dword ptr[_surfrowbytes]	
+
+ add edx,ds:dword ptr[_lightrightstep]	
+ add ebp,ds:dword ptr[_lightdeltastep]	
+
+ mov ds:dword ptr[_lightright],edx	
+ jc Lblockloop8_mip0	
+
+;			if (pbasesource >= r_sourcemax)
+;				pbasesource -= stepback;
+
+ cmp esi,ds:dword ptr[_r_sourcemax]	
+ jb LSkip_mip0	
+ sub esi,ds:dword ptr[_r_stepback]	
+LSkip_mip0:	
+
+ mov ebx,ds:dword ptr[_r_lightptr]	
+ dec ds:dword ptr[sb_v]	
+
+ jnz Lv_loop_mip0	
+
+ pop ebx	; restore register variables
+ pop esi	
+ pop edi	
+ pop ebp	; restore the caller's stack frame
+ ret	
+
+
+;----------------------------------------------------------------------
+; Surface block drawer for mip level 1
+;----------------------------------------------------------------------
+
+ align 4	
+ public _R_DrawSurfaceBlock8_mip1	
+_R_DrawSurfaceBlock8_mip1:	
+ push ebp	; preserve caller's stack frame
+ push edi	
+ push esi	; preserve register variables
+ push ebx	
+
+;		for (v=0 ; v<numvblocks ; v++)
+;		{
+ mov ebx,ds:dword ptr[_r_lightptr]	
+ mov eax,ds:dword ptr[_r_numvblocks]	
+
+ mov ds:dword ptr[sb_v],eax	
+ mov edi,ds:dword ptr[_prowdestbase]	
+
+ mov esi,ds:dword ptr[_pbasesource]	
+
+Lv_loop_mip1:	
+
+;			lightleft = lightptr[0];
+;			lightright = lightptr[1];
+;			lightdelta = (lightleft - lightright) & 0xFFFFF;
+ mov eax,ds:dword ptr[ebx]	; lightleft
+ mov edx,ds:dword ptr[4+ebx]	; lightright
+
+ mov ebp,eax	
+ mov ecx,ds:dword ptr[_r_lightwidth]	
+
+ mov ds:dword ptr[_lightright],edx	
+ sub ebp,edx	
+
+ and ebp,0FFFFFh	
+ lea ebx,ds:dword ptr[ebx+ecx*4]	
+
+;			lightptr += lightwidth;
+ mov ds:dword ptr[_r_lightptr],ebx	
+
+;			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+;			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+;			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+;					0xF0000000;
+ mov ecx,ds:dword ptr[4+ebx]	; lightptr[1]
+ mov ebx,ds:dword ptr[ebx]	; lightptr[0]
+
+ sub ebx,eax	
+ sub ecx,edx	
+
+ sar ecx,3	
+ or ebp,070000000h	
+
+ sar ebx,3	
+ mov ds:dword ptr[_lightrightstep],ecx	
+
+ sub ebx,ecx	
+ and ebx,0FFFFFh	
+
+ or ebx,0F0000000h	
+ sub ecx,ecx	; high word must be 0 in loop for addressing
+
+ mov ds:dword ptr[_lightdeltastep],ebx	
+ sub ebx,ebx	; high word must be 0 in loop for addressing
+
+Lblockloop8_mip1:	
+ mov ds:dword ptr[_lightdelta],ebp	
+ mov cl,ds:byte ptr[6+esi]	
+
+ sar ebp,3	
+ mov bh,dh	
+
+ mov bl,ds:byte ptr[7+esi]	
+ add edx,ebp	
+
+ mov ch,dh	
+ add edx,ebp	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch22:	
+ mov bl,ds:byte ptr[5+esi]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch23:	
+ mov cl,ds:byte ptr[4+esi]	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ ror eax,16	
+ mov ch,dh	
+
+ add edx,ebp	
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch24:	
+
+ mov bl,ds:byte ptr[3+esi]	
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch25:	
+
+ mov cl,ds:byte ptr[2+esi]	
+ mov ds:dword ptr[4+edi],eax	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ mov ch,dh	
+ add edx,ebp	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch26:	
+ mov bl,ds:byte ptr[1+esi]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch27:	
+ mov cl,ds:byte ptr[esi]	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ ror eax,16	
+ mov ch,dh	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch28:	
+ mov edx,ds:dword ptr[_lightright]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch29:	
+ mov ebp,ds:dword ptr[_lightdelta]	
+
+ mov ds:dword ptr[edi],eax	
+ mov eax,ds:dword ptr[_sourcetstep]	
+
+ add esi,eax	
+ mov eax,ds:dword ptr[_surfrowbytes]	
+
+ add edi,eax	
+ mov eax,ds:dword ptr[_lightrightstep]	
+
+ add edx,eax	
+ mov eax,ds:dword ptr[_lightdeltastep]	
+
+ add ebp,eax	
+ mov ds:dword ptr[_lightright],edx	
+
+ jc Lblockloop8_mip1	
+
+;			if (pbasesource >= r_sourcemax)
+;				pbasesource -= stepback;
+
+ cmp esi,ds:dword ptr[_r_sourcemax]	
+ jb LSkip_mip1	
+ sub esi,ds:dword ptr[_r_stepback]	
+LSkip_mip1:	
+
+ mov ebx,ds:dword ptr[_r_lightptr]	
+ dec ds:dword ptr[sb_v]	
+
+ jnz Lv_loop_mip1	
+
+ pop ebx	; restore register variables
+ pop esi	
+ pop edi	
+ pop ebp	; restore the caller's stack frame
+ ret	
+
+
+;----------------------------------------------------------------------
+; Surface block drawer for mip level 2
+;----------------------------------------------------------------------
+
+ align 4	
+ public _R_DrawSurfaceBlock8_mip2	
+_R_DrawSurfaceBlock8_mip2:	
+ push ebp	; preserve caller's stack frame
+ push edi	
+ push esi	; preserve register variables
+ push ebx	
+
+;		for (v=0 ; v<numvblocks ; v++)
+;		{
+ mov ebx,ds:dword ptr[_r_lightptr]	
+ mov eax,ds:dword ptr[_r_numvblocks]	
+
+ mov ds:dword ptr[sb_v],eax	
+ mov edi,ds:dword ptr[_prowdestbase]	
+
+ mov esi,ds:dword ptr[_pbasesource]	
+
+Lv_loop_mip2:	
+
+;			lightleft = lightptr[0];
+;			lightright = lightptr[1];
+;			lightdelta = (lightleft - lightright) & 0xFFFFF;
+ mov eax,ds:dword ptr[ebx]	; lightleft
+ mov edx,ds:dword ptr[4+ebx]	; lightright
+
+ mov ebp,eax	
+ mov ecx,ds:dword ptr[_r_lightwidth]	
+
+ mov ds:dword ptr[_lightright],edx	
+ sub ebp,edx	
+
+ and ebp,0FFFFFh	
+ lea ebx,ds:dword ptr[ebx+ecx*4]	
+
+;			lightptr += lightwidth;
+ mov ds:dword ptr[_r_lightptr],ebx	
+
+;			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+;			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+;			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+;					0xF0000000;
+ mov ecx,ds:dword ptr[4+ebx]	; lightptr[1]
+ mov ebx,ds:dword ptr[ebx]	; lightptr[0]
+
+ sub ebx,eax	
+ sub ecx,edx	
+
+ sar ecx,2	
+ or ebp,030000000h	
+
+ sar ebx,2	
+ mov ds:dword ptr[_lightrightstep],ecx	
+
+ sub ebx,ecx	
+
+ and ebx,0FFFFFh	
+
+ or ebx,0F0000000h	
+ sub ecx,ecx	; high word must be 0 in loop for addressing
+
+ mov ds:dword ptr[_lightdeltastep],ebx	
+ sub ebx,ebx	; high word must be 0 in loop for addressing
+
+Lblockloop8_mip2:	
+ mov ds:dword ptr[_lightdelta],ebp	
+ mov cl,ds:byte ptr[2+esi]	
+
+ sar ebp,2	
+ mov bh,dh	
+
+ mov bl,ds:byte ptr[3+esi]	
+ add edx,ebp	
+
+ mov ch,dh	
+ add edx,ebp	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch18:	
+ mov bl,ds:byte ptr[1+esi]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch19:	
+ mov cl,ds:byte ptr[esi]	
+
+ mov bh,dh	
+ add edx,ebp	
+
+ ror eax,16	
+ mov ch,dh	
+
+ mov ah,ds:byte ptr[12345678h+ebx]	
+LBPatch20:	
+ mov edx,ds:dword ptr[_lightright]	
+
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch21:	
+ mov ebp,ds:dword ptr[_lightdelta]	
+
+ mov ds:dword ptr[edi],eax	
+ mov eax,ds:dword ptr[_sourcetstep]	
+
+ add esi,eax	
+ mov eax,ds:dword ptr[_surfrowbytes]	
+
+ add edi,eax	
+ mov eax,ds:dword ptr[_lightrightstep]	
+
+ add edx,eax	
+ mov eax,ds:dword ptr[_lightdeltastep]	
+
+ add ebp,eax	
+ mov ds:dword ptr[_lightright],edx	
+
+ jc Lblockloop8_mip2	
+
+;			if (pbasesource >= r_sourcemax)
+;				pbasesource -= stepback;
+
+ cmp esi,ds:dword ptr[_r_sourcemax]	
+ jb LSkip_mip2	
+ sub esi,ds:dword ptr[_r_stepback]	
+LSkip_mip2:	
+
+ mov ebx,ds:dword ptr[_r_lightptr]	
+ dec ds:dword ptr[sb_v]	
+
+ jnz Lv_loop_mip2	
+
+ pop ebx	; restore register variables
+ pop esi	
+ pop edi	
+ pop ebp	; restore the caller's stack frame
+ ret	
+
+
+;----------------------------------------------------------------------
+; Surface block drawer for mip level 3
+;----------------------------------------------------------------------
+
+ align 4	
+ public _R_DrawSurfaceBlock8_mip3	
+_R_DrawSurfaceBlock8_mip3:	
+ push ebp	; preserve caller's stack frame
+ push edi	
+ push esi	; preserve register variables
+ push ebx	
+
+;		for (v=0 ; v<numvblocks ; v++)
+;		{
+ mov ebx,ds:dword ptr[_r_lightptr]	
+ mov eax,ds:dword ptr[_r_numvblocks]	
+
+ mov ds:dword ptr[sb_v],eax	
+ mov edi,ds:dword ptr[_prowdestbase]	
+
+ mov esi,ds:dword ptr[_pbasesource]	
+
+Lv_loop_mip3:	
+
+;			lightleft = lightptr[0];
+;			lightright = lightptr[1];
+;			lightdelta = (lightleft - lightright) & 0xFFFFF;
+ mov eax,ds:dword ptr[ebx]	; lightleft
+ mov edx,ds:dword ptr[4+ebx]	; lightright
+
+ mov ebp,eax	
+ mov ecx,ds:dword ptr[_r_lightwidth]	
+
+ mov ds:dword ptr[_lightright],edx	
+ sub ebp,edx	
+
+ and ebp,0FFFFFh	
+ lea ebx,ds:dword ptr[ebx+ecx*4]	
+
+ mov ds:dword ptr[_lightdelta],ebp	
+;			lightptr += lightwidth;
+ mov ds:dword ptr[_r_lightptr],ebx	
+
+;			lightleftstep = (lightptr[0] - lightleft) >> blockdivshift;
+;			lightrightstep = (lightptr[1] - lightright) >> blockdivshift;
+;			lightdeltastep = ((lightleftstep - lightrightstep) & 0xFFFFF) |
+;					0xF0000000;
+ mov ecx,ds:dword ptr[4+ebx]	; lightptr[1]
+ mov ebx,ds:dword ptr[ebx]	; lightptr[0]
+
+ sub ebx,eax	
+ sub ecx,edx	
+
+ sar ecx,1	
+
+ sar ebx,1	
+ mov ds:dword ptr[_lightrightstep],ecx	
+
+ sub ebx,ecx	
+ and ebx,0FFFFFh	
+
+ sar ebp,1	
+ or ebx,0F0000000h	
+
+ mov ds:dword ptr[_lightdeltastep],ebx	
+ sub ebx,ebx	; high word must be 0 in loop for addressing
+
+ mov bl,ds:byte ptr[1+esi]	
+ sub ecx,ecx	; high word must be 0 in loop for addressing
+
+ mov bh,dh	
+ mov cl,ds:byte ptr[esi]	
+
+ add edx,ebp	
+ mov ch,dh	
+
+ mov al,ds:byte ptr[12345678h+ebx]	
+LBPatch16:	
+ mov edx,ds:dword ptr[_lightright]	
+
+ mov ds:byte ptr[1+edi],al	
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch17:	
+
+ mov ds:byte ptr[edi],al	
+ mov eax,ds:dword ptr[_sourcetstep]	
+
+ add esi,eax	
+ mov eax,ds:dword ptr[_surfrowbytes]	
+
+ add edi,eax	
+ mov eax,ds:dword ptr[_lightdeltastep]	
+
+ mov ebp,ds:dword ptr[_lightdelta]	
+ mov cl,ds:byte ptr[esi]	
+
+ add ebp,eax	
+ mov eax,ds:dword ptr[_lightrightstep]	
+
+ sar ebp,1	
+ add edx,eax	
+
+ mov bh,dh	
+ mov bl,ds:byte ptr[1+esi]	
+
+ add edx,ebp	
+ mov ch,dh	
+
+ mov al,ds:byte ptr[12345678h+ebx]	
+LBPatch30:	
+ mov edx,ds:dword ptr[_sourcetstep]	
+
+ mov ds:byte ptr[1+edi],al	
+ mov al,ds:byte ptr[12345678h+ecx]	
+LBPatch31:	
+
+ mov ds:byte ptr[edi],al	
+ mov ebp,ds:dword ptr[_surfrowbytes]	
+
+ add esi,edx	
+ add edi,ebp	
+
+;			if (pbasesource >= r_sourcemax)
+;				pbasesource -= stepback;
+
+ cmp esi,ds:dword ptr[_r_sourcemax]	
+ jb LSkip_mip3	
+ sub esi,ds:dword ptr[_r_stepback]	
+LSkip_mip3:	
+
+ mov ebx,ds:dword ptr[_r_lightptr]	
+ dec ds:dword ptr[sb_v]	
+
+ jnz Lv_loop_mip3	
+
+ pop ebx	; restore register variables
+ pop esi	
+ pop edi	
+ pop ebp	; restore the caller's stack frame
+ ret	
+
+
+ public _R_Surf8End	
+_R_Surf8End:	
+
+;----------------------------------------------------------------------
+; Code patching routines
+;----------------------------------------------------------------------
+_TEXT ENDS
+_DATA SEGMENT	
+
+ align 4	
+LPatchTable8:	
+ dd LBPatch0-4	
+ dd LBPatch1-4	
+ dd LBPatch2-4	
+ dd LBPatch3-4	
+ dd LBPatch4-4	
+ dd LBPatch5-4	
+ dd LBPatch6-4	
+ dd LBPatch7-4	
+ dd LBPatch8-4	
+ dd LBPatch9-4	
+ dd LBPatch10-4	
+ dd LBPatch11-4	
+ dd LBPatch12-4	
+ dd LBPatch13-4	
+ dd LBPatch14-4	
+ dd LBPatch15-4	
+ dd LBPatch16-4	
+ dd LBPatch17-4	
+ dd LBPatch18-4	
+ dd LBPatch19-4	
+ dd LBPatch20-4	
+ dd LBPatch21-4	
+ dd LBPatch22-4	
+ dd LBPatch23-4	
+ dd LBPatch24-4	
+ dd LBPatch25-4	
+ dd LBPatch26-4	
+ dd LBPatch27-4	
+ dd LBPatch28-4	
+ dd LBPatch29-4	
+ dd LBPatch30-4	
+ dd LBPatch31-4	
+
+_DATA ENDS
+_TEXT SEGMENT	
+
+ align 4	
+ public _R_Surf8Patch	
+_R_Surf8Patch:	
+ push ebx	
+
+ mov eax,ds:dword ptr[_colormap]	
+ mov ebx,offset LPatchTable8
+ mov ecx,32	
+LPatchLoop8:	
+ mov edx,ds:dword ptr[ebx]	
+ add ebx,4	
+ mov ds:dword ptr[edx],eax	
+ dec ecx	
+ jnz LPatchLoop8	
+
+ pop ebx	
+
+ ret	
+
+_TEXT ENDS
+endif	;id386
+
+ END
+
--- a/ref_soft/r_varsa.asm
+++ b/ref_soft/r_varsa.asm
@@ -0,0 +1,220 @@
+ .386P
+ .model FLAT
+;
+; d_varsa.s
+;
+
+include qasm.inc
+include d_if.inc
+
+if	id386
+
+_DATA SEGMENT	
+
+;-------------------------------------------------------
+; ASM-only variables
+;-------------------------------------------------------
+ public float_1, float_particle_z_clip, float_point5	
+ public float_minus_1, float_0	
+float_0 dd 0.0	
+float_1 dd 1.0	
+float_minus_1 dd -1.0	
+float_particle_z_clip dd PARTICLE_Z_CLIP	
+float_point5 dd 0.5	
+
+ public fp_16, fp_64k, fp_1m, fp_64kx64k	
+ public fp_1m_minus_1	
+ public fp_8	
+fp_1m dd 1048576.0	
+fp_1m_minus_1 dd 1048575.0	
+fp_64k dd 65536.0	
+fp_8 dd 8.0	
+fp_16 dd 16.0	
+fp_64kx64k dd 04f000000h	; (float)0x8000*0x10000
+
+
+ public FloatZero, Float2ToThe31nd, FloatMinus2ToThe31nd	
+FloatZero dd 0	
+Float2ToThe31nd dd 04f000000h	
+FloatMinus2ToThe31nd dd 0cf000000h	
+
+ public _r_bmodelactive	
+_r_bmodelactive dd 0	
+
+
+;-------------------------------------------------------
+; global refresh variables
+;-------------------------------------------------------
+
+; FIXME: put all refresh variables into one contiguous block. Make into one
+; big structure, like cl or sv?
+
+ align 4	
+ public _d_sdivzstepu	
+ public _d_tdivzstepu	
+ public _d_zistepu	
+ public _d_sdivzstepv	
+ public _d_tdivzstepv	
+ public _d_zistepv	
+ public _d_sdivzorigin	
+ public _d_tdivzorigin	
+ public _d_ziorigin	
+_d_sdivzstepu dd 0	
+_d_tdivzstepu dd 0	
+_d_zistepu dd 0	
+_d_sdivzstepv dd 0	
+_d_tdivzstepv dd 0	
+_d_zistepv dd 0	
+_d_sdivzorigin dd 0	
+_d_tdivzorigin dd 0	
+_d_ziorigin dd 0	
+
+ public _sadjust	
+ public _tadjust	
+ public _bbextents	
+ public _bbextentt	
+_sadjust dd 0	
+_tadjust dd 0	
+_bbextents dd 0	
+_bbextentt dd 0	
+
+ public _cacheblock	
+ public _d_viewbuffer	
+ public _cachewidth	
+ public _d_pzbuffer	
+ public _d_zrowbytes	
+ public _d_zwidth	
+_cacheblock dd 0	
+_cachewidth dd 0	
+_d_viewbuffer dd 0	
+_d_pzbuffer dd 0	
+_d_zrowbytes dd 0	
+_d_zwidth dd 0	
+
+
+;-------------------------------------------------------
+; ASM-only variables
+;-------------------------------------------------------
+ public izi	
+izi dd 0	
+
+ public pbase, s, t, sfracf, tfracf, snext, tnext	
+ public spancountminus1, zi16stepu, sdivz16stepu, tdivz16stepu	
+ public zi8stepu, sdivz8stepu, tdivz8stepu, pz	
+s dd 0	
+t dd 0	
+snext dd 0	
+tnext dd 0	
+sfracf dd 0	
+tfracf dd 0	
+pbase dd 0	
+zi8stepu dd 0	
+sdivz8stepu dd 0	
+tdivz8stepu dd 0	
+zi16stepu dd 0	
+sdivz16stepu dd 0	
+tdivz16stepu dd 0	
+spancountminus1 dd 0	
+pz dd 0	
+
+ public izistep	
+izistep dd 0	
+
+;-------------------------------------------------------
+; local variables for d_draw16.s
+;-------------------------------------------------------
+
+ public reciprocal_table_16, entryvec_table_16	
+; 1/2, 1/3, 1/4, 1/5, 1/6, 1/7, 1/8, 1/9, 1/10, 1/11, 1/12, 1/13,
+; 1/14, and 1/15 in 0.32 form
+reciprocal_table_16 dd 040000000h, 02aaaaaaah, 020000000h	
+ dd 019999999h, 015555555h, 012492492h	
+ dd 010000000h, 0e38e38eh, 0ccccccch, 0ba2e8bah	
+ dd 0aaaaaaah, 09d89d89h, 09249249h, 08888888h	
+
+ externdef Entry2_16:dword	
+ externdef Entry3_16:dword	
+ externdef Entry4_16:dword	
+ externdef Entry5_16:dword	
+ externdef Entry6_16:dword	
+ externdef Entry7_16:dword	
+ externdef Entry8_16:dword	
+ externdef Entry9_16:dword	
+ externdef Entry10_16:dword	
+ externdef Entry11_16:dword	
+ externdef Entry12_16:dword	
+ externdef Entry13_16:dword	
+ externdef Entry14_16:dword	
+ externdef Entry15_16:dword	
+ externdef Entry16_16:dword	
+
+entryvec_table_16 dd 0, Entry2_16, Entry3_16, Entry4_16	
+ dd Entry5_16, Entry6_16, Entry7_16, Entry8_16	
+ dd Entry9_16, Entry10_16, Entry11_16, Entry12_16	
+ dd Entry13_16, Entry14_16, Entry15_16, Entry16_16	
+
+;-------------------------------------------------------
+; local variables for d_parta.s
+;-------------------------------------------------------
+ public DP_Count, DP_u, DP_v, DP_32768, DP_Color, DP_Pix
+DP_Count dd 0	
+DP_u dd 0	
+DP_v dd 0	
+DP_32768 dd 32768.0	
+DP_Color dd 0	
+DP_Pix dd 0	
+
+
+;externdef DP_1x1:dword	
+;externdef DP_2x2:dword	
+;externdef DP_3x3:dword	
+;externdef DP_4x4:dword	
+
+;DP_EntryTable dd DP_1x1, DP_2x2, DP_3x3, DP_4x4	
+
+;
+; advancetable is 8 bytes, but points to the middle of that range so negative
+; offsets will work
+;
+ public advancetable, sstep, tstep, pspantemp, counttemp, jumptemp	
+advancetable dd 0, 0	
+sstep dd 0	
+tstep dd 0	
+
+pspantemp dd 0	
+counttemp dd 0	
+jumptemp dd 0	
+
+; 1/2, 1/3, 1/4, 1/5, 1/6, and 1/7 in 0.32 form
+; public reciprocal_table, entryvec_table	
+reciprocal_table dd 040000000h, 02aaaaaaah, 020000000h	
+ dd 019999999h, 015555555h, 012492492h	
+
+
+; externdef Entry2_8:dword	
+; externdef Entry3_8:dword	
+; externdef Entry4_8:dword	
+; externdef Entry5_8:dword	
+; externdef Entry6_8:dword	
+; externdef Entry7_8:dword	
+; externdef Entry8_8:dword	
+
+;entryvec_table dd 0, Entry2_8, Entry3_8, Entry4_8	
+; dd Entry5_8, Entry6_8, Entry7_8, Entry8_8	
+
+ externdef Spr8Entry2_8:dword	
+ externdef Spr8Entry3_8:dword	
+ externdef Spr8Entry4_8:dword	
+ externdef Spr8Entry5_8:dword	
+ externdef Spr8Entry6_8:dword	
+ externdef Spr8Entry7_8:dword	
+ externdef Spr8Entry8_8:dword	
+
+ public spr8entryvec_table	
+spr8entryvec_table dd 0, Spr8Entry2_8, Spr8Entry3_8, Spr8Entry4_8	
+ dd Spr8Entry5_8, Spr8Entry6_8, Spr8Entry7_8, Spr8Entry8_8	
+
+
+_DATA ENDS
+endif	; id386
+ END
--- a/ref_soft/rand1k.h
+++ b/ref_soft/rand1k.h
@@ -0,0 +1,123 @@
+/*
+Copyright (C) 1997-2001 Id Software, Inc.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
+
+See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+*/
+// 1K random numbers in the range 0-255
+0, 144, 49, 207, 149, 122, 89, 229, 210, 191,
+44, 219, 181, 131, 77, 3, 23, 93, 37, 42,
+253, 114, 30, 1, 2, 96, 136, 146, 154, 155,
+42, 169, 115, 90, 14, 155, 200, 205, 133, 77,
+224, 186, 244, 236, 138, 36, 118, 60, 220, 53,
+199, 215, 255, 255, 156, 100, 68, 76, 215, 6,
+96, 23, 173, 14, 2, 235, 70, 69, 150, 176,
+214, 185, 124, 52, 190, 119, 117, 242, 190, 27,
+153, 98, 188, 155, 146, 92, 38, 57, 108, 205,
+132, 253, 192, 88, 43, 168, 125, 16, 179, 129,
+37, 243, 36, 231, 177, 77, 109, 18, 247, 174,
+39, 224, 210, 149, 48, 45, 209, 121, 39, 129,
+187, 103, 71, 145, 174, 193, 184, 121, 31, 94,
+213, 8, 132, 169, 109, 26, 243, 235, 140, 88,
+120, 95, 216, 81, 116, 69, 251, 76, 189, 145,
+50, 194, 214, 101, 128, 227, 7, 254, 146, 12,
+136, 49, 215, 160, 168, 50, 215, 31, 28, 190,
+80, 240, 73, 86, 35, 187, 213, 181, 153, 191,
+64, 36, 0, 15, 206, 218, 53, 29, 141, 3,
+29, 116, 192, 175, 139, 18, 111, 51, 178, 74,
+111, 59, 147, 136, 160, 41, 129, 246, 178, 236,
+48, 86, 45, 254, 117, 255, 24, 160, 24, 112,
+238, 12, 229, 74, 58, 196, 105, 51, 160, 154,
+115, 119, 153, 162, 218, 212, 159, 184, 144, 96,
+47, 188, 142, 231, 62, 48, 154, 178, 149, 89,
+126, 20, 189, 156, 158, 176, 205, 38, 147, 222,
+233, 157, 186, 11, 170, 249, 80, 145, 78, 44,
+27, 222, 217, 190, 39, 83, 20, 19, 164, 209,
+139, 114, 104, 76, 119, 128, 39, 82, 188, 80,
+211, 245, 223, 185, 76, 241, 32, 16, 200, 134,
+156, 244, 18, 224, 167, 82, 26, 129, 58, 74,
+235, 141, 169, 29, 126, 97, 127, 203, 130, 97,
+176, 136, 155, 101, 1, 181, 25, 159, 220, 125,
+191, 127, 97, 201, 141, 91, 244, 161, 45, 95,
+33, 190, 243, 156, 7, 84, 14, 163, 33, 216,
+221, 152, 184, 218, 3, 32, 181, 157, 55, 16,
+43, 159, 87, 81, 94, 169, 205, 206, 134, 156,
+204, 230, 37, 161, 103, 64, 34, 218, 16, 109,
+146, 77, 140, 57, 79, 28, 206, 34, 72, 201,
+229, 202, 190, 157, 92, 219, 58, 221, 58, 63,
+138, 252, 13, 20, 134, 109, 24, 66, 228, 59,
+37, 32, 238, 20, 12, 15, 86, 234, 102, 110,
+242, 214, 136, 215, 177, 101, 66, 1, 134, 244,
+102, 61, 149, 65, 175, 241, 111, 227, 1, 240,
+153, 201, 147, 36, 56, 98, 1, 106, 21, 168,
+218, 16, 207, 169, 177, 205, 135, 175, 36, 176,
+186, 199, 7, 222, 164, 180, 21, 141, 242, 15,
+70, 37, 251, 158, 74, 236, 94, 177, 55, 39,
+61, 133, 230, 27, 231, 113, 20, 200, 43, 249,
+198, 222, 53, 116, 0, 192, 29, 103, 79, 254,
+9, 64, 48, 63, 39, 158, 226, 240, 50, 199,
+165, 168, 232, 116, 235, 170, 38, 162, 145, 108,
+241, 138, 148, 137, 65, 101, 89, 9, 203, 50,
+17, 99, 151, 18, 50, 39, 164, 116, 154, 178,
+112, 175, 101, 213, 151, 51, 243, 224, 100, 252,
+47, 229, 147, 113, 160, 181, 12, 73, 66, 104,
+229, 181, 186, 229, 100, 101, 231, 79, 99, 146,
+90, 187, 190, 188, 189, 35, 51, 69, 174, 233,
+94, 132, 28, 232, 51, 132, 167, 112, 176, 23,
+20, 19, 7, 90, 78, 178, 36, 101, 17, 172,
+185, 50, 177, 157, 167, 139, 25, 139, 12, 249,
+118, 248, 186, 135, 174, 177, 95, 99, 12, 207,
+43, 15, 79, 200, 54, 82, 124, 2, 112, 130,
+155, 194, 102, 89, 215, 241, 159, 255, 13, 144,
+221, 99, 78, 72, 6, 156, 100, 4, 7, 116,
+219, 239, 102, 186, 156, 206, 224, 149, 152, 20,
+203, 118, 151, 150, 145, 208, 172, 87, 2, 68,
+87, 59, 197, 95, 222, 29, 185, 161, 228, 46,
+137, 230, 199, 247, 50, 230, 204, 244, 217, 227,
+160, 47, 157, 67, 64, 187, 201, 43, 182, 123,
+20, 206, 218, 31, 78, 146, 121, 195, 49, 186,
+254, 3, 165, 177, 44, 18, 70, 173, 214, 142,
+95, 199, 59, 163, 59, 52, 248, 72, 5, 196,
+38, 12, 2, 89, 164, 87, 106, 106, 23, 139,
+179, 86, 168, 224, 137, 145, 13, 119, 66, 109,
+221, 124, 22, 144, 181, 199, 221, 217, 75, 221,
+165, 191, 212, 195, 223, 232, 233, 133, 112, 27,
+90, 210, 109, 43, 0, 168, 198, 16, 22, 98,
+175, 206, 39, 36, 12, 88, 4, 250, 165, 13,
+234, 163, 110, 5, 62, 100, 167, 200, 5, 211,
+35, 162, 140, 251, 118, 54, 76, 200, 87, 123,
+155, 26, 252, 193, 38, 116, 182, 255, 198, 164,
+159, 242, 176, 74, 145, 74, 140, 182, 63, 139,
+126, 243, 171, 195, 159, 114, 204, 190, 253, 52,
+161, 232, 151, 235, 129, 125, 115, 227, 240, 46,
+64, 51, 187, 240, 160, 10, 164, 8, 142, 139,
+114, 15, 254, 32, 153, 12, 44, 169, 85, 80,
+167, 105, 109, 56, 173, 42, 127, 129, 205, 111,
+1, 86, 96, 32, 211, 187, 228, 164, 166, 131,
+187, 188, 245, 119, 92, 28, 231, 210, 116, 27,
+222, 194, 10, 106, 239, 17, 42, 54, 29, 151,
+30, 158, 148, 176, 187, 234, 171, 76, 207, 96,
+255, 197, 52, 43, 99, 46, 148, 50, 245, 48,
+97, 77, 30, 50, 11, 197, 194, 225, 0, 114,
+109, 205, 118, 126, 191, 61, 143, 23, 236, 228,
+219, 15, 125, 161, 191, 193, 65, 232, 202, 51,
+141, 13, 133, 202, 180, 6, 187, 141, 234, 224,
+204, 78, 101, 123, 13, 166, 0, 196, 193, 56,
+39, 14, 171, 8, 88, 178, 204, 111, 251, 162,
+75, 122, 223, 20, 25, 36, 36, 235, 79, 95,
+208, 11, 208, 61, 229, 65, 68, 53, 58, 216,
+223, 227, 216, 155, 10, 44, 47, 91, 115, 47,
+228, 159, 139, 233
--- a/ref_soft/ref_soft.001
+++ b/ref_soft/ref_soft.001
--- a/ref_soft/ref_soft.def
+++ b/ref_soft/ref_soft.def
@@ -0,0 +1,2 @@
+EXPORTS
+	GetRefAPI
--- a/ref_soft/ref_soft.dsp
+++ b/ref_soft/ref_soft.dsp
--- a/ref_soft/ref_soft.plg
+++ b/ref_soft/ref_soft.plg
@@ -0,0 +1,17 @@
+--------------------Configuration: ref_soft - Win32 Release Alpha--------------------
+Begining build with project "G:\quake2\code\ref_soft\ref_soft.dsp", at root.
+Active configuration is Win32 (ALPHA) Dynamic-Link Library (based on Win32 (ALPHA) Dynamic-Link Library)
+
+Project's tools are:
+			"OLE Type Library Maker" with flags "/nologo /D "NDEBUG" /mktyplib203 /o NUL /win32 "
+			"C/C++ Compiler for Alpha" with flags "/nologo /QA21164 /MT /Gt0 /W3 /GX /Zi /O2 /D "WIN32" /D "NDEBUG" /D "_WINDOWS" /D "C_ONLY" /Fp".\ReleaseAXP/ref_soft.pch" /YX /Fo".\ReleaseAXP/" /Fd".\ReleaseAXP/" /FD /QAieee1 /c "
+			"Win32 Resource Compiler" with flags "/l 0x409 /d "NDEBUG" "
+			"Browser Database Maker" with flags "/nologo /o"..\ReleaseAXP/ref_soft.bsc" "
+			"COFF Linker for Alpha" with flags "kernel32.lib user32.lib gdi32.lib winmm.lib /nologo /subsystem:windows /dll /incremental:no /pdb:"..\ReleaseAXP/ref_soft.pdb" /debug /machine:ALPHA /def:".\ref_soft.def" /out:"..\ReleaseAXP/ref_soft.dll" /implib:"..\ReleaseAXP/ref_soft.lib" "
+			"Custom Build" with flags ""
+			"<Component 0xa>" with flags ""
+
+
+
+
+ref_soft.dll - 0 error(s), 0 warning(s)