DEF START
* THIS VERSION USES ALL THE OPTIMIZATIONS TO DATE.
* PLUS SCRATCHPAD UTILITIES AND INLINE SINE LOOKUP
* THANKS TO SOMETIMES99ER FOR WORKING OUT THE DATA!
* relocated to scratchpad - addresses worked
* out by hand! Use caution when modifying them!
SQRT EQU >8324
PLOT EQU >8350
SMULT EQU >838E
DRAWPX EQU >83A8
*FREE EQU >83F8 - only 8 bytes of scratchpad free!
* LABELS FOR SAVE UTILITY
SLOAD
SFIRST
B @START
* array for highest pixel
ROWS
BSS 256
* backup for scratchpad, we're going to just
* blindly decimate it. So we need to restore
* it before we let the console interrupt run
* at the end of execution. I could be picky,
* selective, or careful, but this works too. ;)
SCRATCH
BSS 224
* bits for pixel
BITS
DATA >8040,>2010,>0804,>0201
* SINE TABLE - 9.7 fixed point entries, 256 total
SINTAB
DATA 0,3,6,9,13,16,19,22
DATA 25,28,31,34,37,40,43,46
DATA 49,52,55,58,60,63,66,68
DATA 71,74,76,79,81,84,86,88
DATA 91,93,95,97,99,101,103,105
DATA 106,108,110,111,113,114,116,117
DATA 118,119,121,122,122,123,124,125
DATA 126,126,127,127,127,127,127,127
DATA 127,127,127,127,127,127,127,126
DATA 126,125,124,123,122,122,121,119
DATA 118,117,116,114,113,111,110,108
DATA 106,105,103,101,99,97,95,93
DATA 91,88,86,84,81,79,76,74
DATA 71,68,66,63,60,58,55,52
DATA 49,46,43,40,37,34,31,28
DATA 25,22,19,16,13,9,6,3
DATA 0,-3,-6,-9,-13,-16,-19,-22
DATA -25,-28,-31,-34,-37,-40,-43,-46
DATA -49,-52,-55,-58,-60,-63,-66,-68
DATA -71,-74,-76,-79,-81,-84,-86,-88
DATA -91,-93,-95,-97,-99,-101,-103,-105
DATA -106,-108,-110,-111,-113,-114,-116,-117
DATA -118,-119,-121,-122,-122,-123,-124,-125
DATA -126,-126,-127,-127,-127,-128,-128,-128
DATA -128,-128,-128,-128,-127,-127,-127,-126
DATA -126,-125,-124,-123,-122,-122,-121,-119
DATA -118,-117,-116,-114,-113,-111,-110,-108
DATA -106,-105,-103,-101,-99,-97,-95,-93
DATA -91,-88,-86,-84,-81,-79,-76,-74
DATA -71,-68,-66,-63,-60,-58,-55,-52
DATA -49,-46,-43,-40,-37,-34,-31,-28
DATA -25,-22,-19,-16,-13,-9,-6,-3
* note: NOT in memory, so don't use @XF
* 9.7 signed fixed point variables in registers
XF EQU 15
XT EQU 14
YY EQU 13
* INTEGER VALUES
ZS EQU 12
* RET EQU 11 - for BL
ZI EQU 10
XL EQU 9
XI EQU 8
* 32-bit temp, uses 6 and 7
T32B EQU 7
T32 EQU 6
* Temp vars
T16 EQU 5
T1 EQU 4
T2 EQU 3
NEGFL EQU 2
* PIXEL VARIABLES
X1 EQU 1
Y1 EQU 0
* out of registers, use RAM (these ARE @ZY)
ZX EQU >8320
ZY EQU >8322
* return save
SAVE
BSS 2
* registers for bitmap (and 5A00 is the address of the sprite table)
* background is transparent (the only color never redefined)
* PDT - >0000
* SIT - >1800
* SDT - >1800
* CT - >2000
* SAL - >1B00
BMREGS DATA >81E0,>8002,>8206,>83ff,>8403,>8536,>8603,>8700,>5B00,>0000
START
LWPI >8300
* LOAD THE ROWS ARRAY WITH 192 ENTRIES
LI R0,ROWS
LI R1,192*256
LI R2,256
LP1
MOVB R1,*R0+
DEC R2
JNE LP1
* backup scratchpad
LI R0,>8320 * skip our WP
LI R1,SCRATCH
LI R2,56 * 4 bytes at a time
LS1
MOV *R0+,*R1+
MOV *R0+,*R1+
DEC R2
JNE LS1
* now copy utilities in
LI R0,SQRTX * first function
LI R1,>8324 * first free word
LC1
MOV *R0+,*R1+ * copy one word
CI R0,SLAST * check for done (thus no unroll)
JL LC1
* 140 GRAPHICS 8+16:SETCOLOR 2,0,0
BL @BITMAP
* erase the pattern table
CLR R0
CLR R1
LI R2,>1800
BL @VDPFILL
* set the color table to white on black
LI R0,>2000
LI R1,>F100
LI R2,>1800
BL @VDPFILL
* 130 XP=144:XR=4.71238905:XF=XR/XP
* I'm not sure why they spelled it this way...
* goal of the above math is to covert the Y axis
* of 192 pixels into one circle in Radians (2PI).
* It would have been more clear if XP was 192
* and XR was 6.2831854, these values seem
* obfuscated. Anyway, that's what it is.
* To avoid conversion to radians then back to
* my sine table units, we can just adjust the
* scale factor. For me, 192 needs to equal
* 256, so my ratio is 256/192=1.333333
* which is >00A9 in fixed point (169, losing the .3333)
* As an added bonus, we can clip to the right
* range by simply masking now.
LI XF,>00A9
* 140 FOR ZI=64 TO -64 STEP -1
* Making this an integer!
LI ZI,64
L160
* 150 ZT=ZI*2.25:ZS=ZT*ZT
* We have to do two multiplies here, so we're going
* to end up in a 32-bit value temporarily anyway. That
* actually makes life a little easier.
* 2.25 * 128 = 288, WHICH IS >120
* note: ZT not used :)
LI T32,>0120
MOV ZI,T1
ABS T1 * this is okay, because we are going to square it anyway
MPY T1,T32
* now T32 is 32-bits wide, and contains an 25.7 bit number.
* ZI(16.0) times T32 (9.7) yields 25.7 bits.
* So since we want a 9.7, we just have to take the least
* significant word, no shifting needed! Of course we ignore
* the possibility of overflow, but the largest value should
* be 64*2.25 = 144, which fits in 9 bits.
* now just put them into place, and multiply again
* we know from analysis that the 'sign bit' shouldn't be set here
MOV T32B,T32
MOV T32B,T1
MPY T1,T32
* So, T32 now contains a 32-bit 18.14 number, but for simplicity we
* are going to move that down into ZS as a 16-bit unsigned integer
* so we just need to extract 16 bits of integer, as we don't expect overflow
* and don't want fraction. Of course, those 16 bits are split across the
* two words...
MOV T32B,ZS * least significant - we want two bits from this
SRL ZS,14 * toss the rest
SLA T32,2 * prepare the most significant
SOC T32,ZS * and merge it in
* 160 XL=INT(SQR(20736-ZS)+0.5)
* ZS is a normal int, so this shouldn't be too bad to start
* the result is also an int, and the +0.5 is just for rounding
* our sqrt will return one of our fractional values, as noted,
* to be consistent.
LI T1,20736
S ZS,T1
BL @SQRT * T1 IN AS positive INT, T1 OUT AS 9.7
SRL T1,7 * make an integer for counting
MOV T1,XL * and store it
* 170 ZX=ZI+160:ZY=90+ZI
MOV ZI,T1
AI T1,127 * smaller screen
MOV T1,@ZX
MOV ZI,T1
AI T1,90
MOV T1,@ZY
* 180 FOR XI=0 TO XL
* even this loop always executes once (0 to 0), so
* I can put the condition at the bottom.
CLR XI
L190
* 190 XT=SQR(XI*XI+ZS)*XF
* pretty similar to above, again we are squaring to get positive
* so that makes the unsigned MPY easier to deal with
* XT needs to be integer now, not 9.7
MOV XI,T32 * Integer (always positive now)
MPY XI,T32 * XI*XI - 16.0 * 16.0 = 32.0, so just take the LSW
MOV T32B,T1 * least significant - still 16.0
A ZS,T1 * add ZS (we're an integer so can just add - max is 41472, so unsigned!)
BL @SQRT * T1 in as positive int, T1 OUT as 9.7
MOV XF,T32 * prepare to mult - we know these values are positive
MPY T1,T32 * do it - 9.7*9.7 = 18.14
* it matters to keep the fraction for the XT*3 below, so, keep it
SRL T32B,7 * make room, throwing away 7 fractional bits
SLA T32,9 * get the more significant bits into the right place
SOC T32,T32B * merge the two 16-bit words
MOV T32B,XT
* 200 YY=(SIN(XT)+SIN(XT*3)*0.4)*55 -- was 55, needed to adjust for rounding errors
* order of op, we do SIN(XT*3)*0.4 first...
MOV XT,T1 * prepare for second sine
A XT,T1 * simpler than MPY by 3, no need to shift result
A XT,T1
SRL T1,6 * shift out fraction, but multiply by 2 (we'll trim the extra bit below)
INC T1 * rounding
ANDI T1,>01FE * mask for lookup
MOV @SINTAB(T1),T2
LI T1,>0033 * roughly 0.4 (actually 0.398)
BL @SMULT * Signed multiply, result in T32B
MOV T32B,T16
SRL XT,6 * shift out fraction, but multiply by 2 (we'll trim the extra bit below)
INC XT * rounding
ANDI XT,>01FE * mask for lookup (We don't use XT again)
MOV @SINTAB(XT),T2
A T16,T2
LI T1,>1B80 * 55 x less than 1 will be less than 55, so it fits
BL @SMULT * Signed multiply, result in T32B
* We can just make YY an integer right here
SRA T32B,7 * discard fraction (sign extend!)
MOV T32B,YY
* now go plot the two pixels
BL @DRAWPX
* 250 NEXT XI
INC XI
C XI,XL * I know it's always positive now,
JLE L190 * so I can use an unsigned test
* 255 NEXT ZI
L255
DEC ZI
CI ZI,-65
JGT L160
* 260 GOTO 260
* restore scratchpad before enabling interrupts
LI R0,SCRATCH
LI R1,>8320 * skip our WP
LI R2,56 * 4 bytes at a time
LS2
MOV *R0+,*R1+
MOV *R0+,*R1+
DEC R2
JNE LS2
WAIT
LIMI 2
LIMI 0
JMP WAIT
* VDP access
* Write single byte to R0 from MSB R1
* Destroys R0 (actually just oRs it)
VSBW
ORI R0,>4000
SWPB R0
MOVB R0,@>8C02
SWPB R0
MOVB R0,@>8C02
MOVB R1,@>8C00
B *R11
* Write R2 bytes from R1 to VDP R0
* Destroys R0,R1,R2
VDPFILL
ORI R0,>4000
SWPB R0
MOVB R0,@>8C02
SWPB R0
MOVB R0,@>8C02
VMBWLP
MOVB R1,@>8C00
DEC R2
JNE VMBWLP
B *R11
* Write address or register
VDPWA
SWPB R0
MOVB R0,@>8C02
SWPB R0
MOVB R0,@>8C02
B *R11
* load regs list to VDP address, end on >0000 and write >D0 (for sprites)
* address of table in R1 (destroyed)
LOADRG
LOADLP
MOV *R1+,R0
JEQ LDRDN
SWPB R0
MOVB R0,@>8C02
SWPB R0
MOVB R0,@>8C02
JMP LOADLP
LDRDN
LI R1,>D000
MOVB R1,@>8C00
B *R11
* Setup for normal bitmap mode
BITMAP
MOV R11,@SAVE
* set display and disable sprites
LI R1,BMREGS
BL @LOADRG
* set up SIT - We load the standard 0-255, 3 times
LI R0,>5800
BL @VDPWA
CLR R2
NQ#
CLR R1
LP#
MOVB R1,@>8C00
AI R1,>0100
CI R1,>0000
JNE LP#
INC R2
CI R2,3
JNE NQ#
MOV @SAVE,R11
B *R11
* use this and a listing to get scratchpad addresses for the fctns
* AORG >8324
* IN AND OUT IN T1
* T1 in = integer
* T1 out = 9.7 signed fixed point
* Uses T2,X1,Y1,T32
* http://samples.sains...mple_809121.pdf
* modified a bit - we pretend the input is a 16.8 value (the
* entire fractional part will be 0), that let's us get out a
* 8.8 value, because the algorithm needs an even number of fractional
* bits. Then we just shift once to get .7
SQRTX
CLR X1 root
CLR T2 remHi (t1 is remLo)
LI Y1,16 count = ((WORD/2-1)+(FRACBITS>>1)) -> 11+4, +1 for loop
SQRT1
SLA T2,2 remHi = (remHi << 2) | (remLo >> 14);
MOV T1,T32
SRL T32,14
SOC T32,T2
SLA T1,2 remLo <<= 2;
SLA X1,1 root <<= 1;
MOV X1,T32 testDiv = (root << 1) + 1;
SLA T32,1
INC T32
C T2,T32 if (remHi >= testDiv) {
JL SQRT2
S T32,T2 remHi -= testDiv;
INC X1 root += 1;
SQRT2
DEC Y1 while (--count != 0);
JNE SQRT1
MOV X1,T1 return( root);
SRL T1,1 Get it down to x.7 fixed point
B *R11
* INPUT X1,Y1 - kills T1,T2 as well
PLOTX
* use the E/A routine for address
MOV Y1,T1 R1 is the Y value.
SLA T1,5
SOC Y1,T1
ANDI T1,>FF07
MOV X1,T2 R0 is the X value.
ANDI T2,7
A X1,T1 T1 is the byte offset.
S T2,T1 T2 is the bit offset.
* inline VDP!
SWPB T1 set up read address
MOVB T1,@>8C02
SWPB T1
MOVB T1,@>8C02
ORI T1,>4000 we need this later, and provides a VDP delay
MOVB @>8800,R1 read the byte from VDP
SWPB T1 set up write address
MOVB T1,@>8C02
SWPB T1
MOVB T1,@>8C02
SOCB @BITS(T2),R1 or the bit and provide VDP delay
MOVB R1,@>8C00 write the byte back
B *R11
* signed fixed point multiply - T1 * T2 = T32B
* ONLY T2 is allowed to be negative!! Result
* will be negative if T2 was.
* Uses T1,T2,NEGFL,T32,T32B
SMULTX
CLR NEGFL * temp flag for negative
MOV T2,T32 * prepare for mult and test
JGT NOTNEG1
SETO NEGFL * it is negative, so remember and make positive
ABS T32
NOTNEG1
MPY T1,T32 * does the multiply - you know the drill, fix up number
SRL T32B,7 * make room, throwing away 7 fractional bits
SLA T32,9 * get the more significant bits into the right place
SOC T32,T32B * merge the two 16-bit words
MOV NEGFL,NEGFL * check if it should be negative
JEQ NOTNEG2
NEG T32B * yes, it should
NOTNEG2
B *R11
DRAWXX
MOV R11,@SAVE * need this to get back!
* 210 X1=XI*0.75+ZX:Y1=ZY-YY
* XI can never be negative now, so we can remove all that code
MOV XI,X1 * integer
LI T32,>0060 * 0.75
MPY X1,T32 * now 25.7, so just take the LSW (unsigned mult!)
AI T32B,>40 * 0.5 in x.7, for rounding
SRA T32B,7 * make integer for the plot function (sign extend!)
MOV T32B,X1 * get the integer
A @ZX,X1 * add (integer) ZX
MOV @ZY,Y1 * get ZY (integer)
S YY,Y1 * subtract YY (integer)
* 220 IF RR(X1)>Y1 THEN RR(X1)=Y1:PLOT X1,Y1
SWPB Y1 * stupid Big Endian....
MOV Y1,T16 * plot kills X1,Y1, and we need Y1 again
CB @ROWS(X1),Y1
JLE L230
MOVB Y1,@ROWS(X1)
SWPB Y1
* NOTE: PLOT EXPECTS THE PIXEL IN REGISTERS X1,Y1
BL @PLOT
* 230 X1=ZX-XI*0.75
L230
MOV @ZX,X1
S T32B,X1 * use the scaled X1 on both sides of the origin
* 240 IF RR(X1)>Y1 THEN RR(X1)=Y1:PLOT X1,Y1
MOV T16,Y1 * get it back, still swapped
CB @ROWS(X1),Y1
JLE L250
MOVB Y1,@ROWS(X1)
SWPB Y1
* NOTE: PLOT EXPECTS THE PIXEL IN REGISTERS X1,Y1
BL @PLOT
* Return to caller
L250
MOV @SAVE,R11
B *R11
SLAST
END