Jump to content
IGNORED

Fixing 6sprite colored kernel delay?


MemberAtarian

Recommended Posts

Hi guys, I made a six-sprite kernel with 6 pointers and added a pointer for changing the luma of the color.
 

	dim Score1IconPointer = c
	dim Score2IconPointer = e
	dim Score3IconPointer = g
	dim Score4IconPointer = i
	dim Score5IconPointer = k
	dim Score6IconPointer = m
	dim ColorPointer = o
	dim ScoreColor= q


	ScoreColor= $14
	asm
title_eat_overscan2
	clc
	lda INTIM
	bmi title_eat_overscan2

title_do_vertical_sync2
	lda #2
	sta WSYNC ;one line with VSYNC
	sta VSYNC ;enable VSYNC
	sta WSYNC ;one line with VSYNC
	sta WSYNC ;one line with VSYNC
	lda #0
	sta WSYNC ;one line with VSYNC
	sta VSYNC ;turn off VSYNC

        ;lda #42+128
	ifnconst vblank_time
 	lda #42+128
 	else
 	lda #vblank_time+128
 	endif

	sta TIM64T
title_vblank_loop2
	lda INTIM
	bmi title_vblank_loop2
	lda #0
	sta WSYNC
	sta VBLANK
	sta ENAM0
	sta ENAM1
	sta ENABL
	lda #230
        	sta TIM64T

SelectDone
	


   	LDA	#<Lock
   	STA	Score1IconPointer
   	LDA	#>Lock
   	STA	Score1IconPointer+1

   	LDA	#<Term
   	STA	Score2IconPointer
   	LDA	#>Term
   	STA	Score2IconPointer+1

   	LDA	#<Vent
   	STA	Score3IconPointer
   	LDA	#>Vent
   	STA	Score3IconPointer+1

   	LDA	#<UP
   	STA	Score4IconPointer
   	LDA	#>UP
   	STA	Score4IconPointer+1


   	LDA	#<DOWN
   	STA	Score5IconPointer
   	LDA	#>DOWN
   	STA	Score5IconPointer+1

   	LDA	#<AZ5
   	STA	Score6IconPointer
   	LDA	#>AZ5
   	STA	Score6IconPointer+1

   	LDA	#<Colors
   	STA	ColorPointer
   	LDA	#>Colors
   	STA	ColorPointer+1

 	
	LDA 	#$03			; 2 
	STA 	NUSIZ0			; 3
	STA 	NUSIZ1			; 3
	STA 	HMCLR
	sta 	WSYNC			; 3
         	tsx				; 2
	stx 	temp3			; 3

	sleep 	25
         
	LDY 	#7			; 2
	STY	VDELP0			; 3
	STY 	VDELP1			; 3
	STA 	RESP0			; 3
	
	STA 	RESP1			; 3
	lda 	#$60			; 2
	sta 	HMP0			; 3
	lda	#$70			; 2
	sta 	HMP1			; 3
	sleep 	4
	STA 	HMOVE 			; 3


	
	
	


DisplayLoop

 	lda 	(ColorPointer),y		; 5
	adc 	ScoreColor		; 3
	sta 	COLUP0			; 3
	sta 	COLUP1			; 3
	lax	(Score1IconPointer),y 	; 5
	lda 	(Score2IconPointer),y 	; 5
	stx 	GRP0 			; 3
	
	sta 	GRP1 			; 3

	
	lda 	(Score3IconPointer),y	; 5
	
	
	sta 	GRP0 			; 3
	lax 	(Score5IconPointer),y 	; 5
	txs				; 2
	lax 	(Score4IconPointer),y	; 5
	
	lda 	(Score6IconPointer),y 	; 5
	
	
	stx 	GRP1 			; 3
	tsx				; 2
	
	stx 	GRP0
	sta	GRP1 			; 3
	stx 	GRP0
			
	
	dey				; 3
	nop
	bpl	DisplayLoop 		; 2

	iny 
 	sty 	GRP0
 	sty 	GRP1 
 	sty 	NUSIZ0 
 	sty 	NUSIZ1 
 	sty 	COLUP0
	sty	COLUP1


	ldx 	temp3
	txs

 	sta 	WSYNC  


PFWAIT2
        lda INTIM 
        bne PFWAIT2

end


	goto OVERSCAN2

	asm
	
Lock
	.byte	#%00111100
	.byte	#%01011010
	.byte	#%01011010
	.byte	#%01000010
	.byte	#%01111110
	.byte	#%00100100
	.byte	#%00100100
	.byte	#%00011000

Term
	.byte	#%01000000
	.byte	#%10100011
	.byte	#%01000100 
	.byte	#%11100100
	.byte	#%11100100
	.byte	#%10101011
	.byte	#%10100000
	.byte	#%01000000

Vent
	.byte	#%00000100
	.byte	#%00001100
	.byte	#%11111100
	.byte	#%01100100
	.byte	#%00100110
	.byte	#%00111111
	.byte	#%00110000
	.byte	#%00100000

DOWN
	.byte	#%00111100
	.byte	#%01000010
	.byte	#%10011001
	.byte	#%00111100
	.byte	#%01111110
	.byte	#%00011000
	.byte	#%00011000
	.byte	#%00011000

UP
	.byte	#%00111100
	.byte	#%01000010
	.byte	#%10011001
	.byte	#%00011000
	.byte	#%00011000
	.byte	#%01111110
	.byte	#%00111100
	.byte	#%00011000

AZ5
	.byte	#%00000110
	.byte	#%00000001
	.byte	#%01110111
	.byte	#%00000100
	.byte	#%10111111
	.byte	#%11101000
	.byte	#%10100100
	.byte	#%01011100

Nothing
	.byte	#%00000000
	.byte	#%00000000
	.byte	#%00000000
	.byte	#%00000000
	.byte	#%00000000
	.byte	#%00000000
	.byte	#%00000000
	.byte	#%00000000


Colors
	 .byte #$02
	 .byte #$04
	 .byte #$06
	 .byte #$08
	 .byte #$08
	 .byte #$06
	 .byte #$04
	 .byte #$02

	
ScoreColors
	.byte #$02
	.byte #$04
	.byte #$06
	.byte #$08
	.byte #$08
	.byte #$06
	.byte #$04
	.byte #$02




.OVERSCAN2
	;lda #35+128 

 	ifnconst overscan_time
 	lda #35+128
 	else
 	lda #overscan_time+128-3-1
 	endif


	sta TIM64T
	lda #%11000010
	sta WSYNC
	sta VBLANK
	jmp title_eat_overscan2
end

I don't want to remove the pointers because I want to use it for universal puproses (six digit score, picture, inventory, status screen, etc.) and make it a little colored. I even tried it without the colors, where I can fine tune with "nop"-s, but I couldn't get any better results.

Typically, these are the two best version I can get by changing the sleep macros and the position of the single "nop" in the loop. One version is where the last bit of sprite one becomes the third sprite's last bit (it is saved too soon) or the first bit of sprite 6 becomes the first bit of sprite 4 (sta happens too late).

Can you help me a little? It's based on the method how bB kernel creates the score.

4447.png

4448.png

testshit.bas.bin

Link to comment
Share on other sites

I'm not quite sure which exactly STA happens to early or too late, but if you'd like to split the 2-cycle NOP in half for greater flexibility, you can use shadow registers on the $01xx page.

In other words, for every TIA or RAM basic address, there's an alternate location 256 bytes higher.

And by using these locations you can easily force the assembler not to use zeropage addressing mode - because the address is not on the zero page.

 

Bad:
    STA    GRP0        ; 3 - let's assume for this example it's 1 cycle too early
    NOP                ; 2
    STY    GRP1        ; 3
    
Fix:
    STA    GRP0+256    ; 4 - bullseye! this instruction now takes 1 cycle longer, but on VCS does exactly the same
    STY    GRP1+256    ; 4 - this one needs 1 cycle delay too, to write at 8th cycle exactly as before


This trick is VERY useful on the VCS to get 1-cycle NOPs.

  • Like 2
Link to comment
Share on other sites

17 hours ago, KK/Altair said:

 


    STA    GRP0+256    ; 4 - bullseye! this instruction now takes 1 cycle longer, but on VCS does exactly the same
    STY    GRP1+256    ; 4 - this one needs 1 cycle delay too, to write at 8th cycle exactly as before

 

 

Assuming you're using dasm, I find using the FORCE extensions to be easier:

    STA.w    GRP0    ; 4 - bullseye! this instruction now takes 1 cycle longer, but on VCS does exactly the same
    STY.w    GRP1    ; 4 - this one needs 1 cycle delay too, to write at 8th cycle exactly as before

 

the .w tells dasm to use Absolute addressing instead of Zero Page addressing. From the dasm manual:

EXTENSIONS:

    FORCE extensions are used to force an addressing mode.  In some cases,
    you can optimize the assembly to take fewer passes by telling it the
    addressing mode.  Force extensions are also used with DS,DC, and DV
    to determine the element size.  NOT ALL EXTENSIONS APPLY TO ALL
    PROCESSORS!

	example:    lda.z   charlie

	i   -implied
	ind -indirect word
	0   -implied
	0x  -implied indexing (0,x)
	0y  -implied indexing (0,y)
	b   -byte address
	bx  -byte address indexed x
	by  -byte address indexed y
	w   -word address
	wx  -word address indexed x
	wy  -word address indexed y
	l   -longword (4 bytes) (DS/DC/DV)
	r   -relative
	u   -uninitialized (SEG)

	First character equivalent substitutions:

	b z d	    (byte, zeropage, direct)
	w e a	    (word, extended, absolute)

 

Link to comment
Share on other sites

The problem is that the last 4 writes to the sprite registers (3 updates + dummy write) need to start in rapid succession exactly 3 cycles before the end of the 2nd sprite's image when VDEL is enabled in both sprites.  The TSX instruction disrupts that flow...so it will always be 1 cycle too early or too late in one of the images.  You can either live without using that bit position in the GFX data, or rewrite the routine so that the Y register is able to be saved/restored during the loop (and thus, can hold GFX data for one of the sprites in between).

Worst-case, use a temp array of 8 bytes to hold pre-determined color data.

 

BTW have you noticed that the HMOVE before the loop is hitting at cycle 61?

Link to comment
Share on other sites

On 1/26/2020 at 10:07 AM, Nukey Shay said:

The problem is that the last 4 writes to the sprite registers (3 updates + dummy write) need to start in rapid succession exactly 3 cycles before the end of the 2nd sprite's image when VDEL is enabled in both sprites.  The TSX instruction disrupts that flow...so it will always be 1 cycle too early or too late in one of the images.  You can either live without using that bit position in the GFX data, or rewrite the routine so that the Y register is able to be saved/restored during the loop (and thus, can hold GFX data for one of the sprites in between).

Worst-case, use a temp array of 8 bytes to hold pre-determined color data.

 

BTW have you noticed that the HMOVE before the loop is hitting at cycle 61?

If I rewrite it to reuse the Y, I have to drop the cycle loading the ColorPointer (which loads the shading) and adding the base color to it. And that's something I don't want to do.
The problem is, that I don't have much cycles to change, there was only 2 for placing with nop or +256 (.w) so I don't have much if I want to keep it universal, so you can load everything into pointers and enter this loop, so this can became a score, indicator, healthbar, inventory, single picture, etc.

I also tried to change the position of HMOVE, HMP0 and HMP1, also tried with VDELs (0,0), (0,1), (1,0), (1,1), if I can do something, but it was always really off.

The only solution I found was using some "magic tricks" (fixing the lost bit with ball and m0, then overlap 2 missiles with the pf) I read about in bB's title kernel and using them with a 2line kernel.

Edited by MemberAtarian
Link to comment
Share on other sites

1 hour ago, MemberAtarian said:

If I rewrite it to reuse the Y, I have to drop the cycle loading the ColorPointer (which loads the shading) and adding the base color to it. And that's something I don't want to do.

But that can be pre-calculated and placed into a temp Ram array, which I stated.

You might need to weigh the advantages of using a generic subroutine vs. multiple routines dedicated to what is to be displayed.  Rom is more plentiful than Ram.

  • Like 1
Link to comment
Share on other sites

1 minute ago, Nukey Shay said:

But that can be pre-calculated and placed into a temp Ram array, which I stated.

You might need to weigh the advantages of using a generic subroutine vs. multiple routines dedicated to what is to be displayed.  Rom is more plentiful than Ram.

There are not enough cycles to load sprite variable, store in temp array and then save it one point.

Link to comment
Share on other sites

Well, then unroll the loop.  There's still 3 cycles left over (or 1 cycle, if LAX is not used). The routine then fills 1 page of Rom, but who cares?  Means to an end.

 

LF51C
    ldy     #$07                    ;2 1st scanline
    lda     (ColorPointer),y        ;5
    adc     ScoreColor              ;3
    sta     COLUP0                  ;3
    sta     COLUP1                  ;3
    lda     (Score1IconPointer),y   ;5
    sta     GRP0                    ;3
    lda     (Score2IconPointer),y   ;5
    sta     GRP1                    ;3
    lda     (Score3IconPointer),y   ;5
    sta     GRP0                    ;3
    lda     (Score5IconPointer),y   ;5
    sta     temp                    ;3
    lax     (Score4IconPointer),y   ;5
    lda     (Score6IconPointer),y   ;5
    ldy     temp                    ;3
    stx     GRP1                    ;3
    sty     GRP0                    ;3
    sta     GRP1                    ;3
    sta     GRP0                    ;3
    sta     $2E                     ;3 unused cycles
    ldy     #$06                    ;2 2nd scanline
    lda     (ColorPointer),y        ;5
    adc     ScoreColor              ;3
    sta     COLUP0                  ;3
    sta     COLUP1                  ;3
...etc

 

Link to comment
Share on other sites

7 minutes ago, Nukey Shay said:

Well, then unroll the loop.  There's still 3 cycles left over (or 1 cycle, if LAX is not used). The routine then fills 1 page of Rom, but who cares?  Means to an end.

 

 


LF51C
    ldy     #$07                    ;2 1st scanline
    lda     (ColorPointer),y        ;5
    adc     ScoreColor              ;3
    sta     COLUP0                  ;3
    sta     COLUP1                  ;3
    lda     (Score1IconPointer),y   ;5
    sta     GRP0                    ;3
    lda     (Score2IconPointer),y   ;5
    sta     GRP1                    ;3
    lda     (Score3IconPointer),y   ;5
    sta     GRP0                    ;3
    lda     (Score5IconPointer),y   ;5
    sta     temp                    ;3
    lax     (Score4IconPointer),y   ;5
    lda     (Score6IconPointer),y   ;5
    ldy     temp                    ;3
    stx     GRP1                    ;3
    sty     GRP0                    ;3
    sta     GRP1                    ;3
    sta     GRP0                    ;3
    sta     $2E                     ;3 unused cycles
    ldy     #$06                    ;2 2nd scanline
    lda     (ColorPointer),y        ;5
    adc     ScoreColor              ;3
    sta     COLUP0                  ;3
    sta     COLUP1                  ;3
...etc

 

 

Okay, but this is way too space wasting and fixes the height to 8. :)

Link to comment
Share on other sites

Dedicated routines are beginning to look attractive, right?  Especially if there's only a few color/sprite font choices in the first place.

BTW there is no limit on height so long as there is no limit to Rom.  You could have an unrolled loop occupy an entire bank (or multiple banks) if necessary...and just leap to LDY# at whatever sprite size is needed.

Link to comment
Share on other sites

I suspect the kernel you're basing this on was taking advantage of the fact that the digit graphics only used 7 pixels for the shape, with 1 pixel for spacing between digits.  Since the space pixel doesn't change between digits it didn't matter if the timing was slightly off.  I've done the same before, as seen in this blog post:

Quote

score timer test pattern

score timer test pattern.png

 

The timing is off by a single pixel, resulting in the leftmost pixel of TimerB to also show up as the leftmost pixel of RightScoreB. We can easily work around this by designing the font to not use the leftmost pixel.

 

  • blue stripes = LeftScoreA and LeftScoreB
  • white stripes = TimerA and TimerB
  • green stripes = RightScoreA and RightScoreB

 

Something that might help so you don't have to unroll is to change what you color.  Instead of color the players:

sta COLUP0
sta COLUP1

 

color the playfield, which saves 3 cycles:

sta COLUPF

 

The rest of the this requires you to:

  • set the players and background to black
  • set playfield to mirrored
  • make sure the 48 pixels are centered on the screen
  • turn on only the pixels of PF2 that appear behind the 48 pixels

Then instead of graphics like this:

509846378_ScreenShot2020-01-27at10_04.36AM2.png.35979ffc41f9aea89e8d3064ff4c4150.png

 

Make them stencils like this - the 0s will show the playfield color:

769392552_ScreenShot2020-01-27at10_04_36AM.png.76f59e1f96792a8846917c4de1c36288.png


 

  • Like 1
Link to comment
Share on other sites

Join the conversation

You can post now and register later. If you have an account, sign in now to post with your account.
Note: Your post will require moderator approval before it will be visible.

Guest
Reply to this topic...

×   Pasted as rich text.   Paste as plain text instead

  Only 75 emoji are allowed.

×   Your link has been automatically embedded.   Display as a link instead

×   Your previous content has been restored.   Clear editor

×   You cannot paste images directly. Upload or insert images from URL.

Loading...
  • Recently Browsing   0 members

    • No registered users viewing this page.
×
×
  • Create New...