I'd have to sit and think about the sector boundary matter, since the first half of the sector read is spent populating the buffer which is read immediately after the read command for the subsequent sector is issued. The player isn't unrolled, either: it was enough to unroll the code which plays the first half dozen bytes from the buffer so that the next sector read can be issued at the same time.
Here's the player code, which might better illustrate what I'm talking about. I probably will end up unrolling the second half, since the loop is actually a couple of cycles over:
.proc PlayPDM
mwa #SectorList ptr1
ldy SectorsPerCluster
dey
sty SectorsPerClusterMask
lda #0
sta nmien
sei
sta DMACTL
sta AUDCTL
jsr GetClusterLBA ; get sector number of first cluster
jsr SetUpSectorRead ; issue read command and wait for DRQ
ldy #0
lda #1
sta SectorOff
sty SectorOff+1
sty SectorOff+2
sty SectorOff+3
; mva #$FE PORTB ; disable OS
mva #$0F COLPM0 ; colours
mva #$FF GRAFP0
mva #3 SKCTL ; init POKEY
sta wsync
sta wsync
FAST1 equ 1<<6 ; 1.79Mhz for channel 1
FAST3 equ 1<<5 ; 1.79Mhz for channel 3
HI13 equ 1<<2 ; HiPass 1+3
KHZ15 equ 1<<0 ; 15Khz
mva #[FAST1|FAST3|HI13] AUDCTL
; mva #[HI13|KHZ15] AUDCTL
lda #0
ldx #7
sta:rpl AUDF1,x-
mva #5 AUDF3 ; Set up 1/16 dutycycle HiPass on 1+3
mva #3 AUDF1
sta STIMER
sta AUDF3
ldy #0
jmp GetPDMData ; start playing and filling the buffer
; nop ; 2
; bit 0 ; 3
; bit $0100 ; 4
; Main Player loop
ldy #0 ; 2
sty EOFFlag
PlayerLoop
clc ; 2
; issue next sector read and play first half of buffer
; -------------------------------------
ldx buffer ; 4
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
lda (ptr1),y ; 5 (no page boundary crossing)
adc SectorOff ; 3
sta IDE_SNUM ; 4 (32)
iny ; 2
bit 0 ; 3 (37)
; -------------------------------------
ldx buffer+1 ; 4
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
lda (ptr1),y ; 5
adc #0 ; 2
sta IDE_CYLL ; 4 (31)
iny ; 2
bit $0100 ; 4 (37)
; -------------------------------------
ldx buffer+2 ; 4
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
lda (ptr1),y ; 5
sta EOFFlag ; 4
adc #0 ; 2
sta IDE_CYLH ; 4 (31)
nop
; bit 0 ; 3
; bit 0 ; 3 (37)
; iny ; 2
; bit $0100 ; 4 (37)
; -------------------------------------
ldx buffer+3 ; 4
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
lda #$E0 ; 2
sta IDE_HEAD ; 4
lda #$01 ; 2
sta IDE_SCNT ; 4 (32)
lda #$20 ; 2
sta IDE_STAT ; 4
; nop ; 2
; bit 0 ; 3 (37)
; -------------------------------------
ldx buffer+4 ; 4
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
inc SectorOff ; 5 - bump sector for next time
lda SectorOff ; 3
and SectorsPerClusterMask ; 3
sta SectorOff ; 3
bne SameCluster ; 3 (37)
; -------------------------------------
; Next cluster, so bump pointer
ldx buffer+5 ; 4
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
lda ptr1 ; 3
clc ; 2
adc #4 ; 2
sta ptr1 ; 3 (30)
bit 0 ; 3
bit $0100 ; 4 (37)
; -------------------------------------
ldx buffer+6 ; 4
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
lda ptr1+1 ; 3
adc #0 ; 2
sta ptr1+1 ; 3 (28)
; bit 0 ; 3
; bit 0 ; 3
; bit 0 ; 3
bit $0100 ; 4
ldy #7 ; 2
jmp Loop1 ; 3 (37)
; -------------------------------------
SameCluster
; play the rest of the buffer
Loop1 ; keep on one page
ldx buffer,y ; 4
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
stx hposp0 ; 4
bit $100 ; 4
bit $100 ; 4
iny ; 2 (17)
bne Loop1 ; 3 (assuming same page)
; -------------------------------------
; Now play the first half of the IDE data while filling the buffer with the second half
; Note: data is interleaved, with the upper half of the sector in the high order bytes
GetPDMData
; lda IDE_STAT
; cmp #$58
; beq Loop2
; brk
; ldy #0 ; 2
Loop2
ldx IDE_DATA ; 4 - grab a byte from the drive
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
lda IDE_DATA ; 4
sta Buffer,y ; 5
iny ; 2
nop ; 2
stx hposp0 ; 4 (37)
ldx IDE_DATA ; 4 - grab a byte from the drive
mva hi,x AUDC3 ; 8
mva lo,x AUDC1 ; 8 (20)
lda IDE_DATA ; 4
sta Buffer,y ; 5
bit 0 ; 3
iny ; 2
bne Loop2 ; 3 (37)
inc EOFFlag
beq Done
jmp PlayerLoop
Done
jmp Start
.endp
Edited by flashjazzcat, Thu May 31, 2018 12:59 PM.