;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The X numbers are shifted right by 13 before use
; These numbers are in units of 1/8192
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The upper commented out numbers do this SciAm cover (almost)
; XSTART equ $fffffa86
; XINC equ 1
; XSTART equ ((-1)<<13)
; XINC equ (1<<9)
; XSTART equ 0
; XINC equ (1<<7)
; XSTART equ ((-2)<<13)
; XINC equ ((10<<11)/WIDTH)
; The Y numbers are shifted right by 13 before use
; These numbers are in units of 1/8192
; The upper commented out numbers do this SciAm cover
; YSTART equ $ffffde9a
; YINC equ 1
; YSTART equ ((-1)<<13)
; YINC equ (1<<9)
; YSTART equ ((-2)<<13)
; YINC equ (1<<7)
; YSTART equ ((-19)<<9)
; YINC equ ((6<<12)/WIDTH)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The most important thing in a Mandlebrot program is the inner loop
; The most important thing in a GPU program is to keep as much as possible
; in registers and as much of the rest in internal RAM
; First the inner loop:
; In order to handle both the Mandlebrot and Julia sets we make no assumptions
; about initial conditions.
; The basic loop is: (given xi, yi, cx, cy)
; temp=xi*yi
; sx=xi*xi
; sy=yi*yi
; yi=temp+temp+cy
; xi=sx-sy+cx
; count+=1
; interate until count>maxcount or sx+sy>4
; Note that the nubers used here are 3.13 fixed point
; For a Mandlebrot xi=yi=0 at the start always
; Assume that the following registers are already set up
; movei #MAXCNT,maxcnt
; movei #FOUR,four
INBUF equ $00f03810
; SEMAPHORE equ $0000bff0
WIDTH equ 640
HEIGHT equ 480
XSTART equ ((-2)<<13)
XINC equ ((10<<11)/WIDTH)
YSTART equ ((-19)<<9)
YINC equ ((6<<12)/WIDTH)
.gpu
xi .equr R1
yi .equr R2
cx .equr R3
cy .equr R4
sx .equr R5
sy .equr R6
temp .equr R7
count .equr R8
maxcnt .equr R9
four .equr R10
inloop .equr R11
semaphore .equr R12
inbuf .equr R13
; A1 = R14
; d2 = R18
; A0 = R19
; d0 = R20
; d1 = R21
jx .equr R15
jy .equr R16
ypos .equr R17
xpos .equr R23
rinner .equr R24
router .equr R25
mandGPU::
.org $f03000
; ancien code 68000
; Mandle:
start_mandGPU::
moveq #0,jx
moveq #0,jy
movei #$20000,R14
movei #YSTART,ypos ; Initialize y position
movei #HEIGHT-1,R18
movei #INBUF+8,R19
movei #inner,rinner
movei #outer,router
move jx,R20
store R20,(R19)
addq #4,R19
move jy,R20
store R20,(R19)
outer:
movei #INBUF+4,R19
move ypos,R20
store R20,(R19)
movei #WIDTH-1,R21
movei #XSTART,xpos ; Initialize x position
inner:
movei #INBUF,R19
move xpos,R20
store R20,(R19)
; fin 68000
; start_mandGPU::
movei #loop,inloop
movei #(4<<13),four
movei #254,maxcnt
; movei #$0000bff0,semaphore
movei #$00f03810,inbuf
xor count,count
load (inbuf),cx
addq #4,inbuf
load (inbuf),cy
addq #4,inbuf
load (inbuf),xi
addq #4,inbuf
load (inbuf),yi
addq #4,inbuf
loop:
move xi,temp
imult yi,temp ; temp=xi*yi
imult xi,xi ; xi=xi*xi
imult yi,yi ; yi=yi*yi
sharq #13,xi ; normalize all mult results
sharq #13,temp
sharq #13,yi
; The folowing code has been interleaved
add temp,temp ; temp=temp+temp
move yi,sy ; sy=yi*yi
add cy,temp ; temp=temp+temp+cy
move xi,sx ; sx=xi*xi
move temp,yi ; yi=temp+temp+cy
sub sy,xi ; xi=sx-sy
add cx,xi ; xi=sx-sy+cx
addq #1,count
cmp count,maxcnt
jr MI,noloop ; MI is branch count<maxcnt
; nop ; optimisation, no need to "nop" here
add sx,sy
cmp sy,four
jr EQ,noloop
nop
jump CC,(inloop)
nop
noloop:
; store count,(semaphore)
; re 68000
storeb count,(R14)
addq #1,R14
; addq #XINC,xpos
movei #XINC,R26
add R26,xpos
subq #1,R21
jump PL,(rinner)
nop
; addq #YINC/2,ypos
; addq #YINC/2,ypos
movei #YINC,R26
add R26,ypos
subq #1,R18
jump PL,(router)
nop
nop
; re fin 68000
;BRAK !
; NOTE: This halts the GPU
movei #0,R30
movei #$00f02114,R31
store R30,(R31)
nop
nop
nop
nop
nop
nop
nop
nop
end_mandGPU::