;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; The X numbers are shifted right by 13 before use ; These numbers are in units of 1/8192 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; The upper commented out numbers do this SciAm cover (almost) ; XSTART equ $fffffa86 ; XINC equ 1 ; XSTART equ ((-1)<<13) ; XINC equ (1<<9) ; XSTART equ 0 ; XINC equ (1<<7) ; XSTART equ ((-2)<<13) ; XINC equ ((10<<11)/WIDTH) ; The Y numbers are shifted right by 13 before use ; These numbers are in units of 1/8192 ; The upper commented out numbers do this SciAm cover ; YSTART equ $ffffde9a ; YINC equ 1 ; YSTART equ ((-1)<<13) ; YINC equ (1<<9) ; YSTART equ ((-2)<<13) ; YINC equ (1<<7) ; YSTART equ ((-19)<<9) ; YINC equ ((6<<12)/WIDTH) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; The most important thing in a Mandlebrot program is the inner loop ; The most important thing in a GPU program is to keep as much as possible ; in registers and as much of the rest in internal RAM ; First the inner loop: ; In order to handle both the Mandlebrot and Julia sets we make no assumptions ; about initial conditions. ; The basic loop is: (given xi, yi, cx, cy) ; temp=xi*yi ; sx=xi*xi ; sy=yi*yi ; yi=temp+temp+cy ; xi=sx-sy+cx ; count+=1 ; interate until count>maxcount or sx+sy>4 ; Note that the nubers used here are 3.13 fixed point ; For a Mandlebrot xi=yi=0 at the start always ; Assume that the following registers are already set up ; movei #MAXCNT,maxcnt ; movei #FOUR,four INBUF equ $00f03810 ; SEMAPHORE equ $0000bff0 WIDTH equ 640 HEIGHT equ 480 XSTART equ ((-2)<<13) XINC equ ((10<<11)/WIDTH) YSTART equ ((-19)<<9) YINC equ ((6<<12)/WIDTH) .gpu xi .equr R1 yi .equr R2 cx .equr R3 cy .equr R4 sx .equr R5 sy .equr R6 temp .equr R7 count .equr R8 maxcnt .equr R9 four .equr R10 inloop .equr R11 semaphore .equr R12 inbuf .equr R13 ; A1 = R14 ; d2 = R18 ; A0 = R19 ; d0 = R20 ; d1 = R21 jx .equr R15 jy .equr R16 ypos .equr R17 xpos .equr R23 rinner .equr R24 router .equr R25 mandGPU:: .org $f03000 ; ancien code 68000 ; Mandle: start_mandGPU:: moveq #0,jx moveq #0,jy movei #$20000,R14 movei #YSTART,ypos ; Initialize y position movei #HEIGHT-1,R18 movei #INBUF+8,R19 movei #inner,rinner movei #outer,router move jx,R20 store R20,(R19) addq #4,R19 move jy,R20 store R20,(R19) outer: movei #INBUF+4,R19 move ypos,R20 store R20,(R19) movei #WIDTH-1,R21 movei #XSTART,xpos ; Initialize x position inner: movei #INBUF,R19 move xpos,R20 store R20,(R19) ; fin 68000 ; start_mandGPU:: movei #loop,inloop movei #(4<<13),four movei #254,maxcnt ; movei #$0000bff0,semaphore movei #$00f03810,inbuf xor count,count load (inbuf),cx addq #4,inbuf load (inbuf),cy addq #4,inbuf load (inbuf),xi addq #4,inbuf load (inbuf),yi addq #4,inbuf loop: move xi,temp imult yi,temp ; temp=xi*yi imult xi,xi ; xi=xi*xi imult yi,yi ; yi=yi*yi sharq #13,xi ; normalize all mult results sharq #13,temp sharq #13,yi ; The folowing code has been interleaved add temp,temp ; temp=temp+temp move yi,sy ; sy=yi*yi add cy,temp ; temp=temp+temp+cy move xi,sx ; sx=xi*xi move temp,yi ; yi=temp+temp+cy sub sy,xi ; xi=sx-sy add cx,xi ; xi=sx-sy+cx addq #1,count cmp count,maxcnt jr MI,noloop ; MI is branch count