;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The X numbers are shifted right by 13 before use
; These numbers are in units of 1/8192

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The upper commented out numbers do this SciAm cover (almost)
;	XSTART		equ	$fffffa86
;	XINC		equ	1

;	XSTART		equ	((-1)<<13)
;	XINC		equ	(1<<9)

;	XSTART		equ	0
;	XINC		equ	(1<<7)

;	XSTART		equ	((-2)<<13)
;	XINC		equ	((10<<11)/WIDTH)


; The Y numbers are shifted right by 13 before use
; These numbers are in units of 1/8192

; The upper commented out numbers do this SciAm cover
;	YSTART		equ	$ffffde9a	
;	YINC		equ	1

;	YSTART		equ	((-1)<<13)
;	YINC		equ	(1<<9)

;	YSTART		equ	((-2)<<13)
;	YINC		equ	(1<<7)

;	YSTART		equ	((-19)<<9)
;	YINC		equ	((6<<12)/WIDTH)


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; The most important thing in a Mandlebrot program is the inner loop
; The most important thing in a GPU program is to keep as much as possible
; in registers and as much of the rest in internal RAM
; First the inner loop:
; In order to handle both the Mandlebrot and Julia sets we make no assumptions
; about initial conditions.
; The basic loop is: (given xi, yi, cx, cy)
; temp=xi*yi
; sx=xi*xi
; sy=yi*yi
; yi=temp+temp+cy
; xi=sx-sy+cx
; count+=1
; interate until count>maxcount or sx+sy>4

; Note that the nubers used here are 3.13 fixed point
; For a Mandlebrot xi=yi=0 at the start always

; Assume that the following registers are already set up
;       movei   #MAXCNT,maxcnt
;       movei   #FOUR,four

	INBUF		equ	$00f03810
;	SEMAPHORE	equ	$0000bff0

	WIDTH		equ	640
	HEIGHT		equ	480
	
	XSTART		equ	((-2)<<13)
	XINC		equ	((10<<11)/WIDTH)
	
	YSTART		equ	((-19)<<9)
	YINC		equ	((6<<12)/WIDTH)

	.gpu

	xi              .equr          R1
	yi              .equr          R2
	cx              .equr          R3
	cy              .equr          R4
	sx              .equr          R5
	sy              .equr          R6
	temp            .equr          R7
	count           .equr          R8
	maxcnt          .equr          R9
	four            .equr          R10
	inloop          .equr          R11
	semaphore       .equr          R12
	inbuf           .equr          R13
	
; A1 = R14
; d2 = R18
; A0 = R19
; d0 = R20
; d1 = R21

	jx              .equr          R15
	jy              .equr          R16
	ypos            .equr          R17
	xpos            .equr          R23
	rinner          .equr          R24
	router          .equr          R25

mandGPU::
	.org     $f03000
; ancien code 68000	
; Mandle:
start_mandGPU::
	moveq	#0,jx
	moveq	#0,jy

	movei	#$20000,R14

	movei	#YSTART,ypos	; Initialize y position

	movei	#HEIGHT-1,R18

	movei	#INBUF+8,R19
	
	movei	#inner,rinner
	movei	#outer,router

	move	jx,R20
	store	R20,(R19)
	addq	#4,R19

	move	jy,R20
	store	R20,(R19)

outer:
	movei	#INBUF+4,R19

	move	ypos,R20
	store	R20,(R19)

	movei	#WIDTH-1,R21
	movei	#XSTART,xpos	; Initialize x position

inner:
	movei	#INBUF,R19

	move	xpos,R20
	store	R20,(R19)
; fin 68000

; start_mandGPU::
	movei   #loop,inloop

	movei   #(4<<13),four
	movei   #254,maxcnt

;	movei   #$0000bff0,semaphore
	movei   #$00f03810,inbuf

	xor     count,count

	load    (inbuf),cx
	addq    #4,inbuf

	load    (inbuf),cy
	addq    #4,inbuf

	load    (inbuf),xi
	addq    #4,inbuf

	load    (inbuf),yi
	addq    #4,inbuf

loop:
	move    xi,temp
	imult   yi,temp         ; temp=xi*yi

	imult   xi,xi           ; xi=xi*xi

	imult   yi,yi           ; yi=yi*yi

	sharq   #13,xi          ; normalize all mult results
	sharq   #13,temp
	sharq   #13,yi

; The folowing code has been interleaved

	add     temp,temp       ; temp=temp+temp

	move    yi,sy           ; sy=yi*yi

	add     cy,temp         ; temp=temp+temp+cy

	move    xi,sx           ; sx=xi*xi

	move    temp,yi         ; yi=temp+temp+cy

	sub     sy,xi           ; xi=sx-sy

	add     cx,xi           ; xi=sx-sy+cx


	addq    #1,count
	cmp     count,maxcnt

	jr      MI,noloop       ; MI is branch count<maxcnt
;	nop						; optimisation, no need to "nop" here

	add     sx,sy
	cmp     sy,four

	jr      EQ,noloop
	nop
	jump    CC,(inloop)
	nop

noloop:
;	store   count,(semaphore)
; re 68000
	storeb	count,(R14)
	addq	#1,R14


;	addq	#XINC,xpos
	movei	#XINC,R26
	add		R26,xpos
	
	subq	#1,R21
	jump	PL,(rinner)
	nop

;	addq	#YINC/2,ypos
;	addq	#YINC/2,ypos
	movei	#YINC,R26
	add		R26,ypos
	
	subq	#1,R18
	jump	PL,(router)
	nop
	nop
	; re fin 68000	
;BRAK !

;       NOTE: This halts the GPU
	movei   #0,R30
	movei   #$00f02114,R31
	store   R30,(R31)

	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
end_mandGPU::

