+.macro newton_step rcp den zero t1 t2
+ umull \t2, \t1, \den, \rcp @ \t2 is dummy
+ sub \t1, \zero, \t1, lsl #2
+ smlal \t2, \rcp, \t1, \rcp
+.endm
+
+.macro udiv_newton rd rm rs t1 t2 t3 t4
+ lsl \rd, \rm, #16
+ clz \t1, \rs
+ mov \t2, #0
+ lsl \rs, \t1 @ normalize for the algo
+ mov \rm, #0x4d000000 @ initial estimate ~1.2
+
+ newton_step \rm, \rs, \t2, \t3, \t4
+ newton_step \rm, \rs, \t2, \t3, \t4
+ newton_step \rm, \rs, \t2, \t3, \t4
+ newton_step \rm, \rs, \t2, \t3, \t4
+
+ umull \t4, \rd, \rm, \rd
+ rsb \t2, \t1, #30 @ here t1 is 1..15
+ mov \rd, \rd, lsr \t2
+.endm
+
+@ unsigned divide rd = rm / rs; 16.16 result
+@ no div by 0 check
+@ in: rm, rs
+@ trash: rm rs t*
+.macro udiv rd rm rs t1 t2 t3 t4
+ @udiv_ \rd, \rm, \rs
+ udiv_newton \rd, \rm, \rs, \t1, \t2, \t3, \t4
+.endm