ar100: Support for ar100 (#6054)

Add files to support AR100 Signed-off-by: Elias Bakken <elias@iagent.no>
2025-08-09 14:55:09 -06:00 · 2023-02-21 02:15:01 +01:00 · 2023-02-21 02:15:01 +01:00 · b7978d37b3
commit b7978d37b3
parent d7bd7f1f4b
25 changed files with 3715 additions and 0 deletions
--- a/lib/ar100/asm/spr.h
+++ b/lib/ar100/asm/spr.h
--- a/lib/ar100/macros.S
+++ b/lib/ar100/macros.S
@ -0,0 +1,38 @@
+/*
+ * Copyright © 2013-2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright © 2017-2020 The Crust Firmware Authors.
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef MACROS_S
+#define MACROS_S
+
+	/* This macro marks a global data declaration. */
+	.macro data name
+	.section .data.\name, "aw", @progbits
+	.global \name
+	.type \name, %object
+	.align 4
+\name:
+	.endm
+
+	/* This macro marks the beginning of a function. */
+	.macro func name
+	.section .text.\name, "ax", @progbits
+	.global \name
+	.type \name, %function
+	.func \name
+	.cfi_sections .debug_frame
+	.cfi_startproc
+	.align 4
+\name:
+	.endm
+
+	/* This macro marks the end of a function. */
+	.macro endfunc name
+	.cfi_endproc
+	.endfunc
+	.size \name, . - \name
+	.endm
+
+#endif /* MACROS_S */
--- a/lib/ar100/runtime.S
+++ b/lib/ar100/runtime.S
@ -0,0 +1,117 @@
+/*
+ * Copyright © 2017-2020 The Crust Firmware Authors.
+ * SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
+ */
+
+#include <macros.S>
+
+func __divsi3
+	l.sflts	r3, r0
+	l.sw	-8(r1), r18
+	l.bnf	1f
+	l.ori	r18, r0, 0
+	l.sub	r3, r0, r3		# Negate x if it is negative
+	l.addi	r18, r18, 1		# Increment the flag if x is negative
+1:	l.sflts	r4, r0
+	l.bnf	2f
+	l.sw	-4(r1), r9
+	l.sub	r4, r0, r4		# Negate y if it is negative
+	l.addi	r18, r18, -1		# Decrement the flag if y is negative
+2:	l.jal	__udivsi3
+	l.addi	r1, r1, -8
+	l.sfne	r18, r0
+	l.bnf	3f
+	l.addi	r1, r1, 8
+	l.sub	r11, r0, r11		# Negate q if the flag is nonzero
+3:	l.lwz	r9, -4(r1)
+	l.jr	r9
+	l.lwz	r18, -8(r1)
+endfunc __divsi3
+
+/*
+ * Of the three ORBIS32 32-bit multiplication instructions (l.mul, l.muli, and
+ * l.mulu), only l.mul works. By passing "-msoft-mul" to the compiler, and
+ * delegating to this function, we can force all multiplication to use l.mul.
+ */
+func __mulsi3
+	l.jr	r9
+	l.mul	r11, r3, r4
+endfunc __mulsi3
+
+/*
+ * Derived from the "best method for counting bits in a 32-bit integer" at
+ * https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel.
+ *
+ * Signed multiplication is used because l.mulu is broken in hardware. This is
+ * safe because the previous bit masking ensures neither operand is negative.
+ */
+func __popcountsi2
+	l.movhi	r5, 0x5555		# Statement 1:
+	l.ori	r5, r5, 0x5555		#	r5 = 0x55555555
+	l.srli	r4, r3, 1		#	r4 = v >> 1
+	l.and	r4, r4, r5		#	r4 = (v >> 1) & 0x55555555
+	l.sub	r3, r3, r4		#	v = v - ((v >> 1) & 0x55555555)
+	l.movhi	r5, 0x3333		# Statement 2:
+	l.ori	r5, r5, 0x3333		#	r5 = 0x33333333
+	l.srli	r4, r3, 2		#	r4 = v >> 2
+	l.and	r4, r4, r5		#	r4 = (v >> 2) & 0x33333333
+	l.and	r3, r3, r5		#	v = v & 0x33333333
+	l.add	r3, r3, r4		#	v += ((v >> 2) & 0x33333333)
+	l.movhi	r5, 0x0f0f		# Statement 3:
+	l.ori	r5, r5, 0x0f0f		#	r5 = 0x0f0f0f0f
+	l.srli	r4, r3, 4		#	r4 = v >> 4
+	l.add	r4, r3, r4		#	r4 = v + (v >> 4)
+	l.and	r4, r4, r5		#	r4 = v + (v >> 4) & 0x0f0f0f0f
+	l.movhi	r5, 0x0101
+	l.ori	r5, r5, 0x0101		#	r5 = 0x01010101
+	l.mul	r11, r4, r5		#	c = r4 * 0x01010101
+	l.jr	r9
+	l.srli	r11, r11, 24		#	return c >> 24
+endfunc __popcountsi2
+
+/*
+ * Optimized implementation of the "shift divisor method" algorithm from
+ * T. Rodeheffer. Software Integer Division. Microsoft Research, 2008.
+ *
+ * In addition to returning the quotient in r11, this function also returns
+ * the remainder in r12. __umodsi3 simply copies the remainder into r11.
+ */
+func __udivsi3				# u32 __udivsi3(u32 x, u32 y) {
+	l.sfeqi	r4, 1			#	if (y == 1)
+	l.bf	5f			#		goto identity;
+	l.ori	r12, r3, 0		#	u32 r = x;
+	l.ori	r5, r4, 0		#	u32 y0 = y;
+	l.addi	r11, r0, 0		#	u32 q = 0;
+	l.sfltu	r3, r4			#	if (x >= y) {
+	l.bf	2f
+	l.sub	r3, r3, r4		#		x = x−y;
+1:	l.sfltu	r3, r4			#		while (x >= y) {
+	l.bf	2f
+	l.sub	r3, r3, r4		#			x = x−y;
+	l.add	r4, r4, r4		#			y *= 2;
+	l.j	1b			#		}
+2:	l.sfltu	r12, r4			#	} for (;;) {
+	l.bf	3f			#		if (r >= y) {
+	l.sfeq	r4, r5			#		[if (y == y0)]
+	l.sub	r12, r12, r4		#			r = r−y;
+	l.addi	r11, r11, 1		#			q = q + 1;
+3:	l.bf	4f			#		} if (y == y0) break;
+	l.srli	r4, r4, 1		#		y >>= 1;
+	l.j	2b			#	}
+	l.add	r11, r11, r11		#		q *= 2;
+4:	l.jr	r9			#	return q;
+	l.nop
+5:	l.ori	r11, r3, 0		# identity:
+	l.jr	r9			#	return x;
+	l.ori	r12, r0, 0		#	r = 0;
+endfunc __udivsi3			# }
+
+func __umodsi3
+	l.sw	-4(r1), r9
+	l.jal	__udivsi3
+	l.addi	r1, r1, -4
+	l.addi	r1, r1, 4
+	l.lwz	r9, -4(r1)
+	l.jr	r9
+	l.ori	r11, r12, 0
+endfunc __umodsi3
--- a/lib/ar100/start.S
+++ b/lib/ar100/start.S
@ -0,0 +1,41 @@
+/*
+ * Copyright © 2017-2020 The Crust Firmware Authors.
+ * SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
+ */
+
+#include <macros.S>
+#include <asm/spr.h>
+
+func start
+	l.mfspr	r2, r0, SPR_SYS_PPC_ADDR
+	l.sfltui r2, 0x4000		# Size of exception vector area
+	l.bf	1f
+	l.srli	r2, r2, 8		# Vector address → exception number
+	l.addi	r2, r0, 0		# Set to zero if not an exception
+1:	l.addi	r3, r0, 0		# Invalidate instruction cache
+	l.addi	r4, r0, 4096		# Cache lines (256) * block size (16)
+2:	l.mtspr	r0, r3, SPR_ICACHE_ICBIR_ADDR
+	l.sfltu	r3, r4
+	l.bf	2b
+	l.addi	r3, r3, 16		# Cache block size
+	l.psync				# Flush CPU pipeline
+	l.mfspr	r3, r0, SPR_SYS_SR_ADDR	# Enable instruction cache
+	l.ori	r3, r3, SPR_SYS_SR_ICE_MASK
+	l.mtspr	r0, r3, SPR_SYS_SR_ADDR
+	l.nop				# One cache block of nops
+	l.nop
+	l.nop
+	l.nop
+	l.movhi	r3, hi(__bss_start)	# Clear .bss
+	l.ori	r3, r3, lo(__bss_start)
+	l.movhi	r4, hi(__bss_end)
+	l.ori	r4, r4, lo(__bss_end)
+3:	l.sw	0(r3), r0
+	l.sfltu	r3, r4
+	l.bf	3b
+	l.addi	r3, r3, 4
+	l.movhi	r1, hi(__stack_end)
+	l.ori	r1, r1, lo(__stack_end)	# Initialize stack pointer
+	l.j	main			# Jump to C entry point
+	l.or	r3, r2, r2
+endfunc start