mirror of
https://github.com/Klipper3d/klipper.git
synced 2025-08-09 14:55:09 -06:00
ar100: Support for ar100 (#6054)
Add files to support AR100 Signed-off-by: Elias Bakken <elias@iagent.no>
This commit is contained in:
parent
d7bd7f1f4b
commit
b7978d37b3
25 changed files with 3715 additions and 0 deletions
2667
lib/ar100/asm/spr.h
Normal file
2667
lib/ar100/asm/spr.h
Normal file
File diff suppressed because it is too large
Load diff
38
lib/ar100/macros.S
Normal file
38
lib/ar100/macros.S
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright © 2013-2017, ARM Limited and Contributors. All rights reserved.
|
||||
* Copyright © 2017-2020 The Crust Firmware Authors.
|
||||
* SPDX-License-Identifier: BSD-3-Clause
|
||||
*/
|
||||
|
||||
#ifndef MACROS_S
|
||||
#define MACROS_S
|
||||
|
||||
/* This macro marks a global data declaration. */
|
||||
.macro data name
|
||||
.section .data.\name, "aw", @progbits
|
||||
.global \name
|
||||
.type \name, %object
|
||||
.align 4
|
||||
\name:
|
||||
.endm
|
||||
|
||||
/* This macro marks the beginning of a function. */
|
||||
.macro func name
|
||||
.section .text.\name, "ax", @progbits
|
||||
.global \name
|
||||
.type \name, %function
|
||||
.func \name
|
||||
.cfi_sections .debug_frame
|
||||
.cfi_startproc
|
||||
.align 4
|
||||
\name:
|
||||
.endm
|
||||
|
||||
/* This macro marks the end of a function. */
|
||||
.macro endfunc name
|
||||
.cfi_endproc
|
||||
.endfunc
|
||||
.size \name, . - \name
|
||||
.endm
|
||||
|
||||
#endif /* MACROS_S */
|
117
lib/ar100/runtime.S
Normal file
117
lib/ar100/runtime.S
Normal file
|
@ -0,0 +1,117 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 The Crust Firmware Authors.
|
||||
* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
|
||||
*/
|
||||
|
||||
#include <macros.S>
|
||||
|
||||
func __divsi3
|
||||
l.sflts r3, r0
|
||||
l.sw -8(r1), r18
|
||||
l.bnf 1f
|
||||
l.ori r18, r0, 0
|
||||
l.sub r3, r0, r3 # Negate x if it is negative
|
||||
l.addi r18, r18, 1 # Increment the flag if x is negative
|
||||
1: l.sflts r4, r0
|
||||
l.bnf 2f
|
||||
l.sw -4(r1), r9
|
||||
l.sub r4, r0, r4 # Negate y if it is negative
|
||||
l.addi r18, r18, -1 # Decrement the flag if y is negative
|
||||
2: l.jal __udivsi3
|
||||
l.addi r1, r1, -8
|
||||
l.sfne r18, r0
|
||||
l.bnf 3f
|
||||
l.addi r1, r1, 8
|
||||
l.sub r11, r0, r11 # Negate q if the flag is nonzero
|
||||
3: l.lwz r9, -4(r1)
|
||||
l.jr r9
|
||||
l.lwz r18, -8(r1)
|
||||
endfunc __divsi3
|
||||
|
||||
/*
|
||||
* Of the three ORBIS32 32-bit multiplication instructions (l.mul, l.muli, and
|
||||
* l.mulu), only l.mul works. By passing "-msoft-mul" to the compiler, and
|
||||
* delegating to this function, we can force all multiplication to use l.mul.
|
||||
*/
|
||||
func __mulsi3
|
||||
l.jr r9
|
||||
l.mul r11, r3, r4
|
||||
endfunc __mulsi3
|
||||
|
||||
/*
|
||||
* Derived from the "best method for counting bits in a 32-bit integer" at
|
||||
* https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel.
|
||||
*
|
||||
* Signed multiplication is used because l.mulu is broken in hardware. This is
|
||||
* safe because the previous bit masking ensures neither operand is negative.
|
||||
*/
|
||||
func __popcountsi2
|
||||
l.movhi r5, 0x5555 # Statement 1:
|
||||
l.ori r5, r5, 0x5555 # r5 = 0x55555555
|
||||
l.srli r4, r3, 1 # r4 = v >> 1
|
||||
l.and r4, r4, r5 # r4 = (v >> 1) & 0x55555555
|
||||
l.sub r3, r3, r4 # v = v - ((v >> 1) & 0x55555555)
|
||||
l.movhi r5, 0x3333 # Statement 2:
|
||||
l.ori r5, r5, 0x3333 # r5 = 0x33333333
|
||||
l.srli r4, r3, 2 # r4 = v >> 2
|
||||
l.and r4, r4, r5 # r4 = (v >> 2) & 0x33333333
|
||||
l.and r3, r3, r5 # v = v & 0x33333333
|
||||
l.add r3, r3, r4 # v += ((v >> 2) & 0x33333333)
|
||||
l.movhi r5, 0x0f0f # Statement 3:
|
||||
l.ori r5, r5, 0x0f0f # r5 = 0x0f0f0f0f
|
||||
l.srli r4, r3, 4 # r4 = v >> 4
|
||||
l.add r4, r3, r4 # r4 = v + (v >> 4)
|
||||
l.and r4, r4, r5 # r4 = v + (v >> 4) & 0x0f0f0f0f
|
||||
l.movhi r5, 0x0101
|
||||
l.ori r5, r5, 0x0101 # r5 = 0x01010101
|
||||
l.mul r11, r4, r5 # c = r4 * 0x01010101
|
||||
l.jr r9
|
||||
l.srli r11, r11, 24 # return c >> 24
|
||||
endfunc __popcountsi2
|
||||
|
||||
/*
|
||||
* Optimized implementation of the "shift divisor method" algorithm from
|
||||
* T. Rodeheffer. Software Integer Division. Microsoft Research, 2008.
|
||||
*
|
||||
* In addition to returning the quotient in r11, this function also returns
|
||||
* the remainder in r12. __umodsi3 simply copies the remainder into r11.
|
||||
*/
|
||||
func __udivsi3 # u32 __udivsi3(u32 x, u32 y) {
|
||||
l.sfeqi r4, 1 # if (y == 1)
|
||||
l.bf 5f # goto identity;
|
||||
l.ori r12, r3, 0 # u32 r = x;
|
||||
l.ori r5, r4, 0 # u32 y0 = y;
|
||||
l.addi r11, r0, 0 # u32 q = 0;
|
||||
l.sfltu r3, r4 # if (x >= y) {
|
||||
l.bf 2f
|
||||
l.sub r3, r3, r4 # x = x−y;
|
||||
1: l.sfltu r3, r4 # while (x >= y) {
|
||||
l.bf 2f
|
||||
l.sub r3, r3, r4 # x = x−y;
|
||||
l.add r4, r4, r4 # y *= 2;
|
||||
l.j 1b # }
|
||||
2: l.sfltu r12, r4 # } for (;;) {
|
||||
l.bf 3f # if (r >= y) {
|
||||
l.sfeq r4, r5 # [if (y == y0)]
|
||||
l.sub r12, r12, r4 # r = r−y;
|
||||
l.addi r11, r11, 1 # q = q + 1;
|
||||
3: l.bf 4f # } if (y == y0) break;
|
||||
l.srli r4, r4, 1 # y >>= 1;
|
||||
l.j 2b # }
|
||||
l.add r11, r11, r11 # q *= 2;
|
||||
4: l.jr r9 # return q;
|
||||
l.nop
|
||||
5: l.ori r11, r3, 0 # identity:
|
||||
l.jr r9 # return x;
|
||||
l.ori r12, r0, 0 # r = 0;
|
||||
endfunc __udivsi3 # }
|
||||
|
||||
func __umodsi3
|
||||
l.sw -4(r1), r9
|
||||
l.jal __udivsi3
|
||||
l.addi r1, r1, -4
|
||||
l.addi r1, r1, 4
|
||||
l.lwz r9, -4(r1)
|
||||
l.jr r9
|
||||
l.ori r11, r12, 0
|
||||
endfunc __umodsi3
|
41
lib/ar100/start.S
Normal file
41
lib/ar100/start.S
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright © 2017-2020 The Crust Firmware Authors.
|
||||
* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-only
|
||||
*/
|
||||
|
||||
#include <macros.S>
|
||||
#include <asm/spr.h>
|
||||
|
||||
func start
|
||||
l.mfspr r2, r0, SPR_SYS_PPC_ADDR
|
||||
l.sfltui r2, 0x4000 # Size of exception vector area
|
||||
l.bf 1f
|
||||
l.srli r2, r2, 8 # Vector address → exception number
|
||||
l.addi r2, r0, 0 # Set to zero if not an exception
|
||||
1: l.addi r3, r0, 0 # Invalidate instruction cache
|
||||
l.addi r4, r0, 4096 # Cache lines (256) * block size (16)
|
||||
2: l.mtspr r0, r3, SPR_ICACHE_ICBIR_ADDR
|
||||
l.sfltu r3, r4
|
||||
l.bf 2b
|
||||
l.addi r3, r3, 16 # Cache block size
|
||||
l.psync # Flush CPU pipeline
|
||||
l.mfspr r3, r0, SPR_SYS_SR_ADDR # Enable instruction cache
|
||||
l.ori r3, r3, SPR_SYS_SR_ICE_MASK
|
||||
l.mtspr r0, r3, SPR_SYS_SR_ADDR
|
||||
l.nop # One cache block of nops
|
||||
l.nop
|
||||
l.nop
|
||||
l.nop
|
||||
l.movhi r3, hi(__bss_start) # Clear .bss
|
||||
l.ori r3, r3, lo(__bss_start)
|
||||
l.movhi r4, hi(__bss_end)
|
||||
l.ori r4, r4, lo(__bss_end)
|
||||
3: l.sw 0(r3), r0
|
||||
l.sfltu r3, r4
|
||||
l.bf 3b
|
||||
l.addi r3, r3, 4
|
||||
l.movhi r1, hi(__stack_end)
|
||||
l.ori r1, r1, lo(__stack_end) # Initialize stack pointer
|
||||
l.j main # Jump to C entry point
|
||||
l.or r3, r2, r2
|
||||
endfunc start
|
Loading…
Add table
Add a link
Reference in a new issue