️ FTMotion optimized timing (#28187)

Co-authored-by: Scott Lahteine <thinkyhead@users.noreply.github.com>
This commit is contained in:
David Buezas 2025-12-02 22:35:54 +01:00 committed by GitHub
parent 0265975178
commit 14bed5aee8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 249 additions and 232 deletions

View file

@ -1162,7 +1162,10 @@
//#define FT_MOTION_MENU // Provide a MarlinUI menu to set M493 and M494 parameters
//#define FTM_HOME_AND_PROBE // Use FT Motion for homing / probing. Disable if FT Motion breaks these functions.
#define FTM_DEFAULT_DYNFREQ_MODE dynFreqMode_DISABLED // Default mode of dynamic frequency calculation. (DISABLED, Z_BASED, MASS_BASED)
//#define FTM_DYNAMIC_FREQ // Enable for linear adjustment of XY shaping frequency according to Z or E
#if ENABLED(FTM_DYNAMIC_FREQ)
#define FTM_DEFAULT_DYNFREQ_MODE dynFreqMode_DISABLED // Default mode of dynamic frequency calculation. (DISABLED, Z_BASED, MASS_BASED)
#endif
#define FTM_DEFAULT_SHAPER_X ftMotionShaper_NONE // Default shaper mode on X axis (NONE, ZV, ZVD, ZVDD, ZVDDD, EI, 2HEI, 3HEI, MZV)
#define FTM_SHAPING_DEFAULT_FREQ_X 37.0f // (Hz) Default peak frequency used by input shapers
@ -1219,7 +1222,6 @@
#define FTM_BUFFER_SIZE 128 // Window size for trajectory generation, must be a power of 2 (e.g 64, 128, 256, ...)
// The total buffered time in seconds is (FTM_BUFFER_SIZE/FTM_FS)
#define FTM_FS 1000 // (Hz) Frequency for trajectory generation.
#define FTM_STEPPER_FS 2'000'000 // (Hz) Time resolution of stepper I/O update. Shouldn't affect CPU much (slower board testing needed)
#define FTM_MIN_SHAPE_FREQ 20 // (Hz) Minimum shaping frequency, lower consumes more RAM
#endif // FT_MOTION

View file

@ -51,7 +51,7 @@
#define TEMP_TIMER_FREQUENCY 1000 // Temperature::isr() is expected to be called at around 1kHz
// TODO: get rid of manual rate/prescale/ticks/cycles taken for procedures in stepper.cpp
#define STEPPER_TIMER_RATE 2000000 // 2 Mhz
#define STEPPER_TIMER_RATE 2'000'000 // 2 Mhz
extern uint32_t GetStepperTimerClkFreq();
#define STEPPER_TIMER_PRESCALE (GetStepperTimerClkFreq() / (STEPPER_TIMER_RATE))
#define STEPPER_TIMER_TICKS_PER_US ((STEPPER_TIMER_RATE) / 1000000UL) // (MHz) Stepper Timer ticks per µs

View file

@ -3687,9 +3687,7 @@
// Fixed-Time Motion
#if ENABLED(FT_MOTION)
#define FTM_TS (1.0f / FTM_FS) // (s) Time step for trajectory generation. (Reciprocal of FTM_FS)
#define FTM_STEPS_PER_UNIT_TIME (FTM_STEPPER_FS / FTM_FS) // Interpolated stepper commands per unit time
#define FTM_MIN_TICKS ((STEPPER_TIMER_RATE) / (FTM_STEPPER_FS)) // Minimum stepper ticks between steps
#define FTM_TS (1.0f / FTM_FS) // (s) Time step for trajectory generation. (Reciprocal of FTM_FS)
#define FTM_RATIO (FTM_FS / FTM_MIN_SHAPE_FREQ) // Factor for use in FTM_ZMAX. DON'T CHANGE.
#define FTM_SMOOTH_MAX_I uint32_t(TERN0(FTM_SMOOTHING, CEIL(FTM_FS * FTM_MAX_SMOOTHING_TIME))) // Max delays for smoothing
#define FTM_ZMAX (FTM_RATIO * 2 + FTM_SMOOTH_MAX_I) // Maximum delays for shaping functions (even numbers only!)

View file

@ -83,13 +83,6 @@ TrajectoryType FTMotion::trajectoryType = TrajectoryType::FTM_TRAJECTORY_TYPE;
// Resonance Test
TERN_(FTM_RESONANCE_TEST,ResonanceGenerator FTMotion::rtg;) // Resonance trajectory generator instance
// Compact plan buffer
stepper_plan_t FTMotion::stepper_plan_buff[FTM_BUFFER_SIZE];
XYZEval<int64_t> FTMotion::curr_steps_q32_32 = {0};
uint32_t FTMotion::stepper_plan_tail = 0, // The index to consume from
FTMotion::stepper_plan_head = 0; // The index to produce into
#if FTM_HAS_LIN_ADVANCE
bool FTMotion::use_advance_lead;
#endif
@ -191,7 +184,7 @@ void FTMotion::loop() {
// Set busy status for use by planner.busy()
const bool oldBusy = busy;
busy = stepping.bresenham_iterations_pending > 0 || !stepper_plan_is_empty();
busy = stepping.is_busy();
if (oldBusy && !busy) moving_axis_flags.reset();
}
@ -234,15 +227,9 @@ void FTMotion::reset() {
const bool did_suspend = stepper.suspend();
endPos_prevBlock.reset();
tau = 0;
stepper_plan_tail = stepper_plan_head = 0;
stepping.reset();
curr_steps_q32_32.reset();
#if HAS_FTM_SHAPING
#define _RESET_ZI(A) ZERO(shaping.A.d_zi);
SHAPED_MAP(_RESET_ZI);
shaping.zi_idx = 0;
#endif
shaping.reset();
TERN_(FTM_SMOOTHING, smoothing.reset(););
TERN_(HAS_EXTRUDERS, prev_traj_e = 0.0f); // Reset linear advance variables.
TERN_(DISTINCT_E_FACTORS, block_extruder_axis = E_AXIS);
@ -514,49 +501,12 @@ xyze_float_t FTMotion::calc_traj_point(const float dist) {
return traj_coords;
}
stepper_plan_t FTMotion::calc_stepper_plan(xyze_float_t traj_coords) {
// 1) Convert trajectory to step delta
#define _TOSTEPS_q32(A, B) int64_t(traj_coords.A * planner.settings.axis_steps_per_mm[B] * (1ULL << 32))
XYZEval<int64_t> next_steps_q32_32 = LOGICAL_AXIS_ARRAY(
_TOSTEPS_q32(e, block_extruder_axis),
_TOSTEPS_q32(x, X_AXIS), _TOSTEPS_q32(y, Y_AXIS), _TOSTEPS_q32(z, Z_AXIS),
_TOSTEPS_q32(i, I_AXIS), _TOSTEPS_q32(j, J_AXIS), _TOSTEPS_q32(k, K_AXIS),
_TOSTEPS_q32(u, U_AXIS), _TOSTEPS_q32(v, V_AXIS), _TOSTEPS_q32(w, W_AXIS)
);
#undef _TOSTEPS_q32
constexpr uint32_t ITERATIONS_PER_TRAJ_INV_uq0_32 = (1ULL << 32) / ITERATIONS_PER_TRAJ;
stepper_plan_t stepper_plan;
#define _RUN_AXIS(A) do{ \
int64_t delta_q32_32 = (next_steps_q32_32.A - curr_steps_q32_32.A); \
/* 2) Set stepper plan direction bits */ \
int16_t sign = (delta_q32_32 > 0) - (delta_q32_32 < 0); \
stepper_plan.dir_bits.A = delta_q32_32 > 0; \
/* 3) Set per-iteration advance dividend Q0.32 */ \
uint64_t delta_uq32_32 = ABS(delta_q32_32); \
/* dividend = delta_q32_32 / ITERATIONS_PER_TRAJ, but avoiding division and an intermediate int128 */ \
/* Note the integer part would overflow if there is eq or more than 1 steps per isr */ \
uint32_t integer_part = (delta_uq32_32 >> 32) * ITERATIONS_PER_TRAJ_INV_uq0_32; \
uint32_t fractional_part = ((delta_uq32_32 & UINT32_MAX) * ITERATIONS_PER_TRAJ_INV_uq0_32) >> 32; \
stepper_plan.advance_dividend_q0_32.A = integer_part + fractional_part; \
/* 4) Advance curr_steps by the exact integer steps that Bresenham will emit */ \
/* It's like doing current_steps = next_steps, but considering any fractional error */ \
/* from the dividend. This way there can be no drift. */ \
curr_steps_q32_32.A += (int64_t)stepper_plan.advance_dividend_q0_32.A * sign * ITERATIONS_PER_TRAJ; \
} while(0);
LOGICAL_AXIS_MAP(_RUN_AXIS);
#undef _RUN_AXIS
return stepper_plan;
}
/**
* Generate stepper data of the trajectory.
* Called from FTMotion::loop()
*/
void FTMotion::fill_stepper_plan_buffer() {
while (!stepper_plan_is_full()) {
while (!stepping.is_full()) {
float total_duration = currentGenerator->getTotalDuration(); // If the current plan is empty, it will have zero duration.
while (tau + FTM_TS > total_duration) {
/**
@ -581,10 +531,8 @@ void FTMotion::fill_stepper_plan_buffer() {
// Get distance from trajectory generator
xyze_float_t traj_coords = calc_traj_point(currentGenerator->getDistanceAtTime(tau));
stepper_plan_t plan = calc_stepper_plan(traj_coords);
// Store in buffer
enqueue_stepper_plan(plan);
// Calculate and store stepper plan in buffer
stepping_enqueue(traj_coords);
}
}

View file

@ -44,7 +44,7 @@
#define FTM_VERSION 2 // Change version when hosts need to know
#if HAS_X_AXIS && (HAS_Z_AXIS || HAS_EXTRUDERS)
#if ENABLED(FTM_DYNAMIC_FREQ)
#define HAS_DYNAMIC_FREQ 1
#if HAS_Z_AXIS
#define HAS_DYNAMIC_FREQ_MM 1
@ -191,29 +191,20 @@ class FTMotion {
return cfg.active ? axis_move_dir[axis] : stepper.last_direction_bits[axis];
}
// A frame of the stepping plan
static stepping_t stepping;
FORCE_INLINE static bool stepper_plan_is_empty() {
return stepper_plan_head == stepper_plan_tail;
}
FORCE_INLINE static bool stepper_plan_is_full() {
return ((stepper_plan_head + 1) & FTM_BUFFER_MASK) == stepper_plan_tail;
}
FORCE_INLINE static uint32_t stepper_plan_count() {
return (stepper_plan_head - stepper_plan_tail) & FTM_BUFFER_MASK;
}
// Enqueue a plan
FORCE_INLINE static void enqueue_stepper_plan(const stepper_plan_t& d) {
stepper_plan_buff[stepper_plan_head] = d;
stepper_plan_head = (stepper_plan_head + 1u) & FTM_BUFFER_MASK;
}
// Dequeue a plan.
// Zero-copy consume; caller must use it before next dequeue if they keep a ref.
// Done like this to avoid double copy.
// e.g do: stepper_plan_t data = dequeue_stepper_plan(); this is ok
FORCE_INLINE static stepper_plan_t& dequeue_stepper_plan() {
const uint32_t i = stepper_plan_tail;
stepper_plan_tail = (i + 1u) & FTM_BUFFER_MASK;
return stepper_plan_buff[i];
// Add a single set of coordinates in the stepping plan
FORCE_INLINE static void stepping_enqueue(const xyze_float_t traj_coords) {
#define _TOSTEPS_q16(A, B) int64_t(traj_coords.A * planner.settings.axis_steps_per_mm[B] * (1ULL << 16))
XYZEval<int64_t> next_steps_q48_16 = LOGICAL_AXIS_ARRAY(
_TOSTEPS_q16(e, block_extruder_axis),
_TOSTEPS_q16(x, X_AXIS), _TOSTEPS_q16(y, Y_AXIS), _TOSTEPS_q16(z, Z_AXIS),
_TOSTEPS_q16(i, I_AXIS), _TOSTEPS_q16(j, J_AXIS), _TOSTEPS_q16(k, K_AXIS),
_TOSTEPS_q16(u, U_AXIS), _TOSTEPS_q16(v, V_AXIS), _TOSTEPS_q16(w, W_AXIS)
);
#undef _TOSTEPS_q16
stepping.enqueue(next_steps_q48_16);
}
private:
@ -262,12 +253,7 @@ class FTMotion {
static void plan_runout_block();
static void fill_stepper_plan_buffer();
static xyze_float_t calc_traj_point(const float dist);
static stepper_plan_t calc_stepper_plan(xyze_float_t delta);
static bool plan_next_block();
// stepper_plan buffer variables.
static stepper_plan_t stepper_plan_buff[FTM_BUFFER_SIZE];
static uint32_t stepper_plan_tail, stepper_plan_head;
static XYZEval<int64_t> curr_steps_q32_32;
}; // class FTMotion

View file

@ -53,7 +53,7 @@ void ResonanceGenerator::reset() {
void ResonanceGenerator::fill_stepper_plan_buffer() {
xyze_float_t traj_coords = {};
while (!ftMotion.stepper_plan_is_full()) {
while (!ftMotion.stepping.is_full()) {
// Calculate current frequency
// Logarithmic approach with duration per octave
const float freq = rt_params.min_freq * powf(2.0f, rt_time / rt_params.octave_duration);
@ -77,9 +77,8 @@ void ResonanceGenerator::fill_stepper_plan_buffer() {
#define _SET_TRAJ(A) traj_coords.A = rt_params.start_pos.A + (rt_params.axis == A##_AXIS ? pos_offset : 0.0f);
LOGICAL_AXIS_MAP(_SET_TRAJ);
stepper_plan_t plan = ftMotion.calc_stepper_plan(traj_coords);
// Store in buffer
ftMotion.enqueue_stepper_plan(plan);
ftMotion.stepping_enqueue(traj_coords);
// Increment time for the next point
rt_time += FTM_TS;
}

View file

@ -114,4 +114,9 @@ typedef struct Shaping {
// Of course things can't be done in the past, so when shaping is applied, the all axes are delayed by largest_delay_samples
// minus their own centroid delay. This makes them all be equally delayed and therefore in synch.
void refresh_largest_delay_samples() { largest_delay_samples = -_MIN(SHAPED_LIST(X.Ni[0], Y.Ni[0], Z.Ni[0], E.Ni[0])); }
void reset(){
#define _RESET_ZI(A) ZERO(A.d_zi);
SHAPED_MAP(_RESET_ZI);
zi_idx = 0;
}
} shaping_t;

View file

@ -44,4 +44,9 @@ typedef struct Smoothing {
// is done by delaying all axes to match the laggiest one (i.e largest_delay_samples).
void refresh_largest_delay_samples() { largest_delay_samples = _MAX(CARTES_LIST(X.delay_samples, Y.delay_samples, Z.delay_samples, E.delay_samples)); }
// Note: the delay equals smoothing_time iff the input signal frequency is lower than 1/smoothing_time, luckily for us, this holds in this case
void reset() {
#define _CLEAR(A) ZERO(A.smoothing_pass);
LOGICAL_AXIS_MAP(_CLEAR);
#undef _CLEAR
}
} smoothing_t;

View file

@ -1,115 +0,0 @@
/**
* Marlin 3D Printer Firmware
* Copyright (c) 2025 MarlinFirmware [https://github.com/MarlinFirmware/Marlin]
*
* Based on Sprinter and grbl.
* Copyright (c) 2011 Camiel Gubbels / Erik van der Zalm
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
*/
#include "../../inc/MarlinConfigPre.h"
#if ENABLED(FT_MOTION)
#include "stepping.h"
#include "../ft_motion.h"
void Stepping::reset() {
stepper_plan.reset();
delta_error_q32.set(LOGICAL_AXIS_LIST_1(1UL << 31)); // Start as 0.5 in q32 so steps are rounded
step_bits = 0;
bresenham_iterations_pending = 0;
}
uint32_t Stepping::advance_until_step() {
xyze_ulong_t space_q32 = -delta_error_q32 + UINT32_MAX; // How much accumulation until a step in any axis is ALMOST due
// TODO: Add operators to types.h for scalar destination.
xyze_ulong_t advance_q32 = stepper_plan.advance_dividend_q0_32;
uint32_t iterations = bresenham_iterations_pending;
// Per-axis lower-bound approx of floor(space_q32/adv), min across axes (lower bound because this fast division underestimates result by up to 1)
//#define RUN_AXIS(A) if (advance_q32.A > 0) NOMORE(iterations, space_q32.A / advance_q32.A);
#define RUN_AXIS(A) NOMORE(iterations, uint32_t((uint64_t(space_q32.A) * advance_dividend_reciprocal.A) >> 32));
LOGICAL_AXIS_MAP(RUN_AXIS);
#undef RUN_AXIS
#define RUN_AXIS(A) delta_error_q32.A += advance_q32.A * iterations;
LOGICAL_AXIS_MAP(RUN_AXIS);
#undef RUN_AXIS
bresenham_iterations_pending -= iterations;
step_bits = 0;
// iterations may be underestimated by 1 by the cheap division, therefore we may have to do 2 iterations here
while (bresenham_iterations_pending && !(bool)step_bits) {
iterations++;
bresenham_iterations_pending--;
#define RUN_AXIS(A) do{ \
delta_error_q32.A += stepper_plan.advance_dividend_q0_32.A; \
step_bits.A = delta_error_q32.A < stepper_plan.advance_dividend_q0_32.A; \
}while(0);
LOGICAL_AXIS_MAP(RUN_AXIS);
#undef RUN_AXIS
}
return iterations * INTERVAL_PER_ITERATION;
}
uint32_t Stepping::plan() {
uint32_t intervals = 0;
if (bresenham_iterations_pending > 0) {
intervals = advance_until_step();
if (bool(step_bits)) return intervals; // steps to make => return the wait time so it gets done in due time
// Else all Bresenham iterations were advanced without steps => this is just the frame end, so plan the next one directly and accumulate the wait
}
if (ftMotion.stepper_plan_is_empty()) {
bresenham_iterations_pending = 0;
step_bits = 0;
return INTERVAL_PER_TRAJ_POINT;
}
AxisBits old_dir_bits = stepper_plan.dir_bits;
stepper_plan = ftMotion.dequeue_stepper_plan();
const AxisBits dir_flip_mask = old_dir_bits ^ stepper_plan.dir_bits; // axes that must toggle now
if (dir_flip_mask) {
#define _HANDLE_DIR_CHANGES(A) if (dir_flip_mask.A) delta_error_q32.A *= -1;
LOGICAL_AXIS_MAP(_HANDLE_DIR_CHANGES);
#undef _HANDLE_DIR_CHANGES
}
if (stepper_plan.advance_dividend_q0_32 == 0) {
// Don't waste time in zero motion traj points
bresenham_iterations_pending = 0;
step_bits = 0;
return INTERVAL_PER_TRAJ_POINT;
}
// This vector division is unavoidable, but it saves a division per step during Bresenham
// The reciprocal is actually 2^32/dividend, but that requires dividing a uint64_t, which quite expensive
// Since even the real reciprocal may underestimate the quotient by 1 anyway already, this optimisation doesn't
// make things worse. This underestimation is compensated for in advance_until_step.
#define _DIVIDEND_RECIP(A) do{ \
const uint32_t d = stepper_plan.advance_dividend_q0_32.A; \
advance_dividend_reciprocal.A = d ? UINT32_MAX / d : UINT32_MAX; \
}while(0);
LOGICAL_AXIS_MAP(_DIVIDEND_RECIP);
#undef _DIVIDEND_RECIP
bresenham_iterations_pending = ITERATIONS_PER_TRAJ;
return intervals + advance_until_step();
}
#endif // FT_MOTION

View file

@ -23,31 +23,222 @@
#include "../../inc/MarlinConfig.h"
//
// uint64-free equivalent of: ((uint64_t)a * b) >> 16
//
FORCE_INLINE constexpr uint32_t a_times_b_shift_16(uint32_t a, uint32_t b) {
uint32_t hi = a >> 16;
uint32_t lo = a & 0xFFFFu;
return (hi * b) + ((lo * b) >> 16);
}
#define FTM_NEVER (UINT16_MAX) // Reserved number to indicate "no ticks in this frame" (FRAME_TICKS_FP+1 would work too)
constexpr uint32_t FRAME_TICKS = STEPPER_TIMER_RATE / FTM_FS; // Timer ticks in a frame
static_assert(FRAME_TICKS < FTM_NEVER, "(STEPPER_TIMER_RATE / FTM_FS) must be < 2^16 (otherwise fixed-point numbers exceed uint16 vars).");
constexpr uint32_t FTM_Q_INT = 32u - __builtin_clz(FRAME_TICKS + 1); // Bits to represent the max value (duration of a frame, +1 one for FTM_NEVER).
// "clz" counts leading zeroes.
constexpr uint32_t FTM_Q = 16 - FTM_Q_INT; // uint16 interval fractional bits.
// Intervals buffer has fixed point numbers with the point on this position
// The _FP and _fp suffixes mean the number is in fixed point format with the point at the FTM_Q position.
// See: https://en.wikipedia.org/wiki/Fixed-point_arithmetic
// E.g number_fp = number << FTM_Q
// number == (number_fp >> FTM_Q)
constexpr uint32_t ONE_FP = 1 << FTM_Q; // Number 1 in fixed point format
constexpr uint32_t FP_FLOOR_MASK = ~(ONE_FP - 1); // Bit mask to do FLOOR in fixed point
constexpr uint32_t FRAME_TICKS_FP = FRAME_TICKS << FTM_Q; // Ticks in a frame in fixed point
typedef struct stepper_plan {
AxisBits dir_bits;
xyze_ulong_t advance_dividend_q0_32{0};
void reset() { advance_dividend_q0_32.reset(); }
xyze_uint_t first_interval_fp;
xyze_uint_t interval_fp;
} stepper_plan_t;
// Stepping plan handles steps for a whole frame (trajectory point delta)
typedef struct Stepping {
stepper_plan_t stepper_plan;
xyze_ulong_t advance_dividend_reciprocal{0}; // Note this 32 bit reciprocal underestimates quotients by at most one.
xyze_ulong_t delta_error_q32{ LOGICAL_AXIS_LIST_1(_BV32(31)) };
AxisBits step_bits;
uint32_t bresenham_iterations_pending;
void reset();
// Updates error and bresenham_iterations_pending, sets step_bits and returns interval until the next step (or end of frame).
uint32_t advance_until_step();
/**
* If bresenham_iterations_pending, advance to next actual step.
* Else, consume stepper data point
* Then return interval until that next step
*/
uint32_t plan();
#define INTERVAL_PER_ITERATION (STEPPER_TIMER_RATE / FTM_STEPPER_FS)
#define INTERVAL_PER_TRAJ_POINT (STEPPER_TIMER_RATE / FTM_FS)
#define ITERATIONS_PER_TRAJ (FTM_STEPPER_FS * FTM_TS)
//
// ISR part
//
AxisBits dir_bits;
AxisBits step_bits;
xyze_ulong_t axis_interval_fp{ LOGICAL_AXIS_LIST_1(FTM_NEVER) };
xyze_ulong_t ticks_left_per_axis_fp{ LOGICAL_AXIS_LIST_1(FTM_NEVER) };
uint32_t ticks_left_in_frame_fp = 0;
// Return how many full ticks we must wait before
// generating the next step pulse. The call is inexpensive:
// - no heap, no locks pure arithmetic on pre-computed data
FORCE_INLINE uint32_t advance_until_step() {
step_bits = 0;
uint32_t ticks_to_wait_fp = 0;
for (;;) {
// Smallest remaining tick count among all axes next step time
const uint32_t ticks_to_next_step_fp = ticks_left_per_axis_fp.small();
// Does the frame finish before this next step occurs?
if (ticks_to_next_step_fp > ticks_left_in_frame_fp) {
// Frame ends before next step
if (is_empty()) {
ticks_left_in_frame_fp = 0;
ticks_left_per_axis_fp = FTM_NEVER;
return FTM_NEVER;
}
// Consume the rest of this frame's time
const uint32_t wait_floor_fp = ticks_left_in_frame_fp & FP_FLOOR_MASK;
ticks_to_wait_fp += wait_floor_fp;
ticks_left_in_frame_fp -= wait_floor_fp;
//
// Pull the next plan it already contains:
// - direction bits
// - first_interval_fp (time to the first step)
// - interval_fp (repeating step period)
//
const stepper_plan_t next = dequeue();
dir_bits = next.dir_bits;
axis_interval_fp = next.interval_fp.asUInt32();
// Note the frame will actually end a fraction of a tick in the future, and ticks_left_in_frame_fp still has that fraction.
// Instead of discarding that time, we delay both the end of the next frame, and all first steps by that amount.
ticks_left_per_axis_fp = next.first_interval_fp.asUInt32();
ticks_left_per_axis_fp += ticks_left_in_frame_fp;
ticks_left_in_frame_fp += FRAME_TICKS_FP; // Start a fresh frame
}
else {
// Step happens before frame end
// Advance to it
const uint32_t wait_floor_fp = ticks_to_next_step_fp & FP_FLOOR_MASK;
ticks_to_wait_fp += wait_floor_fp;
ticks_left_in_frame_fp -= wait_floor_fp;
ticks_left_per_axis_fp -= wait_floor_fp;
// Build step_bits: any axis whose counter < ONE_FP should step before the next tick, so we tick now
// unless the frame ends earlier.
uint32_t limit_fp = _MIN(ONE_FP - 1, ticks_left_in_frame_fp);
auto _set_step_bit = [&](const AxisEnum A) __attribute__((always_inline)) {
if (ticks_left_per_axis_fp[A] <= limit_fp) {
step_bits[A] = 1;
ticks_left_per_axis_fp[A] += axis_interval_fp[A];
}
};
LOGICAL_AXIS_CALL(_set_step_bit);
return ticks_to_wait_fp >> FTM_Q; // Convert fixed point back to whole ticks
}
} // loop forever
}
FORCE_INLINE void reset() {
step_bits = 0;
axis_interval_fp = FTM_NEVER;
ticks_left_per_axis_fp = FTM_NEVER;
ticks_left_in_frame_fp = 0;
stepper_plan_tail = stepper_plan_head = 0;
curr_steps_q48_16.reset();
}
//
// Buffering part
//
stepper_plan_t stepper_plan_buff[FTM_BUFFER_SIZE];
uint32_t stepper_plan_tail = 0, stepper_plan_head = 0;
XYZEval<int64_t> curr_steps_q48_16{0};
FORCE_INLINE void enqueue(XYZEval<int64_t> next_steps_q48_16) {
stepper_plan_t stepper_plan;
constexpr uint32_t HALF_PHASE_OFFSET = (1UL << 15); // to make steps at .5 crossings instead of integers to center the error
auto _run_axis = [&](const AxisEnum A) __attribute__((always_inline)) {
// Add half-phase offset to keep step error centred
const int64_t offset_curr_q48_16 = curr_steps_q48_16[A] + HALF_PHASE_OFFSET,
offset_next_q48_16 = next_steps_q48_16[A] + HALF_PHASE_OFFSET;
curr_steps_q48_16[A] = next_steps_q48_16[A];
// Determine direction change
const bool new_dir = offset_next_q48_16 >= offset_curr_q48_16;
stepper_plan.dir_bits[A] = new_dir;
// Δsteps in Q16.16 format magnitude only
const uint32_t delta_q16_16 = abs(offset_next_q48_16 - offset_curr_q48_16);
// Current / next phase (fractional part of the position)
uint32_t curr_phase_q1_16 = offset_curr_q48_16 & 0xFFFF,
next_phase_q1_16 = offset_next_q48_16 & 0xFFFF;
if (!new_dir) {
// When going backwards, the phase is 1-phase
curr_phase_q1_16 = (1UL<<16) - curr_phase_q1_16;
next_phase_q1_16 = (1UL<<16) - next_phase_q1_16;
}
// When going, e.g., from 0.6 to 1.0, the delta is not a whole step,
// but the phase overflow indicates a step.
const uint32_t carry = curr_phase_q1_16 > next_phase_q1_16;
// steps_to_make = integer steps + potential fraction crossing an integer
const uint16_t steps_to_make = (delta_q16_16 >> 16) + carry;
if (steps_to_make == 0) { // No steps on this axis
stepper_plan.first_interval_fp[A] = FTM_NEVER;
stepper_plan.interval_fp[A] = FTM_NEVER;
return;
}
// Compute the exact time between steps.
// interval = ticks_per_frame / delta
// current_frame_phase_fp = interval * curr_phase
const uint32_t interval_fp = (FRAME_TICKS_FP << 16) / delta_q16_16,
current_frame_phase_fp = a_times_b_shift_16(interval_fp, curr_phase_q1_16);
uint32_t first_interval_fp = interval_fp - current_frame_phase_fp;
// The calculation of interval_fp may undershoot its value by a fraction
// due to integer (floor) division. This small fractional error can
// occasionally make a spurious step fit inside this frame.
// To avoid that corner case, the first interval is incremented just enough
// for it to not fit.
const uint32_t tick_of_spurious_step_fp = first_interval_fp + interval_fp * steps_to_make;
if (tick_of_spurious_step_fp <= FRAME_TICKS_FP) {
first_interval_fp += FRAME_TICKS_FP - tick_of_spurious_step_fp + 1;
}
stepper_plan.first_interval_fp[A] = _MIN(first_interval_fp, FTM_NEVER);
stepper_plan.interval_fp[A] = _MIN(interval_fp, FTM_NEVER);
};
LOGICAL_AXIS_CALL(_run_axis);
// Store the plan into the circular buffer
stepper_plan_buff[stepper_plan_head] = stepper_plan;
stepper_plan_head = (stepper_plan_head + 1U) & FTM_BUFFER_MASK;
}
// Dequeue a plan.
// Zero-copy consume; caller must use it before next dequeue if they keep a ref.
// Done like this to avoid double copy.
// e.g do: stepper_plan_t data = dequeue(); this is ok
FORCE_INLINE stepper_plan_t& dequeue() {
const uint32_t i = stepper_plan_tail;
stepper_plan_tail = (i + 1U) & FTM_BUFFER_MASK;
return stepper_plan_buff[i];
}
//
// Simple helper predicates
//
FORCE_INLINE bool is_busy() {
return !(is_empty() && ticks_left_in_frame_fp == 0);
}
FORCE_INLINE bool is_empty() {
return stepper_plan_head == stepper_plan_tail;
}
FORCE_INLINE bool is_full() {
return ((stepper_plan_head + 1) & FTM_BUFFER_MASK) == stepper_plan_tail;
}
} stepping_t;

View file

@ -3585,9 +3585,7 @@ void Stepper::report_positions() {
*/
void Stepper::ftMotion_stepper() {
AxisBits &step_bits = ftMotion.stepping.step_bits; // Aliases for prettier code
AxisBits &dir_bits = ftMotion.stepping.stepper_plan.dir_bits;
if (step_bits.bits == 0) return;
AxisBits &dir_bits = ftMotion.stepping.dir_bits;
USING_TIMED_PULSE();