diff --git a/Marlin/Configuration_adv.h b/Marlin/Configuration_adv.h index 410b08ab2f..272d1a09e7 100644 --- a/Marlin/Configuration_adv.h +++ b/Marlin/Configuration_adv.h @@ -1162,7 +1162,10 @@ //#define FT_MOTION_MENU // Provide a MarlinUI menu to set M493 and M494 parameters //#define FTM_HOME_AND_PROBE // Use FT Motion for homing / probing. Disable if FT Motion breaks these functions. - #define FTM_DEFAULT_DYNFREQ_MODE dynFreqMode_DISABLED // Default mode of dynamic frequency calculation. (DISABLED, Z_BASED, MASS_BASED) + //#define FTM_DYNAMIC_FREQ // Enable for linear adjustment of XY shaping frequency according to Z or E + #if ENABLED(FTM_DYNAMIC_FREQ) + #define FTM_DEFAULT_DYNFREQ_MODE dynFreqMode_DISABLED // Default mode of dynamic frequency calculation. (DISABLED, Z_BASED, MASS_BASED) + #endif #define FTM_DEFAULT_SHAPER_X ftMotionShaper_NONE // Default shaper mode on X axis (NONE, ZV, ZVD, ZVDD, ZVDDD, EI, 2HEI, 3HEI, MZV) #define FTM_SHAPING_DEFAULT_FREQ_X 37.0f // (Hz) Default peak frequency used by input shapers @@ -1219,7 +1222,6 @@ #define FTM_BUFFER_SIZE 128 // Window size for trajectory generation, must be a power of 2 (e.g 64, 128, 256, ...) // The total buffered time in seconds is (FTM_BUFFER_SIZE/FTM_FS) #define FTM_FS 1000 // (Hz) Frequency for trajectory generation. - #define FTM_STEPPER_FS 2'000'000 // (Hz) Time resolution of stepper I/O update. Shouldn't affect CPU much (slower board testing needed) #define FTM_MIN_SHAPE_FREQ 20 // (Hz) Minimum shaping frequency, lower consumes more RAM #endif // FT_MOTION diff --git a/Marlin/src/HAL/STM32/timers.h b/Marlin/src/HAL/STM32/timers.h index c8c4845d98..e931daa72e 100644 --- a/Marlin/src/HAL/STM32/timers.h +++ b/Marlin/src/HAL/STM32/timers.h @@ -51,7 +51,7 @@ #define TEMP_TIMER_FREQUENCY 1000 // Temperature::isr() is expected to be called at around 1kHz // TODO: get rid of manual rate/prescale/ticks/cycles taken for procedures in stepper.cpp -#define STEPPER_TIMER_RATE 2000000 // 2 Mhz +#define STEPPER_TIMER_RATE 2'000'000 // 2 Mhz extern uint32_t GetStepperTimerClkFreq(); #define STEPPER_TIMER_PRESCALE (GetStepperTimerClkFreq() / (STEPPER_TIMER_RATE)) #define STEPPER_TIMER_TICKS_PER_US ((STEPPER_TIMER_RATE) / 1000000UL) // (MHz) Stepper Timer ticks per µs diff --git a/Marlin/src/inc/Conditionals-5-post.h b/Marlin/src/inc/Conditionals-5-post.h index f2b3e361c6..e2d2952e1b 100644 --- a/Marlin/src/inc/Conditionals-5-post.h +++ b/Marlin/src/inc/Conditionals-5-post.h @@ -3687,9 +3687,7 @@ // Fixed-Time Motion #if ENABLED(FT_MOTION) - #define FTM_TS (1.0f / FTM_FS) // (s) Time step for trajectory generation. (Reciprocal of FTM_FS) - #define FTM_STEPS_PER_UNIT_TIME (FTM_STEPPER_FS / FTM_FS) // Interpolated stepper commands per unit time - #define FTM_MIN_TICKS ((STEPPER_TIMER_RATE) / (FTM_STEPPER_FS)) // Minimum stepper ticks between steps + #define FTM_TS (1.0f / FTM_FS) // (s) Time step for trajectory generation. (Reciprocal of FTM_FS) #define FTM_RATIO (FTM_FS / FTM_MIN_SHAPE_FREQ) // Factor for use in FTM_ZMAX. DON'T CHANGE. #define FTM_SMOOTH_MAX_I uint32_t(TERN0(FTM_SMOOTHING, CEIL(FTM_FS * FTM_MAX_SMOOTHING_TIME))) // Max delays for smoothing #define FTM_ZMAX (FTM_RATIO * 2 + FTM_SMOOTH_MAX_I) // Maximum delays for shaping functions (even numbers only!) diff --git a/Marlin/src/module/ft_motion.cpp b/Marlin/src/module/ft_motion.cpp index 8c596f1fa3..30c16077c0 100644 --- a/Marlin/src/module/ft_motion.cpp +++ b/Marlin/src/module/ft_motion.cpp @@ -83,13 +83,6 @@ TrajectoryType FTMotion::trajectoryType = TrajectoryType::FTM_TRAJECTORY_TYPE; // Resonance Test TERN_(FTM_RESONANCE_TEST,ResonanceGenerator FTMotion::rtg;) // Resonance trajectory generator instance -// Compact plan buffer -stepper_plan_t FTMotion::stepper_plan_buff[FTM_BUFFER_SIZE]; -XYZEval FTMotion::curr_steps_q32_32 = {0}; - -uint32_t FTMotion::stepper_plan_tail = 0, // The index to consume from - FTMotion::stepper_plan_head = 0; // The index to produce into - #if FTM_HAS_LIN_ADVANCE bool FTMotion::use_advance_lead; #endif @@ -191,7 +184,7 @@ void FTMotion::loop() { // Set busy status for use by planner.busy() const bool oldBusy = busy; - busy = stepping.bresenham_iterations_pending > 0 || !stepper_plan_is_empty(); + busy = stepping.is_busy(); if (oldBusy && !busy) moving_axis_flags.reset(); } @@ -234,15 +227,9 @@ void FTMotion::reset() { const bool did_suspend = stepper.suspend(); endPos_prevBlock.reset(); tau = 0; - stepper_plan_tail = stepper_plan_head = 0; stepping.reset(); - curr_steps_q32_32.reset(); - - #if HAS_FTM_SHAPING - #define _RESET_ZI(A) ZERO(shaping.A.d_zi); - SHAPED_MAP(_RESET_ZI); - shaping.zi_idx = 0; - #endif + shaping.reset(); + TERN_(FTM_SMOOTHING, smoothing.reset();); TERN_(HAS_EXTRUDERS, prev_traj_e = 0.0f); // Reset linear advance variables. TERN_(DISTINCT_E_FACTORS, block_extruder_axis = E_AXIS); @@ -514,49 +501,12 @@ xyze_float_t FTMotion::calc_traj_point(const float dist) { return traj_coords; } -stepper_plan_t FTMotion::calc_stepper_plan(xyze_float_t traj_coords) { - // 1) Convert trajectory to step delta - #define _TOSTEPS_q32(A, B) int64_t(traj_coords.A * planner.settings.axis_steps_per_mm[B] * (1ULL << 32)) - XYZEval next_steps_q32_32 = LOGICAL_AXIS_ARRAY( - _TOSTEPS_q32(e, block_extruder_axis), - _TOSTEPS_q32(x, X_AXIS), _TOSTEPS_q32(y, Y_AXIS), _TOSTEPS_q32(z, Z_AXIS), - _TOSTEPS_q32(i, I_AXIS), _TOSTEPS_q32(j, J_AXIS), _TOSTEPS_q32(k, K_AXIS), - _TOSTEPS_q32(u, U_AXIS), _TOSTEPS_q32(v, V_AXIS), _TOSTEPS_q32(w, W_AXIS) - ); - #undef _TOSTEPS_q32 - - constexpr uint32_t ITERATIONS_PER_TRAJ_INV_uq0_32 = (1ULL << 32) / ITERATIONS_PER_TRAJ; - stepper_plan_t stepper_plan; - - #define _RUN_AXIS(A) do{ \ - int64_t delta_q32_32 = (next_steps_q32_32.A - curr_steps_q32_32.A); \ - /* 2) Set stepper plan direction bits */ \ - int16_t sign = (delta_q32_32 > 0) - (delta_q32_32 < 0); \ - stepper_plan.dir_bits.A = delta_q32_32 > 0; \ - /* 3) Set per-iteration advance dividend Q0.32 */ \ - uint64_t delta_uq32_32 = ABS(delta_q32_32); \ - /* dividend = delta_q32_32 / ITERATIONS_PER_TRAJ, but avoiding division and an intermediate int128 */ \ - /* Note the integer part would overflow if there is eq or more than 1 steps per isr */ \ - uint32_t integer_part = (delta_uq32_32 >> 32) * ITERATIONS_PER_TRAJ_INV_uq0_32; \ - uint32_t fractional_part = ((delta_uq32_32 & UINT32_MAX) * ITERATIONS_PER_TRAJ_INV_uq0_32) >> 32; \ - stepper_plan.advance_dividend_q0_32.A = integer_part + fractional_part; \ - /* 4) Advance curr_steps by the exact integer steps that Bresenham will emit */ \ - /* It's like doing current_steps = next_steps, but considering any fractional error */ \ - /* from the dividend. This way there can be no drift. */ \ - curr_steps_q32_32.A += (int64_t)stepper_plan.advance_dividend_q0_32.A * sign * ITERATIONS_PER_TRAJ; \ - } while(0); - LOGICAL_AXIS_MAP(_RUN_AXIS); - #undef _RUN_AXIS - - return stepper_plan; -} - /** * Generate stepper data of the trajectory. * Called from FTMotion::loop() */ void FTMotion::fill_stepper_plan_buffer() { - while (!stepper_plan_is_full()) { + while (!stepping.is_full()) { float total_duration = currentGenerator->getTotalDuration(); // If the current plan is empty, it will have zero duration. while (tau + FTM_TS > total_duration) { /** @@ -581,10 +531,8 @@ void FTMotion::fill_stepper_plan_buffer() { // Get distance from trajectory generator xyze_float_t traj_coords = calc_traj_point(currentGenerator->getDistanceAtTime(tau)); - stepper_plan_t plan = calc_stepper_plan(traj_coords); - - // Store in buffer - enqueue_stepper_plan(plan); + // Calculate and store stepper plan in buffer + stepping_enqueue(traj_coords); } } diff --git a/Marlin/src/module/ft_motion.h b/Marlin/src/module/ft_motion.h index d26fa8b882..9e78195264 100644 --- a/Marlin/src/module/ft_motion.h +++ b/Marlin/src/module/ft_motion.h @@ -44,7 +44,7 @@ #define FTM_VERSION 2 // Change version when hosts need to know -#if HAS_X_AXIS && (HAS_Z_AXIS || HAS_EXTRUDERS) +#if ENABLED(FTM_DYNAMIC_FREQ) #define HAS_DYNAMIC_FREQ 1 #if HAS_Z_AXIS #define HAS_DYNAMIC_FREQ_MM 1 @@ -191,29 +191,20 @@ class FTMotion { return cfg.active ? axis_move_dir[axis] : stepper.last_direction_bits[axis]; } + // A frame of the stepping plan static stepping_t stepping; - FORCE_INLINE static bool stepper_plan_is_empty() { - return stepper_plan_head == stepper_plan_tail; - } - FORCE_INLINE static bool stepper_plan_is_full() { - return ((stepper_plan_head + 1) & FTM_BUFFER_MASK) == stepper_plan_tail; - } - FORCE_INLINE static uint32_t stepper_plan_count() { - return (stepper_plan_head - stepper_plan_tail) & FTM_BUFFER_MASK; - } - // Enqueue a plan - FORCE_INLINE static void enqueue_stepper_plan(const stepper_plan_t& d) { - stepper_plan_buff[stepper_plan_head] = d; - stepper_plan_head = (stepper_plan_head + 1u) & FTM_BUFFER_MASK; - } - // Dequeue a plan. - // Zero-copy consume; caller must use it before next dequeue if they keep a ref. - // Done like this to avoid double copy. - // e.g do: stepper_plan_t data = dequeue_stepper_plan(); this is ok - FORCE_INLINE static stepper_plan_t& dequeue_stepper_plan() { - const uint32_t i = stepper_plan_tail; - stepper_plan_tail = (i + 1u) & FTM_BUFFER_MASK; - return stepper_plan_buff[i]; + + // Add a single set of coordinates in the stepping plan + FORCE_INLINE static void stepping_enqueue(const xyze_float_t traj_coords) { + #define _TOSTEPS_q16(A, B) int64_t(traj_coords.A * planner.settings.axis_steps_per_mm[B] * (1ULL << 16)) + XYZEval next_steps_q48_16 = LOGICAL_AXIS_ARRAY( + _TOSTEPS_q16(e, block_extruder_axis), + _TOSTEPS_q16(x, X_AXIS), _TOSTEPS_q16(y, Y_AXIS), _TOSTEPS_q16(z, Z_AXIS), + _TOSTEPS_q16(i, I_AXIS), _TOSTEPS_q16(j, J_AXIS), _TOSTEPS_q16(k, K_AXIS), + _TOSTEPS_q16(u, U_AXIS), _TOSTEPS_q16(v, V_AXIS), _TOSTEPS_q16(w, W_AXIS) + ); + #undef _TOSTEPS_q16 + stepping.enqueue(next_steps_q48_16); } private: @@ -262,12 +253,7 @@ class FTMotion { static void plan_runout_block(); static void fill_stepper_plan_buffer(); static xyze_float_t calc_traj_point(const float dist); - static stepper_plan_t calc_stepper_plan(xyze_float_t delta); static bool plan_next_block(); - // stepper_plan buffer variables. - static stepper_plan_t stepper_plan_buff[FTM_BUFFER_SIZE]; - static uint32_t stepper_plan_tail, stepper_plan_head; - static XYZEval curr_steps_q32_32; }; // class FTMotion diff --git a/Marlin/src/module/ft_motion/resonance_generator.cpp b/Marlin/src/module/ft_motion/resonance_generator.cpp index bfc2f9263d..2c2297e312 100644 --- a/Marlin/src/module/ft_motion/resonance_generator.cpp +++ b/Marlin/src/module/ft_motion/resonance_generator.cpp @@ -53,7 +53,7 @@ void ResonanceGenerator::reset() { void ResonanceGenerator::fill_stepper_plan_buffer() { xyze_float_t traj_coords = {}; - while (!ftMotion.stepper_plan_is_full()) { + while (!ftMotion.stepping.is_full()) { // Calculate current frequency // Logarithmic approach with duration per octave const float freq = rt_params.min_freq * powf(2.0f, rt_time / rt_params.octave_duration); @@ -77,9 +77,8 @@ void ResonanceGenerator::fill_stepper_plan_buffer() { #define _SET_TRAJ(A) traj_coords.A = rt_params.start_pos.A + (rt_params.axis == A##_AXIS ? pos_offset : 0.0f); LOGICAL_AXIS_MAP(_SET_TRAJ); - stepper_plan_t plan = ftMotion.calc_stepper_plan(traj_coords); // Store in buffer - ftMotion.enqueue_stepper_plan(plan); + ftMotion.stepping_enqueue(traj_coords); // Increment time for the next point rt_time += FTM_TS; } diff --git a/Marlin/src/module/ft_motion/shaping.h b/Marlin/src/module/ft_motion/shaping.h index 4a62acddcc..113d0fa3ef 100644 --- a/Marlin/src/module/ft_motion/shaping.h +++ b/Marlin/src/module/ft_motion/shaping.h @@ -114,4 +114,9 @@ typedef struct Shaping { // Of course things can't be done in the past, so when shaping is applied, the all axes are delayed by largest_delay_samples // minus their own centroid delay. This makes them all be equally delayed and therefore in synch. void refresh_largest_delay_samples() { largest_delay_samples = -_MIN(SHAPED_LIST(X.Ni[0], Y.Ni[0], Z.Ni[0], E.Ni[0])); } + void reset(){ + #define _RESET_ZI(A) ZERO(A.d_zi); + SHAPED_MAP(_RESET_ZI); + zi_idx = 0; + } } shaping_t; diff --git a/Marlin/src/module/ft_motion/smoothing.h b/Marlin/src/module/ft_motion/smoothing.h index ae8c088b30..9d4043c028 100644 --- a/Marlin/src/module/ft_motion/smoothing.h +++ b/Marlin/src/module/ft_motion/smoothing.h @@ -44,4 +44,9 @@ typedef struct Smoothing { // is done by delaying all axes to match the laggiest one (i.e largest_delay_samples). void refresh_largest_delay_samples() { largest_delay_samples = _MAX(CARTES_LIST(X.delay_samples, Y.delay_samples, Z.delay_samples, E.delay_samples)); } // Note: the delay equals smoothing_time iff the input signal frequency is lower than 1/smoothing_time, luckily for us, this holds in this case + void reset() { + #define _CLEAR(A) ZERO(A.smoothing_pass); + LOGICAL_AXIS_MAP(_CLEAR); + #undef _CLEAR + } } smoothing_t; diff --git a/Marlin/src/module/ft_motion/stepping.cpp b/Marlin/src/module/ft_motion/stepping.cpp deleted file mode 100644 index f86d07cf7c..0000000000 --- a/Marlin/src/module/ft_motion/stepping.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Marlin 3D Printer Firmware - * Copyright (c) 2025 MarlinFirmware [https://github.com/MarlinFirmware/Marlin] - * - * Based on Sprinter and grbl. - * Copyright (c) 2011 Camiel Gubbels / Erik van der Zalm - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - * - */ - -#include "../../inc/MarlinConfigPre.h" - -#if ENABLED(FT_MOTION) - -#include "stepping.h" -#include "../ft_motion.h" - -void Stepping::reset() { - stepper_plan.reset(); - delta_error_q32.set(LOGICAL_AXIS_LIST_1(1UL << 31)); // Start as 0.5 in q32 so steps are rounded - step_bits = 0; - bresenham_iterations_pending = 0; -} - -uint32_t Stepping::advance_until_step() { - xyze_ulong_t space_q32 = -delta_error_q32 + UINT32_MAX; // How much accumulation until a step in any axis is ALMOST due - // TODO: Add operators to types.h for scalar destination. - - xyze_ulong_t advance_q32 = stepper_plan.advance_dividend_q0_32; - uint32_t iterations = bresenham_iterations_pending; - // Per-axis lower-bound approx of floor(space_q32/adv), min across axes (lower bound because this fast division underestimates result by up to 1) - //#define RUN_AXIS(A) if (advance_q32.A > 0) NOMORE(iterations, space_q32.A / advance_q32.A); - #define RUN_AXIS(A) NOMORE(iterations, uint32_t((uint64_t(space_q32.A) * advance_dividend_reciprocal.A) >> 32)); - LOGICAL_AXIS_MAP(RUN_AXIS); - #undef RUN_AXIS - - #define RUN_AXIS(A) delta_error_q32.A += advance_q32.A * iterations; - LOGICAL_AXIS_MAP(RUN_AXIS); - #undef RUN_AXIS - - bresenham_iterations_pending -= iterations; - step_bits = 0; - // iterations may be underestimated by 1 by the cheap division, therefore we may have to do 2 iterations here - while (bresenham_iterations_pending && !(bool)step_bits) { - iterations++; - bresenham_iterations_pending--; - #define RUN_AXIS(A) do{ \ - delta_error_q32.A += stepper_plan.advance_dividend_q0_32.A; \ - step_bits.A = delta_error_q32.A < stepper_plan.advance_dividend_q0_32.A; \ - }while(0); - LOGICAL_AXIS_MAP(RUN_AXIS); - #undef RUN_AXIS - } - - return iterations * INTERVAL_PER_ITERATION; -} - -uint32_t Stepping::plan() { - uint32_t intervals = 0; - if (bresenham_iterations_pending > 0) { - intervals = advance_until_step(); - if (bool(step_bits)) return intervals; // steps to make => return the wait time so it gets done in due time - // Else all Bresenham iterations were advanced without steps => this is just the frame end, so plan the next one directly and accumulate the wait - } - - if (ftMotion.stepper_plan_is_empty()) { - bresenham_iterations_pending = 0; - step_bits = 0; - return INTERVAL_PER_TRAJ_POINT; - } - - AxisBits old_dir_bits = stepper_plan.dir_bits; - stepper_plan = ftMotion.dequeue_stepper_plan(); - const AxisBits dir_flip_mask = old_dir_bits ^ stepper_plan.dir_bits; // axes that must toggle now - if (dir_flip_mask) { - #define _HANDLE_DIR_CHANGES(A) if (dir_flip_mask.A) delta_error_q32.A *= -1; - LOGICAL_AXIS_MAP(_HANDLE_DIR_CHANGES); - #undef _HANDLE_DIR_CHANGES - } - - if (stepper_plan.advance_dividend_q0_32 == 0) { - // Don't waste time in zero motion traj points - bresenham_iterations_pending = 0; - step_bits = 0; - return INTERVAL_PER_TRAJ_POINT; - } - - // This vector division is unavoidable, but it saves a division per step during Bresenham - // The reciprocal is actually 2^32/dividend, but that requires dividing a uint64_t, which quite expensive - // Since even the real reciprocal may underestimate the quotient by 1 anyway already, this optimisation doesn't - // make things worse. This underestimation is compensated for in advance_until_step. - #define _DIVIDEND_RECIP(A) do{ \ - const uint32_t d = stepper_plan.advance_dividend_q0_32.A; \ - advance_dividend_reciprocal.A = d ? UINT32_MAX / d : UINT32_MAX; \ - }while(0); - LOGICAL_AXIS_MAP(_DIVIDEND_RECIP); - #undef _DIVIDEND_RECIP - - bresenham_iterations_pending = ITERATIONS_PER_TRAJ; - return intervals + advance_until_step(); -} - -#endif // FT_MOTION diff --git a/Marlin/src/module/ft_motion/stepping.h b/Marlin/src/module/ft_motion/stepping.h index fc3a5858fd..cacdd5d69a 100644 --- a/Marlin/src/module/ft_motion/stepping.h +++ b/Marlin/src/module/ft_motion/stepping.h @@ -23,31 +23,222 @@ #include "../../inc/MarlinConfig.h" +// +// uint64-free equivalent of: ((uint64_t)a * b) >> 16 +// +FORCE_INLINE constexpr uint32_t a_times_b_shift_16(uint32_t a, uint32_t b) { + uint32_t hi = a >> 16; + uint32_t lo = a & 0xFFFFu; + return (hi * b) + ((lo * b) >> 16); +} +#define FTM_NEVER (UINT16_MAX) // Reserved number to indicate "no ticks in this frame" (FRAME_TICKS_FP+1 would work too) +constexpr uint32_t FRAME_TICKS = STEPPER_TIMER_RATE / FTM_FS; // Timer ticks in a frame +static_assert(FRAME_TICKS < FTM_NEVER, "(STEPPER_TIMER_RATE / FTM_FS) must be < 2^16 (otherwise fixed-point numbers exceed uint16 vars)."); +constexpr uint32_t FTM_Q_INT = 32u - __builtin_clz(FRAME_TICKS + 1); // Bits to represent the max value (duration of a frame, +1 one for FTM_NEVER). + // "clz" counts leading zeroes. +constexpr uint32_t FTM_Q = 16 - FTM_Q_INT; // uint16 interval fractional bits. + // Intervals buffer has fixed point numbers with the point on this position + +// The _FP and _fp suffixes mean the number is in fixed point format with the point at the FTM_Q position. +// See: https://en.wikipedia.org/wiki/Fixed-point_arithmetic +// E.g number_fp = number << FTM_Q +// number == (number_fp >> FTM_Q) +constexpr uint32_t ONE_FP = 1 << FTM_Q; // Number 1 in fixed point format +constexpr uint32_t FP_FLOOR_MASK = ~(ONE_FP - 1); // Bit mask to do FLOOR in fixed point +constexpr uint32_t FRAME_TICKS_FP = FRAME_TICKS << FTM_Q; // Ticks in a frame in fixed point + typedef struct stepper_plan { AxisBits dir_bits; - xyze_ulong_t advance_dividend_q0_32{0}; - void reset() { advance_dividend_q0_32.reset(); } + xyze_uint_t first_interval_fp; + xyze_uint_t interval_fp; } stepper_plan_t; // Stepping plan handles steps for a whole frame (trajectory point delta) typedef struct Stepping { - stepper_plan_t stepper_plan; - xyze_ulong_t advance_dividend_reciprocal{0}; // Note this 32 bit reciprocal underestimates quotients by at most one. - xyze_ulong_t delta_error_q32{ LOGICAL_AXIS_LIST_1(_BV32(31)) }; - AxisBits step_bits; - uint32_t bresenham_iterations_pending; - void reset(); - // Updates error and bresenham_iterations_pending, sets step_bits and returns interval until the next step (or end of frame). - uint32_t advance_until_step(); - /** - * If bresenham_iterations_pending, advance to next actual step. - * Else, consume stepper data point - * Then return interval until that next step - */ - uint32_t plan(); - #define INTERVAL_PER_ITERATION (STEPPER_TIMER_RATE / FTM_STEPPER_FS) - #define INTERVAL_PER_TRAJ_POINT (STEPPER_TIMER_RATE / FTM_FS) - #define ITERATIONS_PER_TRAJ (FTM_STEPPER_FS * FTM_TS) + // + // ISR part + // + + AxisBits dir_bits; + AxisBits step_bits; + + xyze_ulong_t axis_interval_fp{ LOGICAL_AXIS_LIST_1(FTM_NEVER) }; + xyze_ulong_t ticks_left_per_axis_fp{ LOGICAL_AXIS_LIST_1(FTM_NEVER) }; + uint32_t ticks_left_in_frame_fp = 0; + + // Return how many full ticks we must wait before + // generating the next step pulse. The call is inexpensive: + // - no heap, no locks – pure arithmetic on pre-computed data + FORCE_INLINE uint32_t advance_until_step() { + step_bits = 0; + uint32_t ticks_to_wait_fp = 0; + + for (;;) { + // Smallest remaining tick count among all axes – next step time + const uint32_t ticks_to_next_step_fp = ticks_left_per_axis_fp.small(); + + // Does the frame finish before this next step occurs? + if (ticks_to_next_step_fp > ticks_left_in_frame_fp) { + // Frame ends before next step + if (is_empty()) { + ticks_left_in_frame_fp = 0; + ticks_left_per_axis_fp = FTM_NEVER; + return FTM_NEVER; + } + + // Consume the rest of this frame's time + const uint32_t wait_floor_fp = ticks_left_in_frame_fp & FP_FLOOR_MASK; + ticks_to_wait_fp += wait_floor_fp; + ticks_left_in_frame_fp -= wait_floor_fp; + + // + // Pull the next plan – it already contains: + // - direction bits + // - first_interval_fp (time to the first step) + // - interval_fp (repeating step period) + // + const stepper_plan_t next = dequeue(); + dir_bits = next.dir_bits; + axis_interval_fp = next.interval_fp.asUInt32(); + + // Note the frame will actually end a fraction of a tick in the future, and ticks_left_in_frame_fp still has that fraction. + // Instead of discarding that time, we delay both the end of the next frame, and all first steps by that amount. + ticks_left_per_axis_fp = next.first_interval_fp.asUInt32(); + ticks_left_per_axis_fp += ticks_left_in_frame_fp; + ticks_left_in_frame_fp += FRAME_TICKS_FP; // Start a fresh frame + } + else { + // Step happens before frame end + // Advance to it + const uint32_t wait_floor_fp = ticks_to_next_step_fp & FP_FLOOR_MASK; + ticks_to_wait_fp += wait_floor_fp; + ticks_left_in_frame_fp -= wait_floor_fp; + ticks_left_per_axis_fp -= wait_floor_fp; + + // Build step_bits: any axis whose counter < ONE_FP should step before the next tick, so we tick now + // unless the frame ends earlier. + uint32_t limit_fp = _MIN(ONE_FP - 1, ticks_left_in_frame_fp); + auto _set_step_bit = [&](const AxisEnum A) __attribute__((always_inline)) { + if (ticks_left_per_axis_fp[A] <= limit_fp) { + step_bits[A] = 1; + ticks_left_per_axis_fp[A] += axis_interval_fp[A]; + } + }; + LOGICAL_AXIS_CALL(_set_step_bit); + + return ticks_to_wait_fp >> FTM_Q; // Convert fixed point back to whole ticks + } + } // loop forever + } + + FORCE_INLINE void reset() { + step_bits = 0; + axis_interval_fp = FTM_NEVER; + ticks_left_per_axis_fp = FTM_NEVER; + ticks_left_in_frame_fp = 0; + + stepper_plan_tail = stepper_plan_head = 0; + curr_steps_q48_16.reset(); + } + + // + // Buffering part + // + + stepper_plan_t stepper_plan_buff[FTM_BUFFER_SIZE]; + uint32_t stepper_plan_tail = 0, stepper_plan_head = 0; + XYZEval curr_steps_q48_16{0}; + + FORCE_INLINE void enqueue(XYZEval next_steps_q48_16) { + + stepper_plan_t stepper_plan; + constexpr uint32_t HALF_PHASE_OFFSET = (1UL << 15); // to make steps at .5 crossings instead of integers to center the error + + auto _run_axis = [&](const AxisEnum A) __attribute__((always_inline)) { + // Add half-phase offset to keep step error centred + const int64_t offset_curr_q48_16 = curr_steps_q48_16[A] + HALF_PHASE_OFFSET, + offset_next_q48_16 = next_steps_q48_16[A] + HALF_PHASE_OFFSET; + curr_steps_q48_16[A] = next_steps_q48_16[A]; + + // Determine direction change + const bool new_dir = offset_next_q48_16 >= offset_curr_q48_16; + stepper_plan.dir_bits[A] = new_dir; + + // Δsteps in Q16.16 format – magnitude only + const uint32_t delta_q16_16 = abs(offset_next_q48_16 - offset_curr_q48_16); + + // Current / next phase (fractional part of the position) + uint32_t curr_phase_q1_16 = offset_curr_q48_16 & 0xFFFF, + next_phase_q1_16 = offset_next_q48_16 & 0xFFFF; + if (!new_dir) { + // When going backwards, the phase is 1-phase + curr_phase_q1_16 = (1UL<<16) - curr_phase_q1_16; + next_phase_q1_16 = (1UL<<16) - next_phase_q1_16; + } + // When going, e.g., from 0.6 to 1.0, the delta is not a whole step, + // but the phase overflow indicates a step. + const uint32_t carry = curr_phase_q1_16 > next_phase_q1_16; + + // steps_to_make = integer steps + potential fraction crossing an integer + const uint16_t steps_to_make = (delta_q16_16 >> 16) + carry; + + if (steps_to_make == 0) { // No steps on this axis + stepper_plan.first_interval_fp[A] = FTM_NEVER; + stepper_plan.interval_fp[A] = FTM_NEVER; + return; + } + + // Compute the exact time between steps. + // interval = ticks_per_frame / delta + // current_frame_phase_fp = interval * curr_phase + const uint32_t interval_fp = (FRAME_TICKS_FP << 16) / delta_q16_16, + current_frame_phase_fp = a_times_b_shift_16(interval_fp, curr_phase_q1_16); + uint32_t first_interval_fp = interval_fp - current_frame_phase_fp; + + // The calculation of interval_fp may undershoot its value by a fraction + // due to integer (floor) division. This small fractional error can + // occasionally make a spurious step fit inside this frame. + // To avoid that corner case, the first interval is incremented just enough + // for it to not fit. + const uint32_t tick_of_spurious_step_fp = first_interval_fp + interval_fp * steps_to_make; + if (tick_of_spurious_step_fp <= FRAME_TICKS_FP) { + first_interval_fp += FRAME_TICKS_FP - tick_of_spurious_step_fp + 1; + } + + stepper_plan.first_interval_fp[A] = _MIN(first_interval_fp, FTM_NEVER); + stepper_plan.interval_fp[A] = _MIN(interval_fp, FTM_NEVER); + }; + + LOGICAL_AXIS_CALL(_run_axis); + + // Store the plan into the circular buffer + stepper_plan_buff[stepper_plan_head] = stepper_plan; + stepper_plan_head = (stepper_plan_head + 1U) & FTM_BUFFER_MASK; + } + + // Dequeue a plan. + // Zero-copy consume; caller must use it before next dequeue if they keep a ref. + // Done like this to avoid double copy. + // e.g do: stepper_plan_t data = dequeue(); this is ok + FORCE_INLINE stepper_plan_t& dequeue() { + const uint32_t i = stepper_plan_tail; + stepper_plan_tail = (i + 1U) & FTM_BUFFER_MASK; + return stepper_plan_buff[i]; + } + + // + // Simple helper predicates + // + + FORCE_INLINE bool is_busy() { + return !(is_empty() && ticks_left_in_frame_fp == 0); + } + FORCE_INLINE bool is_empty() { + return stepper_plan_head == stepper_plan_tail; + } + FORCE_INLINE bool is_full() { + return ((stepper_plan_head + 1) & FTM_BUFFER_MASK) == stepper_plan_tail; + } } stepping_t; diff --git a/Marlin/src/module/stepper.cpp b/Marlin/src/module/stepper.cpp index 76efc17e4f..060d51c0cc 100644 --- a/Marlin/src/module/stepper.cpp +++ b/Marlin/src/module/stepper.cpp @@ -3585,9 +3585,7 @@ void Stepper::report_positions() { */ void Stepper::ftMotion_stepper() { AxisBits &step_bits = ftMotion.stepping.step_bits; // Aliases for prettier code - AxisBits &dir_bits = ftMotion.stepping.stepper_plan.dir_bits; - - if (step_bits.bits == 0) return; + AxisBits &dir_bits = ftMotion.stepping.dir_bits; USING_TIMED_PULSE();