瀏覽代碼

🐛 Fix AVR DELAY_US int overflow (#22268)

Skruppy 3 年之前
父節點
當前提交
cc1145302b
沒有連結到貢獻者的電子郵件帳戶。
共有 1 個檔案被更改,包括 51 行新增29 行删除
  1. 51
    29
      Marlin/src/HAL/shared/Delay.h

+ 51
- 29
Marlin/src/HAL/shared/Delay.h 查看文件

@@ -97,43 +97,65 @@ void calibrate_delay_loop();
97 97
   #define DELAY_US(x) DelayCycleFnc((x) * ((F_CPU) / 1000000UL))
98 98
 
99 99
 #elif defined(__AVR__)
100
-
101
-  #define nop() __asm__ __volatile__("nop;\n\t":::)
102
-
103
-  FORCE_INLINE static void __delay_4cycles(uint8_t cy) {
104
-    __asm__ __volatile__(
105
-      L("1")
106
-      A("dec %[cnt]")
107
-      A("nop")
108
-      A("brne 1b")
109
-      : [cnt] "+r"(cy)  // output: +r means input+output
110
-      :                 // input:
111
-      : "cc"            // clobbers:
112
-    );
100
+  FORCE_INLINE static void __delay_up_to_3c(uint8_t cycles) {
101
+    switch (cycles) {
102
+      case 3:
103
+        __asm__ __volatile__(A("RJMP .+0") A("NOP"));
104
+        break;
105
+      case 2:
106
+        __asm__ __volatile__(A("RJMP .+0"));
107
+        break;
108
+      case 1:
109
+        __asm__ __volatile__(A("NOP"));
110
+        break;
111
+    }
113 112
   }
114 113
 
115 114
   // Delay in cycles
116
-  FORCE_INLINE static void DELAY_CYCLES(uint16_t x) {
117
-
118
-    if (__builtin_constant_p(x)) {
119
-      #define MAXNOPS 4
120
-
121
-      if (x <= (MAXNOPS)) {
122
-        switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
115
+  FORCE_INLINE static void DELAY_CYCLES(uint16_t cycles) {
116
+    if (__builtin_constant_p(cycles)) {
117
+      if (cycles <= 3) {
118
+        __delay_up_to_3c(cycles);
119
+      }
120
+      else if (cycles == 4) {
121
+        __delay_up_to_3c(2);
122
+        __delay_up_to_3c(2);
123 123
       }
124 124
       else {
125
-        const uint32_t rem = (x) % (MAXNOPS);
126
-        switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
127
-        if ((x = (x) / (MAXNOPS)))
128
-          __delay_4cycles(x); // if need more then 4 nop loop is more optimal
125
+        cycles -= 1 + 4; // Compensate for the first LDI (1) and the first round (4)
126
+        __delay_up_to_3c(cycles % 4);
127
+
128
+        cycles /= 4;
129
+        // The following code burns [1 + 4 * (rounds+1)] cycles
130
+        uint16_t dummy;
131
+        __asm__ __volatile__(
132
+          // "manually" load counter from constants, otherwise the compiler may optimize this part away
133
+          A("LDI %A[rounds], %[l]") // 1c
134
+          A("LDI %B[rounds], %[h]") // 1c (compensating the non branching BRCC)
135
+          L("1")
136
+          A("SBIW %[rounds], 1")    // 2c
137
+          A("BRCC 1b")              // 2c when branching, else 1c (end of loop)
138
+          : // Outputs ...
139
+          [rounds] "=w" (dummy) // Restrict to a wo (=) 16 bit register pair (w)
140
+          : // Inputs ...
141
+          [l] "M" (cycles%256), // Restrict to 0..255 constant (M)
142
+          [h] "M" (cycles/256)  // Restrict to 0..255 constant (M)
143
+          :// Clobbers ...
144
+          "cc"                  // Indicate we are modifying flags like Carry (cc)
145
+        );
129 146
       }
130
-
131
-      #undef MAXNOPS
132 147
     }
133
-    else if ((x >>= 2))
134
-      __delay_4cycles(x);
148
+    else {
149
+      __asm__ __volatile__(
150
+        L("1")
151
+        A("SBIW %[cycles], 4")   // 2c
152
+        A("BRCC 1b")             // 2c when branching, else 1c (end of loop)
153
+        : [cycles] "+w" (cycles) // output: Restrict to a rw (+) 16 bit register pair (w)
154
+        :                        // input: -
155
+        : "cc"                   // clobbers: We are modifying flags like Carry (cc)
156
+      );
157
+    }
135 158
   }
136
-  #undef nop
137 159
 
138 160
   // Delay in microseconds
139 161
   #define DELAY_US(x) DELAY_CYCLES((x) * ((F_CPU) / 1000000UL))

Loading…
取消
儲存