Przeglądaj źródła

Implement DELAY_NS with CYCCNT on Cortex-M7 (#12283)

Nils Hasenbanck 6 lat temu
rodzic
commit
cafabf2055

+ 6
- 0
Marlin/src/HAL/HAL_STM32/HAL.cpp Wyświetl plik

@@ -30,6 +30,7 @@
30 30
 #include "HAL.h"
31 31
 
32 32
 #include "../../inc/MarlinConfig.h"
33
+#include "../shared/Delay.h"
33 34
 
34 35
 #if ENABLED(EEPROM_EMULATED_WITH_SRAM)
35 36
   #if STM32F7xx
@@ -80,6 +81,11 @@ uint16_t HAL_adc_result;
80 81
 // HAL initialization task
81 82
 void HAL_init(void) {
82 83
 
84
+  // Needed for DELAY_NS() / DELAY_US() on CORTEX-M7
85
+  #if (defined(__arm__) || defined(__thumb__)) && __CORTEX_M == 7
86
+    enableCycleCounter();
87
+  #endif
88
+
83 89
   FastIO_init();
84 90
 
85 91
   #if ENABLED(SDSUPPORT)

+ 0
- 2
Marlin/src/HAL/HAL_STM32F7/HAL.h Wyświetl plik

@@ -153,8 +153,6 @@ extern uint16_t HAL_adc_result;
153 153
 // Public functions
154 154
 // --------------------------------------------------------------------------
155 155
 
156
-
157
-
158 156
 // Memory related
159 157
 #define __bss_end __bss_end__
160 158
 

+ 1
- 1
Marlin/src/HAL/HAL_TEENSY31_32/HAL.cpp Wyświetl plik

@@ -26,7 +26,7 @@
26 26
 #ifdef __MK20DX256__
27 27
 
28 28
 #include "HAL.h"
29
-#include "../Delay.h"
29
+#include "../shared/Delay.h"
30 30
 
31 31
 #include <Wire.h>
32 32
 

+ 64
- 41
Marlin/src/HAL/shared/Delay.h Wyświetl plik

@@ -19,6 +19,7 @@
19 19
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
20 20
  *
21 21
  */
22
+#pragma once
22 23
 
23 24
 /**
24 25
  * Busy wait delay cycles routines:
@@ -28,57 +29,81 @@
28 29
  *  DELAY_US(count): Delay execution in microseconds
29 30
  */
30 31
 
31
-#ifndef MARLIN_DELAY_H
32
-#define MARLIN_DELAY_H
33
-
34 32
 #include "../../core/macros.h"
33
+#include "../../core/millis_t.h"
35 34
 
36 35
 #if defined(__arm__) || defined(__thumb__)
37 36
 
38
-  // https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles
37
+  #if __CORTEX_M == 7
39 38
 
40
-  #define nop() __asm__ __volatile__("nop;\n\t":::)
39
+    // Cortex-M7 can use the cycle counter of the DWT unit
40
+    // http://www.anthonyvh.com/2017/05/18/cortex_m-cycle_counter/
41 41
 
42
-  FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
43
-    #if ARCH_PIPELINE_RELOAD_CYCLES < 2
44
-      #define EXTRA_NOP_CYCLES A("nop")
45
-    #else
46
-      #define EXTRA_NOP_CYCLES ""
47
-    #endif
42
+    FORCE_INLINE static void enableCycleCounter() {
43
+      CoreDebug->DEMCR |= CoreDebug_DEMCR_TRCENA_Msk;
48 44
 
49
-    __asm__ __volatile__(
50
-      A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
51
-      L("1")
52
-      A("subs %[cnt],#1")
53
-      EXTRA_NOP_CYCLES
54
-      A("bne 1b")
55
-      : [cnt]"+r"(cy)   // output: +r means input+output
56
-      :                 // input:
57
-      : "cc"            // clobbers:
58
-    );
59
-  }
45
+      // Unlock DWT.
46
+      DWT->LAR = 0xC5ACCE55;
60 47
 
61
-  // Delay in cycles
62
-  FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
48
+      DWT->CYCCNT = 0;
49
+      DWT->CTRL |= DWT_CTRL_CYCCNTENA_Msk;
50
+    }
63 51
 
64
-    if (__builtin_constant_p(x)) {
65
-      #define MAXNOPS 4
52
+    FORCE_INLINE volatile uint32_t getCycleCount() { return DWT->CYCCNT; }
66 53
 
67
-      if (x <= (MAXNOPS)) {
68
-        switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
69
-      }
70
-      else { // because of +1 cycle inside delay_4cycles
71
-        const uint32_t rem = (x - 1) % (MAXNOPS);
72
-        switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
73
-        if ((x = (x - 1) / (MAXNOPS)))
74
-          __delay_4cycles(x); // if need more then 4 nop loop is more optimal
54
+    FORCE_INLINE static void DELAY_CYCLES(const uint32_t x) {
55
+      const uint32_t endCycles = getCycleCount() + x;
56
+      while (PENDING(getCycleCount(), endCycles)) { }
57
+    }
58
+
59
+  #else
60
+
61
+    // https://blueprints.launchpad.net/gcc-arm-embedded/+spec/delay-cycles
62
+
63
+    #define nop() __asm__ __volatile__("nop;\n\t":::)
64
+
65
+    FORCE_INLINE static void __delay_4cycles(uint32_t cy) { // +1 cycle
66
+      #if ARCH_PIPELINE_RELOAD_CYCLES < 2
67
+        #define EXTRA_NOP_CYCLES A("nop")
68
+      #else
69
+        #define EXTRA_NOP_CYCLES ""
70
+      #endif
71
+
72
+      __asm__ __volatile__(
73
+        A(".syntax unified") // is to prevent CM0,CM1 non-unified syntax
74
+        L("1")
75
+        A("subs %[cnt],#1")
76
+        EXTRA_NOP_CYCLES
77
+        A("bne 1b")
78
+        : [cnt]"+r"(cy)   // output: +r means input+output
79
+        :                 // input:
80
+        : "cc"            // clobbers:
81
+      );
82
+    }
83
+
84
+    // Delay in cycles
85
+    FORCE_INLINE static void DELAY_CYCLES(uint32_t x) {
86
+
87
+      if (__builtin_constant_p(x)) {
88
+        #define MAXNOPS 4
89
+
90
+        if (x <= (MAXNOPS)) {
91
+          switch (x) { case 4: nop(); case 3: nop(); case 2: nop(); case 1: nop(); }
92
+        }
93
+        else { // because of +1 cycle inside delay_4cycles
94
+          const uint32_t rem = (x - 1) % (MAXNOPS);
95
+          switch (rem) { case 3: nop(); case 2: nop(); case 1: nop(); }
96
+          if ((x = (x - 1) / (MAXNOPS)))
97
+            __delay_4cycles(x); // if need more then 4 nop loop is more optimal
98
+        }
99
+        #undef MAXNOPS
75 100
       }
76
-      #undef MAXNOPS
101
+      else if ((x >>= 2))
102
+        __delay_4cycles(x);
77 103
     }
78
-    else if ((x >>= 2))
79
-      __delay_4cycles(x);
80
-  }
81
-  #undef nop
104
+    #undef nop
105
+
106
+  #endif
82 107
 
83 108
 #elif defined(__AVR__)
84 109
 
@@ -144,5 +169,3 @@
144 169
 
145 170
 // Delay in microseconds
146 171
 #define DELAY_US(x) DELAY_CYCLES( (x) * (F_CPU / 1000000UL) )
147
-
148
-#endif // MARLIN_DELAY_H

Ładowanie…
Anuluj
Zapisz