Browse Source

Handle UTF in long filenames (#20087)

Co-authored-by: Scott Lahteine <thinkyhead@users.noreply.github.com>
LinFor 4 years ago
parent
commit
5b9aeb2e5f
No account linked to committer's email address

+ 4
- 0
Marlin/Configuration_adv.h View File

@@ -1256,6 +1256,10 @@
1256 1256
                                       // Note: Only affects SCROLL_LONG_FILENAMES with SDSORT_CACHE_NAMES but not SDSORT_DYNAMIC_RAM.
1257 1257
   #endif
1258 1258
 
1259
+  // Allow international symbols in long filenames. To display correctly, the
1260
+  // LCD's font must contain the characters. Check your selected LCD language.
1261
+  #define UTF_FILENAME_SUPPORT
1262
+
1259 1263
   // This allows hosts to request long names for files and folders with M33
1260 1264
   //#define LONG_FILENAME_HOST_SUPPORT
1261 1265
 

+ 44
- 63
Marlin/src/lcd/fontutils.cpp View File

@@ -9,6 +9,8 @@
9 9
 
10 10
 #include "../inc/MarlinConfig.h"
11 11
 
12
+#define MAX_UTF8_CHAR_SIZE 4
13
+
12 14
 #if HAS_WIRED_LCD
13 15
   #include "marlinui.h"
14 16
   #include "../MarlinCore.h"
@@ -79,6 +81,8 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t
79 81
   uint32_t val = 0;
80 82
   uint8_t *p = pstart;
81 83
 
84
+  #define NEXT_6_BITS() do{ val <<= 6; p++; valcur = cb_read_byte(p); val |= (valcur & 0x3F); }while(0)
85
+
82 86
   uint8_t valcur = cb_read_byte(p);
83 87
   if (0 == (0x80 & valcur)) {
84 88
     val = valcur;
@@ -86,74 +90,51 @@ uint8_t* get_utf8_value_cb(uint8_t *pstart, read_byte_cb_t cb_read_byte, wchar_t
86 90
   }
87 91
   else if (0xC0 == (0xE0 & valcur)) {
88 92
     val = valcur & 0x1F;
89
-    val <<= 6;
90
-    p++;
91
-    valcur = cb_read_byte(p);
92
-    val |= (valcur & 0x3F);
93
-    p++;
94
-  }
95
-  else if (0xE0 == (0xF0 & valcur)) {
96
-    val = valcur & 0x0F;
97
-    val <<= 6; p++;
98
-    valcur = cb_read_byte(p);
99
-    val |= (valcur & 0x3F);
100
-    val <<= 6; p++;
101
-    valcur = cb_read_byte(p);
102
-    val |= (valcur & 0x3F);
103
-    p++;
104
-  }
105
-  else if (0xF0 == (0xF8 & valcur)) {
106
-    val = valcur & 0x07;
107
-    val <<= 6; p++;
108
-    valcur = cb_read_byte(p);
109
-    val |= (valcur & 0x3F);
110
-    val <<= 6; p++;
111
-    valcur = cb_read_byte(p);
112
-    val |= (valcur & 0x3F);
113
-    val <<= 6; p++;
114
-    valcur = cb_read_byte(p);
115
-    val |= (valcur & 0x3F);
116
-    p++;
117
-  }
118
-  else if (0xF8 == (0xFC & valcur)) {
119
-    val = valcur & 0x03;
120
-    val <<= 6; p++;
121
-    valcur = cb_read_byte(p);
122
-    val |= (valcur & 0x3F);
123
-    val <<= 6; p++;
124
-    valcur = cb_read_byte(p);
125
-    val |= (valcur & 0x3F);
126
-    val <<= 6; p++;
127
-    valcur = cb_read_byte(p);
128
-    val |= (valcur & 0x3F);
129
-    val <<= 6; p++;
130
-    valcur = cb_read_byte(p);
131
-    val |= (valcur & 0x3F);
132
-    p++;
133
-  }
134
-  else if (0xFC == (0xFE & valcur)) {
135
-    val = valcur & 0x01;
136
-    val <<= 6; p++;
137
-    valcur = cb_read_byte(p);
138
-    val |= (valcur & 0x3F);
139
-    val <<= 6; p++;
140
-    valcur = cb_read_byte(p);
141
-    val |= (valcur & 0x3F);
142
-    val <<= 6; p++;
143
-    valcur = cb_read_byte(p);
144
-    val |= (valcur & 0x3F);
145
-    val <<= 6; p++;
146
-    valcur = cb_read_byte(p);
147
-    val |= (valcur & 0x3F);
148
-    val <<= 6; p++;
149
-    valcur = cb_read_byte(p);
150
-    val |= (valcur & 0x3F);
93
+    NEXT_6_BITS();
151 94
     p++;
152 95
   }
96
+  #if MAX_UTF8_CHAR_SIZE >= 3
97
+    else if (0xE0 == (0xF0 & valcur)) {
98
+      val = valcur & 0x0F;
99
+      NEXT_6_BITS();
100
+      NEXT_6_BITS();
101
+      p++;
102
+    }
103
+  #endif
104
+  #if MAX_UTF8_CHAR_SIZE >= 4
105
+    else if (0xF0 == (0xF8 & valcur)) {
106
+      val = valcur & 0x07;
107
+      NEXT_6_BITS();
108
+      NEXT_6_BITS();
109
+      NEXT_6_BITS();
110
+      p++;
111
+    }
112
+  #endif
113
+  #if MAX_UTF8_CHAR_SIZE >= 5
114
+    else if (0xF8 == (0xFC & valcur)) {
115
+      val = valcur & 0x03;
116
+      NEXT_6_BITS();
117
+      NEXT_6_BITS();
118
+      NEXT_6_BITS();
119
+      NEXT_6_BITS();
120
+      p++;
121
+    }
122
+  #endif
123
+  #if MAX_UTF8_CHAR_SIZE >= 6
124
+    else if (0xFC == (0xFE & valcur)) {
125
+      val = valcur & 0x01;
126
+      NEXT_6_BITS();
127
+      NEXT_6_BITS();
128
+      NEXT_6_BITS();
129
+      NEXT_6_BITS();
130
+      NEXT_6_BITS();
131
+      p++;
132
+    }
133
+  #endif
153 134
   else if (0x80 == (0xC0 & valcur))
154 135
     for (; 0x80 == (0xC0 & valcur); ) { p++; valcur = cb_read_byte(p); }
155 136
   else
156
-    for (; ((0xFE & valcur) > 0xFC); ) { p++; valcur = cb_read_byte(p); }
137
+    for (; 0xFC < (0xFE & valcur); ) { p++; valcur = cb_read_byte(p); }
157 138
 
158 139
   if (pval) *pval = val;
159 140
 

+ 53
- 5
Marlin/src/sd/SdBaseFile.cpp View File

@@ -1103,19 +1103,67 @@ int8_t SdBaseFile::readDir(dir_t* dir, char* longFilename) {
1103 1103
         if (WITHIN(seq, 1, MAX_VFAT_ENTRIES)) {
1104 1104
           // TODO: Store the filename checksum to verify if a long-filename-unaware system modified the file table.
1105 1105
           n = (seq - 1) * (FILENAME_LENGTH);
1106
-          LOOP_L_N(i, FILENAME_LENGTH)
1107
-            longFilename[n + i] = (i < 5) ? VFAT->name1[i] : (i < 11) ? VFAT->name2[i - 5] : VFAT->name3[i - 11];
1106
+          LOOP_L_N(i, FILENAME_LENGTH) {
1107
+            uint16_t utf16_ch = (i < 5) ? VFAT->name1[i] : (i < 11) ? VFAT->name2[i - 5] : VFAT->name3[i - 11];
1108
+            #if ENABLED(UTF_FILENAME_SUPPORT)
1109
+              // We can't reconvert to UTF-8 here as UTF-8 is variable-size encoding, but joining LFN blocks
1110
+              // needs static bytes addressing. So here just store full UTF-16LE words to re-convert later.
1111
+              uint16_t idx = (n + i) * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding
1112
+              longFilename[idx] = utf16_ch & 0xFF;
1113
+              longFilename[idx+1] = (utf16_ch >> 8) & 0xFF;
1114
+            #else
1115
+              // Replace all multibyte characters to '_'
1116
+              longFilename[n + i] = (utf16_ch > 0xFF) ? '_' : (utf16_ch & 0xFF);
1117
+            #endif
1118
+          }
1108 1119
           // If this VFAT entry is the last one, add a NUL terminator at the end of the string
1109
-          if (VFAT->sequenceNumber & 0x40) longFilename[n + FILENAME_LENGTH] = '\0';
1120
+          if (VFAT->sequenceNumber & 0x40) longFilename[(n + FILENAME_LENGTH) * LONG_FILENAME_CHARSIZE] = '\0';
1110 1121
         }
1111 1122
       }
1112 1123
     }
1124
+
1113 1125
     // Return if normal file or subdirectory
1114
-    if (DIR_IS_FILE_OR_SUBDIR(dir)) return n;
1126
+    if (DIR_IS_FILE_OR_SUBDIR(dir)) {
1127
+      #if ENABLED(UTF_FILENAME_SUPPORT)
1128
+        // Convert filename from utf-16 to utf-8 as Marlin expects
1129
+        #if LONG_FILENAME_CHARSIZE > 2
1130
+          // Add warning for developers for currently not supported 3-byte cases (Conversion series of 2-byte
1131
+          // codepoints to 3-byte in-place will break the rest of filename)
1132
+          #error "Currently filename re-encoding is done in-place. It may break the remaining chars to use 3-byte codepoints."
1133
+        #endif
1134
+        uint16_t currentPos = 0;
1135
+        LOOP_L_N(i, (LONG_FILENAME_LENGTH / 2)) {
1136
+          uint16_t idx = i * 2; // This is fixed as FAT LFN always contain UTF-16LE encoding
1137
+
1138
+          uint16_t utf16_ch = longFilename[idx] | (longFilename[idx + 1] << 8);
1139
+          if (0xD800 == (utf16_ch & 0xF800))                                    // Surrogate pair - encode as '_'
1140
+            longFilename[currentPos++] = '_';
1141
+          else if (0 == (utf16_ch & 0xFF80))                                    // Encode as 1-byte utf-8 char
1142
+            longFilename[currentPos++] = utf16_ch & 0x007F;
1143
+          else if (0 == (utf16_ch & 0xF800)) {                                  // Encode as 2-byte utf-8 char
1144
+            longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x1F);
1145
+            longFilename[currentPos++] = 0x80 | (utf16_ch & 0x3F);
1146
+          }
1147
+          else {
1148
+            #if LONG_FILENAME_CHARSIZE > 2                                      // Encode as 3-byte utf-8 char
1149
+              longFilename[currentPos++] = 0xE0 | ((utf16_ch >> 12) & 0x0F);
1150
+              longFilename[currentPos++] = 0xC0 | ((utf16_ch >> 6) & 0x3F);
1151
+              longFilename[currentPos++] = 0xC0 | (utf16_ch & 0x3F);
1152
+            #else                                                               // Encode as '_'
1153
+              longFilename[currentPos++] = '_';
1154
+            #endif
1155
+          }
1156
+
1157
+          if (0 == utf16_ch) break; // End of filename
1158
+        }
1159
+        return currentPos;
1160
+      #else
1161
+        return n;
1162
+      #endif
1163
+    }
1115 1164
   }
1116 1165
 }
1117 1166
 
1118
-
1119 1167
 // Read next directory entry into the cache
1120 1168
 // Assumes file is correctly positioned
1121 1169
 dir_t* SdBaseFile::readDirCache() {

+ 6
- 1
Marlin/src/sd/SdFatConfig.h View File

@@ -103,5 +103,10 @@
103 103
 
104 104
 #define FILENAME_LENGTH 13 // Number of UTF-16 characters per entry
105 105
 
106
+// UTF-8 may use up to 3 bytes to represent single UTF-16 code point.
107
+// We discard 3-byte characters allowing only 2-bytes
108
+// or 1-byte if UTF_FILENAME_SUPPORT disabled.
109
+#define LONG_FILENAME_CHARSIZE TERN(UTF_FILENAME_SUPPORT, 2, 1)
110
+
106 111
 // Total bytes needed to store a single long filename
107
-#define LONG_FILENAME_LENGTH (FILENAME_LENGTH * MAX_VFAT_ENTRIES + 1)
112
+#define LONG_FILENAME_LENGTH (FILENAME_LENGTH * LONG_FILENAME_CHARSIZE * MAX_VFAT_ENTRIES + 1)

+ 17
- 31
buildroot/share/fonts/genpages.c View File

@@ -71,63 +71,49 @@ uint8_t* get_utf8_value(uint8_t *pstart, wchar_t *pval) {
71 71
 
72 72
   assert(NULL != pstart);
73 73
 
74
+  #define NEXT_6_BITS() do{ val <<= 6; p++; val |= (*p & 0x3F); }while(0)
75
+
74 76
   if (0 == (0x80 & *p)) {
75 77
     val = (size_t)*p;
76 78
     p++;
77 79
   }
78 80
   else if (0xC0 == (0xE0 & *p)) {
79 81
     val = *p & 0x1F;
80
-    val <<= 6;
81
-    p++;
82
-    val |= (*p & 0x3F);
82
+    NEXT_6_BITS();
83 83
     p++;
84 84
     assert((wchar_t)val == get_val_utf82uni(pstart));
85 85
   }
86 86
   else if (0xE0 == (0xF0 & *p)) {
87 87
     val = *p & 0x0F;
88
-    val <<= 6; p++;
89
-    val |= (*p & 0x3F);
90
-    val <<= 6; p++;
91
-    val |= (*p & 0x3F);
88
+    NEXT_6_BITS();
89
+    NEXT_6_BITS();
92 90
     p++;
93 91
     assert((wchar_t)val == get_val_utf82uni(pstart));
94 92
   }
95 93
   else if (0xF0 == (0xF8 & *p)) {
96 94
     val = *p & 0x07;
97
-    val <<= 6; p++;
98
-    val |= (*p & 0x3F);
99
-    val <<= 6; p++;
100
-    val |= (*p & 0x3F);
101
-    val <<= 6; p++;
102
-    val |= (*p & 0x3F);
95
+    NEXT_6_BITS();
96
+    NEXT_6_BITS();
97
+    NEXT_6_BITS();
103 98
     p++;
104 99
     assert((wchar_t)val == get_val_utf82uni(pstart));
105 100
   }
106 101
   else if (0xF8 == (0xFC & *p)) {
107 102
     val = *p & 0x03;
108
-    val <<= 6; p++;
109
-    val |= (*p & 0x3F);
110
-    val <<= 6; p++;
111
-    val |= (*p & 0x3F);
112
-    val <<= 6; p++;
113
-    val |= (*p & 0x3F);
114
-    val <<= 6; p++;
115
-    val |= (*p & 0x3F);
103
+    NEXT_6_BITS();
104
+    NEXT_6_BITS();
105
+    NEXT_6_BITS();
106
+    NEXT_6_BITS();
116 107
     p++;
117 108
     assert((wchar_t)val == get_val_utf82uni(pstart));
118 109
   }
119 110
   else if (0xFC == (0xFE & *p)) {
120 111
     val = *p & 0x01;
121
-    val <<= 6; p++;
122
-    val |= (*p & 0x3F);
123
-    val <<= 6; p++;
124
-    val |= (*p & 0x3F);
125
-    val <<= 6; p++;
126
-    val |= (*p & 0x3F);
127
-    val <<= 6; p++;
128
-    val |= (*p & 0x3F);
129
-    val <<= 6; p++;
130
-    val |= (*p & 0x3F);
112
+    NEXT_6_BITS();
113
+    NEXT_6_BITS();
114
+    NEXT_6_BITS();
115
+    NEXT_6_BITS();
116
+    NEXT_6_BITS();
131 117
     p++;
132 118
     assert((wchar_t)val == get_val_utf82uni(pstart));
133 119
   }

Loading…
Cancel
Save