]> git.ozlabs.org Git - petitboot/blobdiff - lib/fold/fold.c
lib/fold: Catch error case from mbrtowc()
[petitboot] / lib / fold / fold.c
index ec10c8c240388af62981d582eaac401d19ae28d7..812a324839223df6fd43af74f5549754fb6a0146 100644 (file)
@@ -1,4 +1,12 @@
 
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <wchar.h>
+#include <wctype.h>
+
 #include "fold/fold.h"
 
 void fold_text(const char *text,
@@ -7,38 +15,80 @@ void fold_text(const char *text,
                void *arg)
 {
        const char *start, *end, *sep;
-       int rc = 0;
+       size_t sep_bytes, len;
+       int col, rc = 0;
+       mbstate_t ps;
 
+       /* start, end and sep are byte-positions in the string, and should always
+        * lie on the start of a multibyte sequence */
        start = end = sep = text;
+       sep_bytes = 0;
+       col = 0;
+       len = strlen(text);
+       memset(&ps, 0, sizeof(ps));
 
        while (!rc) {
+               size_t bytes;
+               wchar_t wc;
+               int width;
+
+               bytes = mbrtowc(&wc, end, len - (end - text), &ps);
+
+               assert(bytes != (size_t)-1);
+
+               /* we'll get a zero size for the nul terminator, (size_t) -2
+                * if we've reached the end of the buffer, or (size_t) -1 on
+                * error */
+               if (!bytes || bytes == (size_t) -2 || bytes == (size_t) -1) {
+                       line_cb(arg, start, end - start);
+                       break;
+               }
 
-               if (*end == '\n') {
+               if (wc == L'\n') {
                        rc = line_cb(arg, start, end - start);
-                       start = sep = ++end;
+                       start = sep = end += bytes;
+                       sep_bytes = 0;
+                       col = 0;
+                       continue;
+               }
+
+               width = wcwidth(wc);
 
-               } else if (*end == '\0') {
+               /* we should have caught this in the !bytes check... */
+               if (width == 0) {
                        line_cb(arg, start, end - start);
-                       rc = 1;
+                       break;
+               }
 
-               } else if (end - start >= linelen - 1) {
+               /* unprintable character? just add it to the current line */
+               if (width < 0) {
+                       end += bytes;
+                       continue;
+               }
+
+               col += width;
+
+               if (col > linelen) {
                        if (sep != start) {
                                /* split on a previous word boundary, if
                                 * possible */
                                rc = line_cb(arg, start, sep - start);
-                               start = end = ++sep;
+                               end = sep + sep_bytes;
                        } else {
                                /* otherwise, break the word */
-                               end++;
                                rc = line_cb(arg, start, end - start);
-                               start = sep = end;
                        }
+                       sep_bytes = 0;
+                       start = sep = end;
+                       col = 0;
 
                } else {
-                       end++;
                        /* record our last separator */
-                       if (*end == ' ')
+                       if (wc == L' ') {
                                sep = end;
+                               sep_bytes = bytes;
+                       }
+                       end += bytes;
                }
        }
 }