Changeset 1423442 in serd


Ignore:
Timestamp:
Aug 29, 2017, 3:01:09 AM (3 months ago)
Author:
David Robillard <d@…>
Branches:
master, serd1
Children:
b5875a4
Parents:
2976016
git-author:
David Robillard <d@…> (08/29/17 02:51:37)
git-committer:
David Robillard <d@…> (08/29/17 03:01:09)
Message:

Fix writing of corrupt UTF-8

Files:
3 edited

Legend:

Unmodified
Added
Removed
  • src/serd_internal.h

    r2976016 r1423442  
    341341    if ((c & 0x80) == 0) {  // Starts with `0'
    342342        return 1;
    343     }
    344 
    345 #ifdef HAVE_BUILTIN_CLZ
    346     return __builtin_clz(~c << 24);
    347 #else
    348     if ((c & 0xE0) == 0xC0) {  // Starts with `110'
     343    } else if ((c & 0xE0) == 0xC0) {  // Starts with `110'
    349344        return 2;
    350345    } else if ((c & 0xF0) == 0xE0) {  // Starts with `1110'
     
    354349    }
    355350    return 0;
    356 #endif
    357351}
    358352
  • src/writer.c

    r2976016 r1423442  
    242242        i   += size;
    243243        if (size == 0) {
    244             // Corrupt input, write replacement char and scan to next start
    245             sink(replacement_char, sizeof(replacement_char), writer);
    246             for (; i < n_bytes && (utf8[i] & 0x80); ++i) {}
     244            // Corrupt input, scan to start of next character
     245            for (++i; i < n_bytes && (utf8[i] & 0x80); ++i) {}
    247246        }
    248247    }
     
    316315        }
    317316
    318         uint8_t in = utf8[i++];
     317        const uint8_t in = utf8[i++];
    319318        if (ctx == WRITE_LONG_STRING) {
    320319            switch (in) {
     
    350349        }
    351350
     351        // Write UTF-8 character
    352352        size_t size = 0;
    353353        len += write_character(writer, utf8 + i - 1, &size);
    354354        if (size == 0) {
    355             // Corrupt input, write replacement char and scan to next start
    356             sink(replacement_char, sizeof(replacement_char), writer);
     355            // Corrupt input, scan to start of next character
    357356            for (; i < n_bytes && (utf8[i] & 0x80); ++i) {}
    358         }
    359 
    360         i += size - 1;
     357        } else {
     358            i += size - 1;
     359        }
    361360    }
    362361    return len;
  • wscript

    r2976016 r1423442  
    7979                   mandatory     = False)
    8080
    81     conf.check(fragment      = 'int main() { return __builtin_clz(1); }',
    82                function_name = '__builtin_clz',
    83                define_name   = 'HAVE_BUILTIN_CLZ',
    84                mandatory     = False)
    85 
    8681    autowaf.define(conf, 'SERD_VERSION', SERD_VERSION)
    8782    autowaf.set_lib_env(conf, 'serd', SERD_VERSION)
Note: See TracChangeset for help on using the changeset viewer.