From 2d4ac18bc2061b2330fe886691af75bf86ca21db Mon Sep 17 00:00:00 2001 From: divverent Date: Wed, 23 Dec 2009 12:15:47 +0000 Subject: [PATCH] make utf8 functions safer: give them a size limit in memory, to allow working with non-NUL-terminated strings git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@9646 d7cf8633-e32d-0410-b094-e92efae38249 --- console.c | 2 +- gl_draw.c | 11 +++++++---- utf8lib.c | 54 +++++++++++++++++++++++++++++++++++++++++++++--------- utf8lib.h | 1 + 4 files changed, 54 insertions(+), 14 deletions(-) diff --git a/console.c b/console.c index 4d279b96..6a59b970 100644 --- a/console.c +++ b/console.c @@ -1464,7 +1464,7 @@ int Con_DisplayLineFunc(void *passthrough, const char *line, size_t length, floa if(isContinuation && *ti->continuationString) x += (int) DrawQ_String_Font(x, ti->y, ti->continuationString, strlen(ti->continuationString), ti->fontsize, ti->fontsize, 1.0, 1.0, 1.0, 1.0, 0, NULL, false, ti->font); if(length > 0) - DrawQ_String_Font(x, ti->y, line, u8_strnlen(line, length), ti->fontsize, ti->fontsize, 1.0, 1.0, 1.0, 1.0, 0, &(ti->colorindex), false, ti->font); + DrawQ_String_Font(x, ti->y, line, length, ti->fontsize, ti->fontsize, 1.0, 1.0, 1.0, 1.0, 0, &(ti->colorindex), false, ti->font); } ti->y += ti->fontsize; diff --git a/gl_draw.c b/gl_draw.c index 66b2b89e..7f33797d 100644 --- a/gl_draw.c +++ b/gl_draw.c @@ -1075,6 +1075,7 @@ static void DrawQ_GetTextColor(float color[4], int colorindex, float r, float g, float DrawQ_TextWidth_Font_UntilWidth_TrackColors_Size(const char *text, float w, float h, size_t *maxlen, int *outcolor, qboolean ignorecolorcodes, const dp_font_t *fnt, float maxwidth) { + const char *text_start = text; int colorindex = STRING_COLOR_DEFAULT; size_t i; float x = 0; @@ -1083,6 +1084,7 @@ float DrawQ_TextWidth_Font_UntilWidth_TrackColors_Size(const char *text, float w int tempcolorindex; float kx; int map_index = 0; + size_t bytes_left; ft2_font_map_t *fontmap = NULL; ft2_font_map_t *map = NULL; ft2_font_map_t *prevmap = NULL; @@ -1120,9 +1122,9 @@ float DrawQ_TextWidth_Font_UntilWidth_TrackColors_Size(const char *text, float w // maxwidth /= fnt->scale; // w and h are multiplied by it already // ftbase_x = snap_to_pixel_x(0); - for (i = 0;i < *maxlen && *text;) + for (i = 0;((bytes_left = *maxlen - (text - text_start)) > 0) && *text;) { - nextch = ch = u8_getchar(text, &text); + nextch = ch = u8_getnchar(text, &text, bytes_left); //i = text - text_start; if (!ch) break; @@ -1253,6 +1255,7 @@ float DrawQ_String_Font(float startx, float starty, const char *text, size_t max ft2_font_t *ft2 = fnt->ft2; qboolean snap = true; float pix_x, pix_y; + size_t bytes_left; int tw, th; tw = R_TextureWidth(fnt->tex); @@ -1324,9 +1327,9 @@ float DrawQ_String_Font(float startx, float starty, const char *text, size_t max y += r_textshadow.value * vid.height / vid_conheight.value; } */ - for (i = 0;i < maxlen && *text;) + for (i = 0;((bytes_left = maxlen - (text - text_start)) > 0) && *text;) { - nextch = ch = u8_getchar(text, &text); + nextch = ch = u8_getnchar(text, &text, bytes_left); //i = text - text_start; if (!ch) break; diff --git a/utf8lib.c b/utf8lib.c index 8b50e753..bc3ce39a 100644 --- a/utf8lib.c +++ b/utf8lib.c @@ -25,9 +25,11 @@ UTF-8 encoding and decoding functions follow. * @param _start Filled with the start byte-offset of the next valid character * @param _len Fileed with the length of the next valid character * @param _ch Filled with the unicode value of the next character + * @param _maxlen Maximum number of bytes to read from _s * @return Whether or not another valid character is in the string */ -static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch) +#define U8_ANALYZE_INFINITY 7 +static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar *_ch, size_t _maxlen) { const unsigned char *s = (const unsigned char*)_s; unsigned char bt, bc; @@ -39,10 +41,12 @@ static qboolean u8_analyze(const char *_s, size_t *_start, size_t *_len, Uchar * findchar: // <0xC2 is always an overlong encoding, they're invalid, thus skipped - while (s[i] && s[i] >= 0x80 && s[i] <= 0xC2) { + while (i < _maxlen && s[i] && s[i] >= 0x80 && s[i] <= 0xC2) { //fprintf(stderr, "skipping\n"); ++i; } + if(i >= _maxlen) + return false; //fprintf(stderr, "checking\n"); // If we hit the end, well, we're out and invalid @@ -72,6 +76,8 @@ findchar: ++i; goto findchar; } + if(i + bits > _maxlen) + return false; // turn bt into a mask and give ch a starting value --bt; ch = (s[i] & bt); @@ -144,7 +150,7 @@ size_t u8_strlen(const char *_s) continue; } - if (!u8_analyze((const char*)s, &st, &ln, NULL)) + if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY)) break; // valid character, skip after it s += st + ln; @@ -189,7 +195,7 @@ size_t u8_strnlen(const char *_s, size_t n) continue; } - if (!u8_analyze((const char*)s, &st, &ln, NULL)) + if (!u8_analyze((const char*)s, &st, &ln, NULL, n)) break; // valid character, see if it's still inside the range specified by n: if (n < st + ln) @@ -234,7 +240,7 @@ size_t u8_bytelen(const char *_s, size_t n) continue; } - if (!u8_analyze((const char*)s, &st, &ln, NULL)) + if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY)) break; --n; s += st + ln; @@ -265,7 +271,7 @@ int u8_byteofs(const char *_s, size_t i, size_t *len) do { ofs += ln; - if (!u8_analyze((const char*)s + ofs, &st, &ln, NULL)) + if (!u8_analyze((const char*)s + ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY)) return -1; ofs += st; } while(i-- > 0); @@ -312,7 +318,7 @@ int u8_charidx(const char *_s, size_t i, size_t *len) continue; } - if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL)) + if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY)) return -1; // see if next char is after the bytemark if (ofs + st > i) @@ -373,7 +379,7 @@ size_t u8_prevbyte(const char *_s, size_t i) continue; } - if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL)) + if (!u8_analyze((const char*)s+ofs, &st, &ln, NULL, U8_ANALYZE_INFINITY)) return lastofs; if (ofs + st > i) return lastofs; @@ -429,7 +435,37 @@ Uchar u8_getchar(const char *_s, const char **_end) return (Uchar)*(const unsigned char*)_s; } - if (!u8_analyze(_s, &st, &ln, &ch)) + if (!u8_analyze(_s, &st, &ln, &ch, U8_ANALYZE_INFINITY)) + return 0; + if (_end) + *_end = _s + st + ln; + return ch; +} + +/** Fetch a character from an utf-8 encoded string. + * @param _s The start of an utf-8 encoded multi-byte character. + * @param _end Will point to after the first multi-byte character. + * @return The 32-bit integer representation of the first multi-byte character or 0 for invalid characters. + */ +Uchar u8_getnchar(const char *_s, const char **_end, size_t _maxlen) +{ + size_t st, ln; + Uchar ch; + + if (!utf8_enable.integer) + { + if (_end) + *_end = _s + 1; + /* Careful: if we disable utf8 but not freetype, we wish to see freetype chars + * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the + * rest: + */ + if (!char_usefont[(unsigned int)*(const unsigned char*)_s]) + return 0xE000 + (Uchar)*(const unsigned char*)_s; + return (Uchar)*(const unsigned char*)_s; + } + + if (!u8_analyze(_s, &st, &ln, &ch, _maxlen)) return 0; if (_end) *_end = _s + st + ln; diff --git a/utf8lib.h b/utf8lib.h index f435bbdf..4133908a 100644 --- a/utf8lib.h +++ b/utf8lib.h @@ -36,6 +36,7 @@ int u8_charidx(const char*, size_t, size_t*); size_t u8_bytelen(const char*, size_t); size_t u8_prevbyte(const char*, size_t); Uchar u8_getchar(const char*, const char**); +Uchar u8_getnchar(const char*, const char**, size_t); int u8_fromchar(Uchar, char*, size_t); size_t u8_wcstombs(char*, const Uchar*, size_t); size_t u8_COM_StringLengthNoColors(const char *s, size_t size_s, qboolean *valid); -- 2.39.5