From a327a6465c19b19fb0172d0583da6d94a655fedf Mon Sep 17 00:00:00 2001
From: divverent <divverent@d7cf8633-e32d-0410-b094-e92efae38249>
Date: Mon, 12 Dec 2011 06:13:32 +0000
Subject: [PATCH] VM_sprintf: support color codes in %s

git-svn-id: svn://svn.icculus.org/twilight/trunk/darkplaces@11612 d7cf8633-e32d-0410-b094-e92efae38249
---
 dpdefs/dpextensions.qc |   4 +-
 prvm_cmds.c            |   5 +-
 utf8lib.c              | 117 +++++++++++++++++++++++++++++++++++++++++
 utf8lib.h              |   1 +
 4 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/dpdefs/dpextensions.qc b/dpdefs/dpextensions.qc
index 9f2911a1..caa9b628 100644
--- a/dpdefs/dpextensions.qc
+++ b/dpdefs/dpextensions.qc
@@ -1027,7 +1027,9 @@ string(string format, ...) sprintf = #627;
 //    For conversions s and c, the flag # makes precision and width interpreted
 //      as byte count, by default it is interpreted as character count in UTF-8
 //      enabled engines. No other conversions can create wide characters, and #
-//      has another meaning in these.
+//      has another meaning in these. When in character count mode, color codes
+//      are ignored. To get UTF-8 semantics WITHOUT color code parsing, use
+//      the + flag.
 
 //DP_QC_STRFTIME
 //idea: LordHavoc
diff --git a/prvm_cmds.c b/prvm_cmds.c
index 7f1900d9..217d897d 100644
--- a/prvm_cmds.c
+++ b/prvm_cmds.c
@@ -6326,7 +6326,10 @@ nolength:
 							{
 								if(precision < 0) // not set
 									precision = end - o - 1;
-								o += u8_strpad(o, end - o, GETARG_STRING(thisarg), (flags & PRINTF_LEFT) != 0, width, precision);
+								if(flags & PRINTF_SIGNPOSITIVE)
+									o += u8_strpad(o, end - o, GETARG_STRING(thisarg), (flags & PRINTF_LEFT) != 0, width, precision);
+								else
+									o += u8_strpad_colorcodes(o, end - o, GETARG_STRING(thisarg), (flags & PRINTF_LEFT) != 0, width, precision);
 							}
 							break;
 						default:
diff --git a/utf8lib.c b/utf8lib.c
index 8994bd7a..76cc8137 100644
--- a/utf8lib.c
+++ b/utf8lib.c
@@ -226,6 +226,29 @@ size_t u8_strlen(const char *_s)
 	return len;
 }
 
+static int colorcode_skipwidth(const unsigned char *s)
+{
+	if(*s == STRING_COLOR_TAG)
+	{
+		if(s[1] <= '9' && s[1] >= '0') // ^[0-9] found
+		{
+			return 2;
+		}
+		else if(s[1] == STRING_COLOR_RGB_TAG_CHAR &&
+			((s[2] >= '0' && s[2] <= '9') || (s[2] >= 'a' && s[2] <= 'f') || (s[2] >= 'A' && s[2] <= 'F')) &&
+			((s[3] >= '0' && s[3] <= '9') || (s[3] >= 'a' && s[3] <= 'f') || (s[3] >= 'A' && s[3] <= 'F')) &&
+			((s[4] >= '0' && s[4] <= '9') || (s[4] >= 'a' && s[4] <= 'f') || (s[4] >= 'A' && s[4] <= 'F')))
+		{
+			return 5;
+		}
+		else if(s[1] == STRING_COLOR_TAG)
+		{
+			return 1; // special case, do NOT call colorcode_skipwidth for next char
+		}
+	}
+	return 0;
+}
+
 /** Get the number of characters in a part of an UTF-8 string.
  * @param _s    An utf-8 encoded null-terminated string.
  * @param n     The maximum number of bytes.
@@ -274,6 +297,49 @@ size_t u8_strnlen(const char *_s, size_t n)
 	return len;
 }
 
+static size_t u8_strnlen_colorcodes(const char *_s, size_t n)
+{
+	size_t st, ln;
+	size_t len = 0;
+	const unsigned char *s = (const unsigned char*)_s;
+
+	while (*s && n)
+	{
+		int w = colorcode_skipwidth(s);
+		n -= w;
+		s += w;
+		if(w > 1) // == 1 means single caret
+			continue;
+
+		// ascii char, skip u8_analyze
+		if (*s < 0x80 || !utf8_enable.integer)
+		{
+			++len;
+			++s;
+			--n;
+			continue;
+		}
+
+		// invalid, skip u8_analyze
+		if (*s < 0xC2)
+		{
+			++s;
+			--n;
+			continue;
+		}
+
+		if (!u8_analyze((const char*)s, &st, &ln, NULL, n))
+			break;
+		// valid character, see if it's still inside the range specified by n:
+		if (n < st + ln)
+			return len;
+		++len;
+		n -= st + ln;
+		s += st + ln;
+	}
+	return len;
+}
+
 /** Get the number of bytes used in a string to represent an amount of characters.
  * @param _s    An utf-8 encoded null-terminated string.
  * @param n     The number of characters we want to know the byte-size for.
@@ -318,6 +384,46 @@ size_t u8_bytelen(const char *_s, size_t n)
 	return len;
 }
 
+static size_t u8_bytelen_colorcodes(const char *_s, size_t n)
+{
+	size_t st, ln;
+	size_t len = 0;
+	const unsigned char *s = (const unsigned char*)_s;
+
+	while (*s && n)
+	{
+		int w = colorcode_skipwidth(s);
+		len += w;
+		s += w;
+		if(w > 1) // == 1 means single caret
+			continue;
+
+		// ascii char, skip u8_analyze
+		if (*s < 0x80 || !utf8_enable.integer)
+		{
+			++len;
+			++s;
+			--n;
+			continue;
+		}
+
+		// invalid, skip u8_analyze
+		if (*s < 0xC2)
+		{
+			++s;
+			++len;
+			continue;
+		}
+
+		if (!u8_analyze((const char*)s, &st, &ln, NULL, U8_ANALYZE_INFINITY))
+			break;
+		--n;
+		s += st + ln;
+		len += st + ln;
+	}
+	return len;
+}
+
 /** Get the byte-index for a character-index.
  * @param _s      An utf-8 encoded string.
  * @param i       The character-index for which you want the byte offset.
@@ -798,6 +904,17 @@ size_t u8_strpad(char *out, size_t outsize, const char *in, qboolean leftalign,
 	}
 }
 
+size_t u8_strpad_colorcodes(char *out, size_t outsize, const char *in, qboolean leftalign, size_t minwidth, size_t maxwidth)
+{
+	size_t l = u8_bytelen_colorcodes(in, maxwidth);
+	size_t actual_width = u8_strnlen_colorcodes(in, l);
+	int pad = (actual_width >= minwidth) ? 0 : (minwidth - actual_width);
+	int prec = l;
+	int lpad = leftalign ? 0 : pad;
+	int rpad = leftalign ? pad : 0;
+	return dpsnprintf(out, outsize, "%*s%.*s%*s", lpad, "", prec, in, rpad, "");
+}
+
 
 /*
 The two following functions (u8_toupper, u8_tolower) are derived from
diff --git a/utf8lib.h b/utf8lib.h
index 36c16fe1..543fbfc0 100644
--- a/utf8lib.h
+++ b/utf8lib.h
@@ -46,6 +46,7 @@ size_t u8_COM_StringLengthNoColors(const char *s, size_t size_s, qboolean *valid
 char  *u8_encodech(Uchar ch, size_t*, char*buf16);
 
 size_t u8_strpad(char *out, size_t outsize, const char *in, qboolean leftalign, size_t minwidth, size_t maxwidth);
+size_t u8_strpad_colorcodes(char *out, size_t outsize, const char *in, qboolean leftalign, size_t minwidth, size_t maxwidth);
 
 /* Careful: if we disable utf8 but not freetype, we wish to see freetype chars
  * for normal letters. So use E000+x for special chars, but leave the freetype stuff for the
-- 
2.39.5