00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #include "stdafx.h"
00013 #include "debug.h"
00014 #include "core/alloc_func.hpp"
00015 #include "core/math_func.hpp"
00016 #include "string_func.h"
00017
00018 #include "table/control_codes.h"
00019
00020 #include <stdarg.h>
00021 #include <ctype.h>
00022
00023 #ifdef _MSC_VER
00024 #include <errno.h>
00025 #endif
00026
00027 #ifdef WITH_ICU
00028
00029 #include <unicode/ustring.h>
00030 #include "language.h"
00031 #include "gfx_func.h"
00032 #endif
00033
00044 static int CDECL vseprintf(char *str, const char *last, const char *format, va_list ap)
00045 {
00046 ptrdiff_t diff = last - str;
00047 if (diff < 0) return 0;
00048 return min((int)diff, vsnprintf(str, diff + 1, format, ap));
00049 }
00050
00065 void ttd_strlcat(char *dst, const char *src, size_t size)
00066 {
00067 assert(size > 0);
00068 while (size > 0 && *dst != '\0') {
00069 size--;
00070 dst++;
00071 }
00072
00073 ttd_strlcpy(dst, src, size);
00074 }
00075
00076
00091 void ttd_strlcpy(char *dst, const char *src, size_t size)
00092 {
00093 assert(size > 0);
00094 while (--size > 0 && *src != '\0') {
00095 *dst++ = *src++;
00096 }
00097 *dst = '\0';
00098 }
00099
00100
00117 char *strecat(char *dst, const char *src, const char *last)
00118 {
00119 assert(dst <= last);
00120 while (*dst != '\0') {
00121 if (dst == last) return dst;
00122 dst++;
00123 }
00124
00125 return strecpy(dst, src, last);
00126 }
00127
00128
00145 char *strecpy(char *dst, const char *src, const char *last)
00146 {
00147 assert(dst <= last);
00148 while (dst != last && *src != '\0') {
00149 *dst++ = *src++;
00150 }
00151 *dst = '\0';
00152
00153 if (dst == last && *src != '\0') {
00154 #if defined(STRGEN) || defined(SETTINGSGEN)
00155 error("String too long for destination buffer");
00156 #else
00157 DEBUG(misc, 0, "String too long for destination buffer");
00158 #endif
00159 }
00160 return dst;
00161 }
00162
00168 char *CDECL str_fmt(const char *str, ...)
00169 {
00170 char buf[4096];
00171 va_list va;
00172
00173 va_start(va, str);
00174 int len = vseprintf(buf, lastof(buf), str, va);
00175 va_end(va);
00176 char *p = MallocT<char>(len + 1);
00177 memcpy(p, buf, len + 1);
00178 return p;
00179 }
00180
00187 void str_fix_scc_encoded(char *str, const char *last)
00188 {
00189 while (str <= last && *str != '\0') {
00190 size_t len = Utf8EncodedCharLen(*str);
00191 if ((len == 0 && str + 4 > last) || str + len > last) break;
00192
00193 WChar c;
00194 len = Utf8Decode(&c, str);
00195 if (c == '\0') break;
00196
00197 if (c == 0xE028 || c == 0xE02A) {
00198 c = SCC_ENCODED;
00199 }
00200 str += Utf8Encode(str, c);
00201 }
00202 *str = '\0';
00203 }
00204
00205
00213 void str_validate(char *str, const char *last, StringValidationSettings settings)
00214 {
00215
00216
00217 char *dst = str;
00218 while (str <= last && *str != '\0') {
00219 size_t len = Utf8EncodedCharLen(*str);
00220
00221
00222
00223
00224
00225 if ((len == 0 && str + 4 > last) || str + len > last) break;
00226
00227 WChar c;
00228 len = Utf8Decode(&c, str);
00229
00230
00231
00232 if (c == '\0') break;
00233
00234 if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && c == SCC_ENCODED)) {
00235
00236
00237
00238 do {
00239 *dst++ = *str++;
00240 } while (--len != 0);
00241 } else if ((settings & SVS_ALLOW_NEWLINE) != 0 && c == '\n') {
00242 *dst++ = *str++;
00243 } else {
00244 if ((settings & SVS_ALLOW_NEWLINE) != 0 && c == '\r' && str[1] == '\n') {
00245 str += len;
00246 continue;
00247 }
00248
00249 str += len;
00250 if ((settings & SVS_REPLACE_WITH_QUESTION_MARK) != 0) *dst++ = '?';
00251 }
00252 }
00253
00254 *dst = '\0';
00255 }
00256
00262 void ValidateString(const char *str)
00263 {
00264
00265 str_validate(const_cast<char *>(str), str + strlen(str) + 1);
00266 }
00267
00268
00276 bool StrValid(const char *str, const char *last)
00277 {
00278
00279
00280 while (str <= last && *str != '\0') {
00281 size_t len = Utf8EncodedCharLen(*str);
00282
00283
00284
00285
00286 if (len == 0 || str + len > last) return false;
00287
00288 WChar c;
00289 len = Utf8Decode(&c, str);
00290 if (!IsPrintable(c) || (c >= SCC_SPRITE_START && c <= SCC_SPRITE_END)) {
00291 return false;
00292 }
00293
00294 str += len;
00295 }
00296
00297 return *str == '\0';
00298 }
00299
00301 void str_strip_colours(char *str)
00302 {
00303 char *dst = str;
00304 WChar c;
00305 size_t len;
00306
00307 for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
00308 if (c < SCC_BLUE || c > SCC_BLACK) {
00309
00310
00311
00312 do {
00313 *dst++ = *str++;
00314 } while (--len != 0);
00315 } else {
00316
00317 str += len;
00318 }
00319 }
00320 *dst = '\0';
00321 }
00322
00329 size_t Utf8StringLength(const char *s)
00330 {
00331 size_t len = 0;
00332 const char *t = s;
00333 while (Utf8Consume(&t) != 0) len++;
00334 return len;
00335 }
00336
00337
00349 bool strtolower(char *str)
00350 {
00351 bool changed = false;
00352 for (; *str != '\0'; str++) {
00353 char new_str = tolower(*str);
00354 changed |= new_str != *str;
00355 *str = new_str;
00356 }
00357 return changed;
00358 }
00359
00367 bool IsValidChar(WChar key, CharSetFilter afilter)
00368 {
00369 switch (afilter) {
00370 case CS_ALPHANUMERAL: return IsPrintable(key);
00371 case CS_NUMERAL: return (key >= '0' && key <= '9');
00372 case CS_NUMERAL_SPACE: return (key >= '0' && key <= '9') || key == ' ';
00373 case CS_ALPHA: return IsPrintable(key) && !(key >= '0' && key <= '9');
00374 case CS_HEXADECIMAL: return (key >= '0' && key <= '9') || (key >= 'a' && key <= 'f') || (key >= 'A' && key <= 'F');
00375 }
00376
00377 return false;
00378 }
00379
00380 #ifdef WIN32
00381
00382 #if (__MINGW32_MAJOR_VERSION < 3) || ((__MINGW32_MAJOR_VERSION == 3) && (__MINGW32_MINOR_VERSION < 14))
00383 int CDECL snprintf(char *str, size_t size, const char *format, ...)
00384 {
00385 va_list ap;
00386 int ret;
00387
00388 va_start(ap, format);
00389 ret = vsnprintf(str, size, format, ap);
00390 va_end(ap);
00391 return ret;
00392 }
00393 #endif
00394
00395 #ifdef _MSC_VER
00396
00403 int CDECL vsnprintf(char *str, size_t size, const char *format, va_list ap)
00404 {
00405 if (size == 0) return 0;
00406
00407 errno = 0;
00408 int ret = _vsnprintf(str, size, format, ap);
00409
00410 if (ret < 0) {
00411 if (errno != ERANGE) {
00412
00413
00414 NOT_REACHED();
00415 }
00416 } else if ((size_t)ret < size) {
00417
00418
00419
00420 return ret;
00421 }
00422
00423
00424
00425 str[size - 1] = '\0';
00426 return (int)size;
00427 }
00428 #endif
00429
00430 #endif
00431
00441 int CDECL seprintf(char *str, const char *last, const char *format, ...)
00442 {
00443 va_list ap;
00444
00445 va_start(ap, format);
00446 int ret = vseprintf(str, last, format, ap);
00447 va_end(ap);
00448 return ret;
00449 }
00450
00451
00459 char *md5sumToString(char *buf, const char *last, const uint8 md5sum[16])
00460 {
00461 char *p = buf;
00462
00463 for (uint i = 0; i < 16; i++) {
00464 p += seprintf(p, last, "%02X", md5sum[i]);
00465 }
00466
00467 return p;
00468 }
00469
00470
00471
00472
00473
00480 size_t Utf8Decode(WChar *c, const char *s)
00481 {
00482 assert(c != NULL);
00483
00484 if (!HasBit(s[0], 7)) {
00485
00486 *c = s[0];
00487 return 1;
00488 } else if (GB(s[0], 5, 3) == 6) {
00489 if (IsUtf8Part(s[1])) {
00490
00491 *c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00492 if (*c >= 0x80) return 2;
00493 }
00494 } else if (GB(s[0], 4, 4) == 14) {
00495 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00496
00497 *c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00498 if (*c >= 0x800) return 3;
00499 }
00500 } else if (GB(s[0], 3, 5) == 30) {
00501 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00502
00503 *c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00504 if (*c >= 0x10000 && *c <= 0x10FFFF) return 4;
00505 }
00506 }
00507
00508
00509 *c = '?';
00510 return 1;
00511 }
00512
00513
00520 size_t Utf8Encode(char *buf, WChar c)
00521 {
00522 if (c < 0x80) {
00523 *buf = c;
00524 return 1;
00525 } else if (c < 0x800) {
00526 *buf++ = 0xC0 + GB(c, 6, 5);
00527 *buf = 0x80 + GB(c, 0, 6);
00528 return 2;
00529 } else if (c < 0x10000) {
00530 *buf++ = 0xE0 + GB(c, 12, 4);
00531 *buf++ = 0x80 + GB(c, 6, 6);
00532 *buf = 0x80 + GB(c, 0, 6);
00533 return 3;
00534 } else if (c < 0x110000) {
00535 *buf++ = 0xF0 + GB(c, 18, 3);
00536 *buf++ = 0x80 + GB(c, 12, 6);
00537 *buf++ = 0x80 + GB(c, 6, 6);
00538 *buf = 0x80 + GB(c, 0, 6);
00539 return 4;
00540 }
00541
00542
00543 *buf = '?';
00544 return 1;
00545 }
00546
00554 size_t Utf8TrimString(char *s, size_t maxlen)
00555 {
00556 size_t length = 0;
00557
00558 for (const char *ptr = strchr(s, '\0'); *s != '\0';) {
00559 size_t len = Utf8EncodedCharLen(*s);
00560
00561 if (len == 0) len = 1;
00562
00563
00564
00565 if (length + len >= maxlen || (s + len > ptr)) break;
00566 s += len;
00567 length += len;
00568 }
00569
00570 *s = '\0';
00571 return length;
00572 }
00573
00574 #ifdef DEFINE_STRNDUP
00575 char *strndup(const char *s, size_t len)
00576 {
00577 len = ttd_strnlen(s, len);
00578 char *tmp = CallocT<char>(len + 1);
00579 memcpy(tmp, s, len);
00580 return tmp;
00581 }
00582 #endif
00583
00584 #ifdef DEFINE_STRCASESTR
00585 char *strcasestr(const char *haystack, const char *needle)
00586 {
00587 size_t hay_len = strlen(haystack);
00588 size_t needle_len = strlen(needle);
00589 while (hay_len >= needle_len) {
00590 if (strncasecmp(haystack, needle, needle_len) == 0) return const_cast<char *>(haystack);
00591
00592 haystack++;
00593 hay_len--;
00594 }
00595
00596 return NULL;
00597 }
00598 #endif
00599
00608 static const char *SkipGarbage(const char *str)
00609 {
00610 while (*str != '\0' && (*str < 'A' || IsInsideMM(*str, '[', '`' + 1) || IsInsideMM(*str, '{', '~' + 1))) str++;
00611 return str;
00612 }
00613
00622 int strnatcmp(const char *s1, const char *s2, bool ignore_garbage_at_front)
00623 {
00624 if (ignore_garbage_at_front) {
00625 s1 = SkipGarbage(s1);
00626 s2 = SkipGarbage(s2);
00627 }
00628 #ifdef WITH_ICU
00629 if (_current_collator != NULL) {
00630 UErrorCode status = U_ZERO_ERROR;
00631 int result;
00632
00633
00634 #if U_ICU_VERSION_MAJOR_NUM > 4 || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 2)
00635
00636 result = _current_collator->compareUTF8(s1, s2, status);
00637 #else
00638 UChar buffer1[DRAW_STRING_BUFFER];
00639 u_strFromUTF8Lenient(buffer1, lengthof(buffer1), NULL, s1, -1, &status);
00640 UChar buffer2[DRAW_STRING_BUFFER];
00641 u_strFromUTF8Lenient(buffer2, lengthof(buffer2), NULL, s2, -1, &status);
00642
00643 result = _current_collator->compare(buffer1, buffer2, status);
00644 #endif
00645 if (U_SUCCESS(status)) return result;
00646 }
00647
00648 #endif
00649
00650
00651 return strcasecmp(s1, s2);
00652 }