00001
00002
00003
00004
00005
00006
00007
00008
00009
00012 #include "../stdafx.h"
00013 #include "../core/alloc_func.hpp"
00014 #include "../core/endian_func.hpp"
00015 #include "../core/math_func.hpp"
00016 #include "../string_func.h"
00017 #include "../strings_type.h"
00018 #include "strgen.h"
00019 #include "../table/control_codes.h"
00020
00021 #include <stdarg.h>
00022
00023 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00024 #include <unistd.h>
00025 #include <sys/stat.h>
00026 #endif
00027
00028 #if defined WIN32 || defined __WATCOMC__
00029 #include <direct.h>
00030 #endif
00031
00032 #ifdef __MORPHOS__
00033 #ifdef stderr
00034 #undef stderr
00035 #endif
00036 #define stderr stdout
00037 #endif
00038
00039 #include "../table/strgen_tables.h"
00040
00041
00042
00043 struct Case {
00044 int caseidx;
00045 char *string;
00046 Case *next;
00047 };
00048
00049 static bool _masterlang;
00050 static bool _translated;
00051 static bool _translation;
00052 static const char *_file = "(unknown file)";
00053 static FILE *_output_file = NULL;
00054 static const char *_output_filename = NULL;
00055 static int _cur_line;
00056 static int _errors, _warnings, _show_todo;
00057
00058 struct LangString {
00059 char *name;
00060 char *english;
00061 char *translated;
00062 uint16 hash_next;
00063 uint16 index;
00064 int line;
00065 Case *english_case;
00066 Case *translated_case;
00067 };
00068
00069 static LangString *_strings[65536];
00070
00071
00072 #define HASH_SIZE 32767
00073 static uint16 _hash_head[HASH_SIZE];
00074
00075 static byte _put_buf[4096];
00076 static int _put_pos;
00077 static int _next_string_id;
00078
00079 static uint32 _hash;
00080 static char _lang_name[32], _lang_ownname[32], _lang_isocode[16];
00081 static char _lang_digit_group_separator[8];
00082 static char _lang_digit_group_separator_currency[8];
00083 static char _lang_digit_decimal_separator[8];
00084 static byte _lang_pluralform;
00085 static byte _lang_textdir;
00086 static uint16 _lang_winlangid;
00087 static uint8 _lang_newgrflangid;
00088 #define MAX_NUM_GENDER 8
00089 static char _genders[MAX_NUM_GENDER][16];
00090 static uint _numgenders;
00091
00092
00093 #define MAX_NUM_CASES 50
00094 static char _cases[MAX_NUM_CASES][16];
00095 static uint _numcases;
00096
00097 static const char *_cur_ident;
00098
00099 struct CmdPair {
00100 const CmdStruct *a;
00101 const char *v;
00102 };
00103
00104 struct ParsedCommandStruct {
00105 uint np;
00106 CmdPair pairs[32];
00107 const CmdStruct *cmd[32];
00108 };
00109
00110
00111 static ParsedCommandStruct _cur_pcs;
00112 static int _cur_argidx;
00113
00114 static uint HashStr(const char *s)
00115 {
00116 uint hash = 0;
00117 for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00118 return hash % HASH_SIZE;
00119 }
00120
00121 static void HashAdd(const char *s, LangString *ls)
00122 {
00123 uint hash = HashStr(s);
00124 ls->hash_next = _hash_head[hash];
00125 _hash_head[hash] = ls->index + 1;
00126 }
00127
00128 static LangString *HashFind(const char *s)
00129 {
00130 int idx = _hash_head[HashStr(s)];
00131
00132 while (--idx >= 0) {
00133 LangString *ls = _strings[idx];
00134
00135 if (strcmp(ls->name, s) == 0) return ls;
00136 idx = ls->hash_next;
00137 }
00138 return NULL;
00139 }
00140
00141 #ifdef _MSC_VER
00142 # define LINE_NUM_FMT(s) "%s (%d): warning: %s (" s ")\n"
00143 #else
00144 # define LINE_NUM_FMT(s) "%s:%d: " s ": %s\n"
00145 #endif
00146
00147 static void CDECL strgen_warning(const char *s, ...) WARN_FORMAT(1, 2);
00148
00149 static void CDECL strgen_warning(const char *s, ...)
00150 {
00151 char buf[1024];
00152 va_list va;
00153 va_start(va, s);
00154 vsnprintf(buf, lengthof(buf), s, va);
00155 va_end(va);
00156 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, buf);
00157 _warnings++;
00158 }
00159
00160 static void CDECL strgen_error(const char *s, ...) WARN_FORMAT(1, 2);
00161
00162 static void CDECL strgen_error(const char *s, ...)
00163 {
00164 char buf[1024];
00165 va_list va;
00166 va_start(va, s);
00167 vsnprintf(buf, lengthof(buf), s, va);
00168 va_end(va);
00169 fprintf(stderr, LINE_NUM_FMT("error"), _file, _cur_line, buf);
00170 _errors++;
00171 }
00172
00173 void NORETURN CDECL error(const char *s, ...)
00174 {
00175 char buf[1024];
00176 va_list va;
00177 va_start(va, s);
00178 vsnprintf(buf, lengthof(buf), s, va);
00179 va_end(va);
00180 fprintf(stderr, LINE_NUM_FMT("FATAL"), _file, _cur_line, buf);
00181 #ifdef _MSC_VER
00182 fprintf(stderr, LINE_NUM_FMT("warning"), _file, _cur_line, "language is not compiled");
00183 #endif
00184
00185 if (_output_file != NULL) {
00186 fclose(_output_file);
00187 unlink(_output_filename);
00188 }
00189 exit(1);
00190 }
00191
00192 static void PutByte(byte c)
00193 {
00194 if (_put_pos == lengthof(_put_buf)) error("Put buffer too small");
00195 _put_buf[_put_pos++] = c;
00196 }
00197
00198
00199 static void PutUtf8(uint32 value)
00200 {
00201 if (value < 0x80) {
00202 PutByte(value);
00203 } else if (value < 0x800) {
00204 PutByte(0xC0 + GB(value, 6, 5));
00205 PutByte(0x80 + GB(value, 0, 6));
00206 } else if (value < 0x10000) {
00207 PutByte(0xE0 + GB(value, 12, 4));
00208 PutByte(0x80 + GB(value, 6, 6));
00209 PutByte(0x80 + GB(value, 0, 6));
00210 } else if (value < 0x110000) {
00211 PutByte(0xF0 + GB(value, 18, 3));
00212 PutByte(0x80 + GB(value, 12, 6));
00213 PutByte(0x80 + GB(value, 6, 6));
00214 PutByte(0x80 + GB(value, 0, 6));
00215 } else {
00216 strgen_warning("Invalid unicode value U+0x%X", value);
00217 }
00218 }
00219
00220
00221 size_t Utf8Validate(const char *s)
00222 {
00223 uint32 c;
00224
00225 if (!HasBit(s[0], 7)) {
00226
00227 return 1;
00228 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00229
00230 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00231 if (c >= 0x80) return 2;
00232 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00233
00234 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00235 if (c >= 0x800) return 3;
00236 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00237
00238 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00239 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00240 }
00241
00242 return 0;
00243 }
00244
00245
00246 static void EmitSingleChar(char *buf, int value)
00247 {
00248 if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00249 PutUtf8(value);
00250 }
00251
00252
00253
00254
00255
00256
00257
00258
00259 bool ParseRelNum(char **buf, int *value, int *offset)
00260 {
00261 const char *s = *buf;
00262 char *end;
00263 bool rel = false;
00264
00265 while (*s == ' ' || *s == '\t') s++;
00266 if (*s == '+') {
00267 rel = true;
00268 s++;
00269 }
00270 int v = strtol(s, &end, 0);
00271 if (end == s) return false;
00272 if (rel || v < 0) {
00273 *value += v;
00274 } else {
00275 *value = v;
00276 }
00277 if (offset != NULL && *end == ':') {
00278
00279 s = end + 1;
00280 *offset = strtol(s, &end, 0);
00281 if (end == s) return false;
00282 }
00283 *buf = end;
00284 return true;
00285 }
00286
00287
00288 char *ParseWord(char **buf)
00289 {
00290 char *s = *buf, *r;
00291
00292 while (*s == ' ' || *s == '\t') s++;
00293 if (*s == '\0') return NULL;
00294
00295 if (*s == '"') {
00296 r = ++s;
00297
00298 for (;;) {
00299 if (*s == '\0') break;
00300 if (*s == '"') {
00301 *s++ = '\0';
00302 break;
00303 }
00304 s++;
00305 }
00306 } else {
00307
00308 r = s;
00309 for (;;) {
00310 if (*s == '\0') break;
00311 if (*s == ' ' || *s == '\t') {
00312 *s++ = '\0';
00313 break;
00314 }
00315 s++;
00316 }
00317 }
00318 *buf = s;
00319 return r;
00320 }
00321
00322
00323 static int TranslateArgumentIdx(int arg, int offset = 0);
00324
00325 static void EmitWordList(const char * const *words, uint nw)
00326 {
00327 PutByte(nw);
00328 for (uint i = 0; i < nw; i++) PutByte(strlen(words[i]) + 1);
00329 for (uint i = 0; i < nw; i++) {
00330 for (uint j = 0; words[i][j] != '\0'; j++) PutByte(words[i][j]);
00331 PutByte(0);
00332 }
00333 }
00334
00335 static void EmitPlural(char *buf, int value)
00336 {
00337 int argidx = _cur_argidx;
00338 int offset = 0;
00339 const char *words[5];
00340 int nw = 0;
00341
00342
00343 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
00344
00345
00346 for (nw = 0; nw < 5; nw++) {
00347 words[nw] = ParseWord(&buf);
00348 if (words[nw] == NULL) break;
00349 }
00350
00351 if (nw == 0) {
00352 error("%s: No plural words", _cur_ident);
00353 }
00354
00355 if (_plural_forms[_lang_pluralform].plural_count != nw) {
00356 if (_translated) {
00357 error("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00358 _plural_forms[_lang_pluralform].plural_count, nw);
00359 } else {
00360 if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00361 if (nw > _plural_forms[_lang_pluralform].plural_count) {
00362 nw = _plural_forms[_lang_pluralform].plural_count;
00363 } else {
00364 for (; nw < _plural_forms[_lang_pluralform].plural_count; nw++) {
00365 words[nw] = words[nw - 1];
00366 }
00367 }
00368 }
00369 }
00370
00371 PutUtf8(SCC_PLURAL_LIST);
00372 PutByte(TranslateArgumentIdx(argidx, offset));
00373 EmitWordList(words, nw);
00374 }
00375
00376
00377 static void EmitGender(char *buf, int value)
00378 {
00379 int argidx = _cur_argidx;
00380 int offset = 0;
00381 uint nw;
00382
00383 if (buf[0] == '=') {
00384 buf++;
00385
00386
00387 for (nw = 0; ; nw++) {
00388 if (nw >= MAX_NUM_GENDER) error("G argument '%s' invalid", buf);
00389 if (strcmp(buf, _genders[nw]) == 0) break;
00390 }
00391
00392 PutUtf8(SCC_GENDER_INDEX);
00393 PutByte(nw);
00394 } else {
00395 const char *words[MAX_NUM_GENDER];
00396
00397
00398
00399 if (!ParseRelNum(&buf, &argidx, &offset)) {}
00400
00401 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
00402 if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
00403 error("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
00404 }
00405
00406 for (nw = 0; nw < MAX_NUM_GENDER; nw++) {
00407 words[nw] = ParseWord(&buf);
00408 if (words[nw] == NULL) break;
00409 }
00410 if (nw != _numgenders) error("Bad # of arguments for gender command");
00411
00412 assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
00413 PutUtf8(SCC_GENDER_LIST);
00414 PutByte(cmd->value - SCC_CONTROL_START);
00415 PutByte(TranslateArgumentIdx(argidx, offset));
00416 EmitWordList(words, nw);
00417 }
00418 }
00419
00420 static const CmdStruct *FindCmd(const char *s, int len)
00421 {
00422 for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00423 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00424 }
00425 return NULL;
00426 }
00427
00428 static uint ResolveCaseName(const char *str, uint len)
00429 {
00430 for (uint i = 0; i < MAX_NUM_CASES; i++) {
00431 if (memcmp(_cases[i], str, len) == 0 && _cases[i][len] == 0) return i + 1;
00432 }
00433 error("Invalid case-name '%s'", str);
00434 }
00435
00436
00437
00438
00439 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00440 {
00441 const char *s = *str, *start;
00442 char c;
00443
00444 *argno = -1;
00445 *casei = -1;
00446
00447
00448 for (; *s != '{'; s++) {
00449 if (*s == '\0') return NULL;
00450 }
00451 s++;
00452
00453 if (*s >= '0' && *s <= '9') {
00454 char *end;
00455
00456 *argno = strtoul(s, &end, 0);
00457 if (*end != ':') error("missing arg #");
00458 s = end + 1;
00459 }
00460
00461
00462 start = s;
00463 do {
00464 c = *s++;
00465 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00466
00467 const CmdStruct *cmd = FindCmd(start, s - start - 1);
00468 if (cmd == NULL) {
00469 strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
00470 return NULL;
00471 }
00472
00473 if (c == '.') {
00474 const char *casep = s;
00475
00476 if (!(cmd->flags & C_CASE)) {
00477 error("Command '%s' can't have a case", cmd->cmd);
00478 }
00479
00480 do {
00481 c = *s++;
00482 } while (c != '}' && c != ' ' && c != '\0');
00483 *casei = ResolveCaseName(casep, s - casep - 1);
00484 }
00485
00486 if (c == '\0') {
00487 strgen_error("Missing } from command '%s'", start);
00488 return NULL;
00489 }
00490
00491
00492 if (c != '}') {
00493 if (c == '=') s--;
00494
00495 start = s;
00496 for (;;) {
00497 c = *s++;
00498 if (c == '}') break;
00499 if (c == '\0') {
00500 strgen_error("Missing } from command '%s'", start);
00501 return NULL;
00502 }
00503 if (s - start == 250) error("param command too long");
00504 *param++ = c;
00505 }
00506 }
00507 *param = '\0';
00508
00509 *str = s;
00510
00511 return cmd;
00512 }
00513
00514
00515 static void HandlePragma(char *str)
00516 {
00517 if (!memcmp(str, "id ", 3)) {
00518 _next_string_id = strtoul(str + 3, NULL, 0);
00519 } else if (!memcmp(str, "name ", 5)) {
00520 strecpy(_lang_name, str + 5, lastof(_lang_name));
00521 } else if (!memcmp(str, "ownname ", 8)) {
00522 strecpy(_lang_ownname, str + 8, lastof(_lang_ownname));
00523 } else if (!memcmp(str, "isocode ", 8)) {
00524 strecpy(_lang_isocode, str + 8, lastof(_lang_isocode));
00525 } else if (!memcmp(str, "plural ", 7)) {
00526 _lang_pluralform = atoi(str + 7);
00527 if (_lang_pluralform >= lengthof(_plural_forms))
00528 error("Invalid pluralform %d", _lang_pluralform);
00529 } else if (!memcmp(str, "textdir ", 8)) {
00530 if (!memcmp(str + 8, "ltr", 3)) {
00531 _lang_textdir = TD_LTR;
00532 } else if (!memcmp(str + 8, "rtl", 3)) {
00533 _lang_textdir = TD_RTL;
00534 } else {
00535 error("Invalid textdir %s", str + 8);
00536 }
00537 } else if (!memcmp(str, "digitsep ", 9)) {
00538 str += 9;
00539 strecpy(_lang_digit_group_separator, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_group_separator));
00540 } else if (!memcmp(str, "digitsepcur ", 12)) {
00541 str += 12;
00542 strecpy(_lang_digit_group_separator_currency, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_group_separator_currency));
00543 } else if (!memcmp(str, "decimalsep ", 11)) {
00544 str += 11;
00545 strecpy(_lang_digit_decimal_separator, strcmp(str, "{NBSP}") == 0 ? "\xC2\xA0" : str, lastof(_lang_digit_decimal_separator));
00546 } else if (!memcmp(str, "winlangid ", 10)) {
00547 const char *buf = str + 10;
00548 long langid = strtol(buf, NULL, 16);
00549 if (langid > (long)UINT16_MAX || langid < 0) {
00550 error("Invalid winlangid %s", buf);
00551 }
00552 _lang_winlangid = (uint16)langid;
00553 } else if (!memcmp(str, "grflangid ", 10)) {
00554 const char *buf = str + 10;
00555 long langid = strtol(buf, NULL, 16);
00556 if (langid >= 0x7F || langid < 0) {
00557 error("Invalid grflangid %s", buf);
00558 }
00559 _lang_newgrflangid = (uint8)langid;
00560 } else if (!memcmp(str, "gender ", 7)) {
00561 char *buf = str + 7;
00562
00563 for (;;) {
00564 const char *s = ParseWord(&buf);
00565
00566 if (s == NULL) break;
00567 if (_numgenders >= MAX_NUM_GENDER) error("Too many genders, max %d", MAX_NUM_GENDER);
00568 strecpy(_genders[_numgenders], s, lastof(_genders[_numgenders]));
00569 _numgenders++;
00570 }
00571 } else if (!memcmp(str, "case ", 5)) {
00572 char *buf = str + 5;
00573
00574 for (;;) {
00575 const char *s = ParseWord(&buf);
00576
00577 if (s == NULL) break;
00578 if (_numcases >= MAX_NUM_CASES) error("Too many cases, max %d", MAX_NUM_CASES);
00579 strecpy(_cases[_numcases], s, lastof(_cases[_numcases]));
00580 _numcases++;
00581 }
00582 } else {
00583 error("unknown pragma '%s'", str);
00584 }
00585 }
00586
00587 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00588 {
00589 char param[100];
00590 int argno;
00591 int argidx = 0;
00592 int casei;
00593
00594 memset(p, 0, sizeof(*p));
00595
00596 for (;;) {
00597
00598 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00599
00600 if (ar == NULL) break;
00601
00602
00603 if (argno != -1 && ar->consumes == 0) error("Non consumer param can't have a paramindex");
00604
00605 if (ar->consumes) {
00606 if (argno != -1) argidx = argno;
00607 if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) error("invalid param idx %d", argidx);
00608 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) error("duplicate param idx %d", argidx);
00609
00610 p->cmd[argidx++] = ar;
00611 } else if (!(ar->flags & C_DONTCOUNT)) {
00612 if (p->np >= lengthof(p->pairs)) error("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
00613 p->pairs[p->np].a = ar;
00614 p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00615 p->np++;
00616 }
00617 }
00618 }
00619
00620
00621 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00622 {
00623 if (a == NULL) return NULL;
00624
00625 if (strcmp(a->cmd, "STRING1") == 0 ||
00626 strcmp(a->cmd, "STRING2") == 0 ||
00627 strcmp(a->cmd, "STRING3") == 0 ||
00628 strcmp(a->cmd, "STRING4") == 0 ||
00629 strcmp(a->cmd, "STRING5") == 0 ||
00630 strcmp(a->cmd, "RAW_STRING") == 0) {
00631 return FindCmd("STRING", 6);
00632 }
00633
00634 return a;
00635 }
00636
00637
00638 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00639 {
00640
00641
00642
00643
00644 if (!_translation) return true;
00645
00646 ParsedCommandStruct templ;
00647 ParsedCommandStruct lang;
00648 bool result = true;
00649
00650 ExtractCommandString(&templ, b, true);
00651 ExtractCommandString(&lang, a, true);
00652
00653
00654 if (templ.np != lang.np) {
00655 strgen_warning("%s: template string and language string have a different # of commands", name);
00656 result = false;
00657 }
00658
00659 for (uint i = 0; i < templ.np; i++) {
00660
00661 bool found = false;
00662 for (uint j = 0; j < lang.np; j++) {
00663 if (templ.pairs[i].a == lang.pairs[j].a &&
00664 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00665
00666 lang.pairs[j].a = NULL;
00667 found = true;
00668 break;
00669 }
00670 }
00671
00672 if (!found) {
00673 strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00674 result = false;
00675 }
00676 }
00677
00678
00679
00680 for (uint i = 0; i < lengthof(templ.cmd); i++) {
00681 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
00682 strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00683 lang.cmd[i] == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
00684 templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00685 result = false;
00686 }
00687 }
00688
00689 return result;
00690 }
00691
00692 static void HandleString(char *str, bool master)
00693 {
00694 if (*str == '#') {
00695 if (str[1] == '#' && str[2] != '#') HandlePragma(str + 2);
00696 return;
00697 }
00698
00699
00700 if (*str == ';' || *str == ' ' || *str == '\0') return;
00701
00702 char *s = strchr(str, ':');
00703 if (s == NULL) {
00704 strgen_error("Line has no ':' delimiter");
00705 return;
00706 }
00707
00708 char *t;
00709
00710
00711 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
00712 *t = 0;
00713 s++;
00714
00715
00716 const char *tmp;
00717 for (tmp = s; *tmp != '\0';) {
00718 size_t len = Utf8Validate(tmp);
00719 if (len == 0) error("Invalid UTF-8 sequence in '%s'", s);
00720 tmp += len;
00721 }
00722
00723
00724
00725 char *casep = strchr(str, '.');
00726 if (casep) *casep++ = '\0';
00727
00728
00729 LangString *ent = HashFind(str);
00730
00731 if (master) {
00732 if (ent != NULL && casep == NULL) {
00733 strgen_error("String name '%s' is used multiple times", str);
00734 return;
00735 }
00736
00737 if (ent == NULL && casep != NULL) {
00738 strgen_error("Base string name '%s' doesn't exist yet. Define it before defining a case.", str);
00739 return;
00740 }
00741
00742 if (ent == NULL) {
00743 if (_strings[_next_string_id]) {
00744 strgen_error("String ID 0x%X for '%s' already in use by '%s'", _next_string_id, str, _strings[_next_string_id]->name);
00745 return;
00746 }
00747
00748
00749 ent = CallocT<LangString>(1);
00750 _strings[_next_string_id] = ent;
00751 ent->index = _next_string_id++;
00752 ent->name = strdup(str);
00753 ent->line = _cur_line;
00754
00755 HashAdd(str, ent);
00756 }
00757
00758 if (casep != NULL) {
00759 Case *c = MallocT<Case>(1);
00760
00761 c->caseidx = ResolveCaseName(casep, strlen(casep));
00762 c->string = strdup(s);
00763 c->next = ent->english_case;
00764 ent->english_case = c;
00765 } else {
00766 ent->english = strdup(s);
00767 }
00768
00769 } else {
00770 if (ent == NULL) {
00771 strgen_warning("String name '%s' does not exist in master file", str);
00772 return;
00773 }
00774
00775 if (ent->translated && casep == NULL) {
00776 strgen_error("String name '%s' is used multiple times", str);
00777 return;
00778 }
00779
00780 if (s[0] == ':' && s[1] == '\0' && casep == NULL) {
00781
00782 ent->translated = strdup(ent->english);
00783 } else {
00784
00785 if (!CheckCommandsMatch(s, ent->english, str)) return;
00786
00787 if (casep != NULL) {
00788 Case *c = MallocT<Case>(1);
00789
00790 c->caseidx = ResolveCaseName(casep, strlen(casep));
00791 c->string = strdup(s);
00792 c->next = ent->translated_case;
00793 ent->translated_case = c;
00794 } else {
00795 ent->translated = strdup(s);
00796
00797
00798
00799 ent->line = _cur_line;
00800 }
00801 }
00802 }
00803 }
00804
00805
00806 static void rstrip(char *buf)
00807 {
00808 int i = strlen(buf);
00809 while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00810 buf[i] = '\0';
00811 }
00812
00813
00814 static void ParseFile(const char *file, bool english)
00815 {
00816 FILE *in;
00817 char buf[2048];
00818
00819
00820 const char *cur_file = strrchr(_file, PATHSEPCHAR);
00821 const char *next_file = strrchr(file, PATHSEPCHAR);
00822 _translation = next_file != NULL && cur_file != NULL && strcmp(cur_file, next_file) != 0;
00823 _file = file;
00824
00825
00826 _numgenders = 0;
00827 _lang_name[0] = _lang_ownname[0] = _lang_isocode[0] = '\0';
00828 strecpy(_lang_digit_group_separator, ",", lastof(_lang_digit_group_separator));
00829 strecpy(_lang_digit_group_separator_currency, ",", lastof(_lang_digit_group_separator_currency));
00830 strecpy(_lang_digit_decimal_separator, ".", lastof(_lang_digit_decimal_separator));
00831 _lang_textdir = TD_LTR;
00832 _lang_winlangid = 0x0000;
00833 _lang_newgrflangid = 0;
00834
00835
00836
00837 in = fopen(file, "r");
00838 if (in == NULL) error("Cannot open file");
00839 _cur_line = 1;
00840 while (fgets(buf, sizeof(buf), in) != NULL) {
00841 rstrip(buf);
00842 HandleString(buf, english);
00843 _cur_line++;
00844 }
00845 fclose(in);
00846
00847 if (StrEmpty(_lang_name) || StrEmpty(_lang_ownname) || StrEmpty(_lang_isocode)) {
00848 error("Language must include ##name, ##ownname and ##isocode");
00849 }
00850 }
00851
00852
00853 static uint32 MyHashStr(uint32 hash, const char *s)
00854 {
00855 for (; *s != '\0'; s++) {
00856 hash = ROL(hash, 3) ^ *s;
00857 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00858 }
00859 return hash;
00860 }
00861
00862
00863
00864 static void MakeHashOfStrings()
00865 {
00866 uint32 hash = 0;
00867 uint i;
00868
00869 for (i = 0; i != lengthof(_strings); i++) {
00870 const LangString *ls = _strings[i];
00871
00872 if (ls != NULL) {
00873 const CmdStruct *cs;
00874 const char *s;
00875 char buf[256];
00876 int argno;
00877 int casei;
00878
00879 s = ls->name;
00880 hash ^= i * 0x717239;
00881 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00882 hash = MyHashStr(hash, s + 1);
00883
00884 s = ls->english;
00885 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00886 if (cs->flags & C_DONTCOUNT) continue;
00887
00888 hash ^= (cs - _cmd_structs) * 0x1234567;
00889 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00890 }
00891 }
00892 }
00893 _hash = hash;
00894 }
00895
00896
00897 static uint CountInUse(uint grp)
00898 {
00899 int i;
00900
00901 for (i = 0x800; --i >= 0;) if (_strings[(grp << 11) + i] != NULL) break;
00902 return i + 1;
00903 }
00904
00905
00906 bool CompareFiles(const char *n1, const char *n2)
00907 {
00908 FILE *f2 = fopen(n2, "rb");
00909 if (f2 == NULL) return false;
00910
00911 FILE *f1 = fopen(n1, "rb");
00912 if (f1 == NULL) error("can't open %s", n1);
00913
00914 size_t l1, l2;
00915 do {
00916 char b1[4096];
00917 char b2[4096];
00918 l1 = fread(b1, 1, sizeof(b1), f1);
00919 l2 = fread(b2, 1, sizeof(b2), f2);
00920
00921 if (l1 != l2 || memcmp(b1, b2, l1)) {
00922 fclose(f2);
00923 fclose(f1);
00924 return false;
00925 }
00926 } while (l1);
00927
00928 fclose(f2);
00929 fclose(f1);
00930 return true;
00931 }
00932
00933
00934 static void WriteStringsH(const char *filename)
00935 {
00936 int next = -1;
00937
00938 _output_filename = "tmp.xxx";
00939 _output_file = fopen(_output_filename, "w");
00940 if (_output_file == NULL) error("can't open tmp.xxx");
00941
00942 fprintf(_output_file, "/* This file is automatically generated. Do not modify */\n\n");
00943 fprintf(_output_file, "#ifndef TABLE_STRINGS_H\n");
00944 fprintf(_output_file, "#define TABLE_STRINGS_H\n");
00945
00946 for (int i = 0; i != lengthof(_strings); i++) {
00947 if (_strings[i] != NULL) {
00948 if (next != i) fprintf(_output_file, "\n");
00949 fprintf(_output_file, "static const StringID %s = 0x%X;\n", _strings[i]->name, i);
00950 next = i + 1;
00951 }
00952 }
00953
00954 fprintf(_output_file, "\nstatic const StringID STR_LAST_STRINGID = 0x%X;\n", next - 1);
00955
00956 fprintf(_output_file,
00957 "\nenum {\n"
00958 "\tLANGUAGE_PACK_IDENT = 0x474E414C, // Big Endian value for 'LANG' (LE is 0x 4C 41 4E 47)\n"
00959 "\tLANGUAGE_PACK_VERSION = 0x%X,\n"
00960 "};\n", (uint)_hash
00961 );
00962
00963 fprintf(_output_file, "\n#endif /* TABLE_STRINGS_H */\n");
00964
00965 fclose(_output_file);
00966 _output_file = NULL;
00967
00968 if (CompareFiles(_output_filename, filename)) {
00969
00970 unlink(_output_filename);
00971 } else {
00972
00973 #if defined(WIN32) || defined(WIN64)
00974 unlink(filename);
00975 #endif
00976 if (rename(_output_filename, filename) == -1) error("rename() failed");
00977 }
00978 _output_filename = NULL;
00979 }
00980
00981 static int TranslateArgumentIdx(int argidx, int offset)
00982 {
00983 int sum;
00984
00985 if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
00986 error("invalid argidx %d", argidx);
00987 }
00988 const CmdStruct *cs = _cur_pcs.cmd[argidx];
00989 if (cs != NULL && cs->consumes <= offset) {
00990 error("invalid argidx offset %d:%d", argidx, offset);
00991 }
00992
00993 if (_cur_pcs.cmd[argidx] == NULL) {
00994 error("no command for this argidx %d", argidx);
00995 }
00996
00997 for (int i = sum = 0; i < argidx; i++) {
00998 const CmdStruct *cs = _cur_pcs.cmd[i];
00999
01000 sum += (cs != NULL) ? cs->consumes : 1;
01001 }
01002
01003 return sum + offset;
01004 }
01005
01006 static void PutArgidxCommand()
01007 {
01008 PutUtf8(SCC_ARG_INDEX);
01009 PutByte(TranslateArgumentIdx(_cur_argidx));
01010 }
01011
01012
01013 static void PutCommandString(const char *str)
01014 {
01015 _cur_argidx = 0;
01016
01017 while (*str != '\0') {
01018
01019 if (*str != '{') {
01020 PutByte(*str++);
01021 continue;
01022 }
01023
01024 char param[256];
01025 int argno;
01026 int casei;
01027 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
01028 if (cs == NULL) break;
01029
01030 if (casei != -1) {
01031 PutUtf8(SCC_SETCASE);
01032 PutByte(casei);
01033 }
01034
01035
01036 if (cs->consumes > 0) {
01037
01038 if (argno != -1 && argno != _cur_argidx) {
01039 _cur_argidx = argno;
01040 PutArgidxCommand();
01041 }
01042
01043
01044 cs = _cur_pcs.cmd[_cur_argidx++];
01045 if (cs == NULL) {
01046 error("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
01047 }
01048 }
01049
01050 cs->proc(param, cs->value);
01051 }
01052 }
01053
01054 static void WriteLength(FILE *f, uint length)
01055 {
01056 if (length < 0xC0) {
01057 fputc(length, f);
01058 } else if (length < 0x4000) {
01059 fputc((length >> 8) | 0xC0, f);
01060 fputc(length & 0xFF, f);
01061 } else {
01062 error("string too long");
01063 }
01064 }
01065
01066
01067 static void WriteLangfile(const char *filename)
01068 {
01069 uint in_use[32];
01070 LanguagePackHeader hdr;
01071
01072 _output_filename = filename;
01073 _output_file = fopen(filename, "wb");
01074 if (_output_file == NULL) error("can't open %s", filename);
01075
01076 memset(&hdr, 0, sizeof(hdr));
01077 for (int i = 0; i != 32; i++) {
01078 uint n = CountInUse(i);
01079
01080 in_use[i] = n;
01081 hdr.offsets[i] = TO_LE16(n);
01082 }
01083
01084
01085 hdr.ident = TO_LE32(0x474E414C);
01086 hdr.version = TO_LE32(_hash);
01087 hdr.plural_form = _lang_pluralform;
01088 hdr.text_dir = _lang_textdir;
01089 hdr.winlangid = TO_LE16(_lang_winlangid);
01090 hdr.newgrflangid = _lang_newgrflangid;
01091 strecpy(hdr.name, _lang_name, lastof(hdr.name));
01092 strecpy(hdr.own_name, _lang_ownname, lastof(hdr.own_name));
01093 strecpy(hdr.isocode, _lang_isocode, lastof(hdr.isocode));
01094 strecpy(hdr.digit_group_separator, _lang_digit_group_separator, lastof(hdr.digit_group_separator));
01095 strecpy(hdr.digit_group_separator_currency, _lang_digit_group_separator_currency, lastof(hdr.digit_group_separator_currency));
01096 strecpy(hdr.digit_decimal_separator, _lang_digit_decimal_separator, lastof(hdr.digit_decimal_separator));
01097
01098 fwrite(&hdr, sizeof(hdr), 1, _output_file);
01099
01100 for (int i = 0; i != 32; i++) {
01101 for (uint j = 0; j != in_use[i]; j++) {
01102 const LangString *ls = _strings[(i << 11) + j];
01103 const Case *casep;
01104 const char *cmdp;
01105
01106
01107 if (ls == NULL) {
01108 WriteLength(_output_file, 0);
01109 continue;
01110 }
01111
01112 _cur_ident = ls->name;
01113 _cur_line = ls->line;
01114
01115
01116 if (_show_todo > 0 && ls->translated == NULL) {
01117 if ((_show_todo & 2) != 0) {
01118 strgen_warning("'%s' is untranslated", ls->name);
01119 }
01120 if ((_show_todo & 1) != 0) {
01121 const char *s = "<TODO> ";
01122 while (*s != '\0') PutByte(*s++);
01123 }
01124 }
01125
01126
01127 ExtractCommandString(&_cur_pcs, ls->english, false);
01128
01129 if (ls->translated_case != NULL || ls->translated != NULL) {
01130 casep = ls->translated_case;
01131 cmdp = ls->translated;
01132 } else {
01133 casep = ls->english_case;
01134 cmdp = ls->english;
01135 }
01136
01137 _translated = _masterlang || (cmdp != ls->english);
01138
01139 if (casep != NULL) {
01140 const Case *c;
01141 uint num;
01142
01143
01144
01145
01146
01147 PutUtf8(SCC_SWITCH_CASE);
01148
01149 for (num = 0, c = casep; c; c = c->next) num++;
01150 PutByte(num);
01151
01152
01153 for (c = casep; c != NULL; c = c->next) {
01154 int pos;
01155
01156 PutByte(c->caseidx);
01157
01158 pos = _put_pos;
01159 PutByte(0);
01160 PutByte(0);
01161
01162 PutCommandString(c->string);
01163 PutByte(0);
01164
01165 _put_buf[pos + 0] = GB(_put_pos - (pos + 2), 8, 8);
01166 _put_buf[pos + 1] = GB(_put_pos - (pos + 2), 0, 8);
01167 }
01168 }
01169
01170 if (cmdp != NULL) PutCommandString(cmdp);
01171
01172 WriteLength(_output_file, _put_pos);
01173 fwrite(_put_buf, 1, _put_pos, _output_file);
01174 _put_pos = 0;
01175 }
01176 }
01177
01178 fputc(0, _output_file);
01179 fclose(_output_file);
01180
01181 _output_file = NULL;
01182 _output_filename = NULL;
01183 }
01184
01186 static inline void ottd_mkdir(const char *directory)
01187 {
01188 #if defined(WIN32) || defined(__WATCOMC__)
01189 mkdir(directory);
01190 #else
01191 mkdir(directory, 0755);
01192 #endif
01193 }
01194
01198 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
01199 {
01200 ttd_strlcpy(buf, path, buflen);
01201
01202 char *p = strchr(buf, '\0');
01203 if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
01204 ttd_strlcpy(p, file, buflen - (size_t)(p - buf));
01205 return buf;
01206 }
01207
01208 #if defined(__MINGW32__)
01209
01214 static inline char *replace_pathsep(char *s)
01215 {
01216 for (char *c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
01217 return s;
01218 }
01219 #else
01220 static inline char *replace_pathsep(char *s) { return s; }
01221 #endif
01222
01223 int CDECL main(int argc, char *argv[])
01224 {
01225 char pathbuf[MAX_PATH];
01226 const char *src_dir = ".";
01227 const char *dest_dir = NULL;
01228
01229 while (argc > 1 && *argv[1] == '-') {
01230 if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) {
01231 puts("$Revision: 19278 $");
01232 return 0;
01233 }
01234
01235 if (strcmp(argv[1], "-export-commands") == 0) {
01236 printf("args\tflags\tcommand\treplacement\n");
01237 for (const CmdStruct *cs = _cmd_structs; cs < endof(_cmd_structs); cs++) {
01238 char flags;
01239 switch (cs->value) {
01240 case 0x200E: case 0x200F:
01241 case 0x202A: case 0x202B: case 0x202C: case 0x202D: case 0x202E:
01242 case 0xA0:
01243 case '\n':
01244 case '{':
01245
01246 flags = 'i';
01247 break;
01248
01249 default:
01250 if (cs->proc == EmitGender) {
01251 flags = 'g';
01252 } else if (cs->proc == EmitPlural) {
01253 flags = 'p';
01254 } else {
01255 flags = '0';
01256 }
01257 }
01258 printf("%i\t%c\t\"%s\"\t\"%s\"\n", cs->consumes, flags, cs->cmd, strstr(cs->cmd, "STRING") ? "STRING" : cs->cmd);
01259 }
01260 return 0;
01261 }
01262
01263 if (strcmp(argv[1], "-export-plurals") == 0) {
01264 printf("count\tdescription\n");
01265 for (const PluralForm *pf = _plural_forms; pf < endof(_plural_forms); pf++) {
01266 printf("%i\t\"%s\"\n", pf->plural_count, pf->description);
01267 }
01268 return 0;
01269 }
01270
01271 if (strcmp(argv[1], "-export-pragmas") == 0) {
01272 printf("name\tflags\tdefault\tdescription\n");
01273 for (size_t i = 0; i < lengthof(_pragmas); i++) {
01274 printf("\"%s\"\t%s\t\"%s\"\t\"%s\"\n",
01275 _pragmas[i][0], _pragmas[i][1], _pragmas[i][2], _pragmas[i][3]);
01276 }
01277 return 0;
01278 }
01279
01280 if (strcmp(argv[1], "-t") == 0 || strcmp(argv[1], "--todo") == 0) {
01281 _show_todo |= 1;
01282 argc--, argv++;
01283 continue;
01284 }
01285
01286 if (strcmp(argv[1], "-w") == 0 || strcmp(argv[1], "--warning") == 0) {
01287 _show_todo |= 2;
01288 argc--, argv++;
01289 continue;
01290 }
01291
01292 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
01293 puts(
01294 "strgen - $Revision: 19278 $\n"
01295 " -v | --version print version information and exit\n"
01296 " -t | --todo replace any untranslated strings with '<TODO>'\n"
01297 " -w | --warning print a warning for any untranslated strings\n"
01298 " -h | -? | --help print this help message and exit\n"
01299 " -s | --source_dir search for english.txt in the specified directory\n"
01300 " -d | --dest_dir put output file in the specified directory, create if needed\n"
01301 " -export-commands export all commands and exit\n"
01302 " -export-plurals export all plural forms and exit\n"
01303 " -export-pragmas export all pragmas and exit\n"
01304 " Run without parameters and strgen will search for english.txt and parse it,\n"
01305 " creating strings.h. Passing an argument, strgen will translate that language\n"
01306 " file using english.txt as a reference and output <language>.lng."
01307 );
01308 return 0;
01309 }
01310
01311 if (argc > 2 && (strcmp(argv[1], "-s") == 0 || strcmp(argv[1], "--source_dir") == 0)) {
01312 src_dir = replace_pathsep(argv[2]);
01313 argc -= 2, argv += 2;
01314 continue;
01315 }
01316
01317 if (argc > 2 && (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--dest_dir") == 0)) {
01318 dest_dir = replace_pathsep(argv[2]);
01319 argc -= 2, argv += 2;
01320 continue;
01321 }
01322
01323 fprintf(stderr, "Invalid arguments\n");
01324 return 0;
01325 }
01326
01327 if (dest_dir == NULL) dest_dir = src_dir;
01328
01329
01330
01331
01332
01333 if (argc == 1) {
01334 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01335
01336
01337 _masterlang = true;
01338 ParseFile(pathbuf, true);
01339 MakeHashOfStrings();
01340 if (_errors) return 1;
01341
01342
01343 ottd_mkdir(dest_dir);
01344 mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
01345 WriteStringsH(pathbuf);
01346 } else if (argc == 2) {
01347 char *r;
01348
01349 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01350
01351
01352 _masterlang = false;
01353 ParseFile(pathbuf, true);
01354 MakeHashOfStrings();
01355 ParseFile(replace_pathsep(argv[1]), false);
01356 if (_errors) return 1;
01357
01358
01359 r = strrchr(argv[1], PATHSEPCHAR);
01360 mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : argv[1]);
01361
01362
01363 r = strrchr(pathbuf, '.');
01364 if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
01365 ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
01366 WriteLangfile(pathbuf);
01367
01368
01369 if ((_show_todo & 2) != 0) {
01370 fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
01371 }
01372 } else {
01373 fprintf(stderr, "Invalid arguments\n");
01374 }
01375
01376 return 0;
01377 }