00001
00002
00003 #include "../stdafx.h"
00004 #include "../core/alloc_func.hpp"
00005 #include "../core/endian_func.hpp"
00006 #include "../string_func.h"
00007 #include "../table/control_codes.h"
00008
00009 #include <stdio.h>
00010 #include <string.h>
00011 #include <stdlib.h>
00012 #include <stdarg.h>
00013
00014 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00015 #include <unistd.h>
00016 #include <sys/types.h>
00017 #include <sys/stat.h>
00018 #endif
00019
00020 #if defined WIN32 || defined __WATCOMC__
00021 #include <direct.h>
00022 #endif
00023
00024 #ifdef __MORPHOS__
00025 #ifdef stderr
00026 #undef stderr
00027 #endif
00028 #define stderr stdout
00029 #endif
00030
00031
00032
00033 typedef void (*ParseCmdProc)(char *buf, int value);
00034
00035 struct LanguagePackHeader {
00036 uint32 ident;
00037 uint32 version;
00038 char name[32];
00039 char own_name[32];
00040 char isocode[16];
00041 uint16 offsets[32];
00042 byte plural_form;
00043 byte pad[3];
00044 };
00045
00046 struct CmdStruct {
00047 const char *cmd;
00048 ParseCmdProc proc;
00049 long value;
00050 int8 consumes;
00051 byte flags;
00052 };
00053
00054 enum {
00055 C_DONTCOUNT = 1,
00056 C_CASE = 2,
00057 };
00058
00059
00060 struct Case {
00061 int caseidx;
00062 char *string;
00063 Case *next;
00064 };
00065
00066 static bool _masterlang;
00067 static bool _translated;
00068 static const char* _file = "(unknown file)";
00069 static int _cur_line;
00070 static int _errors, _warnings, _show_todo;
00071
00072 struct LangString {
00073 char *name;
00074 char *english;
00075 char *translated;
00076 uint16 hash_next;
00077 uint16 index;
00078 int line;
00079 Case *english_case;
00080 Case *translated_case;
00081 };
00082
00083 static LangString *_strings[65536];
00084
00085
00086 #define HASH_SIZE 32767
00087 static uint16 _hash_head[HASH_SIZE];
00088
00089 static byte _put_buf[4096];
00090 static int _put_pos;
00091 static int _next_string_id;
00092
00093 static uint32 _hash;
00094 static char _lang_name[32], _lang_ownname[32], _lang_isocode[16];
00095 static byte _lang_pluralform;
00096 #define MAX_NUM_GENDER 8
00097 static char _genders[MAX_NUM_GENDER][8];
00098 static int _numgenders;
00099
00100
00101 #define MAX_NUM_CASES 50
00102 static char _cases[MAX_NUM_CASES][16];
00103 static int _numcases;
00104
00105
00106 static const byte _plural_form_counts[] = { 2, 1, 2, 3, 3, 3, 3, 3, 4, 2 };
00107
00108 static const char *_cur_ident;
00109
00110 struct CmdPair {
00111 const CmdStruct *a;
00112 const char *v;
00113 };
00114
00115 struct ParsedCommandStruct {
00116 int np;
00117 CmdPair pairs[32];
00118 const CmdStruct *cmd[32];
00119 };
00120
00121
00122 static ParsedCommandStruct _cur_pcs;
00123 static int _cur_argidx;
00124
00125 static uint HashStr(const char *s)
00126 {
00127 uint hash = 0;
00128 for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00129 return hash % HASH_SIZE;
00130 }
00131
00132 static void HashAdd(const char *s, LangString *ls)
00133 {
00134 uint hash = HashStr(s);
00135 ls->hash_next = _hash_head[hash];
00136 _hash_head[hash] = ls->index + 1;
00137 }
00138
00139 static LangString *HashFind(const char *s)
00140 {
00141 int idx = _hash_head[HashStr(s)];
00142
00143 while (--idx >= 0) {
00144 LangString* ls = _strings[idx];
00145
00146 if (strcmp(ls->name, s) == 0) return ls;
00147 idx = ls->hash_next;
00148 }
00149 return NULL;
00150 }
00151
00152 #ifdef _MSC_VER
00153 # define LINE_NUM_FMT "(%d)"
00154 #else
00155 # define LINE_NUM_FMT ":%d"
00156 #endif
00157
00158 static void CDECL warning(const char *s, ...)
00159 {
00160 char buf[1024];
00161 va_list va;
00162 va_start(va, s);
00163 vsnprintf(buf, lengthof(buf), s, va);
00164 va_end(va);
00165 fprintf(stderr, "%s" LINE_NUM_FMT ": warning: %s\n", _file, _cur_line, buf);
00166 _warnings++;
00167 }
00168
00169 void CDECL error(const char *s, ...)
00170 {
00171 char buf[1024];
00172 va_list va;
00173 va_start(va, s);
00174 vsnprintf(buf, lengthof(buf), s, va);
00175 va_end(va);
00176 fprintf(stderr, "%s" LINE_NUM_FMT ": error: %s\n", _file, _cur_line, buf);
00177 _errors++;
00178 }
00179
00180
00181 static void NORETURN CDECL fatal(const char *s, ...)
00182 {
00183 char buf[1024];
00184 va_list va;
00185 va_start(va, s);
00186 vsnprintf(buf, lengthof(buf), s, va);
00187 va_end(va);
00188 fprintf(stderr, "%s" LINE_NUM_FMT ": FATAL: %s\n", _file, _cur_line, buf);
00189 exit(1);
00190 }
00191
00192 static void PutByte(byte c)
00193 {
00194 if (_put_pos == lengthof(_put_buf)) fatal("Put buffer too small");
00195 _put_buf[_put_pos++] = c;
00196 }
00197
00198
00199 static void PutUtf8(uint32 value)
00200 {
00201 if (value < 0x80) {
00202 PutByte(value);
00203 } else if (value < 0x800) {
00204 PutByte(0xC0 + GB(value, 6, 5));
00205 PutByte(0x80 + GB(value, 0, 6));
00206 } else if (value < 0x10000) {
00207 PutByte(0xE0 + GB(value, 12, 4));
00208 PutByte(0x80 + GB(value, 6, 6));
00209 PutByte(0x80 + GB(value, 0, 6));
00210 } else if (value < 0x110000) {
00211 PutByte(0xF0 + GB(value, 18, 3));
00212 PutByte(0x80 + GB(value, 12, 6));
00213 PutByte(0x80 + GB(value, 6, 6));
00214 PutByte(0x80 + GB(value, 0, 6));
00215 } else {
00216 warning("Invalid unicode value U+0x%X", value);
00217 }
00218 }
00219
00220
00221 size_t Utf8Validate(const char *s)
00222 {
00223 uint32 c;
00224
00225 if (!HasBit(s[0], 7)) {
00226
00227 return 1;
00228 } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00229
00230 c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00231 if (c >= 0x80) return 2;
00232 } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00233
00234 c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00235 if (c >= 0x800) return 3;
00236 } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00237
00238 c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00239 if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00240 }
00241
00242 return 0;
00243 }
00244
00245
00246 static void EmitSingleChar(char *buf, int value)
00247 {
00248 if (*buf != '\0') warning("Ignoring trailing letters in command");
00249 PutUtf8(value);
00250 }
00251
00252
00253 static void EmitSetX(char *buf, int value)
00254 {
00255 char *err;
00256 int x = strtol(buf, &err, 0);
00257 if (*err != 0) fatal("SetX param invalid");
00258 PutUtf8(SCC_SETX);
00259 PutByte((byte)x);
00260 }
00261
00262
00263 static void EmitSetXY(char *buf, int value)
00264 {
00265 char *err;
00266 int x;
00267 int y;
00268
00269 x = strtol(buf, &err, 0);
00270 if (*err != ' ') fatal("SetXY param invalid");
00271 y = strtol(err + 1, &err, 0);
00272 if (*err != 0) fatal("SetXY param invalid");
00273
00274 PutUtf8(SCC_SETXY);
00275 PutByte((byte)x);
00276 PutByte((byte)y);
00277 }
00278
00279
00280
00281
00282
00283
00284
00285 bool ParseRelNum(char **buf, int *value)
00286 {
00287 const char* s = *buf;
00288 char* end;
00289 bool rel = false;
00290 int v;
00291
00292 while (*s == ' ' || *s == '\t') s++;
00293 if (*s == '+') {
00294 rel = true;
00295 s++;
00296 }
00297 v = strtol(s, &end, 0);
00298 if (end == s) return false;
00299 if (rel || v < 0) {
00300 *value += v;
00301 } else {
00302 *value = v;
00303 }
00304 *buf = end;
00305 return true;
00306 }
00307
00308
00309 char *ParseWord(char **buf)
00310 {
00311 char *s = *buf, *r;
00312
00313 while (*s == ' ' || *s == '\t') s++;
00314 if (*s == '\0') return NULL;
00315
00316 if (*s == '"') {
00317 r = ++s;
00318
00319 for (;;) {
00320 if (*s == '\0') break;
00321 if (*s == '"') {
00322 *s++ = '\0';
00323 break;
00324 }
00325 s++;
00326 }
00327 } else {
00328
00329 r = s;
00330 for (;;) {
00331 if (*s == '\0') break;
00332 if (*s == ' ' || *s == '\t') {
00333 *s++ = '\0';
00334 break;
00335 }
00336 s++;
00337 }
00338 }
00339 *buf = s;
00340 return r;
00341 }
00342
00343
00344 static int TranslateArgumentIdx(int arg);
00345
00346 static void EmitWordList(const char* const* words, uint nw)
00347 {
00348 uint i;
00349 uint j;
00350
00351 PutByte(nw);
00352 for (i = 0; i < nw; i++) PutByte(strlen(words[i]));
00353 for (i = 0; i < nw; i++) {
00354 for (j = 0; words[i][j] != '\0'; j++) PutByte(words[i][j]);
00355 }
00356 }
00357
00358 static void EmitPlural(char *buf, int value)
00359 {
00360 int argidx = _cur_argidx;
00361 const char* words[5];
00362 int nw = 0;
00363
00364
00365 if (!ParseRelNum(&buf, &argidx)) argidx--;
00366
00367
00368 for (nw = 0; nw < 5; nw++) {
00369 words[nw] = ParseWord(&buf);
00370 if (words[nw] == NULL) break;
00371 }
00372
00373 if (nw == 0)
00374 fatal("%s: No plural words", _cur_ident);
00375
00376 if (_plural_form_counts[_lang_pluralform] != nw) {
00377 if (_translated) {
00378 fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00379 _plural_form_counts[_lang_pluralform], nw);
00380 } else {
00381 if ((_show_todo & 2) != 0) warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00382 if (nw > _plural_form_counts[_lang_pluralform]) {
00383 nw = _plural_form_counts[_lang_pluralform];
00384 } else {
00385 for (; nw < _plural_form_counts[_lang_pluralform]; nw++) {
00386 words[nw] = words[nw - 1];
00387 }
00388 }
00389 }
00390 }
00391
00392 PutUtf8(SCC_PLURAL_LIST);
00393 PutByte(TranslateArgumentIdx(argidx));
00394 EmitWordList(words, nw);
00395 }
00396
00397
00398 static void EmitGender(char *buf, int value)
00399 {
00400 int argidx = _cur_argidx;
00401 uint nw;
00402
00403 if (buf[0] == '=') {
00404 buf++;
00405
00406
00407 for (nw = 0; ; nw++) {
00408 if (nw >= 8) fatal("G argument '%s' invalid", buf);
00409 if (strcmp(buf, _genders[nw]) == 0) break;
00410 }
00411
00412 PutUtf8(SCC_GENDER_INDEX);
00413 PutByte(nw);
00414 } else {
00415 const char* words[8];
00416
00417
00418
00419 if (!ParseRelNum(&buf, &argidx)) {}
00420
00421 for (nw = 0; nw < 8; nw++) {
00422 words[nw] = ParseWord(&buf);
00423 if (words[nw] == NULL) break;
00424 }
00425 if (nw != _numgenders) fatal("Bad # of arguments for gender command");
00426 PutUtf8(SCC_GENDER_LIST);
00427 PutByte(TranslateArgumentIdx(argidx));
00428 EmitWordList(words, nw);
00429 }
00430 }
00431
00432
00433 static const CmdStruct _cmd_structs[] = {
00434
00435 {"SETX", EmitSetX, SCC_SETX, 0, 0},
00436 {"SETXY", EmitSetXY, SCC_SETXY, 0, 0},
00437
00438
00439 {"TINYFONT", EmitSingleChar, SCC_TINYFONT, 0, 0},
00440 {"BIGFONT", EmitSingleChar, SCC_BIGFONT, 0, 0},
00441
00442
00443 {"BLUE", EmitSingleChar, SCC_BLUE, 0, 0},
00444 {"SILVER", EmitSingleChar, SCC_SILVER, 0, 0},
00445 {"GOLD", EmitSingleChar, SCC_GOLD, 0, 0},
00446 {"RED", EmitSingleChar, SCC_RED, 0, 0},
00447 {"PURPLE", EmitSingleChar, SCC_PURPLE, 0, 0},
00448 {"LTBROWN", EmitSingleChar, SCC_LTBROWN, 0, 0},
00449 {"ORANGE", EmitSingleChar, SCC_ORANGE, 0, 0},
00450 {"GREEN", EmitSingleChar, SCC_GREEN, 0, 0},
00451 {"YELLOW", EmitSingleChar, SCC_YELLOW, 0, 0},
00452 {"DKGREEN", EmitSingleChar, SCC_DKGREEN, 0, 0},
00453 {"CREAM", EmitSingleChar, SCC_CREAM, 0, 0},
00454 {"BROWN", EmitSingleChar, SCC_BROWN, 0, 0},
00455 {"WHITE", EmitSingleChar, SCC_WHITE, 0, 0},
00456 {"LTBLUE", EmitSingleChar, SCC_LTBLUE, 0, 0},
00457 {"GRAY", EmitSingleChar, SCC_GRAY, 0, 0},
00458 {"DKBLUE", EmitSingleChar, SCC_DKBLUE, 0, 0},
00459 {"BLACK", EmitSingleChar, SCC_BLACK, 0, 0},
00460
00461 {"CURRCOMPACT", EmitSingleChar, SCC_CURRENCY_COMPACT, 1, 0},
00462 {"REV", EmitSingleChar, SCC_REVISION, 0, 0},
00463 {"SHORTCARGO", EmitSingleChar, SCC_CARGO_SHORT, 2, 0},
00464
00465 {"STRING1", EmitSingleChar, SCC_STRING1, 2, C_CASE},
00466 {"STRING2", EmitSingleChar, SCC_STRING2, 3, C_CASE},
00467 {"STRING3", EmitSingleChar, SCC_STRING3, 4, C_CASE},
00468 {"STRING4", EmitSingleChar, SCC_STRING4, 5, C_CASE},
00469 {"STRING5", EmitSingleChar, SCC_STRING5, 6, C_CASE},
00470
00471 {"STATIONFEATURES", EmitSingleChar, SCC_STATION_FEATURES, 1, 0},
00472 {"INDUSTRY", EmitSingleChar, SCC_INDUSTRY_NAME, 1, 0},
00473 {"CARGO", EmitSingleChar, SCC_CARGO, 2, 0},
00474 {"POWER", EmitSingleChar, SCC_POWER, 1, 0},
00475 {"VOLUME", EmitSingleChar, SCC_VOLUME, 1, 0},
00476 {"VOLUME_S", EmitSingleChar, SCC_VOLUME_SHORT, 1, 0},
00477 {"WEIGHT", EmitSingleChar, SCC_WEIGHT, 1, 0},
00478 {"WEIGHT_S", EmitSingleChar, SCC_WEIGHT_SHORT, 1, 0},
00479 {"FORCE", EmitSingleChar, SCC_FORCE, 1, 0},
00480 {"VELOCITY", EmitSingleChar, SCC_VELOCITY, 1, 0},
00481
00482 {"P", EmitPlural, 0, 0, C_DONTCOUNT},
00483 {"G", EmitGender, 0, 0, C_DONTCOUNT},
00484
00485 {"DATE_TINY", EmitSingleChar, SCC_DATE_TINY, 1, 0},
00486 {"DATE_SHORT", EmitSingleChar, SCC_DATE_SHORT, 1, 0},
00487 {"DATE_LONG", EmitSingleChar, SCC_DATE_LONG, 1, 0},
00488
00489 {"SKIP", EmitSingleChar, SCC_SKIP, 1, 0},
00490
00491 {"STRING", EmitSingleChar, SCC_STRING, 1, C_CASE},
00492
00493
00494 {"COMMA", EmitSingleChar, SCC_COMMA, 1, 0},
00495 {"NUM", EmitSingleChar, SCC_NUM, 1, 0},
00496
00497 {"CURRENCY", EmitSingleChar, SCC_CURRENCY, 1, 0},
00498
00499 {"WAYPOINT", EmitSingleChar, SCC_WAYPOINT_NAME, 1, 0},
00500 {"STATION", EmitSingleChar, SCC_STATION_NAME, 1, 0},
00501 {"TOWN", EmitSingleChar, SCC_TOWN_NAME, 1, 0},
00502 {"GROUP", EmitSingleChar, SCC_GROUP_NAME, 1, 0},
00503 {"SIGN", EmitSingleChar, SCC_SIGN_NAME, 1, 0},
00504 {"ENGINE", EmitSingleChar, SCC_ENGINE_NAME, 1, 0},
00505 {"VEHICLE", EmitSingleChar, SCC_VEHICLE_NAME, 1, 0},
00506 {"COMPANY", EmitSingleChar, SCC_COMPANY_NAME, 1, 0},
00507 {"COMPANYNUM", EmitSingleChar, SCC_COMPANY_NUM, 1, 0},
00508 {"PLAYERNAME", EmitSingleChar, SCC_PLAYER_NAME, 1, 0},
00509
00510
00511
00512
00513 {"", EmitSingleChar, '\n', 0, C_DONTCOUNT},
00514 {"{", EmitSingleChar, '{', 0, C_DONTCOUNT},
00515 {"UPARROW", EmitSingleChar, SCC_UPARROW, 0, 0},
00516 {"SMALLUPARROW", EmitSingleChar, SCC_SMALLUPARROW, 0, 0},
00517 {"SMALLDOWNARROW", EmitSingleChar, SCC_SMALLDOWNARROW, 0, 0},
00518 {"TRAIN", EmitSingleChar, SCC_TRAIN, 0, 0},
00519 {"LORRY", EmitSingleChar, SCC_LORRY, 0, 0},
00520 {"BUS", EmitSingleChar, SCC_BUS, 0, 0},
00521 {"PLANE", EmitSingleChar, SCC_PLANE, 0, 0},
00522 {"SHIP", EmitSingleChar, SCC_SHIP, 0, 0},
00523 {"NBSP", EmitSingleChar, 0xA0, 0, C_DONTCOUNT},
00524 {"CENT", EmitSingleChar, 0xA2, 0, C_DONTCOUNT},
00525 {"POUNDSIGN", EmitSingleChar, 0xA3, 0, C_DONTCOUNT},
00526 {"EURO", EmitSingleChar, 0x20AC, 0, C_DONTCOUNT},
00527 {"YENSIGN", EmitSingleChar, 0xA5, 0, C_DONTCOUNT},
00528 {"COPYRIGHT", EmitSingleChar, 0xA9, 0, C_DONTCOUNT},
00529 {"DOWNARROW", EmitSingleChar, SCC_DOWNARROW, 0, C_DONTCOUNT},
00530 {"CHECKMARK", EmitSingleChar, SCC_CHECKMARK, 0, C_DONTCOUNT},
00531 {"CROSS", EmitSingleChar, SCC_CROSS, 0, C_DONTCOUNT},
00532 {"REGISTERED", EmitSingleChar, 0xAE, 0, C_DONTCOUNT},
00533 {"RIGHTARROW", EmitSingleChar, SCC_RIGHTARROW, 0, C_DONTCOUNT},
00534 {"SMALLLEFTARROW", EmitSingleChar, SCC_LESSTHAN, 0, C_DONTCOUNT},
00535 {"SMALLRIGHTARROW",EmitSingleChar, SCC_GREATERTHAN, 0, C_DONTCOUNT},
00536 };
00537
00538
00539 static const CmdStruct *FindCmd(const char *s, int len)
00540 {
00541 const CmdStruct* cs;
00542
00543 for (cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00544 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00545 }
00546 return NULL;
00547 }
00548
00549 static uint ResolveCaseName(const char *str, uint len)
00550 {
00551 uint i;
00552
00553 for (i = 0; i < MAX_NUM_CASES; i++) {
00554 if (memcmp(_cases[i], str, len) == 0 && _cases[i][len] == 0) return i + 1;
00555 }
00556 fatal("Invalid case-name '%s'", str);
00557 }
00558
00559
00560
00561
00562 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00563 {
00564 const char *s = *str, *start;
00565 const CmdStruct *cmd;
00566 byte c;
00567
00568 *argno = -1;
00569 *casei = -1;
00570
00571
00572 for (; *s != '{'; s++) {
00573 if (*s == '\0') return NULL;
00574 }
00575 s++;
00576
00577 if (*s >= '0' && *s <= '9') {
00578 char *end;
00579
00580 *argno = strtoul(s, &end, 0);
00581 if (*end != ':') fatal("missing arg #");
00582 s = end + 1;
00583 }
00584
00585
00586 start = s;
00587 do {
00588 c = *s++;
00589 } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00590
00591 cmd = FindCmd(start, s - start - 1);
00592 if (cmd == NULL) {
00593 error("Undefined command '%.*s'", s - start - 1, start);
00594 return NULL;
00595 }
00596
00597 if (c == '.') {
00598 const char *casep = s;
00599
00600 if (!(cmd->flags & C_CASE))
00601 fatal("Command '%s' can't have a case", cmd->cmd);
00602
00603 do c = *s++; while (c != '}' && c != ' ' && c != '\0');
00604 *casei = ResolveCaseName(casep, s - casep - 1);
00605 }
00606
00607 if (c == '\0') {
00608 error("Missing } from command '%s'", start);
00609 return NULL;
00610 }
00611
00612
00613 if (c != '}') {
00614 if (c == '=') s--;
00615
00616 start = s;
00617 for (;;) {
00618 c = *s++;
00619 if (c == '}') break;
00620 if (c == '\0') {
00621 error("Missing } from command '%s'", start);
00622 return NULL;
00623 }
00624 if (s - start == 250) fatal("param command too long");
00625 *param++ = c;
00626 }
00627 }
00628 *param = '\0';
00629
00630 *str = s;
00631
00632 return cmd;
00633 }
00634
00635
00636 static void HandlePragma(char *str)
00637 {
00638 if (!memcmp(str, "id ", 3)) {
00639 _next_string_id = strtoul(str + 3, NULL, 0);
00640 } else if (!memcmp(str, "name ", 5)) {
00641 ttd_strlcpy(_lang_name, str + 5, sizeof(_lang_name));
00642 } else if (!memcmp(str, "ownname ", 8)) {
00643 ttd_strlcpy(_lang_ownname, str + 8, sizeof(_lang_ownname));
00644 } else if (!memcmp(str, "isocode ", 8)) {
00645 ttd_strlcpy(_lang_isocode, str + 8, sizeof(_lang_isocode));
00646 } else if (!memcmp(str, "plural ", 7)) {
00647 _lang_pluralform = atoi(str + 7);
00648 if (_lang_pluralform >= lengthof(_plural_form_counts))
00649 fatal("Invalid pluralform %d", _lang_pluralform);
00650 } else if (!memcmp(str, "gender ", 7)) {
00651 char* buf = str + 7;
00652
00653 for (;;) {
00654 const char* s = ParseWord(&buf);
00655
00656 if (s == NULL) break;
00657 if (_numgenders >= MAX_NUM_GENDER) fatal("Too many genders, max %d", MAX_NUM_GENDER);
00658 ttd_strlcpy(_genders[_numgenders], s, sizeof(_genders[_numgenders]));
00659 _numgenders++;
00660 }
00661 } else if (!memcmp(str, "case ", 5)) {
00662 char* buf = str + 5;
00663
00664 for (;;) {
00665 const char* s = ParseWord(&buf);
00666
00667 if (s == NULL) break;
00668 if (_numcases >= MAX_NUM_CASES) fatal("Too many cases, max %d", MAX_NUM_CASES);
00669 ttd_strlcpy(_cases[_numcases], s, sizeof(_cases[_numcases]));
00670 _numcases++;
00671 }
00672 } else {
00673 fatal("unknown pragma '%s'", str);
00674 }
00675 }
00676
00677 static void ExtractCommandString(ParsedCommandStruct* p, const char* s, bool warnings)
00678 {
00679 char param[100];
00680 int argno;
00681 int argidx = 0;
00682 int casei;
00683
00684 memset(p, 0, sizeof(*p));
00685
00686 for (;;) {
00687
00688 const CmdStruct* ar = ParseCommandString(&s, param, &argno, &casei);
00689
00690 if (ar == NULL) break;
00691
00692
00693 if (argno != -1 && ar->consumes == 0) fatal("Non consumer param can't have a paramindex");
00694
00695 if (ar->consumes) {
00696 if (argno != -1) argidx = argno;
00697 if (argidx < 0 || argidx >= lengthof(p->cmd)) fatal("invalid param idx %d", argidx);
00698 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) fatal("duplicate param idx %d", argidx);
00699
00700 p->cmd[argidx++] = ar;
00701 } else if (!(ar->flags & C_DONTCOUNT)) {
00702 if (p->np >= lengthof(p->pairs)) fatal("too many commands in string, max %d", lengthof(p->pairs));
00703 p->pairs[p->np].a = ar;
00704 p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00705 p->np++;
00706 }
00707 }
00708 }
00709
00710
00711 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00712 {
00713 if (a == NULL) return NULL;
00714
00715 if (strcmp(a->cmd, "STRING1") == 0 ||
00716 strcmp(a->cmd, "STRING2") == 0 ||
00717 strcmp(a->cmd, "STRING3") == 0 ||
00718 strcmp(a->cmd, "STRING4") == 0 ||
00719 strcmp(a->cmd, "STRING5") == 0) {
00720 return FindCmd("STRING", 6);
00721 }
00722
00723 if (strcmp(a->cmd, "SKIP") == 0) return NULL;
00724
00725 return a;
00726 }
00727
00728
00729 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00730 {
00731 ParsedCommandStruct templ;
00732 ParsedCommandStruct lang;
00733 int i, j;
00734 bool result = true;
00735
00736 ExtractCommandString(&templ, b, true);
00737 ExtractCommandString(&lang, a, true);
00738
00739
00740 if (templ.np != lang.np) {
00741 warning("%s: template string and language string have a different # of commands", name);
00742 result = false;
00743 }
00744
00745 for (i = 0; i < templ.np; i++) {
00746
00747 bool found = false;
00748 for (j = 0; j < lang.np; j++) {
00749 if (templ.pairs[i].a == lang.pairs[j].a &&
00750 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00751
00752 lang.pairs[j].a = NULL;
00753 found = true;
00754 break;
00755 }
00756 }
00757
00758 if (!found) {
00759 warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00760 result = false;
00761 }
00762 }
00763
00764
00765
00766 for (i = 0; i < lengthof(templ.cmd); i++) {
00767 if (TranslateCmdForCompare(templ.cmd[i]) != TranslateCmdForCompare(lang.cmd[i])) {
00768 warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00769 lang.cmd[i] == NULL ? "<empty>" : lang.cmd[i]->cmd,
00770 templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00771 result = false;
00772 }
00773 }
00774
00775 return result;
00776 }
00777
00778 static void HandleString(char *str, bool master)
00779 {
00780 char *s,*t;
00781 LangString *ent;
00782 char *casep;
00783
00784 if (*str == '#') {
00785 if (str[1] == '#' && str[2] != '#') HandlePragma(str + 2);
00786 return;
00787 }
00788
00789
00790 if (*str == ';' || *str == ' ' || *str == '\0') return;
00791
00792 s = strchr(str, ':');
00793 if (s == NULL) {
00794 error("Line has no ':' delimiter");
00795 return;
00796 }
00797
00798
00799
00800 for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--);
00801 *t = 0;
00802 s++;
00803
00804
00805 {
00806 const char *tmp;
00807 for (tmp = s; *tmp != '\0';) {
00808 size_t len = Utf8Validate(tmp);
00809 if (len == 0) fatal("Invalid UTF-8 sequence in '%s'", s);
00810 tmp += len;
00811 }
00812 }
00813
00814
00815
00816 casep = strchr(str, '.');
00817 if (casep) *casep++ = 0;
00818
00819
00820 ent = HashFind(str);
00821
00822 if (master) {
00823 if (ent != NULL && casep == NULL) {
00824 error("String name '%s' is used multiple times", str);
00825 return;
00826 }
00827
00828 if (ent == NULL && casep != NULL) {
00829 error("Base string name '%s' doesn't exist yet. Define it before defining a case.", str);
00830 return;
00831 }
00832
00833 if (ent == NULL) {
00834 if (_strings[_next_string_id]) {
00835 error("String ID 0x%X for '%s' already in use by '%s'", ent, str, _strings[_next_string_id]->name);
00836 return;
00837 }
00838
00839
00840 ent = CallocT<LangString>(1);
00841 _strings[_next_string_id] = ent;
00842 ent->index = _next_string_id++;
00843 ent->name = strdup(str);
00844 ent->line = _cur_line;
00845
00846 HashAdd(str, ent);
00847 }
00848
00849 if (casep != NULL) {
00850 Case* c = MallocT<Case>(1);
00851
00852 c->caseidx = ResolveCaseName(casep, strlen(casep));
00853 c->string = strdup(s);
00854 c->next = ent->english_case;
00855 ent->english_case = c;
00856 } else {
00857 ent->english = strdup(s);
00858 }
00859
00860 } else {
00861 if (ent == NULL) {
00862 warning("String name '%s' does not exist in master file", str);
00863 return;
00864 }
00865
00866 if (ent->translated && casep == NULL) {
00867 error("String name '%s' is used multiple times", str);
00868 return;
00869 }
00870
00871 if (s[0] == ':' && s[1] == '\0' && casep == NULL) {
00872
00873 ent->translated = strdup(ent->english);
00874 } else {
00875
00876 if (!CheckCommandsMatch(s, ent->english, str)) return;
00877
00878 if (casep != NULL) {
00879 Case* c = MallocT<Case>(1);
00880
00881 c->caseidx = ResolveCaseName(casep, strlen(casep));
00882 c->string = strdup(s);
00883 c->next = ent->translated_case;
00884 ent->translated_case = c;
00885 } else {
00886 ent->translated = strdup(s);
00887 }
00888 }
00889 }
00890 }
00891
00892
00893 static void rstrip(char *buf)
00894 {
00895 int i = strlen(buf);
00896 while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00897 buf[i] = '\0';
00898 }
00899
00900
00901 static void ParseFile(const char *file, bool english)
00902 {
00903 FILE *in;
00904 char buf[2048];
00905
00906 _file = file;
00907
00908
00909 _numgenders = 0;
00910 _lang_name[0] = _lang_ownname[0] = _lang_isocode[0] = '\0';
00911
00912
00913
00914 in = fopen(file, "r");
00915 if (in == NULL) fatal("Cannot open file");
00916 _cur_line = 1;
00917 while (fgets(buf, sizeof(buf), in) != NULL) {
00918 rstrip(buf);
00919 HandleString(buf, english);
00920 _cur_line++;
00921 }
00922 fclose(in);
00923
00924 if (StrEmpty(_lang_name) || StrEmpty(_lang_ownname) || StrEmpty(_lang_isocode)) {
00925 fatal("Language must include ##name, ##ownname and ##isocode");
00926 }
00927 }
00928
00929
00930 static uint32 MyHashStr(uint32 hash, const char *s)
00931 {
00932 for (; *s != '\0'; s++) {
00933 hash = ROL(hash, 3) ^ *s;
00934 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00935 }
00936 return hash;
00937 }
00938
00939
00940
00941 static void MakeHashOfStrings()
00942 {
00943 uint32 hash = 0;
00944 uint i;
00945
00946 for (i = 0; i != lengthof(_strings); i++) {
00947 const LangString* ls = _strings[i];
00948
00949 if (ls != NULL) {
00950 const CmdStruct* cs;
00951 const char* s;
00952 char buf[256];
00953 int argno;
00954 int casei;
00955
00956 s = ls->name;
00957 hash ^= i * 0x717239;
00958 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00959 hash = MyHashStr(hash, s + 1);
00960
00961 s = ls->english;
00962 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00963 if (cs->flags & C_DONTCOUNT) continue;
00964
00965 hash ^= (cs - _cmd_structs) * 0x1234567;
00966 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00967 }
00968 }
00969 }
00970 _hash = hash;
00971 }
00972
00973
00974 static uint CountInUse(uint grp)
00975 {
00976 int i;
00977
00978 for (i = 0x800; --i >= 0;) if (_strings[(grp << 11) + i] != NULL) break;
00979 return i + 1;
00980 }
00981
00982
00983 bool CompareFiles(const char *n1, const char *n2)
00984 {
00985 FILE *f1, *f2;
00986 char b1[4096];
00987 char b2[4096];
00988 size_t l1, l2;
00989
00990 f2 = fopen(n2, "rb");
00991 if (f2 == NULL) return false;
00992
00993 f1 = fopen(n1, "rb");
00994 if (f1 == NULL) fatal("can't open %s", n1);
00995
00996 do {
00997 l1 = fread(b1, 1, sizeof(b1), f1);
00998 l2 = fread(b2, 1, sizeof(b2), f2);
00999
01000 if (l1 != l2 || memcmp(b1, b2, l1)) {
01001 fclose(f2);
01002 fclose(f1);
01003 return false;
01004 }
01005 } while (l1);
01006
01007 fclose(f2);
01008 fclose(f1);
01009 return true;
01010 }
01011
01012
01013 static void WriteStringsH(const char *filename)
01014 {
01015 FILE *out;
01016 int i;
01017 int next = -1;
01018
01019 out = fopen("tmp.xxx", "w");
01020 if (out == NULL) fatal("can't open tmp.xxx");
01021
01022 fprintf(out, "/* This file is automatically generated. Do not modify */\n\n");
01023 fprintf(out, "#ifndef TABLE_STRINGS_H\n");
01024 fprintf(out, "#define TABLE_STRINGS_H\n");
01025
01026 for (i = 0; i != lengthof(_strings); i++) {
01027 if (_strings[i] != NULL) {
01028 if (next != i) fprintf(out, "\n");
01029 fprintf(out, "static const StringID %s = 0x%X;\n", _strings[i]->name, i);
01030 next = i + 1;
01031 }
01032 }
01033
01034 fprintf(out, "\nstatic const StringID STR_LAST_STRINGID = 0x%X;\n", next - 1);
01035
01036 fprintf(out,
01037 "\nenum {\n"
01038 "\tLANGUAGE_PACK_IDENT = 0x474E414C, // Big Endian value for 'LANG' (LE is 0x 4C 41 4E 47)\n"
01039 "\tLANGUAGE_PACK_VERSION = 0x%X,\n"
01040 "};\n", (uint)_hash
01041 );
01042
01043 fprintf(out, "\n#endif /* TABLE_STRINGS_H */\n");
01044
01045 fclose(out);
01046
01047 if (CompareFiles("tmp.xxx", filename)) {
01048
01049 unlink("tmp.xxx");
01050 } else {
01051
01052 #if defined(WIN32) || defined(WIN64)
01053 unlink(filename);
01054 #endif
01055 if (rename("tmp.xxx", filename) == -1) fatal("rename() failed");
01056 }
01057 }
01058
01059 static int TranslateArgumentIdx(int argidx)
01060 {
01061 int i, sum;
01062
01063 if (argidx < 0 || argidx >= lengthof(_cur_pcs.cmd))
01064 fatal("invalid argidx %d", argidx);
01065
01066 for (i = sum = 0; i < argidx; i++) {
01067 const CmdStruct *cs = _cur_pcs.cmd[i];
01068 sum += (cs != NULL) ? cs->consumes : 1;
01069 }
01070
01071 return sum;
01072 }
01073
01074 static void PutArgidxCommand()
01075 {
01076 PutUtf8(SCC_ARG_INDEX);
01077 PutByte(TranslateArgumentIdx(_cur_argidx));
01078 }
01079
01080
01081 static void PutCommandString(const char *str)
01082 {
01083 const CmdStruct *cs;
01084 char param[256];
01085 int argno;
01086 int casei;
01087
01088 _cur_argidx = 0;
01089
01090 while (*str != '\0') {
01091
01092 if (*str != '{') {
01093 PutByte(*str++);
01094 continue;
01095 }
01096 cs = ParseCommandString(&str, param, &argno, &casei);
01097 if (cs == NULL) break;
01098
01099 if (casei != -1) {
01100 PutUtf8(SCC_SETCASE);
01101 PutByte(casei);
01102 }
01103
01104
01105 if (cs->consumes > 0) {
01106
01107 if (argno != -1 && argno != _cur_argidx) {
01108 _cur_argidx = argno;
01109 PutArgidxCommand();
01110 }
01111
01112
01113 cs = _cur_pcs.cmd[_cur_argidx++];
01114 if (cs == NULL) {
01115 fatal("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
01116 }
01117 }
01118
01119 cs->proc(param, cs->value);
01120 }
01121 }
01122
01123 static void WriteLength(FILE *f, uint length)
01124 {
01125 if (length < 0xC0) {
01126 fputc(length, f);
01127 } else if (length < 0x4000) {
01128 fputc((length >> 8) | 0xC0, f);
01129 fputc(length & 0xFF, f);
01130 } else {
01131 fatal("string too long");
01132 }
01133 }
01134
01135
01136 static void WriteLangfile(const char *filename)
01137 {
01138 FILE *f;
01139 uint in_use[32];
01140 LanguagePackHeader hdr;
01141 uint i;
01142 uint j;
01143
01144 f = fopen(filename, "wb");
01145 if (f == NULL) fatal("can't open %s", filename);
01146
01147 memset(&hdr, 0, sizeof(hdr));
01148 for (i = 0; i != 32; i++) {
01149 uint n = CountInUse(i);
01150
01151 in_use[i] = n;
01152 hdr.offsets[i] = TO_LE16(n);
01153 }
01154
01155
01156 hdr.ident = TO_LE32(0x474E414C);
01157 hdr.version = TO_LE32(_hash);
01158 hdr.plural_form = _lang_pluralform;
01159 strcpy(hdr.name, _lang_name);
01160 strcpy(hdr.own_name, _lang_ownname);
01161 strcpy(hdr.isocode, _lang_isocode);
01162
01163 fwrite(&hdr, sizeof(hdr), 1, f);
01164
01165 for (i = 0; i != 32; i++) {
01166 for (j = 0; j != in_use[i]; j++) {
01167 const LangString* ls = _strings[(i << 11) + j];
01168 const Case* casep;
01169 const char* cmdp;
01170
01171
01172 if (ls == NULL) {
01173 WriteLength(f, 0);
01174 continue;
01175 }
01176
01177 _cur_ident = ls->name;
01178 _cur_line = ls->line;
01179
01180
01181 if (_show_todo > 0 && ls->translated == NULL) {
01182 if ((_show_todo & 2) != 0) {
01183 warning("'%s' is untranslated", ls->name);
01184 }
01185 if ((_show_todo & 1) != 0) {
01186 const char *s = "<TODO> ";
01187 while (*s != '\0') PutByte(*s++);
01188 }
01189 }
01190
01191
01192 ExtractCommandString(&_cur_pcs, ls->english, false);
01193
01194 if (ls->translated_case != NULL || ls->translated != NULL) {
01195 casep = ls->translated_case;
01196 cmdp = ls->translated;
01197 } else {
01198 casep = ls->english_case;
01199 cmdp = ls->english;
01200 }
01201
01202 _translated = _masterlang || (cmdp != ls->english);
01203
01204 if (casep != NULL) {
01205 const Case* c;
01206 uint num;
01207
01208
01209
01210
01211
01212 PutUtf8(SCC_SWITCH_CASE);
01213
01214 for (num = 0, c = casep; c; c = c->next) num++;
01215 PutByte(num);
01216
01217
01218 for (c = casep; c != NULL; c = c->next) {
01219 int pos;
01220
01221 PutByte(c->caseidx);
01222
01223 pos = _put_pos;
01224 PutByte(0);
01225 PutByte(0);
01226
01227 PutCommandString(c->string);
01228 PutByte(0);
01229
01230 _put_buf[pos + 0] = GB(_put_pos - (pos + 2), 8, 8);
01231 _put_buf[pos + 1] = GB(_put_pos - (pos + 2), 0, 8);
01232 }
01233 }
01234
01235 if (cmdp != NULL) PutCommandString(cmdp);
01236
01237 WriteLength(f, _put_pos);
01238 fwrite(_put_buf, 1, _put_pos, f);
01239 _put_pos = 0;
01240 }
01241 }
01242
01243 fputc(0, f);
01244 fclose(f);
01245 }
01246
01248 static inline void ottd_mkdir(const char *directory)
01249 {
01250 #if defined(WIN32) || defined(__WATCOMC__)
01251 mkdir(directory);
01252 #else
01253 mkdir(directory, 0755);
01254 #endif
01255 }
01256
01260 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
01261 {
01262 char *p;
01263 ttd_strlcpy(buf, path, buflen);
01264
01265 p = strchr(buf, '\0');
01266 if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
01267 ttd_strlcpy(p, file, buflen - (size_t)(p - buf));
01268 return buf;
01269 }
01270
01271 #if defined(__MINGW32__)
01272
01277 static inline char *replace_pathsep(char *s)
01278 {
01279 char *c;
01280
01281 for (c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
01282 return s;
01283 }
01284 #else
01285 static inline char *replace_pathsep(char *s) { return s; }
01286 #endif
01287
01288 int CDECL main(int argc, char* argv[])
01289 {
01290 char pathbuf[256];
01291 const char *src_dir = ".";
01292 const char *dest_dir = NULL;
01293
01294 while (argc > 1 && *argv[1] == '-') {
01295 if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) {
01296 puts("$Revision: 14268 $");
01297 return 0;
01298 }
01299
01300 if (strcmp(argv[1], "-t") == 0 || strcmp(argv[1], "--todo") == 0) {
01301 _show_todo |= 1;
01302 argc--, argv++;
01303 continue;
01304 }
01305
01306 if (strcmp(argv[1], "-w") == 0 || strcmp(argv[1], "--warning") == 0) {
01307 _show_todo |= 2;
01308 argc--, argv++;
01309 continue;
01310 }
01311
01312 if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
01313 puts(
01314 "strgen - $Revision: 14268 $\n"
01315 " -v | --version print version information and exit\n"
01316 " -t | --todo replace any untranslated strings with '<TODO>'\n"
01317 " -w | --warning print a warning for any untranslated strings\n"
01318 " -h | -? | --help print this help message and exit\n"
01319 " -s | --source_dir search for english.txt in the specified directory\n"
01320 " -d | --dest_dir put output file in the specified directory, create if needed\n"
01321 " Run without parameters and strgen will search for english.txt and parse it,\n"
01322 " creating strings.h. Passing an argument, strgen will translate that language\n"
01323 " file using english.txt as a reference and output <language>.lng."
01324 );
01325 return 0;
01326 }
01327
01328 if (argc > 2 && (strcmp(argv[1], "-s") == 0 || strcmp(argv[1], "--source_dir") == 0)) {
01329 src_dir = replace_pathsep(argv[2]);
01330 argc -= 2, argv += 2;
01331 continue;
01332 }
01333
01334 if (argc > 2 && (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--dest_dir") == 0)) {
01335 dest_dir = replace_pathsep(argv[2]);
01336 argc -= 2, argv += 2;
01337 continue;
01338 }
01339
01340 fprintf(stderr, "Invalid arguments\n");
01341 return 0;
01342 }
01343
01344 if (dest_dir == NULL) dest_dir = src_dir;
01345
01346
01347
01348
01349
01350 if (argc == 1) {
01351 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01352
01353
01354 _masterlang = true;
01355 ParseFile(pathbuf, true);
01356 MakeHashOfStrings();
01357 if (_errors) return 1;
01358
01359
01360 ottd_mkdir(dest_dir);
01361 mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
01362 WriteStringsH(pathbuf);
01363 } else if (argc == 2) {
01364 char *r;
01365
01366 mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01367
01368
01369 _masterlang = false;
01370 ParseFile(pathbuf, true);
01371 MakeHashOfStrings();
01372 ParseFile(replace_pathsep(argv[1]), false);
01373 if (_errors) return 1;
01374
01375
01376 r = strrchr(argv[1], PATHSEPCHAR);
01377 mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : argv[1]);
01378
01379
01380 r = strrchr(pathbuf, '.');
01381 if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
01382 ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
01383 WriteLangfile(pathbuf);
01384
01385
01386 if ((_show_todo & 2) != 0) {
01387 fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
01388 }
01389 } else {
01390 fprintf(stderr, "Invalid arguments\n");
01391 }
01392
01393 return 0;
01394 }