strgen.cpp

Go to the documentation of this file.
00001 /* $Id: strgen.cpp 15456 2009-02-12 10:42:22Z rubidium $ */
00002 
00005 #include "../stdafx.h"
00006 #include "../core/alloc_func.hpp"
00007 #include "../core/endian_func.hpp"
00008 #include "../string_func.h"
00009 #include "../strings_type.h"
00010 #include "strgen.h"
00011 #include "../table/control_codes.h"
00012 
00013 #include <stdio.h>
00014 #include <string.h>
00015 #include <stdlib.h>
00016 #include <stdarg.h>
00017 
00018 #if (!defined(WIN32) && !defined(WIN64)) || defined(__CYGWIN__)
00019 #include <unistd.h>
00020 #include <sys/types.h>
00021 #include <sys/stat.h>
00022 #endif
00023 
00024 #if defined WIN32 || defined __WATCOMC__
00025 #include <direct.h>
00026 #endif /* WIN32 || __WATCOMC__ */
00027 
00028 #ifdef __MORPHOS__
00029 #ifdef stderr
00030 #undef stderr
00031 #endif
00032 #define stderr stdout
00033 #endif /* __MORPHOS__ */
00034 
00035 /* Compiles a list of strings into a compiled string list */
00036 
00037 typedef void (*ParseCmdProc)(char *buf, int value);
00038 
00039 struct CmdStruct {
00040   const char *cmd;
00041   ParseCmdProc proc;
00042   long value;
00043   int8 consumes;
00044   byte flags;
00045 };
00046 
00047 enum {
00048   C_DONTCOUNT = 1,
00049   C_CASE      = 2,
00050 };
00051 
00052 
00053 struct Case {
00054   int caseidx;
00055   char *string;
00056   Case *next;
00057 };
00058 
00059 static bool _masterlang;
00060 static bool _translated;
00061 static const char *_file = "(unknown file)";
00062 static int _cur_line;
00063 static int _errors, _warnings, _show_todo;
00064 
00065 struct LangString {
00066   char *name;            // Name of the string
00067   char *english;         // English text
00068   char *translated;      // Translated text
00069   uint16 hash_next;      // next hash entry
00070   uint16 index;
00071   int line;              // line of string in source-file
00072   Case *english_case;    // cases for english
00073   Case *translated_case; // cases for foreign
00074 };
00075 
00076 static LangString *_strings[65536];
00077 
00078 
00079 #define HASH_SIZE 32767
00080 static uint16 _hash_head[HASH_SIZE];
00081 
00082 static byte _put_buf[4096];
00083 static int _put_pos;
00084 static int _next_string_id;
00085 
00086 static uint32 _hash;
00087 static char _lang_name[32], _lang_ownname[32], _lang_isocode[16];
00088 static byte _lang_pluralform;
00089 static byte _lang_textdir;
00090 static uint16 _lang_winlangid;
00091 #define MAX_NUM_GENDER 8
00092 static char _genders[MAX_NUM_GENDER][16];
00093 static uint _numgenders;
00094 
00095 // contains the name of all cases.
00096 #define MAX_NUM_CASES 50
00097 static char _cases[MAX_NUM_CASES][16];
00098 static uint _numcases;
00099 
00100 // for each plural value, this is the number of plural forms.
00101 static const byte _plural_form_counts[] = { 2, 1, 2, 3, 3, 3, 3, 3, 4, 2 };
00102 
00103 static const char *_cur_ident;
00104 
00105 struct CmdPair {
00106   const CmdStruct *a;
00107   const char *v;
00108 };
00109 
00110 struct ParsedCommandStruct {
00111   uint np;
00112   CmdPair pairs[32];
00113   const CmdStruct *cmd[32]; // ordered by param #
00114 };
00115 
00116 // Used when generating some advanced commands.
00117 static ParsedCommandStruct _cur_pcs;
00118 static int _cur_argidx;
00119 
00120 static uint HashStr(const char *s)
00121 {
00122   uint hash = 0;
00123   for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00124   return hash % HASH_SIZE;
00125 }
00126 
00127 static void HashAdd(const char *s, LangString *ls)
00128 {
00129   uint hash = HashStr(s);
00130   ls->hash_next = _hash_head[hash];
00131   _hash_head[hash] = ls->index + 1;
00132 }
00133 
00134 static LangString *HashFind(const char *s)
00135 {
00136   int idx = _hash_head[HashStr(s)];
00137 
00138   while (--idx >= 0) {
00139     LangString *ls = _strings[idx];
00140 
00141     if (strcmp(ls->name, s) == 0) return ls;
00142     idx = ls->hash_next;
00143   }
00144   return NULL;
00145 }
00146 
00147 #ifdef _MSC_VER
00148 # define LINE_NUM_FMT "(%d)"
00149 #else
00150 # define LINE_NUM_FMT ":%d"
00151 #endif
00152 
00153 static void CDECL strgen_warning(const char *s, ...)
00154 {
00155   char buf[1024];
00156   va_list va;
00157   va_start(va, s);
00158   vsnprintf(buf, lengthof(buf), s, va);
00159   va_end(va);
00160   fprintf(stderr, "%s" LINE_NUM_FMT ": warning: %s\n", _file, _cur_line, buf);
00161   _warnings++;
00162 }
00163 
00164 static void CDECL strgen_error(const char *s, ...)
00165 {
00166   char buf[1024];
00167   va_list va;
00168   va_start(va, s);
00169   vsnprintf(buf, lengthof(buf), s, va);
00170   va_end(va);
00171   fprintf(stderr, "%s" LINE_NUM_FMT ": error: %s\n", _file, _cur_line, buf);
00172   _errors++;
00173 }
00174 
00175 void NORETURN CDECL error(const char *s, ...)
00176 {
00177   char buf[1024];
00178   va_list va;
00179   va_start(va, s);
00180   vsnprintf(buf, lengthof(buf), s, va);
00181   va_end(va);
00182   fprintf(stderr, "%s" LINE_NUM_FMT ": FATAL: %s\n", _file, _cur_line, buf);
00183   exit(1);
00184 }
00185 
00186 static void PutByte(byte c)
00187 {
00188   if (_put_pos == lengthof(_put_buf)) error("Put buffer too small");
00189   _put_buf[_put_pos++] = c;
00190 }
00191 
00192 
00193 static void PutUtf8(uint32 value)
00194 {
00195   if (value < 0x80) {
00196     PutByte(value);
00197   } else if (value < 0x800) {
00198     PutByte(0xC0 + GB(value,  6, 5));
00199     PutByte(0x80 + GB(value,  0, 6));
00200   } else if (value < 0x10000) {
00201     PutByte(0xE0 + GB(value, 12, 4));
00202     PutByte(0x80 + GB(value,  6, 6));
00203     PutByte(0x80 + GB(value,  0, 6));
00204   } else if (value < 0x110000) {
00205     PutByte(0xF0 + GB(value, 18, 3));
00206     PutByte(0x80 + GB(value, 12, 6));
00207     PutByte(0x80 + GB(value,  6, 6));
00208     PutByte(0x80 + GB(value,  0, 6));
00209   } else {
00210     strgen_warning("Invalid unicode value U+0x%X", value);
00211   }
00212 }
00213 
00214 
00215 size_t Utf8Validate(const char *s)
00216 {
00217   uint32 c;
00218 
00219   if (!HasBit(s[0], 7)) {
00220     /* 1 byte */
00221     return 1;
00222   } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00223     /* 2 bytes */
00224     c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00225     if (c >= 0x80) return 2;
00226   } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00227     /* 3 bytes */
00228     c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00229     if (c >= 0x800) return 3;
00230   } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00231     /* 4 bytes */
00232     c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00233     if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00234   }
00235 
00236   return 0;
00237 }
00238 
00239 
00240 static void EmitSingleChar(char *buf, int value)
00241 {
00242   if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00243   PutUtf8(value);
00244 }
00245 
00246 
00247 static void EmitSetX(char *buf, int value)
00248 {
00249   char *err;
00250   int x = strtol(buf, &err, 0);
00251   if (*err != 0) error("SetX param invalid");
00252   PutUtf8(SCC_SETX);
00253   PutByte((byte)x);
00254 }
00255 
00256 
00257 static void EmitSetXY(char *buf, int value)
00258 {
00259   char *err;
00260   int x;
00261   int y;
00262 
00263   x = strtol(buf, &err, 0);
00264   if (*err != ' ') error("SetXY param invalid");
00265   y = strtol(err + 1, &err, 0);
00266   if (*err != 0) error("SetXY param invalid");
00267 
00268   PutUtf8(SCC_SETXY);
00269   PutByte((byte)x);
00270   PutByte((byte)y);
00271 }
00272 
00273 // The plural specifier looks like
00274 // {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM
00275 
00276 // This is encoded like
00277 //  CommandByte <ARG#> <NUM> {Length of each string} {each string}
00278 
00279 bool ParseRelNum(char **buf, int *value)
00280 {
00281   const char *s = *buf;
00282   char *end;
00283   bool rel = false;
00284   int v;
00285 
00286   while (*s == ' ' || *s == '\t') s++;
00287   if (*s == '+') {
00288     rel = true;
00289     s++;
00290   }
00291   v = strtol(s, &end, 0);
00292   if (end == s) return false;
00293   if (rel || v < 0) {
00294     *value += v;
00295   } else {
00296     *value = v;
00297   }
00298   *buf = end;
00299   return true;
00300 }
00301 
00302 // Parse out the next word, or NULL
00303 char *ParseWord(char **buf)
00304 {
00305   char *s = *buf, *r;
00306 
00307   while (*s == ' ' || *s == '\t') s++;
00308   if (*s == '\0') return NULL;
00309 
00310   if (*s == '"') {
00311     r = ++s;
00312     // parse until next " or NUL
00313     for (;;) {
00314       if (*s == '\0') break;
00315       if (*s == '"') {
00316         *s++ = '\0';
00317         break;
00318       }
00319       s++;
00320     }
00321   } else {
00322     // proceed until whitespace or NUL
00323     r = s;
00324     for (;;) {
00325       if (*s == '\0') break;
00326       if (*s == ' ' || *s == '\t') {
00327         *s++ = '\0';
00328         break;
00329       }
00330       s++;
00331     }
00332   }
00333   *buf = s;
00334   return r;
00335 }
00336 
00337 // Forward declaration
00338 static int TranslateArgumentIdx(int arg);
00339 
00340 static void EmitWordList(const char * const *words, uint nw)
00341 {
00342   uint i;
00343   uint j;
00344 
00345   PutByte(nw);
00346   for (i = 0; i < nw; i++) PutByte(strlen(words[i]));
00347   for (i = 0; i < nw; i++) {
00348     for (j = 0; words[i][j] != '\0'; j++) PutByte(words[i][j]);
00349   }
00350 }
00351 
00352 static void EmitPlural(char *buf, int value)
00353 {
00354   int argidx = _cur_argidx;
00355   const char *words[5];
00356   int nw = 0;
00357 
00358   // Parse out the number, if one exists. Otherwise default to prev arg.
00359   if (!ParseRelNum(&buf, &argidx)) argidx--;
00360 
00361   // Parse each string
00362   for (nw = 0; nw < 5; nw++) {
00363     words[nw] = ParseWord(&buf);
00364     if (words[nw] == NULL) break;
00365   }
00366 
00367   if (nw == 0)
00368     error("%s: No plural words", _cur_ident);
00369 
00370   if (_plural_form_counts[_lang_pluralform] != nw) {
00371     if (_translated) {
00372       error("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00373         _plural_form_counts[_lang_pluralform], nw);
00374     } else {
00375       if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00376       if (nw > _plural_form_counts[_lang_pluralform]) {
00377         nw = _plural_form_counts[_lang_pluralform];
00378       } else {
00379         for (; nw < _plural_form_counts[_lang_pluralform]; nw++) {
00380           words[nw] = words[nw - 1];
00381         }
00382       }
00383     }
00384   }
00385 
00386   PutUtf8(SCC_PLURAL_LIST);
00387   PutByte(TranslateArgumentIdx(argidx));
00388   EmitWordList(words, nw);
00389 }
00390 
00391 
00392 static void EmitGender(char *buf, int value)
00393 {
00394   int argidx = _cur_argidx;
00395   uint nw;
00396 
00397   if (buf[0] == '=') {
00398     buf++;
00399 
00400     // This is a {G=DER} command
00401     for (nw = 0; ; nw++) {
00402       if (nw >= 8) error("G argument '%s' invalid", buf);
00403       if (strcmp(buf, _genders[nw]) == 0) break;
00404     }
00405     // now nw contains the gender index
00406     PutUtf8(SCC_GENDER_INDEX);
00407     PutByte(nw);
00408   } else {
00409     const char *words[8];
00410 
00411     // This is a {G 0 foo bar two} command.
00412     // If no relative number exists, default to +0
00413     if (!ParseRelNum(&buf, &argidx)) {}
00414 
00415     for (nw = 0; nw < 8; nw++) {
00416       words[nw] = ParseWord(&buf);
00417       if (words[nw] == NULL) break;
00418     }
00419     if (nw != _numgenders) error("Bad # of arguments for gender command");
00420     PutUtf8(SCC_GENDER_LIST);
00421     PutByte(TranslateArgumentIdx(argidx));
00422     EmitWordList(words, nw);
00423   }
00424 }
00425 
00426 
00427 static const CmdStruct _cmd_structs[] = {
00428   // Update position
00429   {"SETX",  EmitSetX,  SCC_SETX,  0, 0},
00430   {"SETXY", EmitSetXY, SCC_SETXY, 0, 0},
00431 
00432   // Font size
00433   {"TINYFONT", EmitSingleChar, SCC_TINYFONT, 0, 0},
00434   {"BIGFONT",  EmitSingleChar, SCC_BIGFONT,  0, 0},
00435 
00436   // Colors
00437   {"BLUE",    EmitSingleChar, SCC_BLUE,    0, 0},
00438   {"SILVER",  EmitSingleChar, SCC_SILVER,  0, 0},
00439   {"GOLD",    EmitSingleChar, SCC_GOLD,    0, 0},
00440   {"RED",     EmitSingleChar, SCC_RED,     0, 0},
00441   {"PURPLE",  EmitSingleChar, SCC_PURPLE,  0, 0},
00442   {"LTBROWN", EmitSingleChar, SCC_LTBROWN, 0, 0},
00443   {"ORANGE",  EmitSingleChar, SCC_ORANGE,  0, 0},
00444   {"GREEN",   EmitSingleChar, SCC_GREEN,   0, 0},
00445   {"YELLOW",  EmitSingleChar, SCC_YELLOW,  0, 0},
00446   {"DKGREEN", EmitSingleChar, SCC_DKGREEN, 0, 0},
00447   {"CREAM",   EmitSingleChar, SCC_CREAM,   0, 0},
00448   {"BROWN",   EmitSingleChar, SCC_BROWN,   0, 0},
00449   {"WHITE",   EmitSingleChar, SCC_WHITE,   0, 0},
00450   {"LTBLUE",  EmitSingleChar, SCC_LTBLUE,  0, 0},
00451   {"GRAY",    EmitSingleChar, SCC_GRAY,    0, 0},
00452   {"DKBLUE",  EmitSingleChar, SCC_DKBLUE,  0, 0},
00453   {"BLACK",   EmitSingleChar, SCC_BLACK,   0, 0},
00454 
00455   {"CURRCOMPACT",   EmitSingleChar, SCC_CURRENCY_COMPACT,    1, 0}, // compact currency
00456   {"REV",           EmitSingleChar, SCC_REVISION,            0, 0}, // openttd revision string
00457   {"SHORTCARGO",    EmitSingleChar, SCC_CARGO_SHORT,         2, 0}, // short cargo description, only ### tons, or ### litres
00458 
00459   {"STRING1", EmitSingleChar, SCC_STRING1, 2, C_CASE}, // included string that consumes the string id and ONE argument
00460   {"STRING2", EmitSingleChar, SCC_STRING2, 3, C_CASE}, // included string that consumes the string id and TWO arguments
00461   {"STRING3", EmitSingleChar, SCC_STRING3, 4, C_CASE}, // included string that consumes the string id and THREE arguments
00462   {"STRING4", EmitSingleChar, SCC_STRING4, 5, C_CASE}, // included string that consumes the string id and FOUR arguments
00463   {"STRING5", EmitSingleChar, SCC_STRING5, 6, C_CASE}, // included string that consumes the string id and FIVE arguments
00464 
00465   {"STATIONFEATURES", EmitSingleChar, SCC_STATION_FEATURES, 1, 0}, // station features string, icons of the features
00466   {"INDUSTRY",        EmitSingleChar, SCC_INDUSTRY_NAME,    1, 0}, // industry, takes an industry #
00467   {"CARGO",           EmitSingleChar, SCC_CARGO,            2, 0},
00468   {"POWER",           EmitSingleChar, SCC_POWER,            1, 0},
00469   {"VOLUME",          EmitSingleChar, SCC_VOLUME,           1, 0},
00470   {"VOLUME_S",        EmitSingleChar, SCC_VOLUME_SHORT,     1, 0},
00471   {"WEIGHT",          EmitSingleChar, SCC_WEIGHT,           1, 0},
00472   {"WEIGHT_S",        EmitSingleChar, SCC_WEIGHT_SHORT,     1, 0},
00473   {"FORCE",           EmitSingleChar, SCC_FORCE,            1, 0},
00474   {"VELOCITY",        EmitSingleChar, SCC_VELOCITY,         1, 0},
00475 
00476   {"P", EmitPlural, 0, 0, C_DONTCOUNT}, // plural specifier
00477   {"G", EmitGender, 0, 0, C_DONTCOUNT}, // gender specifier
00478 
00479   {"DATE_TINY",  EmitSingleChar, SCC_DATE_TINY, 1, 0},
00480   {"DATE_SHORT", EmitSingleChar, SCC_DATE_SHORT, 1, 0},
00481   {"DATE_LONG",  EmitSingleChar, SCC_DATE_LONG, 1, 0},
00482   {"DATE_ISO",   EmitSingleChar, SCC_DATE_ISO, 1, 0},
00483 
00484   {"SKIP", EmitSingleChar, SCC_SKIP, 1, 0},
00485 
00486   {"STRING", EmitSingleChar, SCC_STRING, 1, C_CASE},
00487   {"RAW_STRING", EmitSingleChar, SCC_RAW_STRING_POINTER, 1, 0},
00488 
00489   // Numbers
00490   {"COMMA", EmitSingleChar, SCC_COMMA, 1, 0}, // Number with comma
00491   {"NUM",   EmitSingleChar, SCC_NUM,   1, 0}, // Signed number
00492   {"BYTES", EmitSingleChar, SCC_BYTES, 1, 0}, // Unsigned number with "bytes", i.e. "1.02 MiB or 123 KiB"
00493 
00494   {"CURRENCY",   EmitSingleChar, SCC_CURRENCY,    1, 0},
00495 
00496   {"WAYPOINT", EmitSingleChar, SCC_WAYPOINT_NAME, 1, 0}, // waypoint name
00497   {"STATION",  EmitSingleChar, SCC_STATION_NAME,  1, 0},
00498   {"TOWN",     EmitSingleChar, SCC_TOWN_NAME,     1, 0},
00499   {"GROUP",    EmitSingleChar, SCC_GROUP_NAME,    1, 0},
00500   {"SIGN",     EmitSingleChar, SCC_SIGN_NAME,     1, 0},
00501   {"ENGINE",   EmitSingleChar, SCC_ENGINE_NAME,   1, 0},
00502   {"VEHICLE",  EmitSingleChar, SCC_VEHICLE_NAME,  1, 0},
00503   {"COMPANY",  EmitSingleChar, SCC_COMPANY_NAME,  1, 0},
00504   {"COMPANYNUM", EmitSingleChar, SCC_COMPANY_NUM, 1, 0},
00505   {"PRESIDENTNAME", EmitSingleChar, SCC_PRESIDENT_NAME, 1, 0},
00506 
00507   // 0x9D is used for the pseudo command SETCASE
00508   // 0x9E is used for case switching
00509 
00510   {"",               EmitSingleChar, '\n',               0, C_DONTCOUNT},
00511   {"{",              EmitSingleChar, '{',                0, C_DONTCOUNT},
00512   {"UPARROW",        EmitSingleChar, SCC_UPARROW,        0, 0},
00513   {"SMALLUPARROW",   EmitSingleChar, SCC_SMALLUPARROW,   0, 0},
00514   {"SMALLDOWNARROW", EmitSingleChar, SCC_SMALLDOWNARROW, 0, 0},
00515   {"TRAIN",          EmitSingleChar, SCC_TRAIN,          0, 0},
00516   {"LORRY",          EmitSingleChar, SCC_LORRY,          0, 0},
00517   {"BUS",            EmitSingleChar, SCC_BUS,            0, 0},
00518   {"PLANE",          EmitSingleChar, SCC_PLANE,          0, 0},
00519   {"SHIP",           EmitSingleChar, SCC_SHIP,           0, 0},
00520   {"NBSP",           EmitSingleChar, 0xA0,               0, C_DONTCOUNT},
00521   {"CENT",           EmitSingleChar, 0xA2,               0, C_DONTCOUNT},
00522   {"POUNDSIGN",      EmitSingleChar, 0xA3,               0, C_DONTCOUNT},
00523   {"EURO",           EmitSingleChar, 0x20AC,             0, C_DONTCOUNT},
00524   {"YENSIGN",        EmitSingleChar, 0xA5,               0, C_DONTCOUNT},
00525   {"COPYRIGHT",      EmitSingleChar, 0xA9,               0, C_DONTCOUNT},
00526   {"DOWNARROW",      EmitSingleChar, SCC_DOWNARROW,      0, C_DONTCOUNT},
00527   {"CHECKMARK",      EmitSingleChar, SCC_CHECKMARK,      0, C_DONTCOUNT},
00528   {"CROSS",          EmitSingleChar, SCC_CROSS,          0, C_DONTCOUNT},
00529   {"REGISTERED",     EmitSingleChar, 0xAE,               0, C_DONTCOUNT},
00530   {"RIGHTARROW",     EmitSingleChar, SCC_RIGHTARROW,     0, C_DONTCOUNT},
00531   {"SMALLLEFTARROW", EmitSingleChar, SCC_LESSTHAN,       0, C_DONTCOUNT},
00532   {"SMALLRIGHTARROW",EmitSingleChar, SCC_GREATERTHAN,    0, C_DONTCOUNT},
00533 
00534   /* The following are directional formatting codes used to get the RTL strings right:
00535    * http://www.unicode.org/unicode/reports/tr9/#Directional_Formatting_Codes */
00536   {"LRM",            EmitSingleChar, 0x200E,             0, C_DONTCOUNT},
00537   {"RLM",            EmitSingleChar, 0x200F,             0, C_DONTCOUNT},
00538   {"LRE",            EmitSingleChar, 0x202A,             0, C_DONTCOUNT},
00539   {"RLE",            EmitSingleChar, 0x202B,             0, C_DONTCOUNT},
00540   {"LRO",            EmitSingleChar, 0x202D,             0, C_DONTCOUNT},
00541   {"RLO",            EmitSingleChar, 0x202E,             0, C_DONTCOUNT},
00542   {"PDF",            EmitSingleChar, 0x202C,             0, C_DONTCOUNT},
00543 };
00544 
00545 
00546 static const CmdStruct *FindCmd(const char *s, int len)
00547 {
00548   const CmdStruct *cs;
00549 
00550   for (cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00551     if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00552   }
00553   return NULL;
00554 }
00555 
00556 static uint ResolveCaseName(const char *str, uint len)
00557 {
00558   uint i;
00559 
00560   for (i = 0; i < MAX_NUM_CASES; i++) {
00561     if (memcmp(_cases[i], str, len) == 0 && _cases[i][len] == 0) return i + 1;
00562   }
00563   error("Invalid case-name '%s'", str);
00564 }
00565 
00566 
00567 // returns NULL on eof
00568 // else returns command struct
00569 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00570 {
00571   const char *s = *str, *start;
00572   const CmdStruct *cmd;
00573   byte c;
00574 
00575   *argno = -1;
00576   *casei = -1;
00577 
00578   // Scan to the next command, exit if there's no next command.
00579   for (; *s != '{'; s++) {
00580     if (*s == '\0') return NULL;
00581   }
00582   s++; // Skip past the {
00583 
00584   if (*s >= '0' && *s <= '9') {
00585     char *end;
00586 
00587     *argno = strtoul(s, &end, 0);
00588     if (*end != ':') error("missing arg #");
00589     s = end + 1;
00590   }
00591 
00592   // parse command name
00593   start = s;
00594   do {
00595     c = *s++;
00596   } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00597 
00598   cmd = FindCmd(start, s - start - 1);
00599   if (cmd == NULL) {
00600     strgen_error("Undefined command '%.*s'", s - start - 1, start);
00601     return NULL;
00602   }
00603 
00604   if (c == '.') {
00605     const char *casep = s;
00606 
00607     if (!(cmd->flags & C_CASE))
00608       error("Command '%s' can't have a case", cmd->cmd);
00609 
00610     do c = *s++; while (c != '}' && c != ' ' && c != '\0');
00611     *casei = ResolveCaseName(casep, s - casep - 1);
00612   }
00613 
00614   if (c == '\0') {
00615     strgen_error("Missing } from command '%s'", start);
00616     return NULL;
00617   }
00618 
00619 
00620   if (c != '}') {
00621     if (c == '=') s--;
00622     // copy params
00623     start = s;
00624     for (;;) {
00625       c = *s++;
00626       if (c == '}') break;
00627       if (c == '\0') {
00628         strgen_error("Missing } from command '%s'", start);
00629         return NULL;
00630       }
00631       if (s - start == 250) error("param command too long");
00632       *param++ = c;
00633     }
00634   }
00635   *param = '\0';
00636 
00637   *str = s;
00638 
00639   return cmd;
00640 }
00641 
00642 
00643 static void HandlePragma(char *str)
00644 {
00645   if (!memcmp(str, "id ", 3)) {
00646     _next_string_id = strtoul(str + 3, NULL, 0);
00647   } else if (!memcmp(str, "name ", 5)) {
00648     strecpy(_lang_name, str + 5, lastof(_lang_name));
00649   } else if (!memcmp(str, "ownname ", 8)) {
00650     strecpy(_lang_ownname, str + 8, lastof(_lang_ownname));
00651   } else if (!memcmp(str, "isocode ", 8)) {
00652     strecpy(_lang_isocode, str + 8, lastof(_lang_isocode));
00653   } else if (!memcmp(str, "plural ", 7)) {
00654     _lang_pluralform = atoi(str + 7);
00655     if (_lang_pluralform >= lengthof(_plural_form_counts))
00656       error("Invalid pluralform %d", _lang_pluralform);
00657   } else if (!memcmp(str, "textdir ", 8)) {
00658     if (!memcmp(str + 8, "ltr", 3)) {
00659       _lang_textdir = TD_LTR;
00660     } else if (!memcmp(str + 8, "rtl", 3)) {
00661       _lang_textdir = TD_RTL;
00662     } else {
00663       error("Invalid textdir %s", str + 8);
00664     }
00665   } else if (!memcmp(str, "winlangid ", 10)) {
00666     char *buf = str + 10;
00667     long langid = strtol(buf, NULL, 16);
00668     if (langid > UINT16_MAX || langid < 0) {
00669       error("Invalid winlangid %s", buf);
00670     }
00671     _lang_winlangid = (uint16)langid;
00672   } else if (!memcmp(str, "gender ", 7)) {
00673     char *buf = str + 7;
00674 
00675     for (;;) {
00676       const char *s = ParseWord(&buf);
00677 
00678       if (s == NULL) break;
00679       if (_numgenders >= MAX_NUM_GENDER) error("Too many genders, max %d", MAX_NUM_GENDER);
00680       strecpy(_genders[_numgenders], s, lastof(_genders[_numgenders]));
00681       _numgenders++;
00682     }
00683   } else if (!memcmp(str, "case ", 5)) {
00684     char *buf = str + 5;
00685 
00686     for (;;) {
00687       const char *s = ParseWord(&buf);
00688 
00689       if (s == NULL) break;
00690       if (_numcases >= MAX_NUM_CASES) error("Too many cases, max %d", MAX_NUM_CASES);
00691       strecpy(_cases[_numcases], s, lastof(_cases[_numcases]));
00692       _numcases++;
00693     }
00694   } else {
00695     error("unknown pragma '%s'", str);
00696   }
00697 }
00698 
00699 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00700 {
00701   char param[100];
00702   int argno;
00703   int argidx = 0;
00704   int casei;
00705 
00706   memset(p, 0, sizeof(*p));
00707 
00708   for (;;) {
00709     // read until next command from a.
00710     const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00711 
00712     if (ar == NULL) break;
00713 
00714     // Sanity checking
00715     if (argno != -1 && ar->consumes == 0) error("Non consumer param can't have a paramindex");
00716 
00717     if (ar->consumes) {
00718       if (argno != -1) argidx = argno;
00719       if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) error("invalid param idx %d", argidx);
00720       if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) error("duplicate param idx %d", argidx);
00721 
00722       p->cmd[argidx++] = ar;
00723     } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
00724       if (p->np >= lengthof(p->pairs)) error("too many commands in string, max %d", lengthof(p->pairs));
00725       p->pairs[p->np].a = ar;
00726       p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00727       p->np++;
00728     }
00729   }
00730 }
00731 
00732 
00733 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00734 {
00735   if (a == NULL) return NULL;
00736 
00737   if (strcmp(a->cmd, "STRING1") == 0 ||
00738       strcmp(a->cmd, "STRING2") == 0 ||
00739       strcmp(a->cmd, "STRING3") == 0 ||
00740       strcmp(a->cmd, "STRING4") == 0 ||
00741       strcmp(a->cmd, "STRING5") == 0 ||
00742       strcmp(a->cmd, "RAW_STRING") == 0){
00743     return FindCmd("STRING", 6);
00744   }
00745 
00746   if (strcmp(a->cmd, "SKIP") == 0) return NULL;
00747 
00748   return a;
00749 }
00750 
00751 
00752 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00753 {
00754   ParsedCommandStruct templ;
00755   ParsedCommandStruct lang;
00756   uint i, j;
00757   bool result = true;
00758 
00759   ExtractCommandString(&templ, b, true);
00760   ExtractCommandString(&lang, a, true);
00761 
00762   // For each string in templ, see if we find it in lang
00763   if (templ.np != lang.np) {
00764     strgen_warning("%s: template string and language string have a different # of commands", name);
00765     result = false;
00766   }
00767 
00768   for (i = 0; i < templ.np; i++) {
00769     // see if we find it in lang, and zero it out
00770     bool found = false;
00771     for (j = 0; j < lang.np; j++) {
00772       if (templ.pairs[i].a == lang.pairs[j].a &&
00773           strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00774         // it was found in both. zero it out from lang so we don't find it again
00775         lang.pairs[j].a = NULL;
00776         found = true;
00777         break;
00778       }
00779     }
00780 
00781     if (!found) {
00782       strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00783       result = false;
00784     }
00785   }
00786 
00787   // if we reach here, all non consumer commands match up.
00788   // Check if the non consumer commands match up also.
00789   for (i = 0; i < lengthof(templ.cmd); i++) {
00790     if (TranslateCmdForCompare(templ.cmd[i]) != TranslateCmdForCompare(lang.cmd[i])) {
00791       strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00792         lang.cmd[i]  == NULL ? "<empty>" : lang.cmd[i]->cmd,
00793         templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00794       result = false;
00795     }
00796   }
00797 
00798   return result;
00799 }
00800 
00801 static void HandleString(char *str, bool master)
00802 {
00803   char *s, *t;
00804   LangString *ent;
00805   char *casep;
00806 
00807   if (*str == '#') {
00808     if (str[1] == '#' && str[2] != '#') HandlePragma(str + 2);
00809     return;
00810   }
00811 
00812   // Ignore comments & blank lines
00813   if (*str == ';' || *str == ' ' || *str == '\0') return;
00814 
00815   s = strchr(str, ':');
00816   if (s == NULL) {
00817     strgen_error("Line has no ':' delimiter");
00818     return;
00819   }
00820 
00821   // Trim spaces.
00822   // After this str points to the command name, and s points to the command contents
00823   for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--);
00824   *t = 0;
00825   s++;
00826 
00827   /* Check string is valid UTF-8 */
00828   {
00829     const char *tmp;
00830     for (tmp = s; *tmp != '\0';) {
00831       size_t len = Utf8Validate(tmp);
00832       if (len == 0) error("Invalid UTF-8 sequence in '%s'", s);
00833       tmp += len;
00834     }
00835   }
00836 
00837   // Check if the string has a case..
00838   // The syntax for cases is IDENTNAME.case
00839   casep = strchr(str, '.');
00840   if (casep) *casep++ = 0;
00841 
00842   // Check if this string already exists..
00843   ent = HashFind(str);
00844 
00845   if (master) {
00846     if (ent != NULL && casep == NULL) {
00847       strgen_error("String name '%s' is used multiple times", str);
00848       return;
00849     }
00850 
00851     if (ent == NULL && casep != NULL) {
00852       strgen_error("Base string name '%s' doesn't exist yet. Define it before defining a case.", str);
00853       return;
00854     }
00855 
00856     if (ent == NULL) {
00857       if (_strings[_next_string_id]) {
00858         strgen_error("String ID 0x%X for '%s' already in use by '%s'", ent, str, _strings[_next_string_id]->name);
00859         return;
00860       }
00861 
00862       // Allocate a new LangString
00863       ent = CallocT<LangString>(1);
00864       _strings[_next_string_id] = ent;
00865       ent->index = _next_string_id++;
00866       ent->name = strdup(str);
00867       ent->line = _cur_line;
00868 
00869       HashAdd(str, ent);
00870     }
00871 
00872     if (casep != NULL) {
00873       Case *c = MallocT<Case>(1);
00874 
00875       c->caseidx = ResolveCaseName(casep, strlen(casep));
00876       c->string = strdup(s);
00877       c->next = ent->english_case;
00878       ent->english_case = c;
00879     } else {
00880       ent->english = strdup(s);
00881     }
00882 
00883   } else {
00884     if (ent == NULL) {
00885       strgen_warning("String name '%s' does not exist in master file", str);
00886       return;
00887     }
00888 
00889     if (ent->translated && casep == NULL) {
00890       strgen_error("String name '%s' is used multiple times", str);
00891       return;
00892     }
00893 
00894     if (s[0] == ':' && s[1] == '\0' && casep == NULL) {
00895       // Special syntax :: means we should just inherit the master string
00896       ent->translated = strdup(ent->english);
00897     } else {
00898       // make sure that the commands match
00899       if (!CheckCommandsMatch(s, ent->english, str)) return;
00900 
00901       if (casep != NULL) {
00902         Case *c = MallocT<Case>(1);
00903 
00904         c->caseidx = ResolveCaseName(casep, strlen(casep));
00905         c->string = strdup(s);
00906         c->next = ent->translated_case;
00907         ent->translated_case = c;
00908       } else {
00909         ent->translated = strdup(s);
00910       }
00911     }
00912   }
00913 }
00914 
00915 
00916 static void rstrip(char *buf)
00917 {
00918   int i = strlen(buf);
00919   while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00920   buf[i] = '\0';
00921 }
00922 
00923 
00924 static void ParseFile(const char *file, bool english)
00925 {
00926   FILE *in;
00927   char buf[2048];
00928 
00929   _file = file;
00930 
00931   /* For each new file we parse, reset the genders, and language codes */
00932   _numgenders = 0;
00933   _lang_name[0] = _lang_ownname[0] = _lang_isocode[0] = '\0';
00934   _lang_textdir = TD_LTR;
00935   _lang_winlangid = 0x0000; // neutral language code
00936   // TODO:!! We can't reset the cases. In case the translated strings
00937   // derive some strings from english....
00938 
00939   in = fopen(file, "r");
00940   if (in == NULL) error("Cannot open file");
00941   _cur_line = 1;
00942   while (fgets(buf, sizeof(buf), in) != NULL) {
00943     rstrip(buf);
00944     HandleString(buf, english);
00945     _cur_line++;
00946   }
00947   fclose(in);
00948 
00949   if (StrEmpty(_lang_name) || StrEmpty(_lang_ownname) || StrEmpty(_lang_isocode)) {
00950     error("Language must include ##name, ##ownname and ##isocode");
00951   }
00952 }
00953 
00954 
00955 static uint32 MyHashStr(uint32 hash, const char *s)
00956 {
00957   for (; *s != '\0'; s++) {
00958     hash = ROL(hash, 3) ^ *s;
00959     hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00960   }
00961   return hash;
00962 }
00963 
00964 
00965 // make a hash of the file to get a unique "version number"
00966 static void MakeHashOfStrings()
00967 {
00968   uint32 hash = 0;
00969   uint i;
00970 
00971   for (i = 0; i != lengthof(_strings); i++) {
00972     const LangString *ls = _strings[i];
00973 
00974     if (ls != NULL) {
00975       const CmdStruct *cs;
00976       const char *s;
00977       char buf[256];
00978       int argno;
00979       int casei;
00980 
00981       s = ls->name;
00982       hash ^= i * 0x717239;
00983       hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00984       hash = MyHashStr(hash, s + 1);
00985 
00986       s = ls->english;
00987       while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00988         if (cs->flags & C_DONTCOUNT) continue;
00989 
00990         hash ^= (cs - _cmd_structs) * 0x1234567;
00991         hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00992       }
00993     }
00994   }
00995   _hash = hash;
00996 }
00997 
00998 
00999 static uint CountInUse(uint grp)
01000 {
01001   int i;
01002 
01003   for (i = 0x800; --i >= 0;) if (_strings[(grp << 11) + i] != NULL) break;
01004   return i + 1;
01005 }
01006 
01007 
01008 bool CompareFiles(const char *n1, const char *n2)
01009 {
01010   FILE *f1, *f2;
01011   char b1[4096];
01012   char b2[4096];
01013   size_t l1, l2;
01014 
01015   f2 = fopen(n2, "rb");
01016   if (f2 == NULL) return false;
01017 
01018   f1 = fopen(n1, "rb");
01019   if (f1 == NULL) error("can't open %s", n1);
01020 
01021   do {
01022     l1 = fread(b1, 1, sizeof(b1), f1);
01023     l2 = fread(b2, 1, sizeof(b2), f2);
01024 
01025     if (l1 != l2 || memcmp(b1, b2, l1)) {
01026       fclose(f2);
01027       fclose(f1);
01028       return false;
01029     }
01030   } while (l1);
01031 
01032   fclose(f2);
01033   fclose(f1);
01034   return true;
01035 }
01036 
01037 
01038 static void WriteStringsH(const char *filename)
01039 {
01040   FILE *out;
01041   int i;
01042   int next = -1;
01043 
01044   out = fopen("tmp.xxx", "w");
01045   if (out == NULL) error("can't open tmp.xxx");
01046 
01047   fprintf(out, "/* This file is automatically generated. Do not modify */\n\n");
01048   fprintf(out, "#ifndef TABLE_STRINGS_H\n");
01049   fprintf(out, "#define TABLE_STRINGS_H\n");
01050 
01051   for (i = 0; i != lengthof(_strings); i++) {
01052     if (_strings[i] != NULL) {
01053       if (next != i) fprintf(out, "\n");
01054       fprintf(out, "static const StringID %s = 0x%X;\n", _strings[i]->name, i);
01055       next = i + 1;
01056     }
01057   }
01058 
01059   fprintf(out, "\nstatic const StringID STR_LAST_STRINGID = 0x%X;\n", next - 1);
01060 
01061   fprintf(out,
01062     "\nenum {\n"
01063     "\tLANGUAGE_PACK_IDENT = 0x474E414C, // Big Endian value for 'LANG' (LE is 0x 4C 41 4E 47)\n"
01064     "\tLANGUAGE_PACK_VERSION = 0x%X,\n"
01065     "};\n", (uint)_hash
01066   );
01067 
01068   fprintf(out, "\n#endif /* TABLE_STRINGS_H */\n");
01069 
01070   fclose(out);
01071 
01072   if (CompareFiles("tmp.xxx", filename)) {
01073     // files are equal. tmp.xxx is not needed
01074     unlink("tmp.xxx");
01075   } else {
01076     // else rename tmp.xxx into filename
01077 #if defined(WIN32) || defined(WIN64)
01078     unlink(filename);
01079 #endif
01080     if (rename("tmp.xxx", filename) == -1) error("rename() failed");
01081   }
01082 }
01083 
01084 static int TranslateArgumentIdx(int argidx)
01085 {
01086   int i, sum;
01087 
01088   if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd))
01089     error("invalid argidx %d", argidx);
01090 
01091   for (i = sum = 0; i < argidx; i++) {
01092     const CmdStruct *cs = _cur_pcs.cmd[i];
01093     sum += (cs != NULL) ? cs->consumes : 1;
01094   }
01095 
01096   return sum;
01097 }
01098 
01099 static void PutArgidxCommand()
01100 {
01101   PutUtf8(SCC_ARG_INDEX);
01102   PutByte(TranslateArgumentIdx(_cur_argidx));
01103 }
01104 
01105 
01106 static void PutCommandString(const char *str)
01107 {
01108   const CmdStruct *cs;
01109   char param[256];
01110   int argno;
01111   int casei;
01112 
01113   _cur_argidx = 0;
01114 
01115   while (*str != '\0') {
01116     // Process characters as they are until we encounter a {
01117     if (*str != '{') {
01118       PutByte(*str++);
01119       continue;
01120     }
01121     cs = ParseCommandString(&str, param, &argno, &casei);
01122     if (cs == NULL) break;
01123 
01124     if (casei != -1) {
01125       PutUtf8(SCC_SETCASE); // {SETCASE}
01126       PutByte(casei);
01127     }
01128 
01129     // For params that consume values, we need to handle the argindex properly
01130     if (cs->consumes > 0) {
01131       // Check if we need to output a move-param command
01132       if (argno != -1 && argno != _cur_argidx) {
01133         _cur_argidx = argno;
01134         PutArgidxCommand();
01135       }
01136 
01137       // Output the one from the master string... it's always accurate.
01138       cs = _cur_pcs.cmd[_cur_argidx++];
01139       if (cs == NULL) {
01140         error("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
01141       }
01142     }
01143 
01144     cs->proc(param, cs->value);
01145   }
01146 }
01147 
01148 static void WriteLength(FILE *f, uint length)
01149 {
01150   if (length < 0xC0) {
01151     fputc(length, f);
01152   } else if (length < 0x4000) {
01153     fputc((length >> 8) | 0xC0, f);
01154     fputc(length & 0xFF, f);
01155   } else {
01156     error("string too long");
01157   }
01158 }
01159 
01160 
01161 static void WriteLangfile(const char *filename)
01162 {
01163   FILE *f;
01164   uint in_use[32];
01165   LanguagePackHeader hdr;
01166   uint i;
01167   uint j;
01168 
01169   f = fopen(filename, "wb");
01170   if (f == NULL) error("can't open %s", filename);
01171 
01172   memset(&hdr, 0, sizeof(hdr));
01173   for (i = 0; i != 32; i++) {
01174     uint n = CountInUse(i);
01175 
01176     in_use[i] = n;
01177     hdr.offsets[i] = TO_LE16(n);
01178   }
01179 
01180   // see line 655: fprintf(..."\tLANGUAGE_PACK_IDENT = 0x474E414C,...)
01181   hdr.ident = TO_LE32(0x474E414C); // Big Endian value for 'LANG'
01182   hdr.version = TO_LE32(_hash);
01183   hdr.plural_form = _lang_pluralform;
01184   hdr.text_dir = _lang_textdir;
01185   hdr.winlangid = TO_LE16(_lang_winlangid);
01186   strcpy(hdr.name, _lang_name);
01187   strcpy(hdr.own_name, _lang_ownname);
01188   strcpy(hdr.isocode, _lang_isocode);
01189 
01190   fwrite(&hdr, sizeof(hdr), 1, f);
01191 
01192   for (i = 0; i != 32; i++) {
01193     for (j = 0; j != in_use[i]; j++) {
01194       const LangString *ls = _strings[(i << 11) + j];
01195       const Case *casep;
01196       const char *cmdp;
01197 
01198       // For undefined strings, just set that it's an empty string
01199       if (ls == NULL) {
01200         WriteLength(f, 0);
01201         continue;
01202       }
01203 
01204       _cur_ident = ls->name;
01205       _cur_line = ls->line;
01206 
01207       // Produce a message if a string doesn't have a translation.
01208       if (_show_todo > 0 && ls->translated == NULL) {
01209         if ((_show_todo & 2) != 0) {
01210           strgen_warning("'%s' is untranslated", ls->name);
01211         }
01212         if ((_show_todo & 1) != 0) {
01213           const char *s = "<TODO> ";
01214           while (*s != '\0') PutByte(*s++);
01215         }
01216       }
01217 
01218       // Extract the strings and stuff from the english command string
01219       ExtractCommandString(&_cur_pcs, ls->english, false);
01220 
01221       if (ls->translated_case != NULL || ls->translated != NULL) {
01222         casep = ls->translated_case;
01223         cmdp = ls->translated;
01224       } else {
01225         casep = ls->english_case;
01226         cmdp = ls->english;
01227       }
01228 
01229       _translated = _masterlang || (cmdp != ls->english);
01230 
01231       if (casep != NULL) {
01232         const Case *c;
01233         uint num;
01234 
01235         // Need to output a case-switch.
01236         // It has this format
01237         // <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
01238         // Each LEN is printed using 2 bytes in big endian order.
01239         PutUtf8(SCC_SWITCH_CASE);
01240         // Count the number of cases
01241         for (num = 0, c = casep; c; c = c->next) num++;
01242         PutByte(num);
01243 
01244         // Write each case
01245         for (c = casep; c != NULL; c = c->next) {
01246           int pos;
01247 
01248           PutByte(c->caseidx);
01249           // Make some space for the 16-bit length
01250           pos = _put_pos;
01251           PutByte(0);
01252           PutByte(0);
01253           // Write string
01254           PutCommandString(c->string);
01255           PutByte(0); // terminate with a zero
01256           // Fill in the length
01257           _put_buf[pos + 0] = GB(_put_pos - (pos + 2), 8, 8);
01258           _put_buf[pos + 1] = GB(_put_pos - (pos + 2), 0, 8);
01259         }
01260       }
01261 
01262       if (cmdp != NULL) PutCommandString(cmdp);
01263 
01264       WriteLength(f, _put_pos);
01265       fwrite(_put_buf, 1, _put_pos, f);
01266       _put_pos = 0;
01267     }
01268   }
01269 
01270   fputc(0, f);
01271   fclose(f);
01272 }
01273 
01275 static inline void ottd_mkdir(const char *directory)
01276 {
01277 #if defined(WIN32) || defined(__WATCOMC__)
01278     mkdir(directory);
01279 #else
01280     mkdir(directory, 0755);
01281 #endif
01282 }
01283 
01287 static inline char *mkpath(char *buf, size_t buflen, const char *path, const char *file)
01288 {
01289   char *p;
01290   ttd_strlcpy(buf, path, buflen); // copy directory into buffer
01291 
01292   p = strchr(buf, '\0'); // add path seperator if necessary
01293   if (p[-1] != PATHSEPCHAR && (size_t)(p - buf) + 1 < buflen) *p++ = PATHSEPCHAR;
01294   ttd_strlcpy(p, file, buflen - (size_t)(p - buf)); // catenate filename at end of buffer
01295   return buf;
01296 }
01297 
01298 #if defined(__MINGW32__)
01299 
01304 static inline char *replace_pathsep(char *s)
01305 {
01306   char *c;
01307 
01308   for (c = s; *c != '\0'; c++) if (*c == '/') *c = '\\';
01309   return s;
01310 }
01311 #else
01312 static inline char *replace_pathsep(char *s) { return s; }
01313 #endif
01314 
01315 int CDECL main(int argc, char *argv[])
01316 {
01317   char pathbuf[MAX_PATH];
01318   const char *src_dir = ".";
01319   const char *dest_dir = NULL;
01320 
01321   while (argc > 1 && *argv[1] == '-') {
01322     if (strcmp(argv[1], "-v") == 0 || strcmp(argv[1], "--version") == 0) {
01323       puts("$Revision: 15456 $");
01324       return 0;
01325     }
01326 
01327     if (strcmp(argv[1], "-t") == 0 || strcmp(argv[1], "--todo") == 0) {
01328       _show_todo |= 1;
01329       argc--, argv++;
01330       continue;
01331     }
01332 
01333     if (strcmp(argv[1], "-w") == 0 || strcmp(argv[1], "--warning") == 0) {
01334       _show_todo |= 2;
01335       argc--, argv++;
01336       continue;
01337     }
01338 
01339     if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) {
01340       puts(
01341         "strgen - $Revision: 15456 $\n"
01342         " -v | --version    print version information and exit\n"
01343         " -t | --todo       replace any untranslated strings with '<TODO>'\n"
01344         " -w | --warning    print a warning for any untranslated strings\n"
01345         " -h | -? | --help  print this help message and exit\n"
01346         " -s | --source_dir search for english.txt in the specified directory\n"
01347         " -d | --dest_dir   put output file in the specified directory, create if needed\n"
01348         " Run without parameters and strgen will search for english.txt and parse it,\n"
01349         " creating strings.h. Passing an argument, strgen will translate that language\n"
01350         " file using english.txt as a reference and output <language>.lng."
01351       );
01352       return 0;
01353     }
01354 
01355     if (argc > 2 && (strcmp(argv[1], "-s") == 0 || strcmp(argv[1], "--source_dir") == 0)) {
01356       src_dir = replace_pathsep(argv[2]);
01357       argc -= 2, argv += 2;
01358       continue;
01359     }
01360 
01361     if (argc > 2 && (strcmp(argv[1], "-d") == 0 || strcmp(argv[1], "--dest_dir") == 0)) {
01362       dest_dir = replace_pathsep(argv[2]);
01363       argc -= 2, argv += 2;
01364       continue;
01365     }
01366 
01367     fprintf(stderr, "Invalid arguments\n");
01368     return 0;
01369   }
01370 
01371   if (dest_dir == NULL) dest_dir = src_dir; // if dest_dir is not specified, it equals src_dir
01372 
01373   /* strgen has two modes of operation. If no (free) arguments are passed
01374    * strgen generates strings.h to the destination directory. If it is supplied
01375    * with a (free) parameter the program will translate that language to destination
01376    * directory. As input english.txt is parsed from the source directory */
01377   if (argc == 1) {
01378     mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01379 
01380     /* parse master file */
01381     _masterlang = true;
01382     ParseFile(pathbuf, true);
01383     MakeHashOfStrings();
01384     if (_errors) return 1;
01385 
01386     /* write strings.h */
01387     ottd_mkdir(dest_dir);
01388     mkpath(pathbuf, lengthof(pathbuf), dest_dir, "strings.h");
01389     WriteStringsH(pathbuf);
01390   } else if (argc == 2) {
01391     char *r;
01392 
01393     mkpath(pathbuf, lengthof(pathbuf), src_dir, "english.txt");
01394 
01395     /* parse master file and check if target file is correct */
01396     _masterlang = false;
01397     ParseFile(pathbuf, true);
01398     MakeHashOfStrings();
01399     ParseFile(replace_pathsep(argv[1]), false); // target file
01400     if (_errors) return 1;
01401 
01402     /* get the targetfile, strip any directories and append to destination path */
01403     r = strrchr(argv[1], PATHSEPCHAR);
01404     mkpath(pathbuf, lengthof(pathbuf), dest_dir, (r != NULL) ? &r[1] : argv[1]);
01405 
01406     /* rename the .txt (input-extension) to .lng */
01407     r = strrchr(pathbuf, '.');
01408     if (r == NULL || strcmp(r, ".txt") != 0) r = strchr(pathbuf, '\0');
01409     ttd_strlcpy(r, ".lng", (size_t)(r - pathbuf));
01410     WriteLangfile(pathbuf);
01411 
01412     /* if showing warnings, print a summary of the language */
01413     if ((_show_todo & 2) != 0) {
01414       fprintf(stdout, "%d warnings and %d errors for %s\n", _warnings, _errors, pathbuf);
01415     }
01416   } else {
01417     fprintf(stderr, "Invalid arguments\n");
01418   }
01419 
01420   return 0;
01421 }

Generated on Mon Mar 9 23:33:51 2009 for openttd by  doxygen 1.5.6