strgen_base.cpp

Go to the documentation of this file.
00001 /* $Id: strgen_base.cpp 24603 2012-10-17 18:53:35Z rubidium $ */
00002 
00003 /*
00004  * This file is part of OpenTTD.
00005  * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
00006  * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00007  * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
00008  */
00009 
00012 #include "../stdafx.h"
00013 #include "../core/endian_func.hpp"
00014 #include "../string_func.h"
00015 #include "../table/control_codes.h"
00016 
00017 #include "strgen.h"
00018 
00019 
00020 #include "../table/strgen_tables.h"
00021 
00022 /* Compiles a list of strings into a compiled string list */
00023 
00024 static bool _translated;              
00025 static bool _translation;             
00026 const char *_file = "(unknown file)"; 
00027 int _cur_line;                        
00028 int _errors, _warnings, _show_todo;
00029 LanguagePackHeader _lang;             
00030 
00031 static const ptrdiff_t MAX_COMMAND_PARAM_SIZE = 100; 
00032 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei);
00033 
00040 Case::Case(int caseidx, const char *string, Case *next) :
00041     caseidx(caseidx), string(strdup(string)), next(next)
00042 {
00043 }
00044 
00046 Case::~Case()
00047 {
00048   free(this->string);
00049   delete this->next;
00050 }
00051 
00059 LangString::LangString(const char *name, const char *english, int index, int line) :
00060     name(strdup(name)), english(strdup(english)), translated(NULL),
00061     hash_next(0), index(index), line(line), translated_case(NULL)
00062 {
00063 }
00064 
00066 LangString::~LangString()
00067 {
00068   free(this->name);
00069   free(this->english);
00070   free(this->translated);
00071   delete this->translated_case;
00072 }
00073 
00075 void LangString::FreeTranslation()
00076 {
00077   free(this->translated);
00078   this->translated = NULL;
00079 
00080   delete this->translated_case;
00081   this->translated_case = NULL;
00082 }
00083 
00088 StringData::StringData(size_t tabs) : tabs(tabs), max_strings(tabs * TAB_SIZE)
00089 {
00090   this->strings = CallocT<LangString *>(max_strings);
00091   this->hash_heads = CallocT<uint16>(max_strings);
00092   this->next_string_id = 0;
00093 }
00094 
00096 StringData::~StringData()
00097 {
00098   for (size_t i = 0; i < this->max_strings; i++) delete this->strings[i];
00099   free(this->strings);
00100   free(this->hash_heads);
00101 }
00102 
00104 void StringData::FreeTranslation()
00105 {
00106   for (size_t i = 0; i < this->max_strings; i++) {
00107     LangString *ls = this->strings[i];
00108     if (ls != NULL) ls->FreeTranslation();
00109   }
00110 }
00111 
00117 uint StringData::HashStr(const char *s) const
00118 {
00119   uint hash = 0;
00120   for (; *s != '\0'; s++) hash = ROL(hash, 3) ^ *s;
00121   return hash % this->max_strings;
00122 }
00123 
00129 void StringData::Add(const char *s, LangString *ls)
00130 {
00131   uint hash = this->HashStr(s);
00132   ls->hash_next = this->hash_heads[hash];
00133   /* Off-by-one for hash find. */
00134   this->hash_heads[hash] = ls->index + 1;
00135   this->strings[ls->index] = ls;
00136 }
00137 
00143 LangString *StringData::Find(const char *s)
00144 {
00145   int idx = this->hash_heads[this->HashStr(s)];
00146 
00147   while (--idx >= 0) {
00148     LangString *ls = this->strings[idx];
00149 
00150     if (strcmp(ls->name, s) == 0) return ls;
00151     idx = ls->hash_next;
00152   }
00153   return NULL;
00154 }
00155 
00162 uint StringData::VersionHashStr(uint hash, const char *s) const
00163 {
00164   for (; *s != '\0'; s++) {
00165     hash = ROL(hash, 3) ^ *s;
00166     hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00167   }
00168   return hash;
00169 }
00170 
00175 uint StringData::Version() const
00176 {
00177   uint hash = 0;
00178 
00179   for (size_t i = 0; i < this->max_strings; i++) {
00180     const LangString *ls = this->strings[i];
00181 
00182     if (ls != NULL) {
00183       const CmdStruct *cs;
00184       const char *s;
00185       char buf[MAX_COMMAND_PARAM_SIZE];
00186       int argno;
00187       int casei;
00188 
00189       s = ls->name;
00190       hash ^= i * 0x717239;
00191       hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
00192       hash = this->VersionHashStr(hash, s + 1);
00193 
00194       s = ls->english;
00195       while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
00196         if (cs->flags & C_DONTCOUNT) continue;
00197 
00198         hash ^= (cs - _cmd_structs) * 0x1234567;
00199         hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
00200       }
00201     }
00202   }
00203 
00204   return hash;
00205 }
00206 
00211 uint StringData::CountInUse(uint tab) const
00212 {
00213   int i;
00214   for (i = TAB_SIZE; --i >= 0;) if (this->strings[(tab * TAB_SIZE) + i] != NULL) break;
00215   return i + 1;
00216 }
00217 
00218 static const char *_cur_ident;
00219 
00220 struct CmdPair {
00221   const CmdStruct *a;
00222   const char *v;
00223 };
00224 
00225 struct ParsedCommandStruct {
00226   uint np;
00227   CmdPair pairs[32];
00228   const CmdStruct *cmd[32]; // ordered by param #
00229 };
00230 
00231 /* Used when generating some advanced commands. */
00232 static ParsedCommandStruct _cur_pcs;
00233 static int _cur_argidx;
00234 
00236 struct Buffer : SmallVector<byte, 256> {
00241   void AppendByte(byte value)
00242   {
00243     *this->Append() = value;
00244   }
00245 
00250   void AppendUtf8(uint32 value)
00251   {
00252     if (value < 0x80) {
00253       *this->Append() = value;
00254     } else if (value < 0x800) {
00255       *this->Append() = 0xC0 + GB(value,  6, 5);
00256       *this->Append() = 0x80 + GB(value,  0, 6);
00257     } else if (value < 0x10000) {
00258       *this->Append() = 0xE0 + GB(value, 12, 4);
00259       *this->Append() = 0x80 + GB(value,  6, 6);
00260       *this->Append() = 0x80 + GB(value,  0, 6);
00261     } else if (value < 0x110000) {
00262       *this->Append() = 0xF0 + GB(value, 18, 3);
00263       *this->Append() = 0x80 + GB(value, 12, 6);
00264       *this->Append() = 0x80 + GB(value,  6, 6);
00265       *this->Append() = 0x80 + GB(value,  0, 6);
00266     } else {
00267       strgen_warning("Invalid unicode value U+0x%X", value);
00268     }
00269   }
00270 };
00271 
00272 size_t Utf8Validate(const char *s)
00273 {
00274   uint32 c;
00275 
00276   if (!HasBit(s[0], 7)) {
00277     /* 1 byte */
00278     return 1;
00279   } else if (GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
00280     /* 2 bytes */
00281     c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
00282     if (c >= 0x80) return 2;
00283   } else if (GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
00284     /* 3 bytes */
00285     c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
00286     if (c >= 0x800) return 3;
00287   } else if (GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
00288     /* 4 bytes */
00289     c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
00290     if (c >= 0x10000 && c <= 0x10FFFF) return 4;
00291   }
00292 
00293   return 0;
00294 }
00295 
00296 
00297 void EmitSingleChar(Buffer *buffer, char *buf, int value)
00298 {
00299   if (*buf != '\0') strgen_warning("Ignoring trailing letters in command");
00300   buffer->AppendUtf8(value);
00301 }
00302 
00303 
00304 /* The plural specifier looks like
00305  * {NUM} {PLURAL -1 passenger passengers} then it picks either passenger/passengers depending on the count in NUM */
00306 
00307 /* This is encoded like
00308  *  CommandByte <ARG#> <NUM> {Length of each string} {each string} */
00309 
00310 bool ParseRelNum(char **buf, int *value, int *offset)
00311 {
00312   const char *s = *buf;
00313   char *end;
00314   bool rel = false;
00315 
00316   while (*s == ' ' || *s == '\t') s++;
00317   if (*s == '+') {
00318     rel = true;
00319     s++;
00320   }
00321   int v = strtol(s, &end, 0);
00322   if (end == s) return false;
00323   if (rel || v < 0) {
00324     *value += v;
00325   } else {
00326     *value = v;
00327   }
00328   if (offset != NULL && *end == ':') {
00329     /* Take the Nth within */
00330     s = end + 1;
00331     *offset = strtol(s, &end, 0);
00332     if (end == s) return false;
00333   }
00334   *buf = end;
00335   return true;
00336 }
00337 
00338 /* Parse out the next word, or NULL */
00339 char *ParseWord(char **buf)
00340 {
00341   char *s = *buf, *r;
00342 
00343   while (*s == ' ' || *s == '\t') s++;
00344   if (*s == '\0') return NULL;
00345 
00346   if (*s == '"') {
00347     r = ++s;
00348     /* parse until next " or NUL */
00349     for (;;) {
00350       if (*s == '\0') break;
00351       if (*s == '"') {
00352         *s++ = '\0';
00353         break;
00354       }
00355       s++;
00356     }
00357   } else {
00358     /* proceed until whitespace or NUL */
00359     r = s;
00360     for (;;) {
00361       if (*s == '\0') break;
00362       if (*s == ' ' || *s == '\t') {
00363         *s++ = '\0';
00364         break;
00365       }
00366       s++;
00367     }
00368   }
00369   *buf = s;
00370   return r;
00371 }
00372 
00373 /* Forward declaration */
00374 static int TranslateArgumentIdx(int arg, int offset = 0);
00375 
00376 static void EmitWordList(Buffer *buffer, const char * const *words, uint nw)
00377 {
00378   buffer->AppendByte(nw);
00379   for (uint i = 0; i < nw; i++) buffer->AppendByte((uint)strlen(words[i]) + 1);
00380   for (uint i = 0; i < nw; i++) {
00381     for (uint j = 0; words[i][j] != '\0'; j++) buffer->AppendByte(words[i][j]);
00382     buffer->AppendByte(0);
00383   }
00384 }
00385 
00386 void EmitPlural(Buffer *buffer, char *buf, int value)
00387 {
00388   int argidx = _cur_argidx;
00389   int offset = 0;
00390   const char *words[5];
00391   int nw = 0;
00392 
00393   /* Parse out the number, if one exists. Otherwise default to prev arg. */
00394   if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
00395 
00396   /* Parse each string */
00397   for (nw = 0; nw < 5; nw++) {
00398     words[nw] = ParseWord(&buf);
00399     if (words[nw] == NULL) break;
00400   }
00401 
00402   if (nw == 0) {
00403     strgen_fatal("%s: No plural words", _cur_ident);
00404   }
00405 
00406   if (_plural_forms[_lang.plural_form].plural_count != nw) {
00407     if (_translated) {
00408       strgen_fatal("%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
00409         _plural_forms[_lang.plural_form].plural_count, nw);
00410     } else {
00411       if ((_show_todo & 2) != 0) strgen_warning("'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
00412       if (nw > _plural_forms[_lang.plural_form].plural_count) {
00413         nw = _plural_forms[_lang.plural_form].plural_count;
00414       } else {
00415         for (; nw < _plural_forms[_lang.plural_form].plural_count; nw++) {
00416           words[nw] = words[nw - 1];
00417         }
00418       }
00419     }
00420   }
00421 
00422   buffer->AppendUtf8(SCC_PLURAL_LIST);
00423   buffer->AppendByte(_lang.plural_form);
00424   buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
00425   EmitWordList(buffer, words, nw);
00426 }
00427 
00428 
00429 void EmitGender(Buffer *buffer, char *buf, int value)
00430 {
00431   int argidx = _cur_argidx;
00432   int offset = 0;
00433   uint nw;
00434 
00435   if (buf[0] == '=') {
00436     buf++;
00437 
00438     /* This is a {G=DER} command */
00439     nw = _lang.GetGenderIndex(buf);
00440     if (nw >= MAX_NUM_GENDERS) strgen_fatal("G argument '%s' invalid", buf);
00441 
00442     /* now nw contains the gender index */
00443     buffer->AppendUtf8(SCC_GENDER_INDEX);
00444     buffer->AppendByte(nw);
00445   } else {
00446     const char *words[MAX_NUM_GENDERS];
00447 
00448     /* This is a {G 0 foo bar two} command.
00449      * If no relative number exists, default to +0 */
00450     if (!ParseRelNum(&buf, &argidx, &offset)) {}
00451 
00452     const CmdStruct *cmd = _cur_pcs.cmd[argidx];
00453     if (cmd == NULL || (cmd->flags & C_GENDER) == 0) {
00454       strgen_fatal("Command '%s' can't have a gender", cmd == NULL ? "<empty>" : cmd->cmd);
00455     }
00456 
00457     for (nw = 0; nw < MAX_NUM_GENDERS; nw++) {
00458       words[nw] = ParseWord(&buf);
00459       if (words[nw] == NULL) break;
00460     }
00461     if (nw != _lang.num_genders) strgen_fatal("Bad # of arguments for gender command");
00462 
00463     assert(IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
00464     buffer->AppendUtf8(SCC_GENDER_LIST);
00465     buffer->AppendByte(TranslateArgumentIdx(argidx, offset));
00466     EmitWordList(buffer, words, nw);
00467   }
00468 }
00469 
00470 static const CmdStruct *FindCmd(const char *s, int len)
00471 {
00472   for (const CmdStruct *cs = _cmd_structs; cs != endof(_cmd_structs); cs++) {
00473     if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] == '\0') return cs;
00474   }
00475   return NULL;
00476 }
00477 
00478 static uint ResolveCaseName(const char *str, size_t len)
00479 {
00480   /* First get a clean copy of only the case name, then resolve it. */
00481   char case_str[CASE_GENDER_LEN];
00482   len = min(lengthof(case_str) - 1, len);
00483   memcpy(case_str, str, len);
00484   case_str[len] = '\0';
00485 
00486   uint8 case_idx = _lang.GetCaseIndex(case_str);
00487   if (case_idx >= MAX_NUM_CASES) strgen_fatal("Invalid case-name '%s'", case_str);
00488   return case_idx + 1;
00489 }
00490 
00491 
00492 /* returns NULL on eof
00493  * else returns command struct */
00494 static const CmdStruct *ParseCommandString(const char **str, char *param, int *argno, int *casei)
00495 {
00496   const char *s = *str, *start;
00497   char c;
00498 
00499   *argno = -1;
00500   *casei = -1;
00501 
00502   /* Scan to the next command, exit if there's no next command. */
00503   for (; *s != '{'; s++) {
00504     if (*s == '\0') return NULL;
00505   }
00506   s++; // Skip past the {
00507 
00508   if (*s >= '0' && *s <= '9') {
00509     char *end;
00510 
00511     *argno = strtoul(s, &end, 0);
00512     if (*end != ':') strgen_fatal("missing arg #");
00513     s = end + 1;
00514   }
00515 
00516   /* parse command name */
00517   start = s;
00518   do {
00519     c = *s++;
00520   } while (c != '}' && c != ' ' && c != '=' && c != '.' && c != 0);
00521 
00522   const CmdStruct *cmd = FindCmd(start, s - start - 1);
00523   if (cmd == NULL) {
00524     strgen_error("Undefined command '%.*s'", (int)(s - start - 1), start);
00525     return NULL;
00526   }
00527 
00528   if (c == '.') {
00529     const char *casep = s;
00530 
00531     if (!(cmd->flags & C_CASE)) {
00532       strgen_fatal("Command '%s' can't have a case", cmd->cmd);
00533     }
00534 
00535     do {
00536       c = *s++;
00537     } while (c != '}' && c != ' ' && c != '\0');
00538     *casei = ResolveCaseName(casep, s - casep - 1);
00539   }
00540 
00541   if (c == '\0') {
00542     strgen_error("Missing } from command '%s'", start);
00543     return NULL;
00544   }
00545 
00546 
00547   if (c != '}') {
00548     if (c == '=') s--;
00549     /* copy params */
00550     start = s;
00551     for (;;) {
00552       c = *s++;
00553       if (c == '}') break;
00554       if (c == '\0') {
00555         strgen_error("Missing } from command '%s'", start);
00556         return NULL;
00557       }
00558       if (s - start == MAX_COMMAND_PARAM_SIZE) error("param command too long");
00559       *param++ = c;
00560     }
00561   }
00562   *param = '\0';
00563 
00564   *str = s;
00565 
00566   return cmd;
00567 }
00568 
00576 StringReader::StringReader(StringData &data, const char *file, bool master, bool translation) :
00577     data(data), file(strdup(file)), master(master), translation(translation)
00578 {
00579 }
00580 
00582 StringReader::~StringReader()
00583 {
00584   free(file);
00585 }
00586 
00587 static void ExtractCommandString(ParsedCommandStruct *p, const char *s, bool warnings)
00588 {
00589   char param[MAX_COMMAND_PARAM_SIZE];
00590   int argno;
00591   int argidx = 0;
00592   int casei;
00593 
00594   memset(p, 0, sizeof(*p));
00595 
00596   for (;;) {
00597     /* read until next command from a. */
00598     const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
00599 
00600     if (ar == NULL) break;
00601 
00602     /* Sanity checking */
00603     if (argno != -1 && ar->consumes == 0) strgen_fatal("Non consumer param can't have a paramindex");
00604 
00605     if (ar->consumes) {
00606       if (argno != -1) argidx = argno;
00607       if (argidx < 0 || (uint)argidx >= lengthof(p->cmd)) strgen_fatal("invalid param idx %d", argidx);
00608       if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal("duplicate param idx %d", argidx);
00609 
00610       p->cmd[argidx++] = ar;
00611     } else if (!(ar->flags & C_DONTCOUNT)) { // Ignore some of them
00612       if (p->np >= lengthof(p->pairs)) strgen_fatal("too many commands in string, max " PRINTF_SIZE, lengthof(p->pairs));
00613       p->pairs[p->np].a = ar;
00614       p->pairs[p->np].v = param[0] != '\0' ? strdup(param) : "";
00615       p->np++;
00616     }
00617   }
00618 }
00619 
00620 
00621 static const CmdStruct *TranslateCmdForCompare(const CmdStruct *a)
00622 {
00623   if (a == NULL) return NULL;
00624 
00625   if (strcmp(a->cmd, "STRING1") == 0 ||
00626       strcmp(a->cmd, "STRING2") == 0 ||
00627       strcmp(a->cmd, "STRING3") == 0 ||
00628       strcmp(a->cmd, "STRING4") == 0 ||
00629       strcmp(a->cmd, "STRING5") == 0 ||
00630       strcmp(a->cmd, "STRING6") == 0 ||
00631       strcmp(a->cmd, "STRING7") == 0 ||
00632       strcmp(a->cmd, "RAW_STRING") == 0) {
00633     return FindCmd("STRING", 6);
00634   }
00635 
00636   return a;
00637 }
00638 
00639 
00640 static bool CheckCommandsMatch(char *a, char *b, const char *name)
00641 {
00642   /* If we're not translating, i.e. we're compiling the base language,
00643    * it is pointless to do all these checks as it'll always be correct.
00644    * After all, all checks are based on the base language.
00645    */
00646   if (!_translation) return true;
00647 
00648   ParsedCommandStruct templ;
00649   ParsedCommandStruct lang;
00650   bool result = true;
00651 
00652   ExtractCommandString(&templ, b, true);
00653   ExtractCommandString(&lang, a, true);
00654 
00655   /* For each string in templ, see if we find it in lang */
00656   if (templ.np != lang.np) {
00657     strgen_warning("%s: template string and language string have a different # of commands", name);
00658     result = false;
00659   }
00660 
00661   for (uint i = 0; i < templ.np; i++) {
00662     /* see if we find it in lang, and zero it out */
00663     bool found = false;
00664     for (uint j = 0; j < lang.np; j++) {
00665       if (templ.pairs[i].a == lang.pairs[j].a &&
00666           strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
00667         /* it was found in both. zero it out from lang so we don't find it again */
00668         lang.pairs[j].a = NULL;
00669         found = true;
00670         break;
00671       }
00672     }
00673 
00674     if (!found) {
00675       strgen_warning("%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
00676       result = false;
00677     }
00678   }
00679 
00680   /* if we reach here, all non consumer commands match up.
00681    * Check if the non consumer commands match up also. */
00682   for (uint i = 0; i < lengthof(templ.cmd); i++) {
00683     if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
00684       strgen_warning("%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
00685         lang.cmd[i]  == NULL ? "<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
00686         templ.cmd[i] == NULL ? "<empty>" : templ.cmd[i]->cmd);
00687       result = false;
00688     }
00689   }
00690 
00691   return result;
00692 }
00693 
00694 void StringReader::HandleString(char *str)
00695 {
00696   if (*str == '#') {
00697     if (str[1] == '#' && str[2] != '#') this->HandlePragma(str + 2);
00698     return;
00699   }
00700 
00701   /* Ignore comments & blank lines */
00702   if (*str == ';' || *str == ' ' || *str == '\0') return;
00703 
00704   char *s = strchr(str, ':');
00705   if (s == NULL) {
00706     strgen_error("Line has no ':' delimiter");
00707     return;
00708   }
00709 
00710   char *t;
00711   /* Trim spaces.
00712    * After this str points to the command name, and s points to the command contents */
00713   for (t = s; t > str && (t[-1] == ' ' || t[-1] == '\t'); t--) {}
00714   *t = 0;
00715   s++;
00716 
00717   /* Check string is valid UTF-8 */
00718   const char *tmp;
00719   for (tmp = s; *tmp != '\0';) {
00720     size_t len = Utf8Validate(tmp);
00721     if (len == 0) strgen_fatal("Invalid UTF-8 sequence in '%s'", s);
00722 
00723     WChar c;
00724     Utf8Decode(&c, tmp);
00725     if (c <= 0x001F || // ASCII control character range
00726         (c >= 0xE000 && c <= 0xF8FF) || // Private range
00727         (c >= 0xFFF0 && c <= 0xFFFF)) { // Specials range
00728       strgen_fatal("Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
00729     }
00730 
00731     tmp += len;
00732   }
00733 
00734   /* Check if the string has a case..
00735    * The syntax for cases is IDENTNAME.case */
00736   char *casep = strchr(str, '.');
00737   if (casep != NULL) *casep++ = '\0';
00738 
00739   /* Check if this string already exists.. */
00740   LangString *ent = this->data.Find(str);
00741 
00742   if (this->master) {
00743     if (casep != NULL) {
00744       strgen_error("Cases in the base translation are not supported.");
00745       return;
00746     }
00747 
00748     if (ent != NULL) {
00749       strgen_error("String name '%s' is used multiple times", str);
00750       return;
00751     }
00752 
00753     if (this->data.strings[this->data.next_string_id] != NULL) {
00754       strgen_error("String ID 0x%X for '%s' already in use by '%s'", this->data.next_string_id, str, this->data.strings[this->data.next_string_id]->name);
00755       return;
00756     }
00757 
00758     /* Allocate a new LangString */
00759     this->data.Add(str, new LangString(str, s, this->data.next_string_id++, _cur_line));
00760   } else {
00761     if (ent == NULL) {
00762       strgen_warning("String name '%s' does not exist in master file", str);
00763       return;
00764     }
00765 
00766     if (ent->translated && casep == NULL) {
00767       strgen_error("String name '%s' is used multiple times", str);
00768       return;
00769     }
00770 
00771     /* make sure that the commands match */
00772     if (!CheckCommandsMatch(s, ent->english, str)) return;
00773 
00774     if (casep != NULL) {
00775       ent->translated_case = new Case(ResolveCaseName(casep, strlen(casep)), s, ent->translated_case);
00776     } else {
00777       ent->translated = strdup(s);
00778       /* If the string was translated, use the line from the
00779        * translated language so errors in the translated file
00780        * are properly referenced to. */
00781       ent->line = _cur_line;
00782     }
00783   }
00784 }
00785 
00786 void StringReader::HandlePragma(char *str)
00787 {
00788   if (!memcmp(str, "plural ", 7)) {
00789     _lang.plural_form = atoi(str + 7);
00790     if (_lang.plural_form >= lengthof(_plural_forms)) {
00791       strgen_fatal("Invalid pluralform %d", _lang.plural_form);
00792     }
00793   } else {
00794     strgen_fatal("unknown pragma '%s'", str);
00795   }
00796 }
00797 
00798 static void rstrip(char *buf)
00799 {
00800   size_t i = strlen(buf);
00801   while (i > 0 && (buf[i - 1] == '\r' || buf[i - 1] == '\n' || buf[i - 1] == ' ')) i--;
00802   buf[i] = '\0';
00803 }
00804 
00805 void StringReader::ParseFile()
00806 {
00807   char buf[2048];
00808   _warnings = _errors = 0;
00809 
00810   _translation = this->master || this->translation;
00811   _file = this->file;
00812 
00813   /* For each new file we parse, reset the genders, and language codes. */
00814   MemSetT(&_lang, 0);
00815   strecpy(_lang.digit_group_separator, ",", lastof(_lang.digit_group_separator));
00816   strecpy(_lang.digit_group_separator_currency, ",", lastof(_lang.digit_group_separator_currency));
00817   strecpy(_lang.digit_decimal_separator, ".", lastof(_lang.digit_decimal_separator));
00818 
00819   _cur_line = 1;
00820   while (this->ReadLine(buf, sizeof(buf)) != NULL) {
00821     rstrip(buf);
00822     this->HandleString(buf);
00823     _cur_line++;
00824   }
00825 }
00826 
00831 void HeaderWriter::WriteHeader(const StringData &data)
00832 {
00833   int last = 0;
00834   for (size_t i = 0; i < data.max_strings; i++) {
00835     if (data.strings[i] != NULL) {
00836       this->WriteStringID(data.strings[i]->name, (int)i);
00837       last = (int)i;
00838     }
00839   }
00840 
00841   this->WriteStringID("STR_LAST_STRINGID", last);
00842 }
00843 
00844 static int TranslateArgumentIdx(int argidx, int offset)
00845 {
00846   int sum;
00847 
00848   if (argidx < 0 || (uint)argidx >= lengthof(_cur_pcs.cmd)) {
00849     strgen_fatal("invalid argidx %d", argidx);
00850   }
00851   const CmdStruct *cs = _cur_pcs.cmd[argidx];
00852   if (cs != NULL && cs->consumes <= offset) {
00853     strgen_fatal("invalid argidx offset %d:%d", argidx, offset);
00854   }
00855 
00856   if (_cur_pcs.cmd[argidx] == NULL) {
00857     strgen_fatal("no command for this argidx %d", argidx);
00858   }
00859 
00860   for (int i = sum = 0; i < argidx; i++) {
00861     const CmdStruct *cs = _cur_pcs.cmd[i];
00862 
00863     sum += (cs != NULL) ? cs->consumes : 1;
00864   }
00865 
00866   return sum + offset;
00867 }
00868 
00869 static void PutArgidxCommand(Buffer *buffer)
00870 {
00871   buffer->AppendUtf8(SCC_ARG_INDEX);
00872   buffer->AppendByte(TranslateArgumentIdx(_cur_argidx));
00873 }
00874 
00875 
00876 static void PutCommandString(Buffer *buffer, const char *str)
00877 {
00878   _cur_argidx = 0;
00879 
00880   while (*str != '\0') {
00881     /* Process characters as they are until we encounter a { */
00882     if (*str != '{') {
00883       buffer->AppendByte(*str++);
00884       continue;
00885     }
00886 
00887     char param[MAX_COMMAND_PARAM_SIZE];
00888     int argno;
00889     int casei;
00890     const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
00891     if (cs == NULL) break;
00892 
00893     if (casei != -1) {
00894       buffer->AppendUtf8(SCC_SET_CASE); // {SET_CASE}
00895       buffer->AppendByte(casei);
00896     }
00897 
00898     /* For params that consume values, we need to handle the argindex properly */
00899     if (cs->consumes > 0) {
00900       /* Check if we need to output a move-param command */
00901       if (argno != -1 && argno != _cur_argidx) {
00902         _cur_argidx = argno;
00903         PutArgidxCommand(buffer);
00904       }
00905 
00906       /* Output the one from the master string... it's always accurate. */
00907       cs = _cur_pcs.cmd[_cur_argidx++];
00908       if (cs == NULL) {
00909         strgen_fatal("%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
00910       }
00911     }
00912 
00913     cs->proc(buffer, param, cs->value);
00914   }
00915 }
00916 
00921 void LanguageWriter::WriteLength(uint length)
00922 {
00923   char buffer[2];
00924   int offs = 0;
00925   if (length >= 0x4000) {
00926     strgen_fatal("string too long");
00927   }
00928 
00929   if (length >= 0xC0) {
00930     buffer[offs++] = (length >> 8) | 0xC0;
00931   }
00932   buffer[offs++] = length & 0xFF;
00933   this->Write((byte*)buffer, offs);
00934 }
00935 
00940 void LanguageWriter::WriteLang(const StringData &data)
00941 {
00942   uint *in_use = AllocaM(uint, data.tabs);
00943   for (size_t tab = 0; tab < data.tabs; tab++) {
00944     uint n = data.CountInUse((uint)tab);
00945 
00946     in_use[tab] = n;
00947     _lang.offsets[tab] = TO_LE16(n);
00948 
00949     for (uint j = 0; j != in_use[tab]; j++) {
00950       const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
00951       if (ls != NULL && ls->translated == NULL) _lang.missing++;
00952     }
00953   }
00954 
00955   _lang.ident = TO_LE32(LanguagePackHeader::IDENT);
00956   _lang.version = TO_LE32(data.Version());
00957   _lang.missing = TO_LE16(_lang.missing);
00958   _lang.winlangid = TO_LE16(_lang.winlangid);
00959 
00960   this->WriteHeader(&_lang);
00961   Buffer buffer;
00962 
00963   for (size_t tab = 0; tab < data.tabs; tab++) {
00964     for (uint j = 0; j != in_use[tab]; j++) {
00965       const LangString *ls = data.strings[(tab * TAB_SIZE) + j];
00966       const Case *casep;
00967       const char *cmdp;
00968 
00969       /* For undefined strings, just set that it's an empty string */
00970       if (ls == NULL) {
00971         this->WriteLength(0);
00972         continue;
00973       }
00974 
00975       _cur_ident = ls->name;
00976       _cur_line = ls->line;
00977 
00978       /* Produce a message if a string doesn't have a translation. */
00979       if (_show_todo > 0 && ls->translated == NULL) {
00980         if ((_show_todo & 2) != 0) {
00981           strgen_warning("'%s' is untranslated", ls->name);
00982         }
00983         if ((_show_todo & 1) != 0) {
00984           const char *s = "<TODO> ";
00985           while (*s != '\0') buffer.AppendByte(*s++);
00986         }
00987       }
00988 
00989       /* Extract the strings and stuff from the english command string */
00990       ExtractCommandString(&_cur_pcs, ls->english, false);
00991 
00992       if (ls->translated_case != NULL || ls->translated != NULL) {
00993         casep = ls->translated_case;
00994         cmdp = ls->translated;
00995       } else {
00996         casep = NULL;
00997         cmdp = ls->english;
00998       }
00999 
01000       _translated = cmdp != ls->english;
01001 
01002       if (casep != NULL) {
01003         const Case *c;
01004         uint num;
01005 
01006         /* Need to output a case-switch.
01007          * It has this format
01008          * <0x9E> <NUM CASES> <CASE1> <LEN1> <STRING1> <CASE2> <LEN2> <STRING2> <CASE3> <LEN3> <STRING3> <STRINGDEFAULT>
01009          * Each LEN is printed using 2 bytes in big endian order. */
01010         buffer.AppendUtf8(SCC_SWITCH_CASE);
01011         /* Count the number of cases */
01012         for (num = 0, c = casep; c; c = c->next) num++;
01013         buffer.AppendByte(num);
01014 
01015         /* Write each case */
01016         for (c = casep; c != NULL; c = c->next) {
01017           buffer.AppendByte(c->caseidx);
01018           /* Make some space for the 16-bit length */
01019           uint pos = buffer.Length();
01020           buffer.AppendByte(0);
01021           buffer.AppendByte(0);
01022           /* Write string */
01023           PutCommandString(&buffer, c->string);
01024           buffer.AppendByte(0); // terminate with a zero
01025           /* Fill in the length */
01026           uint size = buffer.Length() - (pos + 2);
01027           buffer[pos + 0] = GB(size, 8, 8);
01028           buffer[pos + 1] = GB(size, 0, 8);
01029         }
01030       }
01031 
01032       if (cmdp != NULL) PutCommandString(&buffer, cmdp);
01033 
01034       this->WriteLength(buffer.Length());
01035       this->Write(buffer.Begin(), buffer.Length());
01036       buffer.Clear();
01037     }
01038   }
01039 }