12 #include "../stdafx.h" 13 #include "../core/endian_func.hpp" 14 #include "../string_func.h" 15 #include "../table/control_codes.h" 20 #include "../table/strgen_tables.h" 22 #include "../safeguards.h" 28 const char *
_file =
"(unknown file)";
30 int _errors, _warnings, _show_todo;
34 static const CmdStruct *ParseCommandString(
const char **str,
char *param,
int *argno,
int *casei);
43 caseidx(caseidx), string(
stredup(string)), next(next)
63 hash_next(0), index(index), line(line), translated_case(NULL)
122 for (; *s !=
'\0'; s++) hash =
ROL(hash, 3) ^ *s;
152 if (strcmp(ls->
name, s) == 0)
return ls;
166 for (; *s !=
'\0'; s++) {
167 hash =
ROL(hash, 3) ^ *s;
168 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
192 hash ^= i * 0x717239;
193 hash = (hash & 1 ? hash >> 1 ^ 0xDEADBEEF : hash >> 1);
197 while ((cs = ParseCommandString(&s, buf, &argno, &casei)) != NULL) {
200 hash ^= (cs - _cmd_structs) * 0x1234567;
201 hash = (hash & 1 ? hash >> 1 ^ 0xF00BAA4 : hash >> 1);
220 static const char *_cur_ident;
235 static int _cur_argidx;
245 *this->Append() = value;
255 *this->Append() = value;
256 }
else if (value < 0x800) {
257 *this->Append() = 0xC0 +
GB(value, 6, 5);
258 *this->Append() = 0x80 +
GB(value, 0, 6);
259 }
else if (value < 0x10000) {
260 *this->Append() = 0xE0 +
GB(value, 12, 4);
261 *this->Append() = 0x80 +
GB(value, 6, 6);
262 *this->Append() = 0x80 +
GB(value, 0, 6);
263 }
else if (value < 0x110000) {
264 *this->Append() = 0xF0 +
GB(value, 18, 3);
265 *this->Append() = 0x80 +
GB(value, 12, 6);
266 *this->Append() = 0x80 +
GB(value, 6, 6);
267 *this->Append() = 0x80 +
GB(value, 0, 6);
269 strgen_warning(
"Invalid unicode value U+0x%X", value);
274 size_t Utf8Validate(
const char *s)
281 }
else if (
GB(s[0], 5, 3) == 6 && IsUtf8Part(s[1])) {
283 c =
GB(s[0], 0, 5) << 6 |
GB(s[1], 0, 6);
284 if (c >= 0x80)
return 2;
285 }
else if (
GB(s[0], 4, 4) == 14 && IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
287 c =
GB(s[0], 0, 4) << 12 |
GB(s[1], 0, 6) << 6 |
GB(s[2], 0, 6);
288 if (c >= 0x800)
return 3;
289 }
else if (
GB(s[0], 3, 5) == 30 && IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
291 c =
GB(s[0], 0, 3) << 18 |
GB(s[1], 0, 6) << 12 |
GB(s[2], 0, 6) << 6 |
GB(s[3], 0, 6);
292 if (c >= 0x10000 && c <= 0x10FFFF)
return 4;
299 void EmitSingleChar(
Buffer *buffer,
char *buf,
int value)
301 if (*buf !=
'\0') strgen_warning(
"Ignoring trailing letters in command");
312 bool ParseRelNum(
char **buf,
int *value,
int *offset)
314 const char *s = *buf;
318 while (*s ==
' ' || *s ==
'\t') s++;
323 int v = strtol(s, &end, 0);
324 if (end == s)
return false;
330 if (offset != NULL && *end ==
':') {
333 *offset = strtol(s, &end, 0);
334 if (end == s)
return false;
341 char *ParseWord(
char **buf)
345 while (*s ==
' ' || *s ==
'\t') s++;
346 if (*s ==
'\0')
return NULL;
352 if (*s ==
'\0')
break;
363 if (*s ==
'\0')
break;
364 if (*s ==
' ' || *s ==
'\t') {
376 static int TranslateArgumentIdx(
int arg,
int offset = 0);
378 static void EmitWordList(
Buffer *buffer,
const char *
const *words, uint nw)
381 for (uint i = 0; i < nw; i++) buffer->
AppendByte((byte)strlen(words[i]) + 1);
382 for (uint i = 0; i < nw; i++) {
383 for (uint j = 0; words[i][j] !=
'\0'; j++) buffer->
AppendByte(words[i][j]);
388 void EmitPlural(
Buffer *buffer,
char *buf,
int value)
390 int argidx = _cur_argidx;
397 if (!ParseRelNum(&buf, &argidx, &offset)) argidx--;
399 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
402 if (cmd == NULL || cmd->default_plural_offset < 0) {
403 strgen_fatal(
"Command '%s' has no (default) plural position", cmd == NULL ?
"<empty>" : cmd->cmd);
405 offset = cmd->default_plural_offset;
410 words[nw] = ParseWord(&buf);
411 if (words[nw] == NULL)
break;
415 strgen_fatal(
"%s: No plural words", _cur_ident);
418 if (expected != nw) {
420 strgen_fatal(
"%s: Invalid number of plural forms. Expecting %d, found %d.", _cur_ident,
423 if ((_show_todo & 2) != 0) strgen_warning(
"'%s' is untranslated. Tweaking english string to allow compilation for plural forms", _cur_ident);
427 for (; nw < expected; nw++) {
428 words[nw] = words[nw - 1];
436 buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
437 EmitWordList(buffer, words, nw);
441 void EmitGender(
Buffer *buffer,
char *buf,
int value)
443 int argidx = _cur_argidx;
452 if (nw >=
MAX_NUM_GENDERS) strgen_fatal(
"G argument '%s' invalid", buf);
462 if (!ParseRelNum(&buf, &argidx, &offset)) {}
464 const CmdStruct *cmd = _cur_pcs.cmd[argidx];
465 if (cmd == NULL || (cmd->flags &
C_GENDER) == 0) {
466 strgen_fatal(
"Command '%s' can't have a gender", cmd == NULL ?
"<empty>" : cmd->cmd);
470 words[nw] = ParseWord(&buf);
471 if (words[nw] == NULL)
break;
473 if (nw != _lang.
num_genders) strgen_fatal(
"Bad # of arguments for gender command");
475 assert(
IsInsideBS(cmd->value, SCC_CONTROL_START, UINT8_MAX));
477 buffer->
AppendByte(TranslateArgumentIdx(argidx, offset));
478 EmitWordList(buffer, words, nw);
482 static const CmdStruct *FindCmd(
const char *s,
int len)
484 for (
const CmdStruct *cs = _cmd_structs; cs !=
endof(_cmd_structs); cs++) {
485 if (strncmp(cs->cmd, s, len) == 0 && cs->cmd[len] ==
'\0')
return cs;
490 static uint ResolveCaseName(
const char *str,
size_t len)
495 memcpy(case_str, str, len);
496 case_str[len] =
'\0';
499 if (case_idx >=
MAX_NUM_CASES) strgen_fatal(
"Invalid case-name '%s'", case_str);
506 static const CmdStruct *ParseCommandString(
const char **str,
char *param,
int *argno,
int *casei)
508 const char *s = *str, *start;
515 for (; *s !=
'{'; s++) {
516 if (*s ==
'\0')
return NULL;
520 if (*s >=
'0' && *s <=
'9') {
523 *argno = strtoul(s, &end, 0);
524 if (*end !=
':') strgen_fatal(
"missing arg #");
532 }
while (c !=
'}' && c !=
' ' && c !=
'=' && c !=
'.' && c != 0);
534 const CmdStruct *cmd = FindCmd(start, s - start - 1);
536 strgen_error(
"Undefined command '%.*s'", (
int)(s - start - 1), start);
541 const char *casep = s;
543 if (!(cmd->flags &
C_CASE)) {
544 strgen_fatal(
"Command '%s' can't have a case", cmd->cmd);
549 }
while (c !=
'}' && c !=
' ' && c !=
'\0');
550 *casei = ResolveCaseName(casep, s - casep - 1);
554 strgen_error(
"Missing } from command '%s'", start);
567 strgen_error(
"Missing } from command '%s'", start);
589 data(data), file(
stredup(file)), master(master), translation(translation)
606 memset(p, 0,
sizeof(*p));
610 const CmdStruct *ar = ParseCommandString(&s, param, &argno, &casei);
612 if (ar == NULL)
break;
615 if (argno != -1 && ar->consumes == 0) strgen_fatal(
"Non consumer param can't have a paramindex");
618 if (argno != -1) argidx = argno;
619 if (argidx < 0 || (uint)argidx >=
lengthof(p->cmd)) strgen_fatal(
"invalid param idx %d", argidx);
620 if (p->cmd[argidx] != NULL && p->cmd[argidx] != ar) strgen_fatal(
"duplicate param idx %d", argidx);
622 p->cmd[argidx++] = ar;
624 if (p->np >=
lengthof(p->pairs)) strgen_fatal(
"too many commands in string, max " PRINTF_SIZE,
lengthof(p->pairs));
625 p->pairs[p->np].a = ar;
626 p->pairs[p->np].v = param[0] !=
'\0' ?
stredup(param) :
"";
635 if (a == NULL)
return NULL;
637 if (strcmp(a->cmd,
"STRING1") == 0 ||
638 strcmp(a->cmd,
"STRING2") == 0 ||
639 strcmp(a->cmd,
"STRING3") == 0 ||
640 strcmp(a->cmd,
"STRING4") == 0 ||
641 strcmp(a->cmd,
"STRING5") == 0 ||
642 strcmp(a->cmd,
"STRING6") == 0 ||
643 strcmp(a->cmd,
"STRING7") == 0 ||
644 strcmp(a->cmd,
"RAW_STRING") == 0) {
645 return FindCmd(
"STRING", 6);
652 static bool CheckCommandsMatch(
char *a,
char *b,
const char *name)
664 ExtractCommandString(&templ, b,
true);
665 ExtractCommandString(&lang, a,
true);
668 if (templ.np != lang.np) {
669 strgen_warning(
"%s: template string and language string have a different # of commands", name);
673 for (uint i = 0; i < templ.np; i++) {
676 for (uint j = 0; j < lang.np; j++) {
677 if (templ.pairs[i].a == lang.pairs[j].a &&
678 strcmp(templ.pairs[i].v, lang.pairs[j].v) == 0) {
680 lang.pairs[j].a = NULL;
687 strgen_warning(
"%s: command '%s' exists in template file but not in language file", name, templ.pairs[i].a->cmd);
694 for (uint i = 0; i <
lengthof(templ.cmd); i++) {
695 if (TranslateCmdForCompare(templ.cmd[i]) != lang.cmd[i]) {
696 strgen_warning(
"%s: Param idx #%d '%s' doesn't match with template command '%s'", name, i,
697 lang.cmd[i] == NULL ?
"<empty>" : TranslateCmdForCompare(lang.cmd[i])->cmd,
698 templ.cmd[i] == NULL ?
"<empty>" : templ.cmd[i]->cmd);
706 void StringReader::HandleString(
char *str)
709 if (str[1] ==
'#' && str[2] !=
'#') this->
HandlePragma(str + 2);
714 if (*str ==
';' || *str ==
' ' || *str ==
'\0')
return;
716 char *s = strchr(str,
':');
718 strgen_error(
"Line has no ':' delimiter");
725 for (t = s; t > str && (t[-1] ==
' ' || t[-1] ==
'\t'); t--) {}
731 for (tmp = s; *tmp !=
'\0';) {
732 size_t len = Utf8Validate(tmp);
733 if (len == 0) strgen_fatal(
"Invalid UTF-8 sequence in '%s'", s);
739 (c >= 0xE000 && c <= 0xF8FF) ||
740 (c >= 0xFFF0 && c <= 0xFFFF)) {
741 strgen_fatal(
"Unwanted UTF-8 character U+%04X in sequence '%s'", c, s);
749 char *casep = strchr(str,
'.');
750 if (casep != NULL) *casep++ =
'\0';
757 strgen_error(
"Cases in the base translation are not supported.");
762 strgen_error(
"String name '%s' is used multiple times", str);
766 if (this->
data.
strings[this->data.next_string_id] != NULL) {
767 strgen_error(
"String ID 0x%X for '%s' already in use by '%s'", this->
data.
next_string_id, str, this->data.strings[this->data.next_string_id]->name);
775 strgen_warning(
"String name '%s' does not exist in master file", str);
780 strgen_error(
"String name '%s' is used multiple times", str);
785 if (!CheckCommandsMatch(s, ent->
english, str))
return;
801 if (!memcmp(str,
"plural ", 7)) {
804 strgen_fatal(
"Invalid pluralform %d", _lang.
plural_form);
807 strgen_fatal(
"unknown pragma '%s'", str);
811 static void rstrip(
char *buf)
813 size_t i = strlen(buf);
814 while (i > 0 && (buf[i - 1] ==
'\r' || buf[i - 1] ==
'\n' || buf[i - 1] ==
' ')) i--;
821 _warnings = _errors = 0;
835 this->HandleString(buf);
849 this->WriteStringID(data.
strings[i]->
name, (
int)i);
854 this->WriteStringID(
"STR_LAST_STRINGID", last);
857 static int TranslateArgumentIdx(
int argidx,
int offset)
861 if (argidx < 0 || (uint)argidx >=
lengthof(_cur_pcs.cmd)) {
862 strgen_fatal(
"invalid argidx %d", argidx);
864 const CmdStruct *cs = _cur_pcs.cmd[argidx];
865 if (cs != NULL && cs->consumes <= offset) {
866 strgen_fatal(
"invalid argidx offset %d:%d", argidx, offset);
869 if (_cur_pcs.cmd[argidx] == NULL) {
870 strgen_fatal(
"no command for this argidx %d", argidx);
873 for (
int i = sum = 0; i < argidx; i++) {
876 sum += (cs != NULL) ? cs->consumes : 1;
882 static void PutArgidxCommand(
Buffer *buffer)
885 buffer->
AppendByte(TranslateArgumentIdx(_cur_argidx));
889 static void PutCommandString(
Buffer *buffer,
const char *str)
893 while (*str !=
'\0') {
903 const CmdStruct *cs = ParseCommandString(&str, param, &argno, &casei);
904 if (cs == NULL)
break;
912 if (cs->consumes > 0) {
914 if (argno != -1 && argno != _cur_argidx) {
916 PutArgidxCommand(buffer);
920 cs = _cur_pcs.cmd[_cur_argidx++];
922 strgen_fatal(
"%s: No argument exists at position %d", _cur_ident, _cur_argidx - 1);
926 cs->proc(buffer, param, cs->value);
938 if (length >= 0x4000) {
939 strgen_fatal(
"string too long");
942 if (length >= 0xC0) {
943 buffer[offs++] = (length >> 8) | 0xC0;
945 buffer[offs++] = length & 0xFF;
946 this->Write((byte*)buffer, offs);
956 for (
size_t tab = 0; tab < data.
tabs; tab++) {
960 _lang.
offsets[tab] = TO_LE16(n);
962 for (uint j = 0; j != in_use[tab]; j++) {
973 this->WriteHeader(&_lang);
976 for (
size_t tab = 0; tab < data.
tabs; tab++) {
977 for (uint j = 0; j != in_use[tab]; j++) {
984 this->WriteLength(0);
988 _cur_ident = ls->
name;
992 if (_show_todo > 0 && ls->
translated == NULL) {
993 if ((_show_todo & 2) != 0) {
994 strgen_warning(
"'%s' is untranslated", ls->
name);
996 if ((_show_todo & 1) != 0) {
997 const char *s =
"<TODO> ";
1003 ExtractCommandString(&_cur_pcs, ls->
english,
false);
1015 if (casep != NULL) {
1025 for (num = 0, c = casep; c; c = c->
next) num++;
1029 for (c = casep; c != NULL; c = c->
next) {
1032 uint pos = buffer.
Length();
1036 PutCommandString(&buffer, c->
string);
1039 uint size = buffer.
Length() - (pos + 2);
1040 buffer[pos + 0] =
GB(size, 8, 8);
1041 buffer[pos + 1] =
GB(size, 0, 8);
1045 if (cmdp != NULL) PutCommandString(&buffer, cmdp);
1047 this->WriteLength(buffer.
Length());
const char * _file
The filename of the input, so we can refer to it in errors/warnings.
void AppendByte(byte value)
Convenience method for adding a byte.
size_t max_strings
The maximum number of strings.
static T ROL(const T x, const uint8 n)
ROtate x Left by n.
static bool _translation
Is the current file actually a translation or not.
uint VersionHashStr(uint hash, const char *s) const
Create a compound hash.
Container for the different cases of a string.
uint HashStr(const char *s) const
Create a hash of the string for finding them back quickly.
virtual void WriteLang(const StringData &data)
Actually write the language.
StringReader(StringData &data, const char *file, bool master, bool translation)
Prepare reading.
virtual char * ReadLine(char *buffer, const char *last)=0
Read a single line from the source of strings.
bool master
Are we reading the master file?
LanguagePackHeader _lang
Header information about a language.
static bool IsInsideBS(const T x, const uint base, const uint size)
Checks if a value is between a window started at some base point.
void Clear()
Remove all items from the list.
The buffer for writing a single string.
const T * Begin() const
Get the pointer to the first item (const)
LangString(const char *name, const char *english, int index, int line)
Create a new string.
size_t Utf8Decode(WChar *c, const char *s)
Decode and consume the next UTF-8 encoded character.
char * translated
Translated text.
#define lastof(x)
Get the last element of an fixed size array.
Simple vector template class.
virtual void WriteLength(uint length)
Write the length as a simple gamma.
#define AllocaM(T, num_elements)
alloca() has to be called in the parent function, so define AllocaM() as a macro
void Add(const char *s, LangString *ls)
Add a newly created LangString.
uint CountInUse(uint tab) const
Count the number of tab elements that are in use.
static T max(const T a, const T b)
Returns the maximum of two values.
StringData(size_t tabs)
Create a new string data container.
~LangString()
Free everything we allocated.
LangString * Find(const char *s)
Find a LangString based on the string name.
Information about the currently known strings.
virtual ~StringReader()
Make sure the right reader gets freed.
~Case()
Free everything we allocated.
char * english
English text.
bool translation
Are we reading a translation, implies !master. However, the base translation will have this false...
uint Length() const
Get the number of items in the list.
LangString ** strings
Array of all known strings.
int caseidx
The index of the case.
These commands support genders.
static const int MAX_PLURALS
The maximum number of plurals.
static const uint TAB_SIZE
Number of strings per StringTab.
static const ptrdiff_t MAX_COMMAND_PARAM_SIZE
Maximum size of every command block, not counting the name of the command itself. ...
Information about a single string.
~StringData()
Free everything we allocated.
Structures related to strgen.
uint Version() const
Make a hash of the file to get a unique "version number".
char * stredup(const char *s, const char *last)
Create a duplicate of the given string.
char * name
Name of the string.
Case * next
The next, chained, case.
static const uint8 MAX_NUM_GENDERS
Maximum number of supported genders.
const char * file
The file we are reading.
#define lengthof(x)
Return the length of an fixed size array.
static const uint8 MAX_NUM_CASES
Maximum number of supported cases.
static T min(const T a, const T b)
Returns the minimum of two values.
void FreeTranslation()
Free all data related to the translation.
virtual void HandlePragma(char *str)
Handle the pragma of the file.
char * string
The translation of the case.
virtual void ParseFile()
Start parsing the file.
int _cur_line
The current line we're parsing in the input file.
Case * translated_case
Cases of the translation.
StringData & data
The data to fill during reading.
static const uint8 CASE_GENDER_LEN
The (maximum) length of a case/gender string.
size_t tabs
The number of 'tabs' of strings.
void CDECL error(const char *s,...)
Error handling for fatal non-user errors.
static uint GB(const T x, const uint8 s, const uint8 n)
Fetch n bits from x, started at bit s.
char * strecpy(char *dst, const char *src, const char *last)
Copies characters from one buffer to another.
uint16 * hash_heads
Hash table for the strings.
#define endof(x)
Get the end element of an fixed size array.
These commands aren't counted for comparison.
void FreeTranslation()
Free all data related to the translation.
int next_string_id
The next string ID to allocate.
static bool _translated
Whether the current language is not the master language.
static void free(const void *ptr)
Version of the standard free that accepts const pointers.
uint16 hash_next
Next hash entry.
static bool HasBit(const T x, const uint8 y)
Checks if a bit in a value is set.
int line
Line of string in source-file.
void AppendUtf8(uint32 value)
Add an Unicode character encoded in UTF-8 to the buffer.
uint16 index
The index in the language file.
uint32 WChar
Type for wide characters, i.e.
static const PluralForm _plural_forms[]
All plural forms used.
static void MemSetT(T *ptr, byte value, size_t num=1)
Type-safe version of memset().
Case(int caseidx, const char *string, Case *next)
Create a new case.
These commands support cases.