Complete.Org: Mailing Lists: Archives: freeciv-dev: January 2003:
[Freeciv-Dev] (PR#1824) ruleset data is in incompatible charsets
Home

[Freeciv-Dev] (PR#1824) ruleset data is in incompatible charsets

[Top] [All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index] [Thread Index]
To: jrg45@xxxxxxxxxxxxxxxxx, Kenn.Munro@xxxxxxxxxxxxxx
Cc: jlangley@xxxxxxx, mrproper@xxxxxxxxxx, freeciv-dev@xxxxxxxxxxx
Subject: [Freeciv-Dev] (PR#1824) ruleset data is in incompatible charsets
From: "Jason Short via RT" <rt@xxxxxxxxxxxxxx>
Date: Sun, 26 Jan 2003 14:13:44 -0800
Reply-to: rt@xxxxxxxxxxxxxx

[jrg45@xxxxxxxxxxxxxxxxx - Thu Jul 25 15:41:34 2002]:

> In the GTK+ version of the client (both players using Trident
> tileset), city
> names containing "special" characters (i.e., "high" ASCII), like the
> 'a' with
> tilde above it in Sao Paulo (Brazil) or the u in Rhun (Mordor) get cut
> off
> before this character.  So Sao Paulo comes out as "S".  Reproducing
> this should
> be easy -- just play a game with one or more players playing as races
> with these
> characters in their city names.

The attached patch should "fix" the problem.  It uses iconv to convert
the city ane leader names from latin1 (ISO-8859-1) into the local
encoding when they are loaded from the ruleset.

This is just a "quick" solution.  It will work so long as you always use
the same encoding: i.e., it won't work well over network connections and
savegames won't be fully portable.

Although the patch is generated for CVS HEAD, I would prefer to see this
go into 1.14 and do a "real" solution in main cvs.  This new solution
should probably start by converting the ruleset files into UTF-8.

Question: are there any other strings that are "nontranslated" in the
rulesets?

jason

Index: configure.ac
===================================================================
RCS file: /home/freeciv/CVS/freeciv/configure.ac,v
retrieving revision 1.35
diff -u -r1.35 configure.ac
--- configure.ac        2003/01/16 22:14:13     1.35
+++ configure.ac        2003/01/26 22:11:20
@@ -295,6 +295,10 @@
   SERVER_LIBS="-lwsock32"
 fi
 
+dnl Check for libiconv (which is usually included in glibc, but may be
+dnl distributed separately).
+AM_ICONV
+
 dnl Check and choose clients
 if test x$client != xno; then
 
Index: configure.in
===================================================================
RCS file: /home/freeciv/CVS/freeciv/configure.in,v
retrieving revision 1.213
diff -u -r1.213 configure.in
--- configure.in        2003/01/16 22:14:13     1.213
+++ configure.in        2003/01/26 22:11:20
@@ -289,6 +289,10 @@
   SERVER_LIBS="-lwsock32"
 fi
 
+dnl Check for libiconv (which is usually included in glibc, but may be
+dnl distributed separately).
+AM_ICONV
+
 dnl Check and choose clients
 if test x$client != xno; then
 
Index: common/shared.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/common/shared.c,v
retrieving revision 1.98
diff -u -r1.98 shared.c
--- common/shared.c     2002/12/18 17:36:19     1.98
+++ common/shared.c     2003/01/26 22:11:21
@@ -22,6 +22,7 @@
 #include <assert.h>
 #include <dirent.h>
 #include <errno.h>
+#include <iconv.h>
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -1243,4 +1244,68 @@
     vec2++;
   }
   return FALSE;
+}
+
+/***************************************************************************
+  Convert the text.  This assumes 'from' is an 8-bit charset.
+***************************************************************************/
+static char *convert_string_malloc(const char *text,
+                                  const char *from, const char *to)
+{
+  iconv_t cd = iconv_open(to, from);
+  size_t from_len = strlen(text) + 1, to_len = from_len;
+  char *result;
+
+  if (cd == (iconv_t) (-1)) {
+    freelog(LOG_ERROR,
+           _("Could not convert text from %s to %s: %s"),
+           from, to, strerror(errno));
+    return mystrdup(text); /* The best we can do? */
+  }
+
+  do {
+    size_t flen = from_len, tlen = to_len, res;
+    const char *mytext = text;
+    char *myresult;
+
+    result = fc_malloc(to_len);
+
+    myresult = result;
+
+    /* Since we may do multiple translations, we may need to reset iconv
+     * in between. */
+    iconv(cd, NULL, NULL, NULL, NULL);
+
+    res = iconv(cd, (char**)&mytext, &flen, &myresult, &tlen);
+    if (res == (size_t) (-1)) {
+      if (errno != E2BIG) {
+       /* Invalid input. */
+       freelog(LOG_ERROR, _("The string '%s' is not valid in %s: %s"),
+               text, from, strerror(errno));
+       free(result);
+       iconv_close(cd);
+       return mystrdup(text); /* The best we can do? */
+      }
+    } else {
+      /* Success. */
+      iconv_close(cd);
+
+      /* There may be wasted space here.  But we don't want to call
+       * mystrdup on result since it might not be in an 8-bit charset. */
+      return result;
+    }
+
+    /* Not enough space; try again. */
+    free(result);
+    to_len *= 2;
+  } while (TRUE);
+}
+
+/***************************************************************************
+  We convert from latin1 (the charset currently used by rulesets)
+  into the local encoding.
+***************************************************************************/
+char *convert_data_string_malloc(const char *text)
+{
+  return convert_string_malloc(text, "ISO-8859-1", "//TRANSLIT");
 }
Index: common/shared.h
===================================================================
RCS file: /home/freeciv/CVS/freeciv/common/shared.h,v
retrieving revision 1.110
diff -u -r1.110 shared.h
--- common/shared.h     2002/12/18 19:05:22     1.110
+++ common/shared.h     2003/01/26 22:11:22
@@ -204,4 +204,6 @@
 
 const char *freeciv_motto(void);
 
+char *convert_data_string_malloc(const char *text);
+
 #endif  /* FC__SHARED_H */
Index: server/ruleset.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/server/ruleset.c,v
retrieving revision 1.130
diff -u -r1.130 ruleset.c
--- server/ruleset.c    2003/01/13 23:27:12     1.130
+++ server/ruleset.c    2003/01/26 22:11:23
@@ -1948,7 +1948,7 @@
       } /* if (!next) */
     } /* if (name) */
     remove_leading_trailing_spaces(cities[j]);
-    city_names[j].name = mystrdup(cities[j]);
+    city_names[j].name = convert_data_string_malloc(cities[j]);
     if (check_name(city_names[j].name)) {
       /* The ruleset contains a name that is too long.  This shouldn't
         happen - if it does, the author should get immediate feedback */
@@ -2001,7 +2001,7 @@
     pl->leader_count = dim;
     pl->leaders = fc_malloc(sizeof(*pl->leaders) * pl->leader_count);
     for(j = 0; j < dim; j++) {
-      pl->leaders[j].name = mystrdup(leaders[j]);
+      pl->leaders[j].name = convert_data_string_malloc(leaders[j]);
       if (check_name(leaders[j])) {
        pl->leaders[j].name[MAX_LEN_NAME - 1] = '\0';
       }

[Prev in Thread] Current Thread [Next in Thread]