Complete.Org: Mailing Lists: Archives: freeciv-dev: January 2003:
[Freeciv-Dev] (PR#1824) ruleset data is in incompatible charsets
Home

[Freeciv-Dev] (PR#1824) ruleset data is in incompatible charsets

[Top] [All Lists]

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index] [Thread Index]
To: jrg45@xxxxxxxxxxxxxxxxx, Kenn.Munro@xxxxxxxxxxxxxx
Cc: jlangley@xxxxxxx, mrproper@xxxxxxxxxx, freeciv-dev@xxxxxxxxxxx
Subject: [Freeciv-Dev] (PR#1824) ruleset data is in incompatible charsets
From: "Jason Short via RT" <rt@xxxxxxxxxxxxxx>
Date: Thu, 30 Jan 2003 23:41:31 -0800
Reply-to: rt@xxxxxxxxxxxxxx

[jdorje - Fri Jan 31 00:09:41 2003]:

> [rfalke - Wed Jan 29 17:37:32 2003]:
> 
> > I'm against adding this patch to CVS HEAD. Stable is ok and useful.
> 
> I have updated the patch for S1_14, and made a couple of slight
> changes
> based on feedback from Davide (whose system behaves very differently
> from mine).

Ho hum...

I contacted the glibc people to ask why "" was supported while "char"
isn't.  The answer was: use nl_langinfo, idiot.  So apparently, although
this function claims not to be "portable", it is what we are supposed to
use.

In fact, it seems HAVE_LANGINFO_CODESET is already defined in config.h
(probably from the iconv checks).  So that makes things easy...I guess
looking in the glib sources to see all the crazy checks they do made me
think things were more complicated than they are :-).

Thus the patch becomes what it is here.  Note that transliteration is
necessary, since the latin1 characters may very well not be available in
the local encoding.

jason

Index: acconfig.old
===================================================================
RCS file: /home/freeciv/CVS/freeciv/acconfig.old,v
retrieving revision 1.4
diff -u -r1.4 acconfig.old
--- acconfig.old        2003/01/13 21:24:05     1.4
+++ acconfig.old        2003/01/29 04:21:07
@@ -28,6 +28,8 @@
 #undef VERSION_STRING
 #undef DEBUG
 #undef NDEBUG
+#undef HAVE_ICONV
+#undef ICONV_CONST
 #undef HAVE_LIBICE
 #undef HAVE_LIBSM
 #undef HAVE_LIBX11
Index: configure.ac
===================================================================
RCS file: /home/freeciv/CVS/freeciv/configure.ac,v
retrieving revision 1.35
diff -u -r1.35 configure.ac
--- configure.ac        2003/01/16 22:14:13     1.35
+++ configure.ac        2003/01/29 04:21:07
@@ -295,6 +295,10 @@
   SERVER_LIBS="-lwsock32"
 fi
 
+dnl Check for libiconv (which is usually included in glibc, but may be
+dnl distributed separately).
+AM_ICONV
+
 dnl Check and choose clients
 if test x$client != xno; then
 
Index: configure.in
===================================================================
RCS file: /home/freeciv/CVS/freeciv/configure.in,v
retrieving revision 1.213
diff -u -r1.213 configure.in
--- configure.in        2003/01/16 22:14:13     1.213
+++ configure.in        2003/01/29 04:21:07
@@ -289,6 +289,10 @@
   SERVER_LIBS="-lwsock32"
 fi
 
+dnl Check for libiconv (which is usually included in glibc, but may be
+dnl distributed separately).
+AM_ICONV
+
 dnl Check and choose clients
 if test x$client != xno; then
 
Index: common/shared.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/common/shared.c,v
retrieving revision 1.98
diff -u -r1.98 shared.c
--- common/shared.c     2002/12/18 17:36:19     1.98
+++ common/shared.c     2003/01/29 04:21:08
@@ -29,6 +29,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#ifdef HAVE_ICONV
+#include <iconv.h>
+#endif
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
@@ -1243,4 +1246,100 @@
     vec2++;
   }
   return FALSE;
+}
+
+#ifdef HAVE_ICONV
+/***************************************************************************
+  Convert the text.  This assumes 'from' is an 8-bit charset.
+***************************************************************************/
+static char *convert_string_malloc(const char *text,
+                                  const char *from, const char *to)
+{
+  iconv_t cd = iconv_open(to, from);
+  size_t from_len = strlen(text) + 1, to_len = from_len;
+  char *result;
+
+  if (cd == (iconv_t) (-1)) {
+    freelog(LOG_ERROR,
+           _("Could not convert text from %s to %s: %s.\n"
+             "You may want to change your local encoding by setting\n"
+             "$FREECIV_LOCAL_ENCODING or change the data encoding\n"
+             "by setting $FREECIV_DATA_ENCODING."),
+           from, to, strerror(errno));
+    return mystrdup(text); /* The best we can do? */
+  }
+
+  do {
+    size_t flen = from_len, tlen = to_len, res;
+    const char *mytext = text;
+    char *myresult;
+
+    result = fc_malloc(to_len);
+
+    myresult = result;
+
+    /* Since we may do multiple translations, we may need to reset iconv
+     * in between. */
+    iconv(cd, NULL, NULL, NULL, NULL);
+
+    res = iconv(cd, (ICONV_CONST char **)&mytext, &flen, &myresult, &tlen);
+    if (res == (size_t) (-1)) {
+      if (errno != E2BIG) {
+       /* Invalid input. */
+       freelog(LOG_ERROR,
+               _("The string '%s' is not valid: %s. Ruleset files must\n"
+                 "be encoded as %s; you can change this by setting\n"
+                 "$FREECIV_DATA_ENCODING."),
+               text, strerror(errno), from);
+       free(result);
+       iconv_close(cd);
+       return mystrdup(text); /* The best we can do? */
+      }
+    } else {
+      /* Success. */
+      iconv_close(cd);
+
+      /* There may be wasted space here.  But we don't want to call
+       * mystrdup on result since it might not be in an 8-bit charset. */
+      return result;
+    }
+
+    /* Not enough space; try again. */
+    free(result);
+    to_len *= 2;
+  } while (TRUE);
+}
+#endif
+
+/***************************************************************************
+  We convert from the charset used by the rulesets into the local encoding.
+***************************************************************************/
+char *convert_data_string_malloc(const char *text)
+{
+#ifdef HAVE_ICONV
+  char *local_encoding;
+  char *data_encoding;
+
+  local_encoding = getenv("FREECIV_LOCAL_ENCODING");
+  if (!local_encoding) {
+    /* This converts into the current 8-bit encoding,
+     * with transliteration. */
+    local_encoding = "//TRANSLIT";
+  }
+
+  data_encoding = getenv("FREECIV_DATA_ENCODING");
+  if (!data_encoding) {
+    /* Currently the rulesets are in latin1 (ISO-8859-1). */
+    data_encoding = "ISO-8859-1";
+  }
+
+  return convert_string_malloc(text, data_encoding, local_encoding);
+#else
+  freelog(LOG_ERROR,
+         _("You are running Freeciv without using iconv.  Unless\n"
+           "you are using the latin1 character set, some characters\n"
+           "may not be displayed properly.  You can download iconv\n"
+           "at http://gnu.org/.";));
+  return mystrdup(text);
+#endif
 }
Index: common/shared.h
===================================================================
RCS file: /home/freeciv/CVS/freeciv/common/shared.h,v
retrieving revision 1.110
diff -u -r1.110 shared.h
--- common/shared.h     2002/12/18 19:05:22     1.110
+++ common/shared.h     2003/01/29 04:21:09
@@ -204,4 +204,6 @@
 
 const char *freeciv_motto(void);
 
+char *convert_data_string_malloc(const char *text);
+
 #endif  /* FC__SHARED_H */
Index: server/ruleset.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/server/ruleset.c,v
retrieving revision 1.130
diff -u -r1.130 ruleset.c
--- server/ruleset.c    2003/01/13 23:27:12     1.130
+++ server/ruleset.c    2003/01/29 04:21:10
@@ -1948,7 +1948,7 @@
       } /* if (!next) */
     } /* if (name) */
     remove_leading_trailing_spaces(cities[j]);
-    city_names[j].name = mystrdup(cities[j]);
+    city_names[j].name = convert_data_string_malloc(cities[j]);
     if (check_name(city_names[j].name)) {
       /* The ruleset contains a name that is too long.  This shouldn't
         happen - if it does, the author should get immediate feedback */
@@ -2001,7 +2001,7 @@
     pl->leader_count = dim;
     pl->leaders = fc_malloc(sizeof(*pl->leaders) * pl->leader_count);
     for(j = 0; j < dim; j++) {
-      pl->leaders[j].name = mystrdup(leaders[j]);
+      pl->leaders[j].name = convert_data_string_malloc(leaders[j]);
       if (check_name(leaders[j])) {
        pl->leaders[j].name[MAX_LEN_NAME - 1] = '\0';
       }

[Prev in Thread] Current Thread [Next in Thread]