[Freeciv-Dev] converting between different character encodings (PR#1824)
[Top] [All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index] [Thread Index]
See earlier discussions for a full explanation.
This is the beginnings of a patch to do "proper" conversion of character
encoding.
It defines three types of encoding: the data encoding, the local
encoding, and the display encoding.
The data encoding is defined to be UTF-8, which means all data must be
converted to this encoding. See the attached script which can be used
to convert the rulesets. Run it in data/nations as "convert.pl
*.ruleset". Also all non-ascii strings sent over the network should be
in this encoding: that way when Chris, using German with a latin1
encoding sends an accented message to me, using English with UTF-8, the
string will be converted properly and will show up properly on my
chatline (not that I'd be able to read it).
The local encoding is the encoding gettext translates into. This isn't
used in the current patch since we don't actually do much conversion yet
(all I convert is the city and leader names).
The display encoding is the encoding used by the GUI. For instance
gui-gtk-2.0 uses UTF-8 and gui-sdl uses UTF-16. Although not
implemented, this is where the bulk of the conversions need to be done
since all strings need to be converted from either the local or the data
encoding into the display encoding before being given to the GUI. This
means particular changes for gui-gtk-2.0 and gui-sdl, which already do
the conversions themselves.
jason
? common/fciconv.c
? common/fciconv.h
Index: configure.ac
===================================================================
RCS file: /home/freeciv/CVS/freeciv/configure.ac,v
retrieving revision 1.49
diff -u -r1.49 configure.ac
--- configure.ac 2003/07/28 20:16:17 1.49
+++ configure.ac 2003/07/30 13:27:42
@@ -301,6 +301,11 @@
LIBS="$LIBS -lwsock32"
fi
+dnl Check for libiconv (which is usually included in glibc, but may be
+dnl distributed separately).
+AM_ICONV
+LIBS="$LIBS $LIBICONV"
+
dnl Check and choose clients
if test x$client != xno; then
Index: configure.in
===================================================================
RCS file: /home/freeciv/CVS/freeciv/configure.in,v
retrieving revision 1.225
diff -u -r1.225 configure.in
--- configure.in 2003/07/28 20:16:17 1.225
+++ configure.in 2003/07/30 13:27:42
@@ -295,6 +295,11 @@
LIBS="$LIBS -lwsock32"
fi
+dnl Check for libiconv (which is usually included in glibc, but may be
+dnl distributed separately).
+AM_ICONV
+LIBS="$LIBS $LIBICONV"
+
dnl Check and choose clients
if test x$client != xno; then
Index: client/packhand.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/client/packhand.c,v
retrieving revision 1.322
diff -u -r1.322 packhand.c
--- client/packhand.c 2003/07/26 15:07:58 1.322
+++ client/packhand.c 2003/07/30 13:27:42
@@ -26,6 +26,7 @@
#include "capability.h"
#include "capstr.h"
#include "events.h"
+#include "fciconv.h"
#include "fcintl.h"
#include "game.h"
#include "government.h"
@@ -372,7 +373,8 @@
pcity->owner=packet->owner;
pcity->x=packet->x;
pcity->y=packet->y;
- sz_strlcpy(pcity->name, packet->name);
+ data_to_display_string_buffer(packet->name,
+ pcity->name, sizeof(pcity->name));
pcity->size=packet->size;
for (i=0;i<5;i++) {
@@ -589,7 +591,8 @@
pcity->owner=packet->owner;
pcity->x=packet->x;
pcity->y=packet->y;
- sz_strlcpy(pcity->name, packet->name);
+ data_to_display_string_buffer(packet->name,
+ pcity->name, sizeof(pcity->name));
pcity->size=packet->size;
pcity->tile_trade = packet->tile_trade;
@@ -2341,7 +2344,7 @@
pl->leader_count = p->leader_count;
pl->leaders = fc_malloc(sizeof(*pl->leaders) * pl->leader_count);
for (i = 0; i < pl->leader_count; i++) {
- pl->leaders[i].name = mystrdup(p->leader_name[i]);
+ pl->leaders[i].name = data_to_display_string_malloc(p->leader_name[i]);
pl->leaders[i].is_male = p->leader_sex[i];
}
pl->city_style = p->city_style;
Index: client/gui-gtk/gui_main.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/client/gui-gtk/gui_main.c,v
retrieving revision 1.138
diff -u -r1.138 gui_main.c
--- client/gui-gtk/gui_main.c 2003/07/23 13:46:01 1.138
+++ client/gui-gtk/gui_main.c 2003/07/30 13:27:42
@@ -31,6 +31,7 @@
#include <unistd.h>
#endif
+#include "fciconv.h"
#include "fcintl.h"
#include "game.h"
#include "government.h"
@@ -788,6 +789,7 @@
**************************************************************************/
void ui_init(void)
{
+ init_character_encodings(NULL, 1);
}
/**************************************************************************
Index: client/gui-gtk-2.0/gui_main.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/client/gui-gtk-2.0/gui_main.c,v
retrieving revision 1.60
diff -u -r1.60 gui_main.c
--- client/gui-gtk-2.0/gui_main.c 2003/07/23 13:46:02 1.60
+++ client/gui-gtk-2.0/gui_main.c 2003/07/30 13:27:42
@@ -33,6 +33,7 @@
#include <gdk/gdkkeysyms.h>
#include "dataio.h"
+#include "fciconv.h"
#include "fcintl.h"
#include "game.h"
#include "government.h"
@@ -166,7 +167,6 @@
static gint timer_callback(gpointer data);
static gboolean show_conn_popup(GtkWidget *view, GdkEventButton *ev,
gpointer data);
-static char *network_charset = NULL;
/**************************************************************************
@@ -997,6 +997,7 @@
#ifdef ENABLE_NLS
bind_textdomain_codeset(PACKAGE, "UTF-8");
#endif
+ init_character_encodings("UTF-8", 1);
log_set_callback(log_callback_utf8);
Index: client/gui-sdl/gui_iconv.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/client/gui-sdl/gui_iconv.c,v
retrieving revision 1.7
diff -u -r1.7 gui_iconv.c
--- client/gui-sdl/gui_iconv.c 2003/06/27 16:57:04 1.7
+++ client/gui-sdl/gui_iconv.c 2003/07/30 13:27:42
@@ -52,36 +52,6 @@
#include "gui_iconv.h"
/**************************************************************************
- Return the display charset encoding (which is always a variant of
- UTF-16, but must be adjusted for byteorder since SDL_ttf is not
- byteorder-clean).
-**************************************************************************/
-static const char *get_display_encoding(void)
-{
-#if SDL_BYTEORDER == SDL_LIL_ENDIAN
- return "UTF-16LE";
-#else
- return "UTF-16BE";
-#endif
-}
-
-/**************************************************************************
- Return the local charset encoding (which will be passed to iconv).
-**************************************************************************/
-static const char *get_local_encoding(void)
-{
-#ifdef HAVE_LIBCHARSET
- return locale_charset();
-#else
-# ifdef HAVE_LANGINFO_CODESET
- return nl_langinfo(CODESET);
-# else
- return "";
-# endif
-#endif
-}
-
-/**************************************************************************
Convert string from local encoding (8 bit char) to
display encoding (16 bit unicode) and resut put in pToUniString.
if pToUniString == NULL then resulting string will be allocate automaticaly.
@@ -91,78 +61,8 @@
**************************************************************************/
Uint16 *convertcopy_to_utf16(Uint16 * pToUniString, const char *pFromString)
{
- /* Start Parametrs */
- const char *pTocode = get_display_encoding();
- const char *pFromcode = get_local_encoding();
- const char *pStart = pFromString;
- size_t length = strlen(pFromString);
- const char *pEnd = pFromString + length;
- char *pResult = (char *) pToUniString;
- /* ===== */
-
- iconv_t cd = iconv_open(pTocode, pFromcode);
- if (cd == (iconv_t) (-1)) {
- if (errno != EINVAL) {
- return pToUniString;
- }
- }
-
- /* From 8 bit code to UTF-16 (16 bit code) */
- length = (length + 1) * 2;
-
- if (!pResult) {
- pResult = MALLOC(length);
- }
-
- iconv(cd, NULL, NULL, NULL, NULL); /* return to the initial state */
-
- /* Do the conversion for real. */
- {
- const char *pInptr = pStart;
- size_t Insize = pEnd - pStart + 1;
-
- char *pOutptr = pResult;
- size_t Outsize = length;
-
- while (Insize > 0) {
- size_t Res =
- iconv(cd, (ICONV_CONST char **) &pInptr, &Insize, &pOutptr, &Outsize);
- if (Res == (size_t) (-1)) {
- if (errno == EINVAL) {
- break;
- } else {
- int saved_errno = errno;
- iconv_close(cd);
- errno = saved_errno;
- if(!pToUniString) {
- FREE(pResult);
- }
- return pToUniString;
- }
- }
- }
-
- {
- size_t Res = iconv(cd, NULL, NULL, &pOutptr, &Outsize);
- if (Res == (size_t) (-1)) {
- int saved_errno = errno;
- iconv_close(cd);
- errno = saved_errno;
- if(!pToUniString) {
- FREE(pResult);
- }
- return pToUniString;
- }
- }
-
- if (Outsize != 0) {
- abort();
- }
- }
-
- iconv_close(cd);
-
- return (Uint16 *) pResult;
+ return data_to_display_string_buffer(pFromString, pToUniString,
+ strlen(pFromString) * 2 + 2);
}
/**************************************************************************
@@ -175,86 +75,10 @@
**************************************************************************/
char *convertcopy_to_chars(char *pToString, const Uint16 * pFromUniString)
{
- /* Start Parametrs */
- const char *pFromcode = get_display_encoding();
- const char *pTocode = get_local_encoding();
- const char *pStart = (char *) pFromUniString;
- size_t length = unistrlen(pFromUniString);
- const char *pEnd = (char *) pFromUniString + (length * 2) + 2;
- /* ===== */
-
- char *pResult;
- iconv_t cd;
-
- /* ===== */
-
- if (!pStart) {
- return pToString;
- }
-
- /* From 16 bit code to 8 bit code */
- length++;
-
- cd = iconv_open(pTocode, pFromcode);
- if (cd == (iconv_t) (-1)) {
- if (errno != EINVAL) {
- return pToString;
- }
- }
+ size_t length = unistrlen(pFromUniString) + 1;
- if(pToString) {
- pResult = pToString;
- } else {
- pResult = MALLOC(length);
+ if (!pToString) {
+ pToString = malloc(length);
}
-
- iconv(cd, NULL, NULL, NULL, NULL); /* return to the initial state */
-
- /* Do the conversion for real. */
- {
- const char *pInptr = pStart;
- size_t Insize = pEnd - pStart;
-
- char *pOutptr = pResult;
- size_t Outsize = length;
-
- while (Insize > 0) {
- size_t Res =
- iconv(cd, (ICONV_CONST char **) &pInptr, &Insize, &pOutptr, &Outsize);
- if (Res == (size_t) (-1)) {
- if (errno == EINVAL) {
- break;
- } else {
- int saved_errno = errno;
- iconv_close(cd);
- errno = saved_errno;
- if(!pToString) {
- FREE(pResult);
- }
- return pToString;
- }
- }
- }
-
- {
- size_t Res = iconv(cd, NULL, NULL, &pOutptr, &Outsize);
- if (Res == (size_t) (-1)) {
- int saved_errno = errno;
- iconv_close(cd);
- errno = saved_errno;
- if(!pToString) {
- FREE(pResult);
- }
- return pToString;
- }
- }
-
- if (Outsize != 0) {
- abort();
- }
- }
-
- iconv_close(cd);
-
- return pResult;
+ return display_to_data_string_buffer(pFromUniString, pToString, length);
}
Index: client/gui-sdl/gui_main.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/client/gui-sdl/gui_main.c,v
retrieving revision 1.36
diff -u -r1.36 gui_main.c
--- client/gui-sdl/gui_main.c 2003/07/21 16:09:43 1.36
+++ client/gui-sdl/gui_main.c 2003/07/30 13:27:42
@@ -42,6 +42,7 @@
#include <SDL/SDL.h>
+#include "fciconv.h"
#include "fcintl.h"
#include "log.h"
#include "game.h"
@@ -540,6 +541,12 @@
struct GUI *pInit_String = NULL;
SDL_Surface *pBgd, *pTmp;
Uint32 iSDL_Flags;
+
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+ init_character_encodings("UTF-16LE", 2);
+#else
+ init_character_encodings("UTF-16BE", 2);
+#endif
SDL_Client_Flags = 0;
iSDL_Flags = SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE;
Index: client/gui-xaw/gui_main.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/client/gui-xaw/gui_main.c,v
retrieving revision 1.82
diff -u -r1.82 gui_main.c
--- client/gui-xaw/gui_main.c 2003/07/23 13:46:03 1.82
+++ client/gui-xaw/gui_main.c 2003/07/30 13:27:42
@@ -37,6 +37,7 @@
#include "canvas.h"
#include "pixcomm.h"
+#include "fciconv.h"
#include "fcintl.h"
#include "game.h"
#include "government.h"
@@ -266,6 +267,7 @@
**************************************************************************/
void ui_init(void)
{
+ init_character_encodings(NULL);
}
/**************************************************************************
Index: common/Makefile.am
===================================================================
RCS file: /home/freeciv/CVS/freeciv/common/Makefile.am,v
retrieving revision 1.43
diff -u -r1.43 Makefile.am
--- common/Makefile.am 2003/02/21 13:23:09 1.43
+++ common/Makefile.am 2003/07/30 13:27:42
@@ -27,6 +27,8 @@
diptreaty.c \
diptreaty.h \
events.h \
+ fciconv.c \
+ fciconv.h \
fcintl.c \
fcintl.h \
game.c \
Index: server/srv_main.c
===================================================================
RCS file: /home/freeciv/CVS/freeciv/server/srv_main.c,v
retrieving revision 1.132
diff -u -r1.132 srv_main.c
--- server/srv_main.c 2003/07/21 01:23:27 1.132
+++ server/srv_main.c 2003/07/30 13:27:42
@@ -48,6 +48,7 @@
#include "capstr.h"
#include "city.h"
#include "events.h"
+#include "fciconv.h"
#include "fcintl.h"
#include "game.h"
#include "log.h"
@@ -179,6 +180,9 @@
/* mark as initialized */
has_been_srv_init = TRUE;
+
+ /* init character encodings. */
+ init_character_encodings(NULL, 1);
/* done */
return;
/**********************************************************************
Freeciv - Copyright (C) 2003 - The Freeciv Project
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
***********************************************************************/
#ifndef FC__FCICONV_H
#define FC__FCICONV_H
void init_character_encodings(char *my_display_encoding,
size_t encoding_size);
char *data_to_display_string_malloc(const char *text);
char *data_to_display_string_static(const char *text);
char *data_to_display_string_buffer(const char *text,
char *buf, size_t bufsz);
char *display_to_data_string_malloc(const char *text);
char *display_to_data_string_static(const char *text);
char *display_to_data_string_buffer(const char *text,
char *buf, size_t bufsz);
#endif /* FC__FCICONV_H */
/**********************************************************************
Freeciv - Copyright (C) 2003 - The Freeciv Project
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <assert.h>
#include <stdio.h>
#include <string.h>
#ifdef HAVE_ICONV
#include <iconv.h>
#endif
#ifdef HAVE_LANGINFO_CODESET
#include <langinfo.h>
#endif
#include "fciconv.h"
#include "fcintl.h"
#include "mem.h"
#include "support.h"
#ifdef HAVE_ICONV
#include "log.h"
#endif
#define DEFAULT_DATA_ENCODING "UTF-8"
static bool is_init = FALSE;
static char convert_buffer[4096];
#ifdef HAVE_ICONV
static char *local_encoding, *data_encoding, *display_encoding;
static const size_t local_encoding_size = 1, data_encoding_size = 1;
static size_t display_encoding_size = 1;
#endif
/***************************************************************************
Must be called during the initialization phase of server and client to
initialize the character encodings to be used.
***************************************************************************/
void init_character_encodings(char *my_display_encoding,
size_t encoding_size)
{
#ifdef HAVE_ICONV
static char local[128];
/* Set the data encoding - first check $FREECIV_DATA_ENCODING,
* then fall back to the default. */
data_encoding = getenv("FREECIV_DATA_ENCODING");
if (!data_encoding) {
/* Currently the rulesets are in latin1 (ISO-8859-1). */
data_encoding = "UTF-8";
}
/* Set the local encoding - first check $FREECIV_LOCAL_ENCODING,
* then ask the system. */
local_encoding = getenv("FREECIV_LOCAL_ENCODING");
if (!local_encoding) {
#ifdef HAVE_LIBCHARSET
local_encoding = locale_charset();
#else
#ifdef HAVE_LANGINFO_CODESET
local_encoding = nl_langinfo(CODESET);
#else
local_encoding = "";
#endif
#endif
if (strcasecmp(local_encoding, "ANSI_X3.4-1968") == 0
|| strcasecmp(local_encoding, "ASCII") == 0) {
/* HACK: use latin1 instead of ascii in typical cases when the
* encoding is unconfigured. */
local_encoding = "ISO-8859-1";
}
my_snprintf(local, sizeof(local), "%s//TRANSLIT", local_encoding);
local_encoding = local;
}
/* Set the display encoding - first check $FREECIV_DISPLAY_ENCODING,
* then check the passed-in default value, then fall back to the local
* encoding. */
display_encoding = getenv("FREECIV_DISPLAY_ENCODING");
if (!display_encoding) {
display_encoding = my_display_encoding;
if (!display_encoding) {
display_encoding = local_encoding;
}
}
display_encoding_size = encoding_size;
fprintf(stderr, "Data=%s, Local=%s, Display=%s\n",
data_encoding, local_encoding, display_encoding);
#else
/* freelog may not work at this point. */
fprintf(stderr,
_("You are running Freeciv without using iconv. Unless\n"
"you are using the latin1 character set, some characters\n"
"may not be displayed properly. You can download iconv\n"
"at http://gnu.org/.\n"));
assert(encoding_size == 1);
#endif
is_init = TRUE;
}
#ifdef HAVE_ICONV
/***************************************************************************
Return the number of characters in the string. from_sz is the character
encoding size (currently 1 or 2).
***************************************************************************/
static size_t char_strlen(const char *text, size_t from_sz)
{
size_t length = 0;
do {
size_t i;
for (i = 0; i < from_sz; i++) {
if (text[length * from_sz + i] != 0) {
break;
}
}
if (i == from_sz) {
return length;
}
length++;
} while (TRUE);
}
/***************************************************************************
Convert the text. This assumes 'from' is an 8-bit charset. The result
will be put into the buf buffer unless it is NULL, in which case it
will be allocated on demand.
***************************************************************************/
static char *convert_string(const char *text,
const char *from, size_t from_sz,
const char *to,
char *buf, size_t bufsz)
{
iconv_t cd = iconv_open(to, from);
size_t from_len = char_strlen(text, from_sz) + from_sz, to_len;
bool alloc = (buf == NULL);
assert(is_init && from != NULL && to != NULL);
assert(text != NULL);
if (cd == (iconv_t) (-1)) {
freelog(LOG_ERROR,
_("Could not convert text from %s to %s: %s"),
from, to, strerror(errno));
/* The best we can do? */
if (alloc) {
return mystrdup(text);
} else {
my_snprintf(buf, bufsz, "%s", text);
return buf;
}
}
if (alloc) {
to_len = from_len;
} else {
to_len = bufsz;
}
do {
size_t flen = from_len, tlen = to_len, res;
const char *mytext = text;
char *myresult;
if (alloc) {
buf = fc_malloc(to_len);
}
myresult = buf;
/* Since we may do multiple translations, we may need to reset iconv
* in between. */
iconv(cd, NULL, NULL, NULL, NULL);
res = iconv(cd, (char**)&mytext, &flen, &myresult, &tlen);
if (res == (size_t) (-1)) {
if (errno != E2BIG) {
/* Invalid input. */
freelog(LOG_ERROR, _("The string '%s' is not valid in %s: %s"),
text, from, strerror(errno));
iconv_close(cd);
if (alloc) {
free(buf);
return mystrdup(text); /* The best we can do? */
} else {
my_snprintf(buf, bufsz, "%s", text);
return buf;
}
}
} else {
/* Success. */
iconv_close(cd);
/* There may be wasted space here, but there's nothing we can do
* about it. */
return buf;
}
if (alloc) {
/* Not enough space; try again. */
buf[to_len - 1] = 0;
freelog(LOG_NORMAL, " Result was '%s'.", buf);
free(buf);
to_len *= 2;
}
} while (alloc);
return buf;
}
#endif
#ifdef HAVE_ICONV
#define CONV_FUNC_MALLOC(src, dst) \
char *src ## _to_ ## dst ## _string_malloc(const char *text) \
{ \
return convert_string(text, (src ## _encoding), (src ## _encoding_size), \
(dst ## _encoding), NULL, 0); \
}
#define CONV_FUNC_BUFFER(src, dst) \
char *src ## _to_ ## dst ## _string_buffer(const char *text, \
char *buf, size_t bufsz) \
{ \
return convert_string(text, (src ## _encoding), (src ## _encoding_size), \
(dst ## _encoding), buf, bufsz); \
}
#else /* HAVE_ICONV */
#define CONV_FUNC_MALLOC(src, dst) \
char *src ## _to_ ## dst ## _string_malloc(const char *text) \
{ \
return mystrdup(text); \
}
#define CONV_FUNC_BUFFER(src, dst) \
char *src ## _to_ ## dst ## _string_buffer(const char *text, \
char *buf, size_t bufsz) \
{ \
my_snprintf(buf, bufsz, "%s", text); \
return buf; \
}
#endif /* HAVE_ICONV */
#define CONV_FUNC_STATIC(src, dst) \
char *src ## _to_ ## dst ## _string_static(const char *text) \
{ \
(src ## _to_ ## dst ## _string_buffer)(text, \
convert_buffer, \
sizeof(convert_buffer)); \
return convert_buffer; \
}
CONV_FUNC_MALLOC(data, display)
CONV_FUNC_MALLOC(display, data)
CONV_FUNC_STATIC(data, display)
CONV_FUNC_STATIC(display, data)
CONV_FUNC_BUFFER(data, display)
CONV_FUNC_BUFFER(display, data)
#!/usr/bin/perl
foreach (@ARGV) {
print "Converting $_.\n";
system("iconv -f latin1 -t UTF-8 $_ > $_.tmp");
system("mv $_.tmp $_");
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Freeciv-Dev] converting between different character encodings (PR#1824),
Jason Short <=
|
|