You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
282 lines
6.0 KiB
282 lines
6.0 KiB
From: Tim Waugh <twaugh@redhat.com>
|
|
To: bug-bash@gnu.org
|
|
Subject: [patch] multibyte IFS values
|
|
Date: Tue, 24 Aug 2004 13:34:59 +0100
|
|
|
|
Hi,
|
|
|
|
Here is a patch to address these problems:
|
|
|
|
http://lists.gnu.org/archive/html/bug-bash/2004-07/msg00294.html
|
|
http://lists.gnu.org/archive/html/bug-bash/2004-07/msg00296.html
|
|
|
|
It works well for me at least.
|
|
|
|
Tim.
|
|
|
|
--- bash-3.0/subst.c.multibyteifs 2004-08-20 15:22:48.366497771 +0100
|
|
+++ bash-3.0/subst.c 2004-08-20 18:13:30.833624616 +0100
|
|
@@ -124,7 +124,12 @@
|
|
SHELL_VAR *ifs_var;
|
|
char *ifs_value;
|
|
unsigned char ifs_cmap[UCHAR_MAX + 1];
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+unsigned char ifs_firstc[MB_LEN_MAX];
|
|
+size_t ifs_firstc_len;
|
|
+#else
|
|
unsigned char ifs_firstc;
|
|
+#endif
|
|
|
|
/* Extern functions and variables from different files. */
|
|
extern int last_command_exit_value, last_command_exit_signal;
|
|
@@ -862,8 +867,14 @@
|
|
char *charlist;
|
|
{
|
|
register int i = *sindex;
|
|
+ size_t slen;
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ size_t clen;
|
|
+ wchar_t *wcharlist = NULL;
|
|
+#endif
|
|
int c;
|
|
char *temp;
|
|
+ DECLARE_MBSTATE;
|
|
|
|
if (charlist[0] == '\'' && charlist[1] == '\0')
|
|
{
|
|
@@ -872,18 +883,65 @@
|
|
return temp;
|
|
}
|
|
|
|
- for (i = *sindex; c = string[i]; i++)
|
|
+ slen = strlen (string + *sindex) + *sindex;
|
|
+ i = *sindex;
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ clen = strlen (charlist);
|
|
+#endif
|
|
+ while ((c = string[i]))
|
|
{
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ size_t mblength;
|
|
+#endif
|
|
+
|
|
if (c == CTLESC)
|
|
{
|
|
- i++;
|
|
+ i += 2;
|
|
continue;
|
|
}
|
|
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ mblength = mblen (string + i, slen - i);
|
|
+ if (mblength > 1)
|
|
+ {
|
|
+ wchar_t wc;
|
|
+ size_t mblength = mbtowc (&wc, string + i, slen - i);
|
|
+ if (MB_INVALIDCH (mblength))
|
|
+ {
|
|
+ if (MEMBER (c, charlist))
|
|
+ break;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ if (!wcharlist)
|
|
+ {
|
|
+ size_t len = mbstowcs (wcharlist, charlist, 0);
|
|
+ if (len == -1)
|
|
+ len = 0;
|
|
+ wcharlist = xmalloc (sizeof (wchar_t) * (len + 1));
|
|
+ mbstowcs (wcharlist, charlist, 1 + len);
|
|
+ }
|
|
+
|
|
+ if (wcschr (wcharlist, wc))
|
|
+ {
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+#endif
|
|
+
|
|
if (MEMBER (c, charlist))
|
|
break;
|
|
+
|
|
+ ADVANCE_CHAR (string, slen, i);
|
|
}
|
|
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ if (wcharlist)
|
|
+ free (wcharlist);
|
|
+#endif
|
|
+
|
|
temp = substring (string, *sindex, i);
|
|
*sindex = i;
|
|
|
|
@@ -1456,11 +1514,36 @@
|
|
d2 = 0;
|
|
if (delims)
|
|
{
|
|
- d2 = (char *)xmalloc (strlen (delims) + 1);
|
|
- for (i = ts = 0; delims[i]; i++)
|
|
+ size_t slength = strlen (delims);
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ size_t mblength = 1;
|
|
+ DECLARE_MBSTATE;
|
|
+#endif
|
|
+
|
|
+ d2 = (char *)xmalloc (slength + 1);
|
|
+ i = ts = 0;
|
|
+ while (delims[i])
|
|
{
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ mbstate_t state_bak = state;
|
|
+ mblength = mbrlen (delims + i, slength, &state);
|
|
+
|
|
+ if (MB_INVALIDCH (mblength))
|
|
+ state = state_bak;
|
|
+ else if (mblength != 1)
|
|
+ {
|
|
+ memcpy (d2 + ts, delims + i, mblength);
|
|
+ ts += mblength;
|
|
+ i += mblength;
|
|
+ slength -= mblength;
|
|
+ continue;
|
|
+ }
|
|
+#endif
|
|
+
|
|
if (whitespace(delims[i]) == 0)
|
|
d2[ts++] = delims[i];
|
|
+ i++;
|
|
+ slength--;
|
|
}
|
|
d2[ts] = '\0';
|
|
}
|
|
@@ -1654,10 +1737,19 @@
|
|
string_list_dollar_star (list)
|
|
WORD_LIST *list;
|
|
{
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ char sep[MB_CUR_MAX + 1];
|
|
+#else
|
|
char sep[2];
|
|
+#endif
|
|
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ memcpy (sep, ifs_firstc, ifs_firstc_len);
|
|
+ sep[ifs_firstc_len] = '\0';
|
|
+#else
|
|
sep[0] = ifs_firstc;
|
|
sep[1] = '\0';
|
|
+#endif
|
|
|
|
return (string_list_internal (list, sep));
|
|
}
|
|
@@ -1676,14 +1768,41 @@
|
|
WORD_LIST *list;
|
|
int quoted;
|
|
{
|
|
- char *ifs, sep[2];
|
|
+ char *ifs;
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ char sep[MB_CUR_MAX + 1];
|
|
+#else
|
|
+ char sep[2];
|
|
+#endif
|
|
WORD_LIST *tlist;
|
|
|
|
/* XXX this could just be ifs = ifs_value; */
|
|
ifs = ifs_var ? value_cell (ifs_var) : (char *)0;
|
|
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ if (ifs && *ifs)
|
|
+ {
|
|
+ size_t mblength = mblen (ifs, strnlen (ifs, MB_CUR_MAX));
|
|
+ if (MB_INVALIDCH (mblength))
|
|
+ {
|
|
+ sep[0] = *ifs;
|
|
+ sep[1] = '\0';
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ memcpy (sep, ifs, mblength);
|
|
+ sep[mblength] = '\0';
|
|
+ }
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ sep[0] = ' ';
|
|
+ sep[1] = '\0';
|
|
+ }
|
|
+#else
|
|
sep[0] = (ifs == 0 || *ifs == 0) ? ' ' : *ifs;
|
|
sep[1] = '\0';
|
|
+#endif
|
|
|
|
tlist = ((quoted & (Q_HERE_DOCUMENT|Q_DOUBLE_QUOTES)) || (ifs && *ifs == 0))
|
|
? quote_list (list)
|
|
@@ -1732,6 +1851,7 @@
|
|
WORD_DESC *t;
|
|
char *current_word, *s;
|
|
int sindex, sh_style_split, whitesep;
|
|
+ size_t slen = 0;
|
|
|
|
if (!string || !*string)
|
|
return ((WORD_LIST *)NULL);
|
|
@@ -1805,7 +1925,12 @@
|
|
|
|
/* Move past the current separator character. */
|
|
if (string[sindex])
|
|
- sindex++;
|
|
+ {
|
|
+ DECLARE_MBSTATE;
|
|
+ if (!slen)
|
|
+ slen = strlen (string);
|
|
+ ADVANCE_CHAR (string, slen, sindex);
|
|
+ }
|
|
|
|
/* Now skip sequences of space, tab, or newline characters if they are
|
|
in the list of separators. */
|
|
@@ -6796,7 +6921,27 @@
|
|
ifs_cmap[uc] = 1;
|
|
}
|
|
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+ if (!ifs_value)
|
|
+ {
|
|
+ ifs_firstc[0] = '\0';
|
|
+ ifs_firstc_len = 1;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ size_t ifs_len = strnlen (ifs_value, MB_CUR_MAX);
|
|
+ ifs_firstc_len = mblen (ifs_value, ifs_len);
|
|
+ if (MB_INVALIDCH (ifs_firstc_len))
|
|
+ {
|
|
+ ifs_firstc[0] = '\0';
|
|
+ ifs_firstc_len = 1;
|
|
+ }
|
|
+ else
|
|
+ memcpy (ifs_firstc, ifs_value, ifs_firstc_len);
|
|
+ }
|
|
+#else
|
|
ifs_firstc = ifs_value ? *ifs_value : 0;
|
|
+#endif
|
|
}
|
|
|
|
char *
|
|
--- bash-3.0/subst.h.multibyteifs 2004-08-20 15:51:08.301074583 +0100
|
|
+++ bash-3.0/subst.h 2004-08-20 15:51:39.070206473 +0100
|
|
@@ -231,7 +231,12 @@
|
|
extern SHELL_VAR *ifs_var;
|
|
extern char *ifs_value;
|
|
extern unsigned char ifs_cmap[];
|
|
+#if defined (HANDLE_MULTIBYTE)
|
|
+extern unsigned char ifs_firstc[];
|
|
+extern size_t ifs_firstc_len;
|
|
+#else
|
|
extern unsigned char ifs_firstc;
|
|
+#endif
|
|
|
|
/* Evaluates to 1 if C is a character in $IFS. */
|
|
#define isifs(c) (ifs_cmap[(unsigned char)(c)] != 0)
|