aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--libsylph/codeconv.c73
-rw-r--r--libsylph/codeconv.h8
-rw-r--r--src/compose.c126
4 files changed, 186 insertions, 28 deletions
diff --git a/ChangeLog b/ChangeLog
index 36aa71ea..58f4881b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2017-11-16
+
+ * libsylph/codeconv.[ch]: conv_check_file_encoding(): added detection
+ of UTF-16/UTF-16BE/UTF-16LE.
+ src/compose.c: automatically convert attached UTF-16 text file to
+ UTF-8.
+
2017-11-15
* src/printing.c: use GTK_UNIT_PIXEL instead of GTK_UNIT_POINTS
diff --git a/libsylph/codeconv.c b/libsylph/codeconv.c
index 8781f446..1ba52ac3 100644
--- a/libsylph/codeconv.c
+++ b/libsylph/codeconv.c
@@ -1681,6 +1681,9 @@ static const struct {
{C_GEORGIAN_PS, CS_GEORGIAN_PS},
{C_TCVN5712_1, CS_TCVN5712_1},
{C_ISO_8859_16, CS_ISO_8859_16},
+ {C_UTF_16, CS_UTF_16},
+ {C_UTF_16BE, CS_UTF_16BE},
+ {C_UTF_16LE, CS_UTF_16LE},
};
static const struct {
@@ -2656,6 +2659,7 @@ CharSet conv_check_file_encoding(const gchar *file)
CharSet enc;
const gchar *enc_str;
gboolean is_locale = TRUE, is_utf8 = TRUE;
+ size_t size;
g_return_val_if_fail(file != NULL, C_AUTO);
@@ -2669,6 +2673,75 @@ CharSet conv_check_file_encoding(const gchar *file)
return C_AUTO;
}
+ /* UTF-16 check */
+ if ((size = fread(buf, 2, BUFFSIZE / 2, fp)) > 0) {
+ CharSet guess_enc = C_AUTO;
+
+ debug_print("conv_check_file_encoding: check first %d bytes of file %s\n", size * 2, file);
+
+ /* BOM check */
+ if ((buf[0] & 0xff) == 0xfe && (buf[1] & 0xff) == 0xff) {
+ debug_print("conv_check_file_encoding: UTF-16 BOM (BE) found\n");
+ guess_enc = C_UTF_16; /* UTF-16BE */
+ } else if ((buf[0] & 0xff) == 0xff && (buf[1] & 0xff) == 0xfe) {
+ debug_print("conv_check_file_encoding: UTF-16 BOM (LE) found\n");
+ guess_enc = C_UTF_16; /* UTF-16LE */
+ }
+ if (guess_enc != C_AUTO) {
+ fclose(fp);
+ return guess_enc;
+ }
+
+ /* search UTF-16 CR/LF */
+ if (memchr(buf, 0x00, size * 2) != NULL) {
+ gint i;
+ guchar c1, c2;
+
+ for (i = 0; i < size; i++) {
+ c1 = buf[i * 2] & 0xff;
+ c2 = buf[i * 2 + 1] & 0xff;
+ if (c1 == 0x00 && c2 == 0x0d) { /* UTF-16BE CR */
+ i++;
+ if (i >= size) {
+ break;
+ }
+ c1 = buf[i * 2] & 0xff;
+ c2 = buf[i * 2 + 1] & 0xff;
+ if (c1 == 0x00 && c2 == 0x0a) { /* UTF-16BE LF */
+ guess_enc = C_UTF_16BE;
+ break;
+ }
+ } else if (c1 == 0x0d && c2 == 0x00) { /* UTF-16LE CR */
+ i++;
+ if (i >= size) {
+ break;
+ }
+ c1 = buf[i * 2] & 0xff;
+ c2 = buf[i * 2 + 1] & 0xff;
+ if (c1 == 0x0a && c2 == 0x00) { /* UTF-16LE LF */
+ guess_enc = C_UTF_16LE;
+ break;
+ }
+ } else if (c1 == 0x00 && c2 == 0x0a) { /* UTF-16BE LF */
+ guess_enc = C_UTF_16BE;
+ break;
+ } else if (c1 == 0x0a && c2 == 0x00) { /* UTF-16LE LF */
+ guess_enc = C_UTF_16LE;
+ break;
+ }
+ }
+
+ if (guess_enc != C_AUTO) {
+ debug_print("conv_check_file_encoding: %s detected\n",
+ conv_get_charset_str(guess_enc));
+ fclose(fp);
+ return guess_enc;
+ }
+ }
+ }
+
+ rewind(fp);
+
while (fgets(buf, sizeof(buf), fp) != NULL) {
gchar *str;
gint error = 0;
diff --git a/libsylph/codeconv.h b/libsylph/codeconv.h
index 121de0da..6f6fc365 100644
--- a/libsylph/codeconv.h
+++ b/libsylph/codeconv.h
@@ -92,7 +92,10 @@ typedef enum
C_WINDOWS_874,
C_GEORGIAN_PS,
C_TCVN5712_1,
- C_ISO_8859_16
+ C_ISO_8859_16,
+ C_UTF_16,
+ C_UTF_16BE,
+ C_UTF_16LE
} CharSet;
typedef enum
@@ -180,6 +183,9 @@ struct _CodeConverter
#define CS_GEORGIAN_PS "GEORGIAN-PS"
#define CS_TCVN5712_1 "TCVN5712-1"
#define CS_ISO_8859_16 "ISO-8859-16"
+#define CS_UTF_16 "UTF-16"
+#define CS_UTF_16BE "UTF-16BE"
+#define CS_UTF_16LE "UTF-16LE"
#define C_INTERNAL C_UTF_8
#define CS_INTERNAL CS_UTF_8
diff --git a/src/compose.c b/src/compose.c
index fceb8ddb..418c85d5 100644
--- a/src/compose.c
+++ b/src/compose.c
@@ -4554,6 +4554,11 @@ static gint compose_write_attach(Compose *compose, FILE *fp,
FILE *attach_fp;
gint len;
EncodingType encoding;
+ ContentType content_type;
+ gchar *tmp_file = NULL;
+ const gchar *src_file;
+ FILE *src_fp;
+ FILE *tmp_fp = NULL;
for (valid = gtk_tree_model_get_iter_first(model, &iter); valid;
valid = gtk_tree_model_iter_next(model, &iter)) {
@@ -4631,67 +4636,134 @@ static gint compose_write_attach(Compose *compose, FILE *fp,
fprintf(fp, "Content-Transfer-Encoding: %s\n\n",
procmime_get_encoding_str(encoding));
+ content_type = procmime_scan_mime_type(ainfo->content_type);
+
+ if (content_type == MIME_TEXT || content_type == MIME_TEXT_HTML) {
+ CharSet enc;
+ gchar *src = NULL;
+ gsize len = 0, dlen = 0;
+ gchar *dest;
+
+ enc = conv_check_file_encoding(ainfo->file);
+ if (enc == C_UTF_16 || enc == C_UTF_16BE || enc == C_UTF_16LE) {
+ g_file_get_contents(ainfo->file, &src, &len, NULL);
+ dest = g_convert(src, len, CS_UTF_8, conv_get_charset_str(enc), NULL, &dlen, NULL);
+ tmp_file = get_tmp_file();
+ if (g_file_set_contents(tmp_file, dest, dlen, NULL) == FALSE) {
+ g_warning("Cannot convert UTF-16 file %s to UTF-8\n", ainfo->file);
+ g_free(tmp_file);
+ tmp_file = NULL;
+ }
+ g_free(dest);
+ g_free(src);
+ }
+ }
+
+ if (tmp_file) {
+ src_file = tmp_file;
+ } else {
+ src_file = ainfo->file;
+ }
+
if (encoding == ENC_BASE64) {
gchar inbuf[B64_LINE_SIZE], outbuf[B64_BUFFSIZE];
- FILE *tmp_fp = attach_fp;
- gchar *tmp_file = NULL;
- ContentType content_type;
+ gchar *canon_file = NULL;
- content_type =
- procmime_scan_mime_type(ainfo->content_type);
if (content_type == MIME_TEXT ||
content_type == MIME_TEXT_HTML ||
content_type == MIME_MESSAGE_RFC822) {
- tmp_file = get_tmp_file();
- if (canonicalize_file(ainfo->file, tmp_file) < 0) {
- g_free(tmp_file);
+ canon_file = get_tmp_file();
+ if (canonicalize_file(src_file, canon_file) < 0) {
+ g_free(canon_file);
+ if (tmp_file) {
+ g_unlink(tmp_file);
+ g_free(tmp_file);
+ }
fclose(attach_fp);
return -1;
}
- if ((tmp_fp = g_fopen(tmp_file, "rb")) == NULL) {
- FILE_OP_ERROR(tmp_file, "fopen");
- g_unlink(tmp_file);
- g_free(tmp_file);
+ if ((tmp_fp = g_fopen(canon_file, "rb")) == NULL) {
+ FILE_OP_ERROR(canon_file, "fopen");
+ g_unlink(canon_file);
+ g_free(canon_file);
+ if (tmp_file) {
+ g_unlink(tmp_file);
+ g_free(tmp_file);
+ }
fclose(attach_fp);
return -1;
}
}
+ if (tmp_fp) {
+ src_fp = tmp_fp;
+ } else {
+ src_fp = attach_fp;
+ }
+
while ((len = fread(inbuf, sizeof(gchar),
- B64_LINE_SIZE, tmp_fp))
+ B64_LINE_SIZE, src_fp))
== B64_LINE_SIZE) {
base64_encode(outbuf, (guchar *)inbuf,
B64_LINE_SIZE);
fputs(outbuf, fp);
fputc('\n', fp);
}
- if (len > 0 && feof(tmp_fp)) {
+ if (len > 0 && feof(src_fp)) {
base64_encode(outbuf, (guchar *)inbuf, len);
fputs(outbuf, fp);
fputc('\n', fp);
}
- if (tmp_file) {
+ if (tmp_fp) {
fclose(tmp_fp);
- g_unlink(tmp_file);
- g_free(tmp_file);
+ tmp_fp = NULL;
}
- } else if (encoding == ENC_QUOTED_PRINTABLE) {
- gchar inbuf[BUFFSIZE], outbuf[BUFFSIZE * 4];
-
- while (fgets(inbuf, sizeof(inbuf), attach_fp) != NULL) {
- qp_encode_line(outbuf, (guchar *)inbuf);
- fputs(outbuf, fp);
+ if (canon_file) {
+ g_unlink(canon_file);
+ g_free(canon_file);
}
} else {
- gchar buf[BUFFSIZE];
+ if (tmp_file) {
+ if ((tmp_fp = g_fopen(tmp_file, "rb")) == NULL) {
+ FILE_OP_ERROR(tmp_file, "fopen");
+ g_unlink(tmp_file);
+ g_free(tmp_file);
+ fclose(attach_fp);
+ return -1;
+ }
+ src_fp = tmp_fp;
+ } else {
+ src_fp = attach_fp;
+ }
+
+ if (encoding == ENC_QUOTED_PRINTABLE) {
+ gchar inbuf[BUFFSIZE], outbuf[BUFFSIZE * 4];
+
+ while (fgets(inbuf, sizeof(inbuf), src_fp) != NULL) {
+ qp_encode_line(outbuf, (guchar *)inbuf);
+ fputs(outbuf, fp);
+ }
+ } else {
+ gchar buf[BUFFSIZE];
- while (fgets(buf, sizeof(buf), attach_fp) != NULL) {
- strcrchomp(buf);
- fputs(buf, fp);
+ while (fgets(buf, sizeof(buf), src_fp) != NULL) {
+ strcrchomp(buf);
+ fputs(buf, fp);
+ }
+ }
+
+ if (tmp_fp) {
+ fclose(tmp_fp);
+ tmp_fp = NULL;
}
}
+ if (tmp_file) {
+ g_unlink(tmp_file);
+ g_free(tmp_file);
+ tmp_file = NULL;
+ }
fclose(attach_fp);
}