diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | libsylph/codeconv.c | 73 | ||||
-rw-r--r-- | libsylph/codeconv.h | 8 | ||||
-rw-r--r-- | src/compose.c | 126 |
4 files changed, 186 insertions, 28 deletions
@@ -1,3 +1,10 @@ +2017-11-16 + + * libsylph/codeconv.[ch]: conv_check_file_encoding(): added detection + of UTF-16/UTF-16BE/UTF-16LE. + src/compose.c: automatically convert attached UTF-16 text file to + UTF-8. + 2017-11-15 * src/printing.c: use GTK_UNIT_PIXEL instead of GTK_UNIT_POINTS diff --git a/libsylph/codeconv.c b/libsylph/codeconv.c index 8781f446..1ba52ac3 100644 --- a/libsylph/codeconv.c +++ b/libsylph/codeconv.c @@ -1681,6 +1681,9 @@ static const struct { {C_GEORGIAN_PS, CS_GEORGIAN_PS}, {C_TCVN5712_1, CS_TCVN5712_1}, {C_ISO_8859_16, CS_ISO_8859_16}, + {C_UTF_16, CS_UTF_16}, + {C_UTF_16BE, CS_UTF_16BE}, + {C_UTF_16LE, CS_UTF_16LE}, }; static const struct { @@ -2656,6 +2659,7 @@ CharSet conv_check_file_encoding(const gchar *file) CharSet enc; const gchar *enc_str; gboolean is_locale = TRUE, is_utf8 = TRUE; + size_t size; g_return_val_if_fail(file != NULL, C_AUTO); @@ -2669,6 +2673,75 @@ CharSet conv_check_file_encoding(const gchar *file) return C_AUTO; } + /* UTF-16 check */ + if ((size = fread(buf, 2, BUFFSIZE / 2, fp)) > 0) { + CharSet guess_enc = C_AUTO; + + debug_print("conv_check_file_encoding: check first %d bytes of file %s\n", size * 2, file); + + /* BOM check */ + if ((buf[0] & 0xff) == 0xfe && (buf[1] & 0xff) == 0xff) { + debug_print("conv_check_file_encoding: UTF-16 BOM (BE) found\n"); + guess_enc = C_UTF_16; /* UTF-16BE */ + } else if ((buf[0] & 0xff) == 0xff && (buf[1] & 0xff) == 0xfe) { + debug_print("conv_check_file_encoding: UTF-16 BOM (LE) found\n"); + guess_enc = C_UTF_16; /* UTF-16LE */ + } + if (guess_enc != C_AUTO) { + fclose(fp); + return guess_enc; + } + + /* search UTF-16 CR/LF */ + if (memchr(buf, 0x00, size * 2) != NULL) { + gint i; + guchar c1, c2; + + for (i = 0; i < size; i++) { + c1 = buf[i * 2] & 0xff; + c2 = buf[i * 2 + 1] & 0xff; + if (c1 == 0x00 && c2 == 0x0d) { /* UTF-16BE CR */ + i++; + if (i >= size) { + break; + } + c1 = buf[i * 2] & 0xff; + c2 = buf[i * 2 + 1] & 0xff; + if (c1 == 0x00 && c2 == 0x0a) { /* UTF-16BE LF */ + guess_enc = C_UTF_16BE; + break; + } + } else if (c1 == 0x0d && c2 == 0x00) { /* UTF-16LE CR */ + i++; + if (i >= size) { + break; + } + c1 = buf[i * 2] & 0xff; + c2 = buf[i * 2 + 1] & 0xff; + if (c1 == 0x0a && c2 == 0x00) { /* UTF-16LE LF */ + guess_enc = C_UTF_16LE; + break; + } + } else if (c1 == 0x00 && c2 == 0x0a) { /* UTF-16BE LF */ + guess_enc = C_UTF_16BE; + break; + } else if (c1 == 0x0a && c2 == 0x00) { /* UTF-16LE LF */ + guess_enc = C_UTF_16LE; + break; + } + } + + if (guess_enc != C_AUTO) { + debug_print("conv_check_file_encoding: %s detected\n", + conv_get_charset_str(guess_enc)); + fclose(fp); + return guess_enc; + } + } + } + + rewind(fp); + while (fgets(buf, sizeof(buf), fp) != NULL) { gchar *str; gint error = 0; diff --git a/libsylph/codeconv.h b/libsylph/codeconv.h index 121de0da..6f6fc365 100644 --- a/libsylph/codeconv.h +++ b/libsylph/codeconv.h @@ -92,7 +92,10 @@ typedef enum C_WINDOWS_874, C_GEORGIAN_PS, C_TCVN5712_1, - C_ISO_8859_16 + C_ISO_8859_16, + C_UTF_16, + C_UTF_16BE, + C_UTF_16LE } CharSet; typedef enum @@ -180,6 +183,9 @@ struct _CodeConverter #define CS_GEORGIAN_PS "GEORGIAN-PS" #define CS_TCVN5712_1 "TCVN5712-1" #define CS_ISO_8859_16 "ISO-8859-16" +#define CS_UTF_16 "UTF-16" +#define CS_UTF_16BE "UTF-16BE" +#define CS_UTF_16LE "UTF-16LE" #define C_INTERNAL C_UTF_8 #define CS_INTERNAL CS_UTF_8 diff --git a/src/compose.c b/src/compose.c index fceb8ddb..418c85d5 100644 --- a/src/compose.c +++ b/src/compose.c @@ -4554,6 +4554,11 @@ static gint compose_write_attach(Compose *compose, FILE *fp, FILE *attach_fp; gint len; EncodingType encoding; + ContentType content_type; + gchar *tmp_file = NULL; + const gchar *src_file; + FILE *src_fp; + FILE *tmp_fp = NULL; for (valid = gtk_tree_model_get_iter_first(model, &iter); valid; valid = gtk_tree_model_iter_next(model, &iter)) { @@ -4631,67 +4636,134 @@ static gint compose_write_attach(Compose *compose, FILE *fp, fprintf(fp, "Content-Transfer-Encoding: %s\n\n", procmime_get_encoding_str(encoding)); + content_type = procmime_scan_mime_type(ainfo->content_type); + + if (content_type == MIME_TEXT || content_type == MIME_TEXT_HTML) { + CharSet enc; + gchar *src = NULL; + gsize len = 0, dlen = 0; + gchar *dest; + + enc = conv_check_file_encoding(ainfo->file); + if (enc == C_UTF_16 || enc == C_UTF_16BE || enc == C_UTF_16LE) { + g_file_get_contents(ainfo->file, &src, &len, NULL); + dest = g_convert(src, len, CS_UTF_8, conv_get_charset_str(enc), NULL, &dlen, NULL); + tmp_file = get_tmp_file(); + if (g_file_set_contents(tmp_file, dest, dlen, NULL) == FALSE) { + g_warning("Cannot convert UTF-16 file %s to UTF-8\n", ainfo->file); + g_free(tmp_file); + tmp_file = NULL; + } + g_free(dest); + g_free(src); + } + } + + if (tmp_file) { + src_file = tmp_file; + } else { + src_file = ainfo->file; + } + if (encoding == ENC_BASE64) { gchar inbuf[B64_LINE_SIZE], outbuf[B64_BUFFSIZE]; - FILE *tmp_fp = attach_fp; - gchar *tmp_file = NULL; - ContentType content_type; + gchar *canon_file = NULL; - content_type = - procmime_scan_mime_type(ainfo->content_type); if (content_type == MIME_TEXT || content_type == MIME_TEXT_HTML || content_type == MIME_MESSAGE_RFC822) { - tmp_file = get_tmp_file(); - if (canonicalize_file(ainfo->file, tmp_file) < 0) { - g_free(tmp_file); + canon_file = get_tmp_file(); + if (canonicalize_file(src_file, canon_file) < 0) { + g_free(canon_file); + if (tmp_file) { + g_unlink(tmp_file); + g_free(tmp_file); + } fclose(attach_fp); return -1; } - if ((tmp_fp = g_fopen(tmp_file, "rb")) == NULL) { - FILE_OP_ERROR(tmp_file, "fopen"); - g_unlink(tmp_file); - g_free(tmp_file); + if ((tmp_fp = g_fopen(canon_file, "rb")) == NULL) { + FILE_OP_ERROR(canon_file, "fopen"); + g_unlink(canon_file); + g_free(canon_file); + if (tmp_file) { + g_unlink(tmp_file); + g_free(tmp_file); + } fclose(attach_fp); return -1; } } + if (tmp_fp) { + src_fp = tmp_fp; + } else { + src_fp = attach_fp; + } + while ((len = fread(inbuf, sizeof(gchar), - B64_LINE_SIZE, tmp_fp)) + B64_LINE_SIZE, src_fp)) == B64_LINE_SIZE) { base64_encode(outbuf, (guchar *)inbuf, B64_LINE_SIZE); fputs(outbuf, fp); fputc('\n', fp); } - if (len > 0 && feof(tmp_fp)) { + if (len > 0 && feof(src_fp)) { base64_encode(outbuf, (guchar *)inbuf, len); fputs(outbuf, fp); fputc('\n', fp); } - if (tmp_file) { + if (tmp_fp) { fclose(tmp_fp); - g_unlink(tmp_file); - g_free(tmp_file); + tmp_fp = NULL; } - } else if (encoding == ENC_QUOTED_PRINTABLE) { - gchar inbuf[BUFFSIZE], outbuf[BUFFSIZE * 4]; - - while (fgets(inbuf, sizeof(inbuf), attach_fp) != NULL) { - qp_encode_line(outbuf, (guchar *)inbuf); - fputs(outbuf, fp); + if (canon_file) { + g_unlink(canon_file); + g_free(canon_file); } } else { - gchar buf[BUFFSIZE]; + if (tmp_file) { + if ((tmp_fp = g_fopen(tmp_file, "rb")) == NULL) { + FILE_OP_ERROR(tmp_file, "fopen"); + g_unlink(tmp_file); + g_free(tmp_file); + fclose(attach_fp); + return -1; + } + src_fp = tmp_fp; + } else { + src_fp = attach_fp; + } + + if (encoding == ENC_QUOTED_PRINTABLE) { + gchar inbuf[BUFFSIZE], outbuf[BUFFSIZE * 4]; + + while (fgets(inbuf, sizeof(inbuf), src_fp) != NULL) { + qp_encode_line(outbuf, (guchar *)inbuf); + fputs(outbuf, fp); + } + } else { + gchar buf[BUFFSIZE]; - while (fgets(buf, sizeof(buf), attach_fp) != NULL) { - strcrchomp(buf); - fputs(buf, fp); + while (fgets(buf, sizeof(buf), src_fp) != NULL) { + strcrchomp(buf); + fputs(buf, fp); + } + } + + if (tmp_fp) { + fclose(tmp_fp); + tmp_fp = NULL; } } + if (tmp_file) { + g_unlink(tmp_file); + g_free(tmp_file); + tmp_file = NULL; + } fclose(attach_fp); } |