diff options
author | hiro <hiro@ee746299-78ed-0310-b773-934348b2243d> | 2013-08-26 06:35:49 +0000 |
---|---|---|
committer | hiro <hiro@ee746299-78ed-0310-b773-934348b2243d> | 2013-08-26 06:35:49 +0000 |
commit | 4f43482ff38ff553470b40889d5a68a14b656a3e (patch) | |
tree | 602721cb13f2320b33812d79dea0c42428f2382d | |
parent | e21d94c6fcb2112906f15be5b73864ea989358e7 (diff) |
properly unescape special characters in HTML links (#120).
git-svn-id: svn://sylpheed.sraoss.jp/sylpheed/trunk@3273 ee746299-78ed-0310-b773-934348b2243d
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | libsylph/html.c | 72 |
2 files changed, 76 insertions, 1 deletions
@@ -1,3 +1,8 @@ +2013-08-26 + + * libsylph/html.c: properly unescape special characters in links + (#120). + 2013-08-16 * libsylph/imap.c: imap_status(): fixed the parse failure of STATUS response diff --git a/libsylph/html.c b/libsylph/html.c index 8ae3e780..9241793c 100644 --- a/libsylph/html.c +++ b/libsylph/html.c @@ -191,6 +191,9 @@ static void html_get_parenthesis (HTMLParser *parser, gchar *buf, gint len); +static gchar *html_unescape_str (HTMLParser *parser, + const gchar *str); + HTMLParser *html_parser_new(FILE *fp, CodeConverter *conv) { @@ -553,7 +556,7 @@ static HTMLState html_parse_tag(HTMLParser *parser) if (attr && !strcmp(attr->name, "href")) { g_free(parser->href); - parser->href = g_strdup(attr->value); + parser->href = html_unescape_str(parser, attr->value); parser->state = HTML_HREF; break; } @@ -719,3 +722,70 @@ static void html_get_parenthesis(HTMLParser *parser, gchar *buf, gint len) g_strstrip(buf); parser->bufp = p + 1; } + +static gchar *html_unescape_str(HTMLParser *parser, const gchar *str) +{ + const gchar *p = str; + gchar symbol_name[9]; + gint n; + const gchar *val; + gchar *unescape_str; + gchar *up; + + if (!str) + return NULL; + + up = unescape_str = g_malloc(strlen(str) + 1); + + while (*p != '\0') { + switch (*p) { + case '&': + for (n = 0; p[n] != '\0' && p[n] != ';'; n++) + ; + if (n > 7 || p[n] != ';') { + *up++ = *p++; + break; + } + strncpy2(symbol_name, p, n + 2); + p += n + 1; + + if ((val = g_hash_table_lookup(parser->symbol_table, symbol_name)) != NULL) { + gint len = strlen(val); + if (len <= n + 1) { + strcpy(up, val); + up += len; + } else { + strcpy(up, symbol_name); + up += n + 1; + } + } else if (symbol_name[1] == '#' && g_ascii_isdigit(symbol_name[2])) { + gint ch; + + ch = atoi(symbol_name + 2); + if (ch < 128 && g_ascii_isprint(ch)) { + *up++ = ch; + } else { + /* ISO 10646 to UTF-8 */ + gchar buf[6]; + gint len; + + len = g_unichar_to_utf8((gunichar)ch, buf); + if (len > 0 && len <= n + 1) { + memcpy(up, buf, len); + up += len; + } else { + strcpy(up, symbol_name); + up += n + 1; + } + } + } + + break; + default: + *up++ = *p++; + } + } + + *up = '\0'; + return unescape_str; +} |