diff options
author | hiro <hiro@ee746299-78ed-0310-b773-934348b2243d> | 2005-08-31 06:10:31 +0000 |
---|---|---|
committer | hiro <hiro@ee746299-78ed-0310-b773-934348b2243d> | 2005-08-31 06:10:31 +0000 |
commit | f36577b27b6f352f140cf1f25755d39661bd4072 (patch) | |
tree | 664d196337dc86ddafc6218c8c9f19055e22e155 /libsylph/xml.c | |
parent | 6ae811ae5e6a0463dadc9ebb6f833dc5154700bd (diff) |
made some core modules library (libsylph).
git-svn-id: svn://sylpheed.sraoss.jp/sylpheed/trunk@528 ee746299-78ed-0310-b773-934348b2243d
Diffstat (limited to 'libsylph/xml.c')
-rw-r--r-- | libsylph/xml.c | 655 |
1 files changed, 655 insertions, 0 deletions
diff --git a/libsylph/xml.c b/libsylph/xml.c new file mode 100644 index 00000000..62a04829 --- /dev/null +++ b/libsylph/xml.c @@ -0,0 +1,655 @@ +/* + * Sylpheed -- a GTK+ based, lightweight, and fast e-mail client + * Copyright (C) 1999-2005 Hiroyuki Yamamoto + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +#include <glib.h> +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +#include "xml.h" +#include "utils.h" +#include "codeconv.h" + +#define SPARSE_MEMORY +/* if this is defined all attr.names and tag.names are stored + * in a hash table */ +#if defined(SPARSE_MEMORY) +#include "stringtable.h" + +static StringTable *xml_string_table; + +static void xml_string_table_create(void) +{ + if (xml_string_table == NULL) + xml_string_table = string_table_new(); +} +#define XML_STRING_ADD(str) \ + string_table_insert_string(xml_string_table, (str)) +#define XML_STRING_FREE(str) \ + string_table_free_string(xml_string_table, (str)) + +#define XML_STRING_TABLE_CREATE() \ + xml_string_table_create() + +#else /* !SPARSE_MEMORY */ + +#define XML_STRING_ADD(str) \ + g_strdup(str) +#define XML_STRING_FREE(str) \ + g_free(str) + +#define XML_STRING_TABLE_CREATE() + +#endif /* SPARSE_MEMORY */ + +static void xml_free_tag (XMLTag *tag); +static gint xml_get_parenthesis (XMLFile *file, + gchar *buf, + gint len); + +XMLFile *xml_open_file(const gchar *path) +{ + XMLFile *newfile; + + g_return_val_if_fail(path != NULL, NULL); + + XML_STRING_TABLE_CREATE(); + + newfile = g_new(XMLFile, 1); + + newfile->fp = g_fopen(path, "rb"); + if (!newfile->fp) { + g_free(newfile); + return NULL; + } + + newfile->buf = g_string_new(NULL); + newfile->bufp = newfile->buf->str; + + newfile->dtd = NULL; + newfile->encoding = NULL; + newfile->tag_stack = NULL; + newfile->level = 0; + newfile->is_empty_element = FALSE; + + return newfile; +} + +void xml_close_file(XMLFile *file) +{ + g_return_if_fail(file != NULL); + + if (file->fp) fclose(file->fp); + + g_string_free(file->buf, TRUE); + + g_free(file->dtd); + g_free(file->encoding); + + while (file->tag_stack != NULL) + xml_pop_tag(file); + + g_free(file); +} + +static GNode *xml_build_tree(XMLFile *file, GNode *parent, guint level) +{ + GNode *node = NULL; + XMLNode *xmlnode; + XMLTag *tag; + + while (xml_parse_next_tag(file) == 0) { + if (file->level < level) break; + if (file->level == level) { + g_warning("xml_build_tree(): Parse error\n"); + break; + } + + tag = xml_get_current_tag(file); + if (!tag) break; + xmlnode = xml_node_new(xml_copy_tag(tag), NULL); + xmlnode->element = xml_get_element(file); + if (!parent) + node = g_node_new(xmlnode); + else + node = g_node_append_data(parent, xmlnode); + + xml_build_tree(file, node, file->level); + if (file->level == 0) break; + } + + return node; +} + +GNode *xml_parse_file(const gchar *path) +{ + XMLFile *file; + GNode *node; + + file = xml_open_file(path); + g_return_val_if_fail(file != NULL, NULL); + + xml_get_dtd(file); + + node = xml_build_tree(file, NULL, file->level); + + xml_close_file(file); + +#if defined(SPARSE_MEMORY) + if (get_debug_mode()) + string_table_get_stats(xml_string_table); +#endif + + return node; +} + +gint xml_get_dtd(XMLFile *file) +{ + gchar buf[XMLBUFSIZE]; + gchar *bufp = buf; + + if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) return -1; + + if ((*bufp++ == '?') && + (bufp = strcasestr(bufp, "xml")) && + (bufp = strcasestr(bufp + 3, "version")) && + (bufp = strchr(bufp + 7, '?'))) { + file->dtd = g_strdup(buf); + if ((bufp = strcasestr(buf, "encoding=\""))) { + bufp += 9; + extract_quote(bufp, '"'); + file->encoding = g_strdup(bufp); + } else + file->encoding = g_strdup(CS_INTERNAL); + } else { + g_warning("Can't get xml dtd\n"); + return -1; + } + + return 0; +} + +gint xml_parse_next_tag(XMLFile *file) +{ + gchar buf[XMLBUFSIZE]; + gchar *bufp = buf; + gchar *tag_str; + XMLTag *tag; + gint len; + + if (file->is_empty_element == TRUE) { + file->is_empty_element = FALSE; + xml_pop_tag(file); + return 0; + } + + if (xml_get_parenthesis(file, buf, sizeof(buf)) < 0) { + g_warning("xml_parse_next_tag(): Can't parse next tag\n"); + return -1; + } + + /* end-tag */ + if (buf[0] == '/') { + if (strcmp(xml_get_current_tag(file)->tag, buf + 1) != 0) { + g_warning("xml_parse_next_tag(): Tag name mismatch: %s\n", buf); + return -1; + } + xml_pop_tag(file); + return 0; + } + + tag = xml_tag_new(NULL); + xml_push_tag(file, tag); + + len = strlen(buf); + if (len > 0 && buf[len - 1] == '/') { + file->is_empty_element = TRUE; + buf[len - 1] = '\0'; + g_strchomp(buf); + } + if (strlen(buf) == 0) { + g_warning("xml_parse_next_tag(): Tag name is empty\n"); + return -1; + } + + while (*bufp != '\0' && !g_ascii_isspace(*bufp)) bufp++; + if (*bufp == '\0') { + tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL); + if (tag_str) { + tag->tag = XML_STRING_ADD(tag_str); + g_free(tag_str); + } else + tag->tag = XML_STRING_ADD(buf); + return 0; + } else { + *bufp++ = '\0'; + tag_str = conv_codeset_strdup(buf, file->encoding, CS_INTERNAL); + if (tag_str) { + tag->tag = XML_STRING_ADD(tag_str); + g_free(tag_str); + } else + tag->tag = XML_STRING_ADD(buf); + } + + /* parse attributes ( name=value ) */ + while (*bufp) { + XMLAttr *attr; + gchar *attr_name; + gchar *attr_value; + gchar *utf8_attr_name; + gchar *utf8_attr_value; + gchar *p; + gchar quote; + + while (g_ascii_isspace(*bufp)) bufp++; + attr_name = bufp; + if ((p = strchr(attr_name, '=')) == NULL) { + g_warning("xml_parse_next_tag(): Syntax error in tag\n"); + return -1; + } + bufp = p; + *bufp++ = '\0'; + while (g_ascii_isspace(*bufp)) bufp++; + + if (*bufp != '"' && *bufp != '\'') { + g_warning("xml_parse_next_tag(): Syntax error in tag\n"); + return -1; + } + quote = *bufp; + bufp++; + attr_value = bufp; + if ((p = strchr(attr_value, quote)) == NULL) { + g_warning("xml_parse_next_tag(): Syntax error in tag\n"); + return -1; + } + bufp = p; + *bufp++ = '\0'; + + g_strchomp(attr_name); + xml_unescape_str(attr_value); + utf8_attr_name = conv_codeset_strdup + (attr_name, file->encoding, CS_INTERNAL); + utf8_attr_value = conv_codeset_strdup + (attr_value, file->encoding, CS_INTERNAL); + if (!utf8_attr_name) + utf8_attr_name = g_strdup(attr_name); + if (!utf8_attr_value) + utf8_attr_value = g_strdup(attr_value); + + attr = xml_attr_new(utf8_attr_name, utf8_attr_value); + xml_tag_add_attr(tag, attr); + + g_free(utf8_attr_value); + g_free(utf8_attr_name); + } + + return 0; +} + +void xml_push_tag(XMLFile *file, XMLTag *tag) +{ + g_return_if_fail(tag != NULL); + + file->tag_stack = g_list_prepend(file->tag_stack, tag); + file->level++; +} + +void xml_pop_tag(XMLFile *file) +{ + XMLTag *tag; + + if (!file->tag_stack) return; + + tag = (XMLTag *)file->tag_stack->data; + + xml_free_tag(tag); + file->tag_stack = g_list_remove(file->tag_stack, tag); + file->level--; +} + +XMLTag *xml_get_current_tag(XMLFile *file) +{ + if (file->tag_stack) + return (XMLTag *)file->tag_stack->data; + else + return NULL; +} + +GList *xml_get_current_tag_attr(XMLFile *file) +{ + XMLTag *tag; + + tag = xml_get_current_tag(file); + if (!tag) return NULL; + + return tag->attr; +} + +gchar *xml_get_element(XMLFile *file) +{ + gchar *str; + gchar *new_str; + gchar *end; + + while ((end = strchr(file->bufp, '<')) == NULL) + if (xml_read_line(file) < 0) return NULL; + + if (end == file->bufp) + return NULL; + + str = g_strndup(file->bufp, end - file->bufp); + /* this is not XML1.0 strict */ + g_strstrip(str); + xml_unescape_str(str); + + file->bufp = end; + xml_truncate_buf(file); + + if (str[0] == '\0') { + g_free(str); + return NULL; + } + + new_str = conv_codeset_strdup(str, file->encoding, CS_INTERNAL); + if (!new_str) + new_str = g_strdup(str); + g_free(str); + + return new_str; +} + +gint xml_read_line(XMLFile *file) +{ + gchar buf[XMLBUFSIZE]; + gint index; + + if (fgets(buf, sizeof(buf), file->fp) == NULL) + return -1; + + index = file->bufp - file->buf->str; + + g_string_append(file->buf, buf); + + file->bufp = file->buf->str + index; + + return 0; +} + +void xml_truncate_buf(XMLFile *file) +{ + gint len; + + len = file->bufp - file->buf->str; + if (len > 0) { + g_string_erase(file->buf, 0, len); + file->bufp = file->buf->str; + } +} + +gboolean xml_compare_tag(XMLFile *file, const gchar *name) +{ + XMLTag *tag; + + tag = xml_get_current_tag(file); + + if (tag && strcmp(tag->tag, name) == 0) + return TRUE; + else + return FALSE; +} + +XMLNode *xml_node_new(XMLTag *tag, const gchar *text) +{ + XMLNode *node; + + node = g_new(XMLNode, 1); + node->tag = tag; + node->element = g_strdup(text); + + return node; +} + +XMLTag *xml_tag_new(const gchar *tag) +{ + XMLTag *new_tag; + + new_tag = g_new(XMLTag, 1); + if (tag) + new_tag->tag = XML_STRING_ADD(tag); + else + new_tag->tag = NULL; + new_tag->attr = NULL; + + return new_tag; +} + +XMLAttr *xml_attr_new(const gchar *name, const gchar *value) +{ + XMLAttr *new_attr; + + new_attr = g_new(XMLAttr, 1); + new_attr->name = XML_STRING_ADD(name); + new_attr->value = g_strdup(value); + + return new_attr; +} + +void xml_tag_add_attr(XMLTag *tag, XMLAttr *attr) +{ + tag->attr = g_list_append(tag->attr, attr); +} + +XMLTag *xml_copy_tag(XMLTag *tag) +{ + XMLTag *new_tag; + XMLAttr *attr; + GList *list; + + new_tag = xml_tag_new(tag->tag); + for (list = tag->attr; list != NULL; list = list->next) { + attr = xml_copy_attr((XMLAttr *)list->data); + xml_tag_add_attr(new_tag, attr); + } + + return new_tag; +} + +XMLAttr *xml_copy_attr(XMLAttr *attr) +{ + return xml_attr_new(attr->name, attr->value); +} + +gint xml_unescape_str(gchar *str) +{ + gchar *start; + gchar *end; + gchar *p = str; + gchar *esc_str; + gchar ch; + gint len; + + while ((start = strchr(p, '&')) != NULL) { + if ((end = strchr(start + 1, ';')) == NULL) { + g_warning("Unescaped `&' appeared\n"); + p = start + 1; + continue; + } + len = end - start + 1; + if (len < 3) { + p = end + 1; + continue; + } + + Xstrndup_a(esc_str, start, len, return -1); + if (!strcmp(esc_str, "<")) + ch = '<'; + else if (!strcmp(esc_str, ">")) + ch = '>'; + else if (!strcmp(esc_str, "&")) + ch = '&'; + else if (!strcmp(esc_str, "'")) + ch = '\''; + else if (!strcmp(esc_str, """)) + ch = '\"'; + else { + p = end + 1; + continue; + } + + *start = ch; + memmove(start + 1, end + 1, strlen(end + 1) + 1); + p = start + 1; + } + + return 0; +} + +gint xml_file_put_escape_str(FILE *fp, const gchar *str) +{ + const gchar *p; + + g_return_val_if_fail(fp != NULL, -1); + + if (!str) return 0; + + for (p = str; *p != '\0'; p++) { + switch (*p) { + case '<': + fputs("<", fp); + break; + case '>': + fputs(">", fp); + break; + case '&': + fputs("&", fp); + break; + case '\'': + fputs("'", fp); + break; + case '\"': + fputs(""", fp); + break; + default: + fputc(*p, fp); + } + } + + return 0; +} + +gint xml_file_put_xml_decl(FILE *fp) +{ + g_return_val_if_fail(fp != NULL, -1); + + fprintf(fp, "<?xml version=\"1.0\" encoding=\"%s\"?>\n", CS_INTERNAL); + return 0; +} + +gint xml_file_put_node(FILE *fp, XMLNode *node) +{ + GList *cur; + + g_return_val_if_fail(fp != NULL, -1); + g_return_val_if_fail(node != NULL, -1); + + fprintf(fp, "<%s", node->tag->tag); + + for (cur = node->tag->attr; cur != NULL; cur = cur->next) { + XMLAttr *attr = (XMLAttr *)cur->data; + fprintf(fp, " %s=\"", attr->name); + xml_file_put_escape_str(fp, attr->value); + fputs("\"", fp); + } + + if (node->element) { + fputs(">", fp); + xml_file_put_escape_str(fp, node->element); + fprintf(fp, "</%s>\n", node->tag->tag); + } else { + fputs(" />\n", fp); + } + + return 0; +} + +void xml_free_node(XMLNode *node) +{ + if (!node) return; + + xml_free_tag(node->tag); + g_free(node->element); + g_free(node); +} + +static gboolean xml_free_func(GNode *node, gpointer data) +{ + XMLNode *xmlnode = node->data; + + xml_free_node(xmlnode); + return FALSE; +} + +void xml_free_tree(GNode *node) +{ + g_return_if_fail(node != NULL); + + g_node_traverse(node, G_PRE_ORDER, G_TRAVERSE_ALL, -1, xml_free_func, + NULL); + + g_node_destroy(node); +} + +static void xml_free_tag(XMLTag *tag) +{ + if (!tag) return; + + XML_STRING_FREE(tag->tag); + while (tag->attr != NULL) { + XMLAttr *attr = (XMLAttr *)tag->attr->data; + XML_STRING_FREE(attr->name); + g_free(attr->value); + g_free(attr); + tag->attr = g_list_remove(tag->attr, tag->attr->data); + } + g_free(tag); +} + +static gint xml_get_parenthesis(XMLFile *file, gchar *buf, gint len) +{ + gchar *start; + gchar *end; + + buf[0] = '\0'; + + while ((start = strchr(file->bufp, '<')) == NULL) + if (xml_read_line(file) < 0) return -1; + + start++; + file->bufp = start; + + while ((end = strchr(file->bufp, '>')) == NULL) + if (xml_read_line(file) < 0) return -1; + + strncpy2(buf, file->bufp, MIN(end - file->bufp + 1, len)); + g_strstrip(buf); + file->bufp = end + 1; + xml_truncate_buf(file); + + return 0; +} |