#include #include #include #include #include #include #include #include #include "xslt.h" #include "s1kd_tools.h" #define PROG_NAME "s1kd-index" #define VERSION "1.10.0" /* Path to text nodes where indexFlags may occur */ #define ELEMENTS_XPATH BAD_CAST "//para/text()" #define PRE_TERM_DELIM BAD_CAST " " #define POST_TERM_DELIM BAD_CAST " .," #define ERR_PREFIX PROG_NAME ": ERROR: " #define INF_PREFIX PROG_NAME ": INFO: " #define E_NO_LIST ERR_PREFIX "Could not read index flags from %s\n" #define E_BAD_LIST ERR_PREFIX "Could not read list: %s\n" #define E_NO_FILE ERR_PREFIX "Could not read file: %s\n" #define I_MARKUP INF_PREFIX "Adding index flags to %s...\n" #define I_DELETE INF_PREFIX "Deleting index flags from %s...\n" #define EXIT_NO_LIST 1 static enum verbosity { QUIET, NORMAL, VERBOSE } verbosity = NORMAL; /* Help/usage message */ static void show_help(void) { puts("Usage:"); puts(" " PROG_NAME " -h?"); puts(" " PROG_NAME " [-I ] [-filqv] [...]"); puts(" " PROG_NAME " -D [-filqv] [...]"); puts(""); puts("Options:"); puts(" -D, --delete Delete current index flags."); puts(" -f, --overwrite Overwrite input module(s)."); puts(" -h, -?, --help Show help/usage message."); puts(" -I, --indexflags Specify a custom .indexflags file"); puts(" -i, --ignore-case Ignore case when flagging terms."); puts(" -l, --list Input is a list of file names."); puts(" -q, --quiet Quiet mode."); puts(" -v, --verbose Verbose output."); puts(" --version Show version information."); LIBXML2_PARSE_LONGOPT_HELP } static void show_version(void) { printf("%s (s1kd-tools) %s\n", PROG_NAME, VERSION); printf("Using libxml %s and libxslt %s\n", xmlParserVersion, xsltEngineVersion); } /* Return the lowest level in an indexFlag. This is matched against the text * to determine where to insert the flag. */ static xmlChar *last_level(xmlNodePtr flag) { xmlChar *lvl; if ((lvl = xmlGetProp(flag, BAD_CAST "indexLevelFour"))) { return lvl; } else if ((lvl = xmlGetProp(flag, BAD_CAST "indexLevelThree"))) { return lvl; } else if ((lvl = xmlGetProp(flag, BAD_CAST "indexLevelTwo"))) { return lvl; } else if ((lvl = xmlGetProp(flag, BAD_CAST "indexLevelOne"))) { return lvl; } return NULL; } static bool is_term(xmlChar *content, int content_len, int i, xmlChar *term, int term_len, bool ignorecase) { bool is; xmlChar s, e; s = i == 0 ? ' ' : content[i - 1]; e = i + term_len >= content_len - 1 ? ' ' : content[i + term_len]; is = xmlStrchr(PRE_TERM_DELIM, s) && (ignorecase ? xmlStrncasecmp(content + i, term, term_len) : xmlStrncmp(content + i, term, term_len)) == 0 && xmlStrchr(POST_TERM_DELIM, e); return is; } /* Insert indexFlag elements after matched terms. */ static void gen_index_node(xmlNodePtr node, xmlNodePtr flag, bool ignorecase) { xmlChar *content; xmlChar *term; int term_len, content_len; int i; content = xmlNodeGetContent(node); content_len = xmlStrlen(content); term = last_level(flag); term_len = xmlStrlen(term); i = 0; while (i + term_len <= content_len) { if (is_term(content, content_len, i, term, term_len, ignorecase)) { xmlChar *s1 = xmlStrndup(content, i + term_len); xmlChar *s2 = xmlStrsub(content, i + term_len, content_len - (i + term_len)); xmlNodePtr acr; xmlFree(content); xmlNodeSetContent(node, s1); xmlFree(s1); acr = xmlAddNextSibling(node, xmlCopyNode(flag, 1)); node = xmlAddNextSibling(acr, xmlNewText(s2)); content = s2; content_len = xmlStrlen(s2); i = 0; } else { ++i; } } xmlFree(term); xmlFree(content); } /* Flag an individual term in all applicable elements in a module. */ static void gen_index_flag(xmlNodePtr flag, xmlXPathContextPtr ctx, bool ignorecase) { xmlXPathObjectPtr obj; obj = xmlXPathEvalExpression(ELEMENTS_XPATH, ctx); if (!xmlXPathNodeSetIsEmpty(obj->nodesetval)) { int i; for (i = 0; i < obj->nodesetval->nodeNr; ++i) { gen_index_node(obj->nodesetval->nodeTab[i], flag, ignorecase); } } xmlXPathFreeObject(obj); } /* Insert indexFlags for each term included in the specified index file. */ static void gen_index_flags(xmlNodeSetPtr flags, xmlXPathContextPtr ctx, bool ignorecase) { int i; for (i = 0; i < flags->nodeNr; ++i) { gen_index_flag(flags->nodeTab[i], ctx, ignorecase); } } /* Apply a built-in XSLT transform to a doc in place. */ static void transform_doc(xmlDocPtr doc, unsigned char *xsl, unsigned int len) { xmlDocPtr styledoc, src, res; xsltStylesheetPtr style; xmlNodePtr old; src = xmlCopyDoc(doc, 1); styledoc = read_xml_mem((const char *) xsl, len); style = xsltParseStylesheetDoc(styledoc); res = xsltApplyStylesheet(style, src, NULL); old = xmlDocSetRootElement(doc, xmlCopyNode(xmlDocGetRootElement(res), 1)); xmlFreeNode(old); xmlFreeDoc(src); xmlFreeDoc(res); xsltFreeStylesheet(style); } /* Convert index flags for older issues. */ static void convert_to_iss_30(xmlDocPtr doc) { transform_doc(doc, iss30_xsl, iss30_xsl_len); } static void delete_index_flags(const char *path, bool overwrite) { xmlDocPtr doc; if (verbosity >= VERBOSE) { fprintf(stderr, I_DELETE, path); } doc = read_xml_doc(path); transform_doc(doc, delete_xsl, delete_xsl_len); if (overwrite) { save_xml_doc(doc, path); } else { save_xml_doc(doc, "-"); } } /* Insert indexFlag elements after matched terms in a document. */ static void gen_index(const char *path, xmlDocPtr index_doc, bool overwrite, bool ignorecase) { xmlDocPtr doc; xmlXPathContextPtr doc_ctx, index_ctx; xmlXPathObjectPtr index_obj; xmlNodeSetPtr flags; if (verbosity >= VERBOSE) { fprintf(stderr, I_MARKUP, path); } if (!(doc = read_xml_doc(path))) { if (verbosity >= NORMAL) { fprintf(stderr, E_NO_FILE, path); } return; } index_ctx = xmlXPathNewContext(index_doc); index_obj = xmlXPathEvalExpression(BAD_CAST "//indexFlag", index_ctx); flags = index_obj->nodesetval; doc_ctx = xmlXPathNewContext(doc); if (!xmlXPathNodeSetIsEmpty(flags)) { gen_index_flags(flags, doc_ctx, ignorecase); } xmlXPathFreeContext(doc_ctx); xmlXPathFreeObject(index_obj); xmlXPathFreeContext(index_ctx); if (xmlStrcmp(xmlFirstElementChild(xmlDocGetRootElement(doc))->name, BAD_CAST "idstatus") == 0) { convert_to_iss_30(doc); } if (overwrite) { save_xml_doc(doc, path); } else { save_xml_doc(doc, "-"); } xmlFreeDoc(doc); } static xmlDocPtr read_index_flags(const char *fname) { xmlDocPtr index_doc; if (!(index_doc = read_xml_doc(fname))) { if (verbosity >= NORMAL) { fprintf(stderr, E_NO_LIST, fname); } exit(EXIT_NO_LIST); } return index_doc; } static void handle_list(const char *path, bool delflags, xmlDocPtr index_doc, bool overwrite, bool ignorecase) { FILE *f; char line[PATH_MAX]; if (path) { f = fopen(path, "r"); } else { f = stdin; } if (!f) { if (verbosity >= NORMAL) { fprintf(stderr, E_BAD_LIST, path); } return; } while (fgets(line, PATH_MAX, f)) { strtok(line, "\t\r\n"); if (delflags) { delete_index_flags(line, overwrite); } else { gen_index(line, index_doc, overwrite, ignorecase); } } fclose(f); } int main(int argc, char **argv) { int i; bool overwrite = false; bool ignorecase = false; bool delflags = false; bool list = false; xmlDocPtr index_doc = NULL; const char *sopts = "DfI:liqvh?"; struct option lopts[] = { {"version" , no_argument , 0, 0}, {"help" , no_argument , 0, 'h'}, {"delete" , no_argument , 0, 'D'}, {"overwrite" , no_argument , 0, 'f'}, {"indexflags" , required_argument, 0, 'I'}, {"ignore-case", no_argument , 0, 'i'}, {"list" , no_argument , 0, 'l'}, {"quiet" , no_argument , 0, 'q'}, {"verbose" , no_argument , 0, 'v'}, LIBXML2_PARSE_LONGOPT_DEFS {0, 0, 0, 0} }; int loptind = 0; while ((i = getopt_long(argc, argv, sopts, lopts, &loptind)) != -1) { switch (i) { case 0: if (strcmp(lopts[loptind].name, "version") == 0) { show_version(); return 0; } LIBXML2_PARSE_LONGOPT_HANDLE(lopts, loptind, optarg) break; case 'D': delflags = true; break; case 'f': overwrite = true; break; case 'I': if (!index_doc) { index_doc = read_index_flags(optarg); } break; case 'i': ignorecase = true; break; case 'l': list = true; break; case 'q': --verbosity; break; case 'v': ++verbosity; break; case 'h': case '?': show_help(); return 0; } } if (!index_doc && !delflags) { char fname[PATH_MAX]; find_config(fname, DEFAULT_INDEXFLAGS_FNAME); index_doc = read_index_flags(fname); } if (optind < argc) { for (i = optind; i < argc; ++i) { if (list) { handle_list(argv[i], delflags, index_doc, overwrite, ignorecase); } else if (delflags) { delete_index_flags(argv[i], overwrite); } else { gen_index(argv[i], index_doc, overwrite, ignorecase); } } } else if (list) { handle_list(NULL, delflags, index_doc, overwrite, ignorecase); } else if (delflags) { delete_index_flags("-", false); } else { gen_index("-", index_doc, false, ignorecase); } xmlFreeDoc(index_doc); xsltCleanupGlobals(); xmlCleanupParser(); return 0; }