..
/
download
#include <unistd.h>
#include <getopt.h>
#include <string.h>
#include <libxml/tree.h>
#include <libxml/xmlschemas.h>
#include <libxml/debugXML.h>
#include "s1kd_tools.h"
#define PROG_NAME "s1kd-validate"
#define VERSION "3.1.1"
#define ERR_PREFIX PROG_NAME ": ERROR: "
#define SUCCESS_PREFIX PROG_NAME ": SUCCESS: "
#define FAILED_PREFIX PROG_NAME ": FAILED: "
#define E_BAD_LIST ERR_PREFIX "Could not read list file: %s\n"
#define E_MAX_SCHEMA_PARSERS ERR_PREFIX "Maximum number of schemas reached: %d\n"
#define E_BAD_IDREF ERR_PREFIX "%s (%ld): No matching ID for '%s'.\n"
#define EXIT_MAX_SCHEMAS 2
#define EXIT_MISSING_SCHEMA 3
/* FIXME:
*
* libxml2 XML schema validation does not currently check IDREF/IDREFS, so this
* tool implements its own checks. If libxml2 ever expands its XML schema
* validation to include IDREF/IDREFS checking, this can probably be removed.
*
* Because this tool is designed for use only with S1000D CSDB objects, the
* check can be done more efficiently by hardcoding all attributes that are
* type IDREF/IDREFS.
*
* xml-utils/xml-validate has a more general-purpose implementation which
* parses the XML schema for ID, IDS, IDREF and IDREFS types, but it is
* noticeably slower.
*/
/* XPath to match all invalid IDREF attributes. */
#define INVALID_ID_XPATH BAD_CAST \
"//@applicMapRefId[not(//@id=.)]|" \
"//@applicRefId[not(//@id=.)]|" \
"//@condRefId[not(//@id=.)]|" \
"//@condTypeRefId[not(//@id=.)]|" \
"//@conditionidref[not(//@id=.)]|" \
"//@condtyperef[not(//@id=.)]|" \
"//@dependencyTest[not(//@id=.)]|" \
"//@derivativeClassificationRefId[not(//@id = .)]|" \
"//@internalRefId[not(//@id=.)]|" \
"//@nextActionRefId[not(//@id=.)]|" \
"//@refapplic[not(//@id=.)]|" \
"//@refid[not(//@id=.)]|" \
"//@xrefid[not(//@id=.)]"
/* XPath to match all IDREFS attributes to check their validity. */
#define INVALID_IDS_XPATH BAD_CAST \
"//@reasonForUpdateRefIds|//@warningRefs|//@cautionRefs|//@controlAuthorityRefs"
/* URI for XML schema instance namespace. */
#define XSI_URI BAD_CAST "http://www.w3.org/2001/XMLSchema-instance"
static enum verbosity_level {SILENT, NORMAL, VERBOSE} verbosity = NORMAL;
enum show_fnames { SHOW_NONE, SHOW_INVALID, SHOW_VALID };
/* Cache schemas to prevent parsing them twice (mainly needed when accessing
* the schema over a network)
*/
struct s1kd_schema_parser {
char *url;
xmlSchemaParserCtxtPtr ctxt;
xmlSchemaPtr schema;
xmlSchemaValidCtxtPtr valid_ctxt;
};
/* Initial max schema parsers. */
static unsigned SCHEMA_PARSERS_MAX = 1;
static struct s1kd_schema_parser *schema_parsers;
static int schema_parser_count = 0;
/* The signature of xmlStructuredErrorFunc is different from v2.12.0 and up. */
#if LIBXML_VERSION < 21200
static void print_error(void *userData, xmlErrorPtr error)
#else
static void print_error(void *userData, const xmlError *error)
#endif
{
if (error->file) {
fprintf(userData, ERR_PREFIX "%s (%d): %s", error->file, error->line, error->message);
} else {
fprintf(userData, ERR_PREFIX "%s\n", error->message);
}
}
#if LIBXML_VERSION < 21200
static void print_non_parser_error(void *userData, xmlErrorPtr error)
#else
static void print_non_parser_error(void *userData, const xmlError *error)
#endif
{
if (error->domain != XML_FROM_PARSER) {
if (error->file) {
fprintf(userData, ERR_PREFIX "%s (%d): %s", error->file, error->line, error->message);
} else {
fprintf(userData, ERR_PREFIX "%s\n", error->message);
}
}
}
#if LIBXML_VERSION < 21200
static void suppress_error(void *userData, xmlErrorPtr error)
#else
static void suppress_error(void *userData, const xmlError *error)
#endif
{
}
xmlStructuredErrorFunc schema_errfunc = print_error;
/* Print the XML tree to stdout if it is valid. */
static int output_tree = 0;
static struct s1kd_schema_parser *get_schema_parser(const char *url)
{
int i;
for (i = 0; i < schema_parser_count; ++i) {
if (strcmp(schema_parsers[i].url, url) == 0) {
return &(schema_parsers[i]);
}
}
return NULL;
}
static struct s1kd_schema_parser *add_schema_parser(char *url)
{
struct s1kd_schema_parser *parser;
xmlSchemaParserCtxtPtr ctxt;
xmlSchemaPtr schema;
xmlSchemaValidCtxtPtr valid_ctxt;
ctxt = xmlSchemaNewParserCtxt(url);
schema = xmlSchemaParse(ctxt);
valid_ctxt = xmlSchemaNewValidCtxt(schema);
xmlSchemaSetParserStructuredErrors(ctxt, schema_errfunc, stderr);
xmlSchemaSetValidStructuredErrors(valid_ctxt, schema_errfunc, stderr);
schema_parsers[schema_parser_count].url = url;
schema_parsers[schema_parser_count].ctxt = ctxt;
schema_parsers[schema_parser_count].schema = schema;
schema_parsers[schema_parser_count].valid_ctxt = valid_ctxt;
parser = &schema_parsers[schema_parser_count];
++schema_parser_count;
return parser;
}
static void show_help(void)
{
puts("Usage: " PROG_NAME " [-s <path>] [-x <URI>] [-efloqv^h?] [<object>...]");
puts("");
puts("Options:");
puts(" -e, --ignore-empty Ignore empty/non-XML documents.");
puts(" -f, --filenames List invalid files.");
puts(" -h, -?, --help Show help/usage message.");
puts(" -l, --list Treat input as list of filenames.");
puts(" -o, --output-valid Output valid CSDB objects to stdout.");
puts(" -q, --quiet Silent (no output).");
puts(" -s, --schema <path> Validate against the given schema.");
puts(" -v, --verbose Verbose output.");
puts(" -x, --exclude <URI> Exclude namespace from validation by URI.");
puts(" -^, --remove-deleted Validate with elements marked as \"delete\" removed.");
puts(" --version Show version information.");
puts(" <object> Any number of CSDB objects to validate.");
LIBXML2_PARSE_LONGOPT_HELP
}
static void show_version(void)
{
printf("%s (s1kd-tools) %s\n", PROG_NAME, VERSION);
printf("Using libxml %s\n", xmlParserVersion);
}
static void add_ignore_ns(xmlNodePtr ignore_ns, const char *arg)
{
xmlNewChild(ignore_ns, NULL, BAD_CAST "ignore", BAD_CAST arg);
}
static void strip_ns(xmlDocPtr doc, xmlNodePtr ignore)
{
xmlXPathContextPtr ctxt;
xmlXPathObjectPtr results;
char xpath[256];
char *uri;
int i;
uri = (char *) xmlNodeGetContent(ignore);
snprintf(xpath, 256, "//*[namespace-uri() = '%s']", uri);
xmlFree(uri);
ctxt = xmlXPathNewContext(doc);
results = xmlXPathEvalExpression(BAD_CAST xpath, ctxt);
for (i = results->nodesetval->nodeNr - 1; i >= 0; --i) {
xmlUnlinkNode(results->nodesetval->nodeTab[i]);
xmlFreeNode(results->nodesetval->nodeTab[i]);
}
xmlXPathFreeObject(results);
xmlXPathFreeContext(ctxt);
}
/* Check that certain attributes of type xs:IDREF and xs:IDREFS have a matching
* xs:ID attribute.
*/
static int check_idrefs(xmlDocPtr doc, const char *fname)
{
xmlXPathContextPtr ctx;
xmlXPathObjectPtr obj;
int err = 0;
ctx = xmlXPathNewContext(doc);
/* Check xs:IDREF */
obj = xmlXPathEvalExpression(INVALID_ID_XPATH, ctx);
if (!xmlXPathNodeSetIsEmpty(obj->nodesetval)) {
if (verbosity > SILENT) {
int i;
for (i = 0; i < obj->nodesetval->nodeNr; ++i) {
xmlChar *id = xmlNodeGetContent(obj->nodesetval->nodeTab[i]);
fprintf(stderr,
E_BAD_IDREF,
fname,
xmlGetLineNo(obj->nodesetval->nodeTab[i]->parent),
(char *) id);
xmlFree(id);
}
}
++err;
}
xmlXPathFreeObject(obj);
/* Check xs:IDREFS */
obj = xmlXPathEvalExpression(INVALID_IDS_XPATH, ctx);
if (!xmlXPathNodeSetIsEmpty(obj->nodesetval)) {
int i;
for (i = 0; i < obj->nodesetval->nodeNr; ++i) {
char *ids, *id = NULL;
ids = (char *) xmlNodeGetContent(obj->nodesetval->nodeTab[i]);
while ((id = strtok(id ? NULL : ids, " "))) {
xmlChar xpath[256];
xmlXPathObjectPtr res;
xmlStrPrintf(xpath, 256, "//*[@id='%s']", id);
res = xmlXPathEvalExpression(xpath, ctx);
if (xmlXPathNodeSetIsEmpty(res->nodesetval)) {
if (verbosity > SILENT) {
fprintf(stderr,
E_BAD_IDREF,
fname,
xmlGetLineNo(obj->nodesetval->nodeTab[i]->parent),
(char *) id);
}
++err;
}
xmlXPathFreeObject(res);
}
xmlFree(ids);
}
}
xmlXPathFreeObject(obj);
xmlXPathFreeContext(ctx);
return err;
}
static void resize_schema_parsers(void)
{
if (!(schema_parsers = realloc(schema_parsers, (SCHEMA_PARSERS_MAX *= 2) * sizeof(struct s1kd_schema_parser)))) {
fprintf(stderr, E_MAX_SCHEMA_PARSERS, schema_parser_count);
exit(EXIT_MAX_SCHEMAS);
}
}
static int validate_file(const char *fname, const char *schema, xmlNodePtr ignore_ns, enum show_fnames show_fnames, int ignore_empty, int rem_del)
{
xmlDocPtr doc;
xmlDocPtr validtree = NULL;
xmlNodePtr dmodule;
char *url;
struct s1kd_schema_parser *parser;
int err = 0;
if (!(doc = read_xml_doc(fname))) {
return !ignore_empty;
}
/* Make a copy of the original XML tree before performing extra
* processing on it. */
if (output_tree) {
validtree = xmlCopyDoc(doc, 1);
}
if (ignore_ns->children) {
xmlNodePtr cur;
for (cur = ignore_ns->children; cur; cur = cur->next) {
strip_ns(doc, cur);
}
}
/* Remove elements marked as "delete". */
if (rem_del) {
rem_delete_elems(doc);
}
/* This shouldn't be needed because the xs:ID, xs:IDREF and xs:IDREFS
* are defined in the schema, but at this time libxml2 does not check
* these when validating.
*/
err += check_idrefs(doc, fname);
dmodule = xmlDocGetRootElement(doc);
if (schema) {
url = strdup(schema);
} else {
url = (char *) xmlGetNsProp(dmodule, BAD_CAST "noNamespaceSchemaLocation", XSI_URI);
}
if (!url) {
if (verbosity > SILENT) {
fprintf(stderr, ERR_PREFIX "%s has no schema.\n", fname);
}
return 1;
}
if ((parser = get_schema_parser(url))) {
xmlFree(url);
} else {
if (schema_parser_count == SCHEMA_PARSERS_MAX) {
resize_schema_parsers();
}
parser = add_schema_parser(url);
}
if (xmlSchemaValidateDoc(parser->valid_ctxt, doc)) {
++err;
}
/* Write the original XML tree to stdout if determined to be valid. */
if (output_tree) {
if (err == 0) {
save_xml_doc(validtree, "-");
}
xmlFreeDoc(validtree);
}
if (verbosity >= VERBOSE) {
if (err) {
fprintf(stderr, FAILED_PREFIX "%s fails to validate against schema %s\n", fname, parser->url);
} else {
fprintf(stderr, SUCCESS_PREFIX "%s validates against schema %s\n", fname, parser->url);
}
}
if ((show_fnames == SHOW_INVALID && err != 0) || (show_fnames == SHOW_VALID && err == 0)) {
printf("%s\n", fname);
}
xmlFreeDoc(doc);
return err;
}
static int validate_file_list(const char *fname, const char *schema, xmlNodePtr ignore_ns, enum show_fnames show_fnames, int ignore_empty, int rem_del)
{
FILE *f;
char path[PATH_MAX];
int err;
if (fname) {
if (!(f = fopen(fname, "r"))) {
fprintf(stderr, E_BAD_LIST, fname);
return 0;
}
} else {
f = stdin;
}
err = 0;
while (fgets(path, PATH_MAX, f)) {
strtok(path, "\t\r\n");
err += validate_file(path, schema, ignore_ns, show_fnames, ignore_empty, rem_del);
}
if (fname) {
fclose(f);
}
return err;
}
int main(int argc, char *argv[])
{
int c, i;
int err = 0;
enum show_fnames show_fnames = SHOW_NONE;
int is_list = 0;
int ignore_empty = 0;
int rem_del = 0;
char *schema = NULL;
xmlNodePtr ignore_ns;
const char *sopts = "vqx:Ffloes:^h?";
struct option lopts[] = {
{"version" , no_argument , 0, 0},
{"help" , no_argument , 0, 'h'},
{"valid-filenames", no_argument , 0, 'F'},
{"filenames" , no_argument , 0, 'f'},
{"list" , no_argument , 0, 'l'},
{"output-valid" , no_argument , 0, 'o'},
{"quiet" , no_argument , 0, 'q'},
{"verbose" , no_argument , 0, 'v'},
{"exclude" , required_argument, 0, 'x'},
{"ignore-empty" , no_argument , 0, 'e'},
{"schema" , required_argument, 0, 's'},
{"remove-deleted" , no_argument , 0, '^'},
LIBXML2_PARSE_LONGOPT_DEFS
{0, 0, 0, 0}
};
int loptind = 0;
schema_parsers = malloc(SCHEMA_PARSERS_MAX * sizeof(struct s1kd_schema_parser));
ignore_ns = xmlNewNode(NULL, BAD_CAST "ignorens");
while ((c = getopt_long(argc, argv, sopts, lopts, &loptind)) != -1) {
switch (c) {
case 0:
if (strcmp(lopts[loptind].name, "version") == 0) {
show_version();
return EXIT_SUCCESS;
}
LIBXML2_PARSE_LONGOPT_HANDLE(lopts, loptind, optarg)
break;
case 'q': verbosity = SILENT; break;
case 'v': verbosity = VERBOSE; break;
case 'x': add_ignore_ns(ignore_ns, optarg); break;
case 'F': show_fnames = SHOW_VALID; break;
case 'f': show_fnames = SHOW_INVALID; break;
case 'l': is_list = 1; break;
case 'o': output_tree = 1; break;
case 'e': ignore_empty = 1; break;
case 's': schema = strdup(optarg); break;
case '^': rem_del = 1; break;
case 'h':
case '?': show_help(); return EXIT_SUCCESS;
}
}
LIBXML2_PARSE_INIT
if (verbosity == SILENT) {
schema_errfunc = suppress_error;
} else if (ignore_empty) {
schema_errfunc = print_non_parser_error;
}
xmlSetStructuredErrorFunc(stderr, schema_errfunc);
if (optind < argc) {
for (i = optind; i < argc; ++i) {
if (is_list) {
err += validate_file_list(argv[i], schema, ignore_ns, show_fnames, ignore_empty, rem_del);
} else {
err += validate_file(argv[i], schema, ignore_ns, show_fnames, ignore_empty, rem_del);
}
}
} else if (is_list) {
err = validate_file_list(NULL, schema, ignore_ns, show_fnames, ignore_empty, rem_del);
} else {
err = validate_file("-", schema, ignore_ns, show_fnames, ignore_empty, rem_del);
}
for (i = 0; i < schema_parser_count; ++i) {
xmlFree(schema_parsers[i].url);
xmlSchemaFreeValidCtxt(schema_parsers[i].valid_ctxt);
xmlSchemaFree(schema_parsers[i].schema);
xmlSchemaFreeParserCtxt(schema_parsers[i].ctxt);
}
free(schema_parsers);
xmlFreeNode(ignore_ns);
free(schema);
xmlCleanupParser();
return err ? EXIT_FAILURE : EXIT_SUCCESS;
}
gopher://khzae.net/0/s1kd/s1kd-tools/src/tools/s1kd-validate/s1kd-validate.c