summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
inline | side by side (from parent 1:
7102443)
When users don't specify the input format, applications can try to have
the format auto-detected. Some of the tests were weak and could result
in false positives.
Add a 'confidence' parameter to the input modules' format_match()
method. Claim high confidence (1) for those formats which can check
magic strings or the presence of essential keywords (vcd, wav). Claim
medium confidence (10) for those formats which happened to mis-detect
inputs in the past (trace32_ad). Claim weak confidence (100) for those
formats which totally lack reliable conditions and rather guess than
detect (chronovu_la8).
Prefer the best match in public scan routines. Return at most one module
to callers even if multiple modules matched, to keep the current API.
This addresses part of bug #1200.
-static int format_match(GHashTable *metadata)
+static int format_match(GHashTable *metadata, unsigned int *confidence)
+ /*
+ * In the absence of a reliable condition like magic strings,
+ * we can only guess based on the file size. Since this is
+ * rather weak a condition, signal "little confidence" and
+ * optionally give precedence to better matches.
+ */
size = GPOINTER_TO_INT(g_hash_table_lookup(metadata,
GINT_TO_POINTER(SR_INPUT_META_FILESIZE)));
size = GPOINTER_TO_INT(g_hash_table_lookup(metadata,
GINT_TO_POINTER(SR_INPUT_META_FILESIZE)));
- if (size == CHRONOVU_LA8_FILESIZE)
- return SR_OK;
+ if (size != CHRONOVU_LA8_FILESIZE)
+ return SR_ERR;
+ *confidence = 100;
}
static int init(struct sr_input *in, GHashTable *options)
}
static int init(struct sr_input *in, GHashTable *options)
* 128 bytes is normally enough.
*
* If an input module is found, an instance is created into *in.
* 128 bytes is normally enough.
*
* If an input module is found, an instance is created into *in.
- * Otherwise, *in contains NULL.
+ * Otherwise, *in contains NULL. When multiple input moduless claim
+ * support for the format, the one with highest confidence takes
+ * precedence. Applications will see at most one input module spec.
*
* If an instance is created, it has the given buffer used for scanning
* already submitted to it, to be processed before more data is sent.
*
* If an instance is created, it has the given buffer used for scanning
* already submitted to it, to be processed before more data is sent.
*/
SR_API int sr_input_scan_buffer(GString *buf, const struct sr_input **in)
{
*/
SR_API int sr_input_scan_buffer(GString *buf, const struct sr_input **in)
{
- const struct sr_input_module *imod;
+ const struct sr_input_module *imod, *best_imod;
GHashTable *meta;
unsigned int m, i;
GHashTable *meta;
unsigned int m, i;
+ unsigned int conf, best_conf;
int ret;
uint8_t mitem, avail_metadata[8];
int ret;
uint8_t mitem, avail_metadata[8];
avail_metadata[1] = 0;
*in = NULL;
avail_metadata[1] = 0;
*in = NULL;
+ best_imod = NULL;
+ best_conf = ~0;
for (i = 0; input_module_list[i]; i++) {
imod = input_module_list[i];
if (!imod->metadata[0]) {
for (i = 0; input_module_list[i]; i++) {
imod = input_module_list[i];
if (!imod->metadata[0]) {
continue;
}
sr_spew("Trying module %s.", imod->id);
continue;
}
sr_spew("Trying module %s.", imod->id);
- ret = imod->format_match(meta);
+ ret = imod->format_match(meta, &conf);
g_hash_table_destroy(meta);
if (ret == SR_ERR_DATA) {
/* Module recognized this buffer, but cannot handle it. */
g_hash_table_destroy(meta);
if (ret == SR_ERR_DATA) {
/* Module recognized this buffer, but cannot handle it. */
} else if (ret == SR_ERR) {
/* Module didn't recognize this buffer. */
continue;
} else if (ret != SR_OK) {
/* Can be SR_ERR_NA. */
} else if (ret == SR_ERR) {
/* Module didn't recognize this buffer. */
continue;
} else if (ret != SR_OK) {
/* Can be SR_ERR_NA. */
}
/* Found a matching module. */
}
/* Found a matching module. */
- sr_spew("Module %s matched.", imod->id);
- *in = sr_input_new(imod, NULL);
+ sr_spew("Module %s matched, confidence %u.", imod->id, conf);
+ if (conf >= best_conf)
+ continue;
+ best_imod = imod;
+ best_conf = conf;
+ }
+
+ if (best_imod) {
+ *in = sr_input_new(best_imod, NULL);
g_string_insert_len((*in)->buf, 0, buf->str, buf->len);
g_string_insert_len((*in)->buf, 0, buf->str, buf->len);
}
/**
* Try to find an input module that can parse the given file.
*
* If an input module is found, an instance is created into *in.
}
/**
* Try to find an input module that can parse the given file.
*
* If an input module is found, an instance is created into *in.
- * Otherwise, *in contains NULL.
+ * Otherwise, *in contains NULL. When multiple input moduless claim
+ * support for the format, the one with highest confidence takes
+ * precedence. Applications will see at most one input module spec.
*
*/
SR_API int sr_input_scan_file(const char *filename, const struct sr_input **in)
{
int64_t filesize;
FILE *stream;
*
*/
SR_API int sr_input_scan_file(const char *filename, const struct sr_input **in)
{
int64_t filesize;
FILE *stream;
- const struct sr_input_module *imod;
+ const struct sr_input_module *imod, *best_imod;
GHashTable *meta;
GString *header;
size_t count;
unsigned int midx, i;
GHashTable *meta;
GString *header;
size_t count;
unsigned int midx, i;
+ unsigned int conf, best_conf;
int ret;
uint8_t avail_metadata[8];
int ret;
uint8_t avail_metadata[8];
avail_metadata[midx] = 0;
/* TODO: MIME type */
avail_metadata[midx] = 0;
/* TODO: MIME type */
+ best_imod = NULL;
+ best_conf = ~0;
for (i = 0; input_module_list[i]; i++) {
imod = input_module_list[i];
if (!imod->metadata[0]) {
for (i = 0; input_module_list[i]; i++) {
imod = input_module_list[i];
if (!imod->metadata[0]) {
sr_dbg("Trying module %s.", imod->id);
sr_dbg("Trying module %s.", imod->id);
- ret = imod->format_match(meta);
+ ret = imod->format_match(meta, &conf);
if (ret == SR_ERR) {
/* Module didn't recognize this buffer. */
continue;
} else if (ret != SR_OK) {
/* Module recognized this buffer, but cannot handle it. */
if (ret == SR_ERR) {
/* Module didn't recognize this buffer. */
continue;
} else if (ret != SR_OK) {
/* Module recognized this buffer, but cannot handle it. */
}
/* Found a matching module. */
}
/* Found a matching module. */
- sr_dbg("Module %s matched.", imod->id);
-
- *in = sr_input_new(imod, NULL);
- break;
+ sr_dbg("Module %s matched, confidence %u.", imod->id, conf);
+ if (conf >= best_conf)
+ continue;
+ best_imod = imod;
+ best_conf = conf;
}
g_hash_table_destroy(meta);
g_string_free(header, TRUE);
}
g_hash_table_destroy(meta);
g_string_free(header, TRUE);
+ if (best_imod) {
+ *in = sr_input_new(best_imod, NULL);
+ return SR_OK;
+ }
+
+ return SR_ERR;
-static int format_match(GHashTable *metadata)
+static int format_match(GHashTable *metadata, unsigned int *confidence)
buf = g_hash_table_lookup(metadata, GINT_TO_POINTER(SR_INPUT_META_HEADER));
buf = g_hash_table_lookup(metadata, GINT_TO_POINTER(SR_INPUT_META_HEADER));
+ rc = process_header(buf, NULL);
- return process_header(buf, NULL);
+ if (rc != SR_OK)
+ return rc;
+ *confidence = 10;
+
+ return SR_OK;
}
static int process_header(GString *buf, struct context *inc)
}
static int process_header(GString *buf, struct context *inc)
-static int format_match(GHashTable *metadata)
+static int format_match(GHashTable *metadata, unsigned int *confidence)
{
GString *buf, *tmpbuf;
gboolean status;
{
GString *buf, *tmpbuf;
gboolean status;
g_free(name);
g_free(contents);
g_free(name);
g_free(contents);
- return status ? SR_OK : SR_ERR;
+ if (!status)
+ return SR_ERR;
+ *confidence = 1;
+
+ return SR_OK;
}
/* Send all accumulated bytes from inc->buffer. */
}
/* Send all accumulated bytes from inc->buffer. */
-static int format_match(GHashTable *metadata)
+static int format_match(GHashTable *metadata, unsigned int *confidence)
if ((ret = parse_wav_header(buf, NULL)) != SR_OK)
return ret;
if ((ret = parse_wav_header(buf, NULL)) != SR_OK)
return ret;
* Check if this input module can load and parse the specified stream.
*
* @param[in] metadata Metadata the module can use to identify the stream.
* Check if this input module can load and parse the specified stream.
*
* @param[in] metadata Metadata the module can use to identify the stream.
+ * @param[out] confidence "Strength" of the detection.
+ * Specialized handlers can take precedence over generic/basic support.
*
* @retval SR_OK This module knows the format.
* @retval SR_ERR_NA There wasn't enough data for this module to
*
* @retval SR_OK This module knows the format.
* @retval SR_ERR_NA There wasn't enough data for this module to
* it. This means the stream is either corrupt, or indicates a
* feature that the module does not support.
* @retval SR_ERR This module does not know the format.
* it. This means the stream is either corrupt, or indicates a
* feature that the module does not support.
* @retval SR_ERR This module does not know the format.
+ *
+ * Lower numeric values of 'confidence' mean that the input module
+ * stronger believes in its capability to handle this specific format.
+ * This way, multiple input modules can claim support for a format,
+ * and the application can pick the best match, or try fallbacks
+ * in case of errors. This approach also copes with formats that
+ * are unreliable to detect in the absence of magic signatures.
- int (*format_match) (GHashTable *metadata);
+ int (*format_match) (GHashTable *metadata, unsigned int *confidence);
/**
* Initialize the input module.
/**
* Initialize the input module.