X-Git-Url: http://sigrok.org/gitweb/?a=blobdiff_plain;f=src%2Finput%2Fvcd.c;h=413712301318787f7201f9389de5748418dbfbeb;hb=c1310f7deba7ac1f4ce75fbb5cc9b498ee66aa49;hp=59015055e5e600c69deb4c6b6377d63308afddc8;hpb=358105152aaceb9065e4a18b2b0e0a1fc118eecb;p=libsigrok.git

diff --git a/src/input/vcd.c b/src/input/vcd.c
index 59015055..41371230 100644
--- a/src/input/vcd.c
+++ b/src/input/vcd.c
@@ -75,6 +75,17 @@
  * - Check VCD input to VCD output behaviour. Verify that export and
  *   re-import results in identical data (well, VCD's constraints on
  *   timescale values is known to result in differences).
+ * - Check the minimum timestamp delta in the input data set, suggest
+ *   the downsample=N option to users for reduced resource consumption.
+ *   Popular VCD file creation utilities love to specify insanely tiny
+ *   timescale values in the pico or even femto seconds range. Which
+ *   results in huge sample counts after import, and potentially even
+ *   terminates the application due to resource exhaustion. This issue
+ *   only will vanish when common libsigrok infrastructure no longer
+ *   depends on constant rate streams of samples at discrete points
+ *   in time. The current input module implementation has code in place
+ *   to gather timestamp statistics, but the most appropriate condition
+ *   when to notify users is yet to be found.
  * - Cleanup the implementation.
  *   - Consistent use of the glib API (where appropriate).
  *   - More appropriate variable/function identifiers.
@@ -146,6 +157,18 @@ struct context {
 		char **words;
 		gboolean in_use;
 	} split;
+	struct ts_stats {
+		size_t total_ts_seen;
+		uint64_t last_ts_value;
+		uint64_t last_ts_delta;
+		size_t min_count;
+		struct {
+			uint64_t delta;
+			size_t count;
+		} min_items[2];
+		uint32_t early_check_shift;
+		size_t early_last_emitted;
+	} ts_stats;
 	struct vcd_prev {
 		GSList *sr_channels;
 		GSList *sr_groups;
@@ -191,6 +214,244 @@ static void cg_free(void *p)
 	sr_channel_group_free(p);
 }
 
+/*
+ * Another timestamp delta was observed, update statistics: Update the
+ * sorted list of minimum values, and increment the occurance counter.
+ * Returns the position of the item's statistics slot, or returns a huge
+ * invalid index when the current delta is larger than previously found
+ * values.
+ */
+static size_t ts_stats_update_min(struct ts_stats *stats, uint64_t delta)
+{
+	size_t idx, copy_idx;
+
+	/* Advance over previously recorded values which are smaller. */
+	idx = 0;
+	while (idx < stats->min_count && stats->min_items[idx].delta < delta)
+		idx++;
+	if (idx == ARRAY_SIZE(stats->min_items))
+		return idx;
+
+	/* Found the exact value that previously was registered? */
+	if (stats->min_items[idx].delta == delta) {
+		stats->min_items[idx].count++;
+		return idx;
+	}
+
+	/* Allocate another slot, bubble up larger values as needed. */
+	if (stats->min_count < ARRAY_SIZE(stats->min_items))
+		stats->min_count++;
+	for (copy_idx = stats->min_count - 1; copy_idx > idx; copy_idx--)
+		stats->min_items[copy_idx] = stats->min_items[copy_idx - 1];
+
+	/* Start tracking this value in the found or freed slot. */
+	memset(&stats->min_items[idx], 0, sizeof(stats->min_items[idx]));
+	stats->min_items[idx].delta = delta;
+	stats->min_items[idx].count++;
+
+	return idx;
+}
+
+/*
+ * Intermediate check for extreme oversampling in the input data. Rate
+ * limited emission of warnings to avoid noise, "late" emission of the
+ * first potential message to avoid false positives, yet need to  emit
+ * the messages early (*way* before EOF) to raise awareness.
+ *
+ * TODO
+ * Tune the limits, improve perception and usefulness of these checks.
+ * Need to start emitting messages soon enough to be seen by users. Yet
+ * avoid unnecessary messages for valid input's idle/quiet phases. Slow
+ * input transitions are perfectly legal before bursty phases are seen
+ * in the input data. Needs the check become an option, on by default,
+ * but suppressable by users?
+ */
+static void ts_stats_check_early(struct ts_stats *stats)
+{
+	static const struct {
+		uint64_t delta;
+		size_t count;
+	} *cp, check_points[] = {
+		{     100, 1000000, }, /* Still x100 after 1mio transitions. */
+		{    1000,  100000, }, /* Still x1k after 100k transitions. */
+		{   10000,   10000, }, /* Still x10k after 10k transitions. */
+		{ 1000000,    2500, }, /* Still x1m after 2.5k transitions. */
+	};
+
+	size_t cp_idx;
+	uint64_t seen_delta, check_delta;
+	size_t seen_count;
+
+	/* Get the current minimum's value and count. */
+	if (!stats->min_count)
+		return;
+	seen_delta = stats->min_items[0].delta;
+	seen_count = stats->min_items[0].count;
+
+	/* Emit at most one weak message per import. */
+	if (stats->early_last_emitted)
+		return;
+
+	/* Check arbitrary marks, emit rate limited warnings. */
+	(void)seen_count;
+	check_delta = seen_delta >> stats->early_check_shift;
+	for (cp_idx = 0; cp_idx < ARRAY_SIZE(check_points); cp_idx++) {
+		cp = &check_points[cp_idx];
+		/* No other match can happen below. Done iterating. */
+		if (stats->total_ts_seen > cp->count)
+			return;
+		/* Advance to the next checkpoint description. */
+		if (stats->total_ts_seen != cp->count)
+			continue;
+		/* First occurance of that timestamp count. Check the value. */
+		sr_dbg("TS early chk: total %zu, min delta %" PRIu64 " / %" PRIu64 ".",
+			cp->count, seen_delta, check_delta);
+		if (check_delta < cp->delta)
+			return;
+		sr_warn("Low change rate? (weak estimate, min TS delta %" PRIu64 " after %zu timestamps)",
+			seen_delta, stats->total_ts_seen);
+		sr_warn("Consider using the downsample=N option, or increasing its value.");
+		stats->early_last_emitted = stats->total_ts_seen;
+		return;
+	}
+}
+
+/* Reset the internal state of the timestamp tracker. */
+static int ts_stats_prep(struct context *inc)
+{
+	struct ts_stats *stats;
+	uint64_t down_sample_value;
+	uint32_t down_sample_shift;
+
+	stats = &inc->ts_stats;
+	memset(stats, 0, sizeof(*stats));
+
+	down_sample_value = inc->options.downsample;
+	down_sample_shift = 0;
+	while (down_sample_value >= 2) {
+		down_sample_shift++;
+		down_sample_value /= 2;
+	}
+	stats->early_check_shift = down_sample_shift;
+
+	return SR_OK;
+}
+
+/* Inspect another timestamp that was received. */
+static int ts_stats_check(struct ts_stats *stats, uint64_t curr_ts)
+{
+	uint64_t last_ts, delta;
+
+	last_ts = stats->last_ts_value;
+	stats->last_ts_value = curr_ts;
+	stats->total_ts_seen++;
+	if (stats->total_ts_seen < 2)
+		return SR_OK;
+
+	delta = curr_ts - last_ts;
+	stats->last_ts_delta = delta;
+	(void)ts_stats_update_min(stats, delta);
+
+	ts_stats_check_early(stats);
+
+	return SR_OK;
+}
+
+/* Postprocess internal timestamp tracker state. */
+static int ts_stats_post(struct context *inc, gboolean ignore_terminal)
+{
+	struct ts_stats *stats;
+	size_t min_idx;
+	uint64_t delta, over_sample, over_sample_scaled, suggest_factor;
+	enum sr_loglevel log_level;
+	gboolean is_suspicious, has_downsample;
+
+	stats = &inc->ts_stats;
+
+	/*
+	 * Lookup the smallest timestamp delta which was found during
+	 * data import. Ignore the last delta if its timestamp was never
+	 * followed by data, and this was the only occurance. Absence of
+	 * result data is non-fatal here -- this code exclusively serves
+	 * to raise users' awareness of potential pitfalls, but does not
+	 * change behaviour of data processing.
+	 *
+	 * TODO Also filter by occurance count? To not emit warnings when
+	 * captured signals only change slowly by design. Only warn when
+	 * the sample rate and samples count product exceeds a threshold?
+	 * See below for the necessity (and potential) to adjust the log
+	 * message's severity and content.
+	 */
+	min_idx = 0;
+	if (ignore_terminal) do {
+		if (min_idx >= stats->min_count)
+			break;
+		delta = stats->last_ts_delta;
+		if (stats->min_items[min_idx].delta != delta)
+			break;
+		if (stats->min_items[min_idx].count != 1)
+			break;
+		min_idx++;
+	} while (0);
+	if (min_idx >= stats->min_count)
+		return SR_OK;
+
+	/*
+	 * TODO Refine the condition whether to notify the user, and
+	 * which severity to use after having inspected all input data.
+	 * Any detail could get involved which previously was gathered
+	 * during data processing: total sample count, channel count
+	 * including their data type and bits width, the oversampling
+	 * factor (minimum observed "change rate"), or any combination
+	 * thereof. The current check is rather simple (unconditional
+	 * warning for ratios starting at 100, regardless of sample or
+	 * channel count).
+	 */
+	over_sample = stats->min_items[min_idx].delta;
+	over_sample_scaled = over_sample / inc->options.downsample;
+	sr_dbg("TS post stats: oversample unscaled %" PRIu64 ", scaled %" PRIu64,
+		over_sample, over_sample_scaled);
+	if (over_sample_scaled < 10) {
+		sr_dbg("TS post stats: Low oversampling ratio, good.");
+		return SR_OK;
+	}
+
+	/*
+	 * Avoid constructing the message from several tiny pieces by
+	 * design, because this would be hard on translators. Stick with
+	 * complete sentences instead, and accept the redundancy in the
+	 * user's interest.
+	 */
+	log_level = (over_sample_scaled > 20) ? SR_LOG_WARN : SR_LOG_INFO;
+	is_suspicious = over_sample_scaled > 20;
+	if (is_suspicious) {
+		sr_log(log_level, LOG_PREFIX ": "
+			"Suspiciously low overall change rate (total min TS delta %" PRIu64 ").",
+			over_sample_scaled);
+	} else {
+		sr_log(log_level, LOG_PREFIX ": "
+			"Low overall change rate (total min TS delta %" PRIu64 ").",
+			over_sample_scaled);
+	}
+	has_downsample = inc->options.downsample > 1;
+	suggest_factor = inc->options.downsample;
+	while (over_sample_scaled >= 10) {
+		suggest_factor *= 10;
+		over_sample_scaled /= 10;
+	}
+	if (has_downsample) {
+		sr_log(log_level, LOG_PREFIX ": "
+			"Suggest higher downsample value, like %" PRIu64 ".",
+			suggest_factor);
+	} else {
+		sr_log(log_level, LOG_PREFIX ": "
+			"Suggest to downsample, value like %" PRIu64 ".",
+			suggest_factor);
+	}
+
+	return SR_OK;
+}
+
 static void check_remove_bom(GString *buf)
 {
 	static const char *bom_text = "\xef\xbb\xbf";
@@ -560,6 +821,7 @@ static int parse_header_var(struct context *inc, char *contents)
 	size_t length;
 	char *type, *size_txt, *id, *ref, *idx;
 	gboolean is_reg, is_wire, is_real, is_int;
+	gboolean is_str;
 	enum sr_channeltype ch_type;
 	size_t size, next_size;
 	struct vcd_channel *vcd_ch;
@@ -588,13 +850,21 @@ static int parse_header_var(struct context *inc, char *contents)
 	is_wire = g_strcmp0(type, "wire") == 0;
 	is_real = g_strcmp0(type, "real") == 0;
 	is_int = g_strcmp0(type, "integer") == 0;
+	is_str = g_strcmp0(type, "string") == 0;
 
 	if (is_reg || is_wire) {
 		ch_type = SR_CHANNEL_LOGIC;
 	} else if (is_real || is_int) {
 		ch_type = SR_CHANNEL_ANALOG;
+	} else if (is_str) {
+		sr_warn("Skipping id %s, name '%s%s', unsupported type '%s'.",
+			id, ref, idx ? idx : "", type);
+		inc->ignored_signals = g_slist_append(inc->ignored_signals,
+			g_strdup(id));
+		g_strfreev(parts);
+		return SR_OK;
 	} else {
-		sr_info("Unsupported signal type: '%s'", type);
+		sr_err("Unsupported signal type: '%s'", type);
 		g_strfreev(parts);
 		return SR_ERR_DATA;
 	}
@@ -835,9 +1105,11 @@ static void create_feeds(const struct sr_input *in)
 	inc = in->priv;
 
 	/* Create one feed for logic data. */
-	inc->unit_size = (inc->logic_count + 7) / 8;
-	inc->feed_logic = feed_queue_logic_alloc(in->sdi,
-		CHUNK_SIZE / inc->unit_size, inc->unit_size);
+	if (inc->logic_count) {
+		inc->unit_size = (inc->logic_count + 7) / 8;
+		inc->feed_logic = feed_queue_logic_alloc(in->sdi,
+			CHUNK_SIZE / inc->unit_size, inc->unit_size);
+	}
 
 	/* Create one feed per analog channel. */
 	for (l = inc->channels; l; l = l->next) {
@@ -920,6 +1192,7 @@ static int parse_header(const struct sr_input *in, GString *buf)
 	gboolean status;
 	char *name, *contents;
 	size_t size;
+	int ret;
 
 	inc = in->priv;
 
@@ -1008,6 +1281,10 @@ done_section:
 	for (size = 0; size < inc->analog_count; size++)
 		inc->current_floats[size] = 0.;
 
+	ret = ts_stats_prep(inc);
+	if (ret != SR_OK)
+		return ret;
+
 	return SR_OK;
 }
 
@@ -1242,6 +1519,95 @@ static uint8_t vcd_char_to_value(char bit_char, int *warn)
 	return ~0;
 }
 
+/*
+ * Check the validity of a VCD string value. It's essential to reliably
+ * accept valid data which the community uses in the field, yet robustly
+ * reject invalid data for users' awareness. Since IEEE 1800-2017 would
+ * not discuss the representation of this data type, it's assumed to not
+ * be an official feature of the VCD file format. This implementation is
+ * an educated guess after inspection of other arbitrary implementations,
+ * not backed by any specification or public documentation.
+ *
+ * A quick summary of the implemented assumptions: Must be a sequence of
+ * ASCII printables. Must not contain whitespace. Might contain escape
+ * sequences: A backslash followed by a single character, like '\n' or
+ * '\\'. Or a backslash and the letter x followed by two hex digits,
+ * like '\x20'. Or a backslash followed by three octal digits, like
+ * '\007'. As an exception also accepts a single digit '\0' but only at
+ * the text end. The string value may be empty, but must not be NULL.
+ *
+ * This implementation assumes an ASCII based platform for simplicity
+ * and readability. Should be a given on sigrok supported platforms.
+ */
+static gboolean vcd_string_valid(const char *s)
+{
+	char c;
+
+	if (!s)
+		return FALSE;
+
+	while (*s) {
+		c = *s++;
+		/* Reject non-printable ASCII chars including DEL. */
+		if (c < ' ')
+			return FALSE;
+		if (c > '~')
+			return FALSE;
+		/* Deeper inspection of escape sequences. */
+		if (c == '\\') {
+			c = *s++;
+			switch (c) {
+			case 'a': /* BEL, bell aka "alarm" */
+			case 'b': /* BS, back space */
+			case 't': /* TAB, tabulator */
+			case 'n': /* NL, newline */
+			case 'v': /* VT, vertical tabulator */
+			case 'f': /* FF, form feed */
+			case 'r': /* CR, carriage return */
+			case '"': /* double quotes */
+			case '\'': /* tick, single quote */
+			case '?': /* question mark */
+			case '\\': /* backslash */
+				continue;
+			case 'x': /* \xNN two hex digits */
+				c = *s++;
+				if (!g_ascii_isxdigit(c))
+					return FALSE;
+				c = *s++;
+				if (!g_ascii_isxdigit(c))
+					return FALSE;
+				continue;
+			case '0': /* \NNN three octal digits */
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+				/* Special case '\0' at end of text. */
+				if (c == '0' && !*s)
+					return TRUE;
+				/*
+				 * First digit was covered by the outer
+				 * switch(). Two more digits to check.
+				 */
+				c = *s++;
+				if (!g_ascii_isdigit(c) || c > '7')
+					return FALSE;
+				c = *s++;
+				if (!g_ascii_isdigit(c) || c > '7')
+					return FALSE;
+				continue;
+			default:
+				return FALSE;
+			}
+		}
+	}
+
+	return TRUE;
+}
+
 /* Parse one text line of the data section. */
 static int parse_textline(const struct sr_input *in, char *lines)
 {
@@ -1250,7 +1616,8 @@ static int parse_textline(const struct sr_input *in, char *lines)
 	char **words;
 	size_t word_count, word_idx;
 	char *curr_word, *next_word, curr_first;
-	gboolean is_timestamp, is_section, is_real, is_multibit, is_singlebit;
+	gboolean is_timestamp, is_section;
+	gboolean is_real, is_multibit, is_singlebit, is_string;
 	uint64_t timestamp;
 	char *identifier, *endptr;
 	size_t count;
@@ -1354,6 +1721,9 @@ static int parse_textline(const struct sr_input *in, char *lines)
 				break;
 			}
 			sr_spew("Got timestamp: %" PRIu64, timestamp);
+			ret = ts_stats_check(&inc->ts_stats, timestamp);
+			if (ret != SR_OK)
+				break;
 			if (inc->options.downsample > 1) {
 				timestamp /= inc->options.downsample;
 				sr_spew("Downsampled timestamp: %" PRIu64, timestamp);
@@ -1426,6 +1796,7 @@ static int parse_textline(const struct sr_input *in, char *lines)
 		 * timestamp.
 		 *
 		 * Supported input data formats are:
+		 * - S<value> <sep> <id> (value not used, VCD type 'string').
 		 * - R<value> <sep> <id> (analog channel, VCD type 'real').
 		 * - B<value> <sep> <id> (analog channel, VCD type 'integer').
 		 * - B<value> <sep> <id> (logic channels, VCD bit vectors).
@@ -1454,6 +1825,7 @@ static int parse_textline(const struct sr_input *in, char *lines)
 		is_singlebit |= curr_first == 'l' || curr_first == 'h';
 		is_singlebit |= curr_first == 'x' || curr_first == 'z';
 		is_singlebit |= curr_first == 'u' || curr_first == '-';
+		is_string = curr_first == 's';
 		if (is_real) {
 			char *real_text;
 			float real_val;
@@ -1601,6 +1973,32 @@ static int parse_textline(const struct sr_input *in, char *lines)
 			process_bits(inc, identifier, inc->conv_bits.value, 1);
 			continue;
 		}
+		if (is_string) {
+			const char *str_value;
+
+			str_value = &curr_word[1];
+			identifier = next_word;
+			word_idx++;
+			if (!vcd_string_valid(str_value)) {
+				sr_err("Invalid string data: %s", str_value);
+				ret = SR_ERR_DATA;
+				break;
+			}
+			if (!identifier || !*identifier) {
+				sr_err("String value without identifier.");
+				ret = SR_ERR_DATA;
+				break;
+			}
+			sr_spew("Got string data, id '%s', value \"%s\".",
+				identifier, str_value);
+			if (!is_ignored(inc, identifier)) {
+				sr_err("String value for identifier '%s'.",
+					identifier);
+				ret = SR_ERR_DATA;
+				break;
+			}
+			continue;
+		}
 
 		/* Design choice: Consider unsupported input fatal. */
 		sr_err("Unknown token '%s'.", curr_word);
@@ -1786,6 +2184,9 @@ static int end(struct sr_input *in)
 	count = inc->data_after_timestamp ? 1 : 0;
 	add_samples(in, count, TRUE);
 
+	/* Optionally suggest downsampling after all input data was seen. */
+	(void)ts_stats_post(inc, !inc->data_after_timestamp);
+
 	/* Must send DF_END when DF_HEADER was sent before. */
 	if (inc->started)
 		std_session_send_df_end(in->sdi);