kingst-la2016: address endianess issue in data feed submission

[libsigrok.git] / src / input / vcd.c
diff --git a/src/input/vcd.c b/src/input/vcd.c

index 59015055e5e600c69deb4c6b6377d63308afddc8..413712301318787f7201f9389de5748418dbfbeb 100644 (file)
--- a/src/input/vcd.c
+++ b/src/input/vcd.c
@@ -75,6 +75,17 @@
   * - Check VCD input to VCD output behaviour. Verify that export and
   *   re-import results in identical data (well, VCD's constraints on
   *   timescale values is known to result in differences).
+ * - Check the minimum timestamp delta in the input data set, suggest
+ *   the downsample=N option to users for reduced resource consumption.
+ *   Popular VCD file creation utilities love to specify insanely tiny
+ *   timescale values in the pico or even femto seconds range. Which
+ *   results in huge sample counts after import, and potentially even
+ *   terminates the application due to resource exhaustion. This issue
+ *   only will vanish when common libsigrok infrastructure no longer
+ *   depends on constant rate streams of samples at discrete points
+ *   in time. The current input module implementation has code in place
+ *   to gather timestamp statistics, but the most appropriate condition
+ *   when to notify users is yet to be found.
   * - Cleanup the implementation.
   *   - Consistent use of the glib API (where appropriate).
   *   - More appropriate variable/function identifiers.
@@ -146,6 +157,18 @@ struct context {
                 char **words;
                 gboolean in_use;
         } split;
+       struct ts_stats {
+               size_t total_ts_seen;
+               uint64_t last_ts_value;
+               uint64_t last_ts_delta;
+               size_t min_count;
+               struct {
+                       uint64_t delta;
+                       size_t count;
+               } min_items[2];
+               uint32_t early_check_shift;
+               size_t early_last_emitted;
+       } ts_stats;
         struct vcd_prev {
                 GSList *sr_channels;
                 GSList *sr_groups;
@@ -191,6 +214,244 @@ static void cg_free(void *p)
         sr_channel_group_free(p);
  }
  
+/*
+ * Another timestamp delta was observed, update statistics: Update the
+ * sorted list of minimum values, and increment the occurance counter.
+ * Returns the position of the item's statistics slot, or returns a huge
+ * invalid index when the current delta is larger than previously found
+ * values.
+ */
+static size_t ts_stats_update_min(struct ts_stats *stats, uint64_t delta)
+{
+       size_t idx, copy_idx;
+
+       /* Advance over previously recorded values which are smaller. */
+       idx = 0;
+       while (idx < stats->min_count && stats->min_items[idx].delta < delta)
+               idx++;
+       if (idx == ARRAY_SIZE(stats->min_items))
+               return idx;
+
+       /* Found the exact value that previously was registered? */
+       if (stats->min_items[idx].delta == delta) {
+               stats->min_items[idx].count++;
+               return idx;
+       }
+
+       /* Allocate another slot, bubble up larger values as needed. */
+       if (stats->min_count < ARRAY_SIZE(stats->min_items))
+               stats->min_count++;
+       for (copy_idx = stats->min_count - 1; copy_idx > idx; copy_idx--)
+               stats->min_items[copy_idx] = stats->min_items[copy_idx - 1];
+
+       /* Start tracking this value in the found or freed slot. */
+       memset(&stats->min_items[idx], 0, sizeof(stats->min_items[idx]));
+       stats->min_items[idx].delta = delta;
+       stats->min_items[idx].count++;
+
+       return idx;
+}
+
+/*
+ * Intermediate check for extreme oversampling in the input data. Rate
+ * limited emission of warnings to avoid noise, "late" emission of the
+ * first potential message to avoid false positives, yet need to  emit
+ * the messages early (*way* before EOF) to raise awareness.
+ *
+ * TODO
+ * Tune the limits, improve perception and usefulness of these checks.
+ * Need to start emitting messages soon enough to be seen by users. Yet
+ * avoid unnecessary messages for valid input's idle/quiet phases. Slow
+ * input transitions are perfectly legal before bursty phases are seen
+ * in the input data. Needs the check become an option, on by default,
+ * but suppressable by users?
+ */
+static void ts_stats_check_early(struct ts_stats *stats)
+{
+       static const struct {
+               uint64_t delta;
+               size_t count;
+       } *cp, check_points[] = {
+               {     100, 1000000, }, /* Still x100 after 1mio transitions. */
+               {    1000,  100000, }, /* Still x1k after 100k transitions. */
+               {   10000,   10000, }, /* Still x10k after 10k transitions. */
+               { 1000000,    2500, }, /* Still x1m after 2.5k transitions. */
+       };
+
+       size_t cp_idx;
+       uint64_t seen_delta, check_delta;
+       size_t seen_count;
+
+       /* Get the current minimum's value and count. */
+       if (!stats->min_count)
+               return;
+       seen_delta = stats->min_items[0].delta;
+       seen_count = stats->min_items[0].count;
+
+       /* Emit at most one weak message per import. */
+       if (stats->early_last_emitted)
+               return;
+
+       /* Check arbitrary marks, emit rate limited warnings. */
+       (void)seen_count;
+       check_delta = seen_delta >> stats->early_check_shift;
+       for (cp_idx = 0; cp_idx < ARRAY_SIZE(check_points); cp_idx++) {
+               cp = &check_points[cp_idx];
+               /* No other match can happen below. Done iterating. */
+               if (stats->total_ts_seen > cp->count)
+                       return;
+               /* Advance to the next checkpoint description. */
+               if (stats->total_ts_seen != cp->count)
+                       continue;
+               /* First occurance of that timestamp count. Check the value. */
+               sr_dbg("TS early chk: total %zu, min delta %" PRIu64 " / %" PRIu64 ".",
+                       cp->count, seen_delta, check_delta);
+               if (check_delta < cp->delta)
+                       return;
+               sr_warn("Low change rate? (weak estimate, min TS delta %" PRIu64 " after %zu timestamps)",
+                       seen_delta, stats->total_ts_seen);
+               sr_warn("Consider using the downsample=N option, or increasing its value.");
+               stats->early_last_emitted = stats->total_ts_seen;
+               return;
+       }
+}
+
+/* Reset the internal state of the timestamp tracker. */
+static int ts_stats_prep(struct context *inc)
+{
+       struct ts_stats *stats;
+       uint64_t down_sample_value;
+       uint32_t down_sample_shift;
+
+       stats = &inc->ts_stats;
+       memset(stats, 0, sizeof(*stats));
+
+       down_sample_value = inc->options.downsample;
+       down_sample_shift = 0;
+       while (down_sample_value >= 2) {
+               down_sample_shift++;
+               down_sample_value /= 2;
+       }
+       stats->early_check_shift = down_sample_shift;
+
+       return SR_OK;
+}
+
+/* Inspect another timestamp that was received. */
+static int ts_stats_check(struct ts_stats *stats, uint64_t curr_ts)
+{
+       uint64_t last_ts, delta;
+
+       last_ts = stats->last_ts_value;
+       stats->last_ts_value = curr_ts;
+       stats->total_ts_seen++;
+       if (stats->total_ts_seen < 2)
+               return SR_OK;
+
+       delta = curr_ts - last_ts;
+       stats->last_ts_delta = delta;
+       (void)ts_stats_update_min(stats, delta);
+
+       ts_stats_check_early(stats);
+
+       return SR_OK;
+}
+
+/* Postprocess internal timestamp tracker state. */
+static int ts_stats_post(struct context *inc, gboolean ignore_terminal)
+{
+       struct ts_stats *stats;
+       size_t min_idx;
+       uint64_t delta, over_sample, over_sample_scaled, suggest_factor;
+       enum sr_loglevel log_level;
+       gboolean is_suspicious, has_downsample;
+
+       stats = &inc->ts_stats;
+
+       /*
+        * Lookup the smallest timestamp delta which was found during
+        * data import. Ignore the last delta if its timestamp was never
+        * followed by data, and this was the only occurance. Absence of
+        * result data is non-fatal here -- this code exclusively serves
+        * to raise users' awareness of potential pitfalls, but does not
+        * change behaviour of data processing.
+        *
+        * TODO Also filter by occurance count? To not emit warnings when
+        * captured signals only change slowly by design. Only warn when
+        * the sample rate and samples count product exceeds a threshold?
+        * See below for the necessity (and potential) to adjust the log
+        * message's severity and content.
+        */
+       min_idx = 0;
+       if (ignore_terminal) do {
+               if (min_idx >= stats->min_count)
+                       break;
+               delta = stats->last_ts_delta;
+               if (stats->min_items[min_idx].delta != delta)
+                       break;
+               if (stats->min_items[min_idx].count != 1)
+                       break;
+               min_idx++;
+       } while (0);
+       if (min_idx >= stats->min_count)
+               return SR_OK;
+
+       /*
+        * TODO Refine the condition whether to notify the user, and
+        * which severity to use after having inspected all input data.
+        * Any detail could get involved which previously was gathered
+        * during data processing: total sample count, channel count
+        * including their data type and bits width, the oversampling
+        * factor (minimum observed "change rate"), or any combination
+        * thereof. The current check is rather simple (unconditional
+        * warning for ratios starting at 100, regardless of sample or
+        * channel count).
+        */
+       over_sample = stats->min_items[min_idx].delta;
+       over_sample_scaled = over_sample / inc->options.downsample;
+       sr_dbg("TS post stats: oversample unscaled %" PRIu64 ", scaled %" PRIu64,
+               over_sample, over_sample_scaled);
+       if (over_sample_scaled < 10) {
+               sr_dbg("TS post stats: Low oversampling ratio, good.");
+               return SR_OK;
+       }
+
+       /*
+        * Avoid constructing the message from several tiny pieces by
+        * design, because this would be hard on translators. Stick with
+        * complete sentences instead, and accept the redundancy in the
+        * user's interest.
+        */
+       log_level = (over_sample_scaled > 20) ? SR_LOG_WARN : SR_LOG_INFO;
+       is_suspicious = over_sample_scaled > 20;
+       if (is_suspicious) {
+               sr_log(log_level, LOG_PREFIX ": "
+                       "Suspiciously low overall change rate (total min TS delta %" PRIu64 ").",
+                       over_sample_scaled);
+       } else {
+               sr_log(log_level, LOG_PREFIX ": "
+                       "Low overall change rate (total min TS delta %" PRIu64 ").",
+                       over_sample_scaled);
+       }
+       has_downsample = inc->options.downsample > 1;
+       suggest_factor = inc->options.downsample;
+       while (over_sample_scaled >= 10) {
+               suggest_factor *= 10;
+               over_sample_scaled /= 10;
+       }
+       if (has_downsample) {
+               sr_log(log_level, LOG_PREFIX ": "
+                       "Suggest higher downsample value, like %" PRIu64 ".",
+                       suggest_factor);
+       } else {
+               sr_log(log_level, LOG_PREFIX ": "
+                       "Suggest to downsample, value like %" PRIu64 ".",
+                       suggest_factor);
+       }
+
+       return SR_OK;
+}
+
  static void check_remove_bom(GString *buf)
  {
         static const char *bom_text = "\xef\xbb\xbf";
@@ -560,6 +821,7 @@ static int parse_header_var(struct context *inc, char *contents)
         size_t length;
         char *type, *size_txt, *id, *ref, *idx;
         gboolean is_reg, is_wire, is_real, is_int;
+       gboolean is_str;
         enum sr_channeltype ch_type;
         size_t size, next_size;
         struct vcd_channel *vcd_ch;
@@ -588,13 +850,21 @@ static int parse_header_var(struct context *inc, char *contents)
         is_wire = g_strcmp0(type, "wire") == 0;
         is_real = g_strcmp0(type, "real") == 0;
         is_int = g_strcmp0(type, "integer") == 0;
+       is_str = g_strcmp0(type, "string") == 0;
  
         if (is_reg || is_wire) {
                 ch_type = SR_CHANNEL_LOGIC;
         } else if (is_real || is_int) {
                 ch_type = SR_CHANNEL_ANALOG;
+       } else if (is_str) {
+               sr_warn("Skipping id %s, name '%s%s', unsupported type '%s'.",
+                       id, ref, idx ? idx : "", type);
+               inc->ignored_signals = g_slist_append(inc->ignored_signals,
+                       g_strdup(id));
+               g_strfreev(parts);
+               return SR_OK;
         } else {
-               sr_info("Unsupported signal type: '%s'", type);
+               sr_err("Unsupported signal type: '%s'", type);
                 g_strfreev(parts);
                 return SR_ERR_DATA;
         }
@@ -835,9 +1105,11 @@ static void create_feeds(const struct sr_input *in)
         inc = in->priv;
  
         /* Create one feed for logic data. */
-       inc->unit_size = (inc->logic_count + 7) / 8;
-       inc->feed_logic = feed_queue_logic_alloc(in->sdi,
-               CHUNK_SIZE / inc->unit_size, inc->unit_size);
+       if (inc->logic_count) {
+               inc->unit_size = (inc->logic_count + 7) / 8;
+               inc->feed_logic = feed_queue_logic_alloc(in->sdi,
+                       CHUNK_SIZE / inc->unit_size, inc->unit_size);
+       }
  
         /* Create one feed per analog channel. */
         for (l = inc->channels; l; l = l->next) {
@@ -920,6 +1192,7 @@ static int parse_header(const struct sr_input *in, GString *buf)
         gboolean status;
         char *name, *contents;
         size_t size;
+       int ret;
  
         inc = in->priv;
  
@@ -1008,6 +1281,10 @@ done_section:
         for (size = 0; size < inc->analog_count; size++)
                 inc->current_floats[size] = 0.;
  
+       ret = ts_stats_prep(inc);
+       if (ret != SR_OK)
+               return ret;
+
         return SR_OK;
  }
  
@@ -1242,6 +1519,95 @@ static uint8_t vcd_char_to_value(char bit_char, int *warn)
         return ~0;
  }
  
+/*
+ * Check the validity of a VCD string value. It's essential to reliably
+ * accept valid data which the community uses in the field, yet robustly
+ * reject invalid data for users' awareness. Since IEEE 1800-2017 would
+ * not discuss the representation of this data type, it's assumed to not
+ * be an official feature of the VCD file format. This implementation is
+ * an educated guess after inspection of other arbitrary implementations,
+ * not backed by any specification or public documentation.
+ *
+ * A quick summary of the implemented assumptions: Must be a sequence of
+ * ASCII printables. Must not contain whitespace. Might contain escape
+ * sequences: A backslash followed by a single character, like '\n' or
+ * '\\'. Or a backslash and the letter x followed by two hex digits,
+ * like '\x20'. Or a backslash followed by three octal digits, like
+ * '\007'. As an exception also accepts a single digit '\0' but only at
+ * the text end. The string value may be empty, but must not be NULL.
+ *
+ * This implementation assumes an ASCII based platform for simplicity
+ * and readability. Should be a given on sigrok supported platforms.
+ */
+static gboolean vcd_string_valid(const char *s)
+{
+       char c;
+
+       if (!s)
+               return FALSE;
+
+       while (*s) {
+               c = *s++;
+               /* Reject non-printable ASCII chars including DEL. */
+               if (c < ' ')
+                       return FALSE;
+               if (c > '~')
+                       return FALSE;
+               /* Deeper inspection of escape sequences. */
+               if (c == '\\') {
+                       c = *s++;
+                       switch (c) {
+                       case 'a': /* BEL, bell aka "alarm" */
+                       case 'b': /* BS, back space */
+                       case 't': /* TAB, tabulator */
+                       case 'n': /* NL, newline */
+                       case 'v': /* VT, vertical tabulator */
+                       case 'f': /* FF, form feed */
+                       case 'r': /* CR, carriage return */
+                       case '"': /* double quotes */
+                       case '\'': /* tick, single quote */
+                       case '?': /* question mark */
+                       case '\\': /* backslash */
+                               continue;
+                       case 'x': /* \xNN two hex digits */
+                               c = *s++;
+                               if (!g_ascii_isxdigit(c))
+                                       return FALSE;
+                               c = *s++;
+                               if (!g_ascii_isxdigit(c))
+                                       return FALSE;
+                               continue;
+                       case '0': /* \NNN three octal digits */
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                               /* Special case '\0' at end of text. */
+                               if (c == '0' && !*s)
+                                       return TRUE;
+                               /*
+                                * First digit was covered by the outer
+                                * switch(). Two more digits to check.
+                                */
+                               c = *s++;
+                               if (!g_ascii_isdigit(c) || c > '7')
+                                       return FALSE;
+                               c = *s++;
+                               if (!g_ascii_isdigit(c) || c > '7')
+                                       return FALSE;
+                               continue;
+                       default:
+                               return FALSE;
+                       }
+               }
+       }
+
+       return TRUE;
+}
+
  /* Parse one text line of the data section. */
  static int parse_textline(const struct sr_input *in, char *lines)
  {
@@ -1250,7 +1616,8 @@ static int parse_textline(const struct sr_input *in, char *lines)
         char **words;
         size_t word_count, word_idx;
         char *curr_word, *next_word, curr_first;
-       gboolean is_timestamp, is_section, is_real, is_multibit, is_singlebit;
+       gboolean is_timestamp, is_section;
+       gboolean is_real, is_multibit, is_singlebit, is_string;
         uint64_t timestamp;
         char *identifier, *endptr;
         size_t count;
@@ -1354,6 +1721,9 @@ static int parse_textline(const struct sr_input *in, char *lines)
                                 break;
                         }
                         sr_spew("Got timestamp: %" PRIu64, timestamp);
+                       ret = ts_stats_check(&inc->ts_stats, timestamp);
+                       if (ret != SR_OK)
+                               break;
                         if (inc->options.downsample > 1) {
                                 timestamp /= inc->options.downsample;
                                 sr_spew("Downsampled timestamp: %" PRIu64, timestamp);
@@ -1426,6 +1796,7 @@ static int parse_textline(const struct sr_input *in, char *lines)
                  * timestamp.
                  *
                  * Supported input data formats are:
+                * - S<value> <sep> <id> (value not used, VCD type 'string').
                  * - R<value> <sep> <id> (analog channel, VCD type 'real').
                  * - B<value> <sep> <id> (analog channel, VCD type 'integer').
                  * - B<value> <sep> <id> (logic channels, VCD bit vectors).
@@ -1454,6 +1825,7 @@ static int parse_textline(const struct sr_input *in, char *lines)
                 is_singlebit |= curr_first == 'l' || curr_first == 'h';
                 is_singlebit |= curr_first == 'x' || curr_first == 'z';
                 is_singlebit |= curr_first == 'u' || curr_first == '-';
+               is_string = curr_first == 's';
                 if (is_real) {
                         char *real_text;
                         float real_val;
@@ -1601,6 +1973,32 @@ static int parse_textline(const struct sr_input *in, char *lines)
                         process_bits(inc, identifier, inc->conv_bits.value, 1);
                         continue;
                 }
+               if (is_string) {
+                       const char *str_value;
+
+                       str_value = &curr_word[1];
+                       identifier = next_word;
+                       word_idx++;
+                       if (!vcd_string_valid(str_value)) {
+                               sr_err("Invalid string data: %s", str_value);
+                               ret = SR_ERR_DATA;
+                               break;
+                       }
+                       if (!identifier || !*identifier) {
+                               sr_err("String value without identifier.");
+                               ret = SR_ERR_DATA;
+                               break;
+                       }
+                       sr_spew("Got string data, id '%s', value \"%s\".",
+                               identifier, str_value);
+                       if (!is_ignored(inc, identifier)) {
+                               sr_err("String value for identifier '%s'.",
+                                       identifier);
+                               ret = SR_ERR_DATA;
+                               break;
+                       }
+                       continue;
+               }
  
                 /* Design choice: Consider unsupported input fatal. */
                 sr_err("Unknown token '%s'.", curr_word);
@@ -1786,6 +2184,9 @@ static int end(struct sr_input *in)
         count = inc->data_after_timestamp ? 1 : 0;
         add_samples(in, count, TRUE);
  
+       /* Optionally suggest downsampling after all input data was seen. */
+       (void)ts_stats_post(inc, !inc->data_after_timestamp);
+
         /* Must send DF_END when DF_HEADER was sent before. */
         if (inc->started)
                 std_session_send_df_end(in->sdi);