input/vcd: detect and skip string data types (value not used)

author Gerhard Sittig <redacted>

Sun, 5 Sep 2021 10:45:26 +0000 (12:45 +0200)

committer Gerhard Sittig <redacted>

Sat, 8 Jan 2022 09:03:47 +0000 (10:03 +0100)
author Gerhard Sittig <redacted>
Sun, 5 Sep 2021 10:45:26 +0000 (12:45 +0200)
committer Gerhard Sittig <redacted>
Sat, 8 Jan 2022 09:03:47 +0000 (10:03 +0100)
diff --git a/src/input/vcd.c b/src/input/vcd.c

index 2cb4eeed66518f128e19b277f2a86060c56e31ab..413712301318787f7201f9389de5748418dbfbeb 100644 (file)
--- a/src/input/vcd.c
+++ b/src/input/vcd.c
@@ -821,6 +821,7 @@ static int parse_header_var(struct context *inc, char *contents)
         size_t length;
         char *type, *size_txt, *id, *ref, *idx;
         gboolean is_reg, is_wire, is_real, is_int;
         size_t length;
         char *type, *size_txt, *id, *ref, *idx;
         gboolean is_reg, is_wire, is_real, is_int;
+       gboolean is_str;
         enum sr_channeltype ch_type;
         size_t size, next_size;
         struct vcd_channel *vcd_ch;
         enum sr_channeltype ch_type;
         size_t size, next_size;
         struct vcd_channel *vcd_ch;
@@ -849,11 +850,19 @@ static int parse_header_var(struct context *inc, char *contents)
         is_wire = g_strcmp0(type, "wire") == 0;
         is_real = g_strcmp0(type, "real") == 0;
         is_int = g_strcmp0(type, "integer") == 0;
         is_wire = g_strcmp0(type, "wire") == 0;
         is_real = g_strcmp0(type, "real") == 0;
         is_int = g_strcmp0(type, "integer") == 0;
+       is_str = g_strcmp0(type, "string") == 0;
  
         if (is_reg || is_wire) {
                 ch_type = SR_CHANNEL_LOGIC;
         } else if (is_real || is_int) {
                 ch_type = SR_CHANNEL_ANALOG;
  
         if (is_reg || is_wire) {
                 ch_type = SR_CHANNEL_LOGIC;
         } else if (is_real || is_int) {
                 ch_type = SR_CHANNEL_ANALOG;
+       } else if (is_str) {
+               sr_warn("Skipping id %s, name '%s%s', unsupported type '%s'.",
+                       id, ref, idx ? idx : "", type);
+               inc->ignored_signals = g_slist_append(inc->ignored_signals,
+                       g_strdup(id));
+               g_strfreev(parts);
+               return SR_OK;
         } else {
                 sr_err("Unsupported signal type: '%s'", type);
                 g_strfreev(parts);
         } else {
                 sr_err("Unsupported signal type: '%s'", type);
                 g_strfreev(parts);
@@ -1510,6 +1519,95 @@ static uint8_t vcd_char_to_value(char bit_char, int *warn)
         return ~0;
  }
  
         return ~0;
  }
  
+/*
+ * Check the validity of a VCD string value. It's essential to reliably
+ * accept valid data which the community uses in the field, yet robustly
+ * reject invalid data for users' awareness. Since IEEE 1800-2017 would
+ * not discuss the representation of this data type, it's assumed to not
+ * be an official feature of the VCD file format. This implementation is
+ * an educated guess after inspection of other arbitrary implementations,
+ * not backed by any specification or public documentation.
+ *
+ * A quick summary of the implemented assumptions: Must be a sequence of
+ * ASCII printables. Must not contain whitespace. Might contain escape
+ * sequences: A backslash followed by a single character, like '\n' or
+ * '\\'. Or a backslash and the letter x followed by two hex digits,
+ * like '\x20'. Or a backslash followed by three octal digits, like
+ * '\007'. As an exception also accepts a single digit '\0' but only at
+ * the text end. The string value may be empty, but must not be NULL.
+ *
+ * This implementation assumes an ASCII based platform for simplicity
+ * and readability. Should be a given on sigrok supported platforms.
+ */
+static gboolean vcd_string_valid(const char *s)
+{
+       char c;
+
+       if (!s)
+               return FALSE;
+
+       while (*s) {
+               c = *s++;
+               /* Reject non-printable ASCII chars including DEL. */
+               if (c < ' ')
+                       return FALSE;
+               if (c > '~')
+                       return FALSE;
+               /* Deeper inspection of escape sequences. */
+               if (c == '\\') {
+                       c = *s++;
+                       switch (c) {
+                       case 'a': /* BEL, bell aka "alarm" */
+                       case 'b': /* BS, back space */
+                       case 't': /* TAB, tabulator */
+                       case 'n': /* NL, newline */
+                       case 'v': /* VT, vertical tabulator */
+                       case 'f': /* FF, form feed */
+                       case 'r': /* CR, carriage return */
+                       case '"': /* double quotes */
+                       case '\'': /* tick, single quote */
+                       case '?': /* question mark */
+                       case '\\': /* backslash */
+                               continue;
+                       case 'x': /* \xNN two hex digits */
+                               c = *s++;
+                               if (!g_ascii_isxdigit(c))
+                                       return FALSE;
+                               c = *s++;
+                               if (!g_ascii_isxdigit(c))
+                                       return FALSE;
+                               continue;
+                       case '0': /* \NNN three octal digits */
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                               /* Special case '\0' at end of text. */
+                               if (c == '0' && !*s)
+                                       return TRUE;
+                               /*
+                                * First digit was covered by the outer
+                                * switch(). Two more digits to check.
+                                */
+                               c = *s++;
+                               if (!g_ascii_isdigit(c) || c > '7')
+                                       return FALSE;
+                               c = *s++;
+                               if (!g_ascii_isdigit(c) || c > '7')
+                                       return FALSE;
+                               continue;
+                       default:
+                               return FALSE;
+                       }
+               }
+       }
+
+       return TRUE;
+}
+
  /* Parse one text line of the data section. */
  static int parse_textline(const struct sr_input *in, char *lines)
  {
  /* Parse one text line of the data section. */
  static int parse_textline(const struct sr_input *in, char *lines)
  {
@@ -1518,7 +1616,8 @@ static int parse_textline(const struct sr_input *in, char *lines)
         char **words;
         size_t word_count, word_idx;
         char *curr_word, *next_word, curr_first;
         char **words;
         size_t word_count, word_idx;
         char *curr_word, *next_word, curr_first;
-       gboolean is_timestamp, is_section, is_real, is_multibit, is_singlebit;
+       gboolean is_timestamp, is_section;
+       gboolean is_real, is_multibit, is_singlebit, is_string;
         uint64_t timestamp;
         char *identifier, *endptr;
         size_t count;
         uint64_t timestamp;
         char *identifier, *endptr;
         size_t count;
@@ -1697,6 +1796,7 @@ static int parse_textline(const struct sr_input *in, char *lines)
                  * timestamp.
                  *
                  * Supported input data formats are:
                  * timestamp.
                  *
                  * Supported input data formats are:
+                * - S<value> <sep> <id> (value not used, VCD type 'string').
                  * - R<value> <sep> <id> (analog channel, VCD type 'real').
                  * - B<value> <sep> <id> (analog channel, VCD type 'integer').
                  * - B<value> <sep> <id> (logic channels, VCD bit vectors).
                  * - R<value> <sep> <id> (analog channel, VCD type 'real').
                  * - B<value> <sep> <id> (analog channel, VCD type 'integer').
                  * - B<value> <sep> <id> (logic channels, VCD bit vectors).
@@ -1725,6 +1825,7 @@ static int parse_textline(const struct sr_input *in, char *lines)
                 is_singlebit |= curr_first == 'l' || curr_first == 'h';
                 is_singlebit |= curr_first == 'x' || curr_first == 'z';
                 is_singlebit |= curr_first == 'u' || curr_first == '-';
                 is_singlebit |= curr_first == 'l' || curr_first == 'h';
                 is_singlebit |= curr_first == 'x' || curr_first == 'z';
                 is_singlebit |= curr_first == 'u' || curr_first == '-';
+               is_string = curr_first == 's';
                 if (is_real) {
                         char *real_text;
                         float real_val;
                 if (is_real) {
                         char *real_text;
                         float real_val;
@@ -1872,6 +1973,32 @@ static int parse_textline(const struct sr_input *in, char *lines)
                         process_bits(inc, identifier, inc->conv_bits.value, 1);
                         continue;
                 }
                         process_bits(inc, identifier, inc->conv_bits.value, 1);
                         continue;
                 }
+               if (is_string) {
+                       const char *str_value;
+
+                       str_value = &curr_word[1];
+                       identifier = next_word;
+                       word_idx++;
+                       if (!vcd_string_valid(str_value)) {
+                               sr_err("Invalid string data: %s", str_value);
+                               ret = SR_ERR_DATA;
+                               break;
+                       }
+                       if (!identifier || !*identifier) {
+                               sr_err("String value without identifier.");
+                               ret = SR_ERR_DATA;
+                               break;
+                       }
+                       sr_spew("Got string data, id '%s', value \"%s\".",
+                               identifier, str_value);
+                       if (!is_ignored(inc, identifier)) {
+                               sr_err("String value for identifier '%s'.",
+                                       identifier);
+                               ret = SR_ERR_DATA;
+                               break;
+                       }
+                       continue;
+               }
  
                 /* Design choice: Consider unsupported input fatal. */
                 sr_err("Unknown token '%s'.", curr_word);
  
                 /* Design choice: Consider unsupported input fatal. */
                 sr_err("Unknown token '%s'.", curr_word);
author	Gerhard Sittig <redacted>
	Sun, 5 Sep 2021 10:45:26 +0000 (12:45 +0200)
committer	Gerhard Sittig <redacted>
	Sat, 8 Jan 2022 09:03:47 +0000 (10:03 +0100)