X-Git-Url: https://sigrok.org/gitweb/?p=libsigrok.git;a=blobdiff_plain;f=src%2Finput%2Fvcd.c;h=d5471a47163236f239fb7bef16d5fc9abcdf620b;hp=e751e885617471366a97c16671d58558c1305872;hb=HEAD;hpb=968b1a23f23e1f22f7e438a39e3235b4f21b8b14 diff --git a/src/input/vcd.c b/src/input/vcd.c index e751e885..d5471a47 100644 --- a/src/input/vcd.c +++ b/src/input/vcd.c @@ -152,11 +152,6 @@ struct context { } conv_bits; GString *scope_prefix; struct feed_queue_logic *feed_logic; - struct split_state { - size_t alloced; - char **words; - gboolean in_use; - } split; struct ts_stats { size_t total_ts_seen; uint64_t last_ts_value; @@ -205,15 +200,6 @@ static void free_channel(void *data) g_free(vcd_ch); } -/* TODO Drop the local decl when this has become a common helper. */ -void sr_channel_group_free(struct sr_channel_group *cg); - -/* Wrapper for GDestroyNotify compatibility. */ -static void cg_free(void *p) -{ - sr_channel_group_free(p); -} - /* * Another timestamp delta was observed, update statistics: Update the * sorted list of minimum values, and increment the occurance counter. @@ -466,6 +452,11 @@ static void check_remove_bom(GString *buf) /* * Reads a single VCD section from input file and parses it to name/contents. * e.g. $timescale 1ps $end => "timescale" "1ps" + * + * The section (its content and its opening/closing markers) can span + * multiple text lines. This routine must not modify the caller's input + * buffer. Executes potentially multiple times on the same input data, + * and executes outside of the processing of the file's data section. */ static gboolean parse_section(GString *buf, char **name, char **contents) { @@ -530,142 +521,6 @@ static gboolean parse_section(GString *buf, char **name, char **contents) return status; } -/* - * The glib routine which splits an input text into a list of words also - * "provides empty strings" which application code then needs to remove. - * And copies of the input text get allocated for all words. - * - * The repeated memory allocation is acceptable for small workloads like - * parsing the header sections. But the heavy lifting for sample data is - * done by DIY code to speedup execution. The use of glib routines would - * severely hurt throughput. Allocated memory gets re-used while a strict - * ping-pong pattern is assumed (each text line of input data enters and - * leaves in a strict symmetrical manner, due to the organization of the - * receive() routine and parse calls). - */ - -/* Remove empty parts from an array returned by g_strsplit(). */ -static void remove_empty_parts(gchar **parts) -{ - gchar **src, **dest; - - src = dest = parts; - while (*src) { - if (!**src) { - g_free(*src); - } else { - if (dest != src) - *dest = *src; - dest++; - } - src++; - } - *dest = NULL; -} - -static char **split_text_line(struct context *inc, char *text, size_t *count) -{ - struct split_state *state; - size_t counted, alloced, wanted; - char **words, *p, **new_words; - - state = &inc->split; - - if (count) - *count = 0; - - if (state->in_use) { - sr_dbg("coding error, split() called while \"in use\"."); - return NULL; - } - - /* - * Seed allocation when invoked for the first time. Assume - * simple logic data, start with a few words per line. Will - * automatically adjust with subsequent use. - */ - if (!state->alloced) { - alloced = 20; - words = g_malloc(sizeof(words[0]) * alloced); - if (!words) - return NULL; - state->alloced = alloced; - state->words = words; - } - - /* Start with most recently allocated word list space. */ - alloced = state->alloced; - words = state->words; - counted = 0; - - /* As long as more input text remains ... */ - p = text; - while (*p) { - /* Resize word list if needed. Just double the size. */ - if (counted + 1 >= alloced) { - wanted = 2 * alloced; - new_words = g_realloc(words, sizeof(words[0]) * wanted); - if (!new_words) { - return NULL; - } - words = new_words; - alloced = wanted; - state->words = words; - state->alloced = alloced; - } - - /* Skip leading spaces. */ - while (g_ascii_isspace(*p)) - p++; - if (!*p) - break; - - /* Add found word to word list. */ - words[counted++] = p; - - /* Find end of the word. Terminate loop upon EOS. */ - while (*p && !g_ascii_isspace(*p)) - p++; - if (!*p) - break; - - /* More text follows. Terminate the word. */ - *p++ = '\0'; - } - - /* - * NULL terminate the word list. Provide its length so that - * calling code need not re-iterate the list to get the count. - */ - words[counted] = NULL; - if (count) - *count = counted; - state->in_use = TRUE; - - return words; -} - -static void free_text_split(struct context *inc, char **words) -{ - struct split_state *state; - - state = &inc->split; - - if (words && words != state->words) { - sr_dbg("coding error, free() arg differs from split() result."); - } - - /* "Double free" finally releases the memory. */ - if (!state->in_use) { - g_free(state->words); - state->words = NULL; - state->alloced = 0; - } - - /* Mark as no longer in use. */ - state->in_use = FALSE; -} - static gboolean have_header(GString *buf) { static const char *enddef_txt = "$enddefinitions"; @@ -679,7 +534,14 @@ static gboolean have_header(GString *buf) return FALSE; p += strlen(enddef_txt); - /* Search for end of section (content expected to be empty). */ + /* + * Search for end of section (content expected to be empty). + * Uses DIY logic to scan for the literals' presence including + * empty space between keywords. MUST NOT modify the caller's + * input data, potentially executes several times on the same + * receive buffer, and executes outside of the processing the + * file's data section. + */ p_stop = &buf->str[buf->len]; p_stop -= strlen(end_txt); while (p < p_stop && g_ascii_isspace(*p)) @@ -742,8 +604,7 @@ static int parse_timescale(struct context *inc, char *contents) */ static int parse_scope(struct context *inc, char *contents, gboolean is_up) { - char *sep_pos, *name_pos; - char **parts; + char *sep_pos, *name_pos, *type_pos; size_t length; /* @@ -783,15 +644,17 @@ static int parse_scope(struct context *inc, char *contents, gboolean is_up) * was emitted by libsigrok's VCD output module. */ sr_spew("$scope, got: \"%s\"", contents); - parts = g_strsplit_set(contents, " \r\n\t", 0); - remove_empty_parts(parts); - length = g_strv_length(parts); - if (length != 2) { - sr_err("Unsupported 'scope' syntax: %s", contents); - g_strfreev(parts); + type_pos = sr_text_next_word(contents, &contents); + if (!type_pos) { + sr_err("Cannot parse 'scope' directive"); return SR_ERR_DATA; } - name_pos = parts[1]; + name_pos = sr_text_next_word(contents, &contents); + if (!name_pos || contents) { + sr_err("Cannot parse 'scope' directive"); + return SR_ERR_DATA; + } + if (strcmp(name_pos, PACKAGE_NAME) == 0) { sr_info("Skipping scope with application's package name: %s", name_pos); @@ -803,7 +666,6 @@ static int parse_scope(struct context *inc, char *contents, gboolean is_up) g_string_append_printf(inc->scope_prefix, "%s%c%c", name_pos, SCOPE_SEP, '\0'); } - g_strfreev(parts); sr_dbg("$scope, prefix now: \"%s\"", inc->scope_prefix->str); return SR_OK; @@ -817,10 +679,9 @@ static int parse_scope(struct context *inc, char *contents, gboolean is_up) */ static int parse_header_var(struct context *inc, char *contents) { - char **parts; - size_t length; char *type, *size_txt, *id, *ref, *idx; gboolean is_reg, is_wire, is_real, is_int; + gboolean is_str; enum sr_channeltype ch_type; size_t size, next_size; struct vcd_channel *vcd_ch; @@ -829,34 +690,36 @@ static int parse_header_var(struct context *inc, char *contents) * Format of $var or $reg header specs: * $var type size identifier reference [opt-index] $end */ - parts = g_strsplit_set(contents, " \r\n\t", 0); - remove_empty_parts(parts); - length = g_strv_length(parts); - if (length != 4 && length != 5) { + type = sr_text_next_word(contents, &contents); + size_txt = sr_text_next_word(contents, &contents); + id = sr_text_next_word(contents, &contents); + ref = sr_text_next_word(contents, &contents); + idx = sr_text_next_word(contents, &contents); + if (idx && !*idx) + idx = NULL; + if (!type || !size_txt || !id || !ref || contents) { sr_warn("$var section should have 4 or 5 items"); - g_strfreev(parts); return SR_ERR_DATA; } - type = parts[0]; - size_txt = parts[1]; - id = parts[2]; - ref = parts[3]; - idx = parts[4]; - if (idx && !*idx) - idx = NULL; is_reg = g_strcmp0(type, "reg") == 0; is_wire = g_strcmp0(type, "wire") == 0; is_real = g_strcmp0(type, "real") == 0; is_int = g_strcmp0(type, "integer") == 0; + is_str = g_strcmp0(type, "string") == 0; if (is_reg || is_wire) { ch_type = SR_CHANNEL_LOGIC; } else if (is_real || is_int) { ch_type = SR_CHANNEL_ANALOG; + } else if (is_str) { + sr_warn("Skipping id %s, name '%s%s', unsupported type '%s'.", + id, ref, idx ? idx : "", type); + inc->ignored_signals = g_slist_append(inc->ignored_signals, + g_strdup(id)); + return SR_OK; } else { - sr_info("Unsupported signal type: '%s'", type); - g_strfreev(parts); + sr_err("Unsupported signal type: '%s'", type); return SR_ERR_DATA; } @@ -882,7 +745,6 @@ static int parse_header_var(struct context *inc, char *contents) } if (!size) { sr_warn("Unsupported signal size: '%s'", size_txt); - g_strfreev(parts); return SR_ERR_DATA; } if (inc->conv_bits.max_bits < size) @@ -893,7 +755,6 @@ static int parse_header_var(struct context *inc, char *contents) ref, idx ? idx : "", inc->options.maxchannels); inc->ignored_signals = g_slist_append(inc->ignored_signals, g_strdup(id)); - g_strfreev(parts); return SR_OK; } @@ -922,7 +783,6 @@ static int parse_header_var(struct context *inc, char *contents) vcd_ch->type == SR_CHANNEL_ANALOG ? "A" : "L", vcd_ch->array_index); inc->channels = g_slist_append(inc->channels, vcd_ch); - g_strfreev(parts); return SR_OK; } @@ -1065,10 +925,8 @@ static void create_channels(const struct sr_input *in, if (vcd_ch->type != ch_type) continue; cg = NULL; - if (vcd_ch->size != 1) { - cg = g_malloc0(sizeof(*cg)); - cg->name = g_strdup(vcd_ch->name); - } + if (vcd_ch->size != 1) + cg = sr_channel_group_new(sdi, vcd_ch->name, NULL); for (size_idx = 0; size_idx < vcd_ch->size; size_idx++) { ch_name = get_channel_name(vcd_ch, size_idx); sr_dbg("sigrok channel idx %zu, name %s, type %s, en %d.", @@ -1080,8 +938,6 @@ static void create_channels(const struct sr_input *in, if (cg) cg->channels = g_slist_append(cg->channels, ch); } - if (cg) - sdi->channel_groups = g_slist_append(sdi->channel_groups, cg); } } @@ -1126,7 +982,7 @@ static void keep_header_for_reread(const struct sr_input *in) inc = in->priv; - g_slist_free_full(inc->prev.sr_groups, cg_free); + g_slist_free_full(inc->prev.sr_groups, sr_channel_group_free_cb); inc->prev.sr_groups = in->sdi->channel_groups; in->sdi->channel_groups = NULL; @@ -1165,7 +1021,7 @@ static gboolean check_header_in_reread(const struct sr_input *in) return FALSE; } - g_slist_free_full(in->sdi->channel_groups, cg_free); + g_slist_free_full(in->sdi->channel_groups, sr_channel_group_free_cb); in->sdi->channel_groups = inc->prev.sr_groups; inc->prev.sr_groups = NULL; @@ -1180,7 +1036,7 @@ static gboolean check_header_in_reread(const struct sr_input *in) static int parse_header(const struct sr_input *in, GString *buf) { struct context *inc; - gboolean status; + gboolean enddef_seen, header_valid; char *name, *contents; size_t size; int ret; @@ -1188,34 +1044,35 @@ static int parse_header(const struct sr_input *in, GString *buf) inc = in->priv; /* Parse sections until complete header was seen. */ - status = FALSE; + enddef_seen = FALSE; + header_valid = TRUE; name = contents = NULL; inc->conv_bits.max_bits = 1; while (parse_section(buf, &name, &contents)) { sr_dbg("Section '%s', contents '%s'.", name, contents); if (g_strcmp0(name, "enddefinitions") == 0) { - status = TRUE; + enddef_seen = TRUE; goto done_section; } if (g_strcmp0(name, "timescale") == 0) { if (parse_timescale(inc, contents) != SR_OK) - status = FALSE; + header_valid = FALSE; goto done_section; } if (g_strcmp0(name, "scope") == 0) { if (parse_scope(inc, contents, FALSE) != SR_OK) - status = FALSE; + header_valid = FALSE; goto done_section; } if (g_strcmp0(name, "upscope") == 0) { if (parse_scope(inc, NULL, TRUE) != SR_OK) - status = FALSE; + header_valid = FALSE; goto done_section; } if (g_strcmp0(name, "var") == 0) { if (parse_header_var(inc, contents) != SR_OK) - status = FALSE; + header_valid = FALSE; goto done_section; } @@ -1225,14 +1082,14 @@ done_section: g_free(contents); contents = NULL; - if (status) + if (enddef_seen) break; } g_free(name); g_free(contents); - inc->got_header = status; - if (!status) + inc->got_header = enddef_seen && header_valid; + if (!inc->got_header) return SR_ERR_DATA; /* Create sigrok channels here, late, logic before analog. */ @@ -1295,7 +1152,7 @@ static void add_samples(const struct sr_input *in, size_t count, gboolean flush) inc = in->priv; if (inc->logic_count) { - feed_queue_logic_submit(inc->feed_logic, + feed_queue_logic_submit_one(inc->feed_logic, inc->current_logic, count); if (flush) feed_queue_logic_flush(inc->feed_logic); @@ -1308,7 +1165,7 @@ static void add_samples(const struct sr_input *in, size_t count, gboolean flush) if (!q) continue; value = inc->current_floats[vcd_ch->array_index]; - feed_queue_analog_submit(q, value, count); + feed_queue_analog_submit_one(q, value, count); if (flush) feed_queue_analog_flush(q); } @@ -1510,15 +1367,103 @@ static uint8_t vcd_char_to_value(char bit_char, int *warn) return ~0; } +/* + * Check the validity of a VCD string value. It's essential to reliably + * accept valid data which the community uses in the field, yet robustly + * reject invalid data for users' awareness. Since IEEE 1800-2017 would + * not discuss the representation of this data type, it's assumed to not + * be an official feature of the VCD file format. This implementation is + * an educated guess after inspection of other arbitrary implementations, + * not backed by any specification or public documentation. + * + * A quick summary of the implemented assumptions: Must be a sequence of + * ASCII printables. Must not contain whitespace. Might contain escape + * sequences: A backslash followed by a single character, like '\n' or + * '\\'. Or a backslash and the letter x followed by two hex digits, + * like '\x20'. Or a backslash followed by three octal digits, like + * '\007'. As an exception also accepts a single digit '\0' but only at + * the text end. The string value may be empty, but must not be NULL. + * + * This implementation assumes an ASCII based platform for simplicity + * and readability. Should be a given on sigrok supported platforms. + */ +static gboolean vcd_string_valid(const char *s) +{ + char c; + + if (!s) + return FALSE; + + while (*s) { + c = *s++; + /* Reject non-printable ASCII chars including DEL. */ + if (c < ' ') + return FALSE; + if (c > '~') + return FALSE; + /* Deeper inspection of escape sequences. */ + if (c == '\\') { + c = *s++; + switch (c) { + case 'a': /* BEL, bell aka "alarm" */ + case 'b': /* BS, back space */ + case 't': /* TAB, tabulator */ + case 'n': /* NL, newline */ + case 'v': /* VT, vertical tabulator */ + case 'f': /* FF, form feed */ + case 'r': /* CR, carriage return */ + case '"': /* double quotes */ + case '\'': /* tick, single quote */ + case '?': /* question mark */ + case '\\': /* backslash */ + continue; + case 'x': /* \xNN two hex digits */ + c = *s++; + if (!g_ascii_isxdigit(c)) + return FALSE; + c = *s++; + if (!g_ascii_isxdigit(c)) + return FALSE; + continue; + case '0': /* \NNN three octal digits */ + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + /* Special case '\0' at end of text. */ + if (c == '0' && !*s) + return TRUE; + /* + * First digit was covered by the outer + * switch(). Two more digits to check. + */ + c = *s++; + if (!g_ascii_isdigit(c) || c > '7') + return FALSE; + c = *s++; + if (!g_ascii_isdigit(c) || c > '7') + return FALSE; + continue; + default: + return FALSE; + } + } + } + + return TRUE; +} + /* Parse one text line of the data section. */ -static int parse_textline(const struct sr_input *in, char *lines) +static int parse_textline(const struct sr_input *in, char *line) { struct context *inc; int ret; - char **words; - size_t word_count, word_idx; - char *curr_word, *next_word, curr_first; - gboolean is_timestamp, is_section, is_real, is_multibit, is_singlebit; + char *curr_word, curr_first; + gboolean is_timestamp, is_section; + gboolean is_real, is_multibit, is_singlebit, is_string; uint64_t timestamp; char *identifier, *endptr; size_t count; @@ -1526,30 +1471,33 @@ static int parse_textline(const struct sr_input *in, char *lines) inc = in->priv; /* - * Split the caller's text lines into a list of space separated - * words. Note that some of the branches consume the very next - * words as well, and assume that both adjacent words will be - * available when the first word is seen. This constraint applies - * to bit vector data, multi-bit integers and real (float) data, - * as well as single-bit data with whitespace before its - * identifier (if that's valid in VCD, we'd accept it here). + * Consume space separated words from a caller's text line. Note + * that many words are self contained, but some require another + * word to follow. This implementation assumes that both words + * (when involved) become available in the same invocation, that + * is that both words reside on the same text line of the file. * The fact that callers always pass complete text lines should - * make this assumption acceptable. + * make this assumption acceptable. No generator is known to + * split two corresponding words across text lines. + * + * This constraint applies to bit vector data, multi-bit integer + * and real (float) values, text strings, as well as single-bit + * values with whitespace before their identifiers (if that is + * valid in VCD, we'd accept it here; if generators don't create + * such input, then support for it does not harm). */ ret = SR_OK; - words = split_text_line(inc, lines, &word_count); - for (word_idx = 0; word_idx < word_count; word_idx++) { + while (line) { /* - * Make the next two words available, to simpilify code - * paths below. The second word is optional here. + * Lookup one word here which is mandatory. Locations + * below conditionally lookup another word as needed. */ - curr_word = words[word_idx]; - if (!curr_word && !curr_word[0]) + curr_word = sr_text_next_word(line, &line); + if (!curr_word) + break; + if (!*curr_word) continue; curr_first = g_ascii_tolower(curr_word[0]); - next_word = words[word_idx + 1]; - if (next_word && !next_word[0]) - next_word = NULL; /* * Optionally skip some sections that can be interleaved @@ -1697,6 +1645,7 @@ static int parse_textline(const struct sr_input *in, char *lines) * timestamp. * * Supported input data formats are: + * - S (value not used, VCD type 'string'). * - R (analog channel, VCD type 'real'). * - B (analog channel, VCD type 'integer'). * - B (logic channels, VCD bit vectors). @@ -1725,13 +1674,13 @@ static int parse_textline(const struct sr_input *in, char *lines) is_singlebit |= curr_first == 'l' || curr_first == 'h'; is_singlebit |= curr_first == 'x' || curr_first == 'z'; is_singlebit |= curr_first == 'u' || curr_first == '-'; + is_string = curr_first == 's'; if (is_real) { char *real_text; float real_val; real_text = &curr_word[1]; - identifier = next_word; - word_idx++; + identifier = sr_text_next_word(line, &line); if (!*real_text || !identifier || !*identifier) { sr_err("Unexpected real format."); ret = SR_ERR_DATA; @@ -1765,8 +1714,7 @@ static int parse_textline(const struct sr_input *in, char *lines) * we may never unify code paths at all here. */ bits_text = &curr_word[1]; - identifier = next_word; - word_idx++; + identifier = sr_text_next_word(line, &line); if (!*bits_text || !identifier || !*identifier) { sr_err("Unexpected integer/vector format."); @@ -1851,10 +1799,8 @@ static int parse_textline(const struct sr_input *in, char *lines) break; } identifier = ++bits_text; - if (!*identifier) { - identifier = next_word; - word_idx++; - } + if (!*identifier) + identifier = sr_text_next_word(line, &line); if (!identifier || !*identifier) { sr_err("Identifier missing."); ret = SR_ERR_DATA; @@ -1872,13 +1818,37 @@ static int parse_textline(const struct sr_input *in, char *lines) process_bits(inc, identifier, inc->conv_bits.value, 1); continue; } + if (is_string) { + const char *str_value; + + str_value = &curr_word[1]; + identifier = sr_text_next_word(line, &line); + if (!vcd_string_valid(str_value)) { + sr_err("Invalid string data: %s", str_value); + ret = SR_ERR_DATA; + break; + } + if (!identifier || !*identifier) { + sr_err("String value without identifier."); + ret = SR_ERR_DATA; + break; + } + sr_spew("Got string data, id '%s', value \"%s\".", + identifier, str_value); + if (!is_ignored(inc, identifier)) { + sr_err("String value for identifier '%s'.", + identifier); + ret = SR_ERR_DATA; + break; + } + continue; + } /* Design choice: Consider unsupported input fatal. */ sr_err("Unknown token '%s'.", curr_word); ret = SR_ERR_DATA; break; } - free_text_split(inc, words); return ret; } @@ -1889,11 +1859,14 @@ static int process_buffer(struct sr_input *in, gboolean is_eof) uint64_t samplerate; GVariant *gvar; int ret; - char *rdptr, *endptr, *trimptr; - size_t rdlen; + char *rdptr, *line; + size_t taken, rdlen; inc = in->priv; + if (!inc->got_header) + return SR_ERR_DATA; + /* Send feed header and samplerate (once) before sample data. */ if (!inc->started) { std_session_send_df_header(in->sdi); @@ -1918,28 +1891,19 @@ static int process_buffer(struct sr_input *in, gboolean is_eof) /* Find and process complete text lines in the input data. */ ret = SR_OK; rdptr = in->buf->str; - while (TRUE) { + taken = 0; + while (rdptr) { rdlen = &in->buf->str[in->buf->len] - rdptr; - endptr = g_strstr_len(rdptr, rdlen, "\n"); - if (!endptr) + line = sr_text_next_line(rdptr, rdlen, &rdptr, &taken); + if (!line) break; - trimptr = endptr; - *endptr++ = '\0'; - while (g_ascii_isspace(*rdptr)) - rdptr++; - while (trimptr > rdptr && g_ascii_isspace(trimptr[-1])) - *(--trimptr) = '\0'; - if (!*rdptr) { - rdptr = endptr; + if (!*line) continue; - } - ret = parse_textline(in, rdptr); - rdptr = endptr; + ret = parse_textline(in, line); if (ret != SR_OK) break; } - rdlen = rdptr - in->buf->str; - g_string_erase(in->buf, 0, rdlen); + g_string_erase(in->buf, 0, taken); return ret; } @@ -2054,11 +2018,14 @@ static int end(struct sr_input *in) ret = SR_OK; /* Flush most recently queued sample data when EOF is seen. */ - count = inc->data_after_timestamp ? 1 : 0; - add_samples(in, count, TRUE); + if (inc->got_header && ret == SR_OK) { + count = inc->data_after_timestamp ? 1 : 0; + add_samples(in, count, TRUE); + } /* Optionally suggest downsampling after all input data was seen. */ - (void)ts_stats_post(inc, !inc->data_after_timestamp); + if (inc->got_header) + (void)ts_stats_post(inc, !inc->data_after_timestamp); /* Must send DF_END when DF_HEADER was sent before. */ if (inc->started) @@ -2089,7 +2056,6 @@ static void cleanup(struct sr_input *in) inc->scope_prefix = NULL; g_slist_free_full(inc->ignored_signals, g_free); inc->ignored_signals = NULL; - free_text_split(inc, NULL); } static int reset(struct sr_input *in)