src/input/vcd.c

   1 /*
   2  * This file is part of the libsigrok project.
   3  *
   4  * Copyright (C) 2012 Petteri Aimonen <jpa@sr.mail.kapsi.fi>
   5  * Copyright (C) 2014 Bert Vermeulen <bert@biot.com>
   6  * Copyright (C) 2017-2020 Gerhard Sittig <gerhard.sittig@gmx.net>
   7  *
   8  * This program is free software: you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation, either version 3 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  20  */
  21
  22 /*
  23  * The VCD input module has the following options. See the options[]
  24  * declaration near the bottom of the input module's source file.
  25  *
  26  * numchannels: Maximum number of sigrok channels to create. VCD signals
  27  *   are detected in their order of declaration in the VCD file header,
  28  *   and mapped to sigrok channels.
  29  *
  30  * skip: Allows to skip data at the start of the input file. This can
  31  *   speed up operation on long captures.
  32  *   Value < 0: Skip until first timestamp that is listed in the file.
  33  *     (This is the default behaviour.)
  34  *   Value = 0: Do not skip, instead generate samples beginning from
  35  *     timestamp 0.
  36  *   Value > 0: Start at the given timestamp.
  37  *
  38  * downsample: Divide the samplerate by the given factor. This can
  39  *   speed up operation on long captures.
  40  *
  41  * compress: Trim idle periods which are longer than this value to span
  42  *   only this many timescale ticks. This can speed up operation on long
  43  *   captures (default 0, don't compress).
  44  *
  45  * Based on Verilog standard IEEE Std 1364-2001 Version C
  46  *
  47  * Supported features:
  48  * - $var with 'wire' and 'reg' types of scalar variables
  49  * - $timescale definition for samplerate
  50  * - multiple character variable identifiers
  51  * - same identifer used for multiple signals (identical values)
  52  * - vector variables (bit vectors)
  53  * - integer variables (analog signals with 0 digits, passed as single
  54  *   precision float number)
  55  * - real variables (analog signals, passed on with single precision,
  56  *   arbitrary digits value, not user adjustable)
  57  * - nested $scope, results in prefixed sigrok channel names
  58  *
  59  * Most important unsupported features:
  60  * - $dumpvars initial value declaration (is not an issue if generators
  61  *   provide sample data for the #0 timestamp, otherwise session data
  62  *   starts from zero values, and catches up when the signal changes its
  63  *   state to a supported value)
  64  *
  65  * Implementor's note: This input module specifically does _not_ use
  66  * glib routines where they would hurt performance. Lots of memory
  67  * allocations increase execution time not by percents but by huge
  68  * factors. This motivated this module's custom code for splitting
  69  * words on text lines, and pooling previously allocated buffers.
  70  *
  71  * TODO (in arbitrary order)
  72  * - Map VCD scopes to sigrok channel groups?
  73  *   - Does libsigrok support nested channel groups? Or is this feature
  74  *     exclusive to Pulseview?
  75  * - Check VCD input to VCD output behaviour. Verify that export and
  76  *   re-import results in identical data (well, VCD's constraints on
  77  *   timescale values is known to result in differences).
  78  * - Cleanup the implementation.
  79  *   - Consistent use of the glib API (where appropriate).
  80  *   - More appropriate variable/function identifiers.
  81  *   - More robust handling of multi-word input phrases and chunked
  82  *     input buffers? This implementation assumes that e.g. b[01]+
  83  *     patterns are complete when they start, and the signal identifier
  84  *     is available as well. Which may be true assuming that input data
  85  *     comes in complete text lines.
  86  *   - See if other input modules have learned lessons that we could
  87  *     benefit from here as well? Pointless BOM (done), line oriented
  88  *     processing with EOL variants and with optional last EOL, module
  89  *     state reset and file re-read (stable channels list), buffered
  90  *     session feed, synchronized feed for mixed signal sources, digits
  91  *     or formats support for analog input, single vs double precision,
  92  *     etc.
  93  *   - Re-consider logging. Verbosity levels should be acceptable,
  94  *     but volume is an issue. Drop duplicates, and drop messages from
  95  *     known good code paths.
  96  */
  97
  98 #include <config.h>
  99
 100 #include <glib.h>
 101 #include <libsigrok/libsigrok.h>
 102 #include "libsigrok-internal.h"
 103 #include <stdio.h>
 104 #include <stdlib.h>
 105 #include <string.h>
 106
 107 #define LOG_PREFIX "input/vcd"
 108
 109 #define CHUNK_SIZE (4 * 1024 * 1024)
 110 #define SCOPE_SEP '.'
 111
 112 struct context {
 113         struct vcd_user_opt {
 114                 size_t maxchannels; /* sigrok channels (output) */
 115                 uint64_t downsample;
 116                 uint64_t compress;
 117                 uint64_t skip_starttime;
 118                 gboolean skip_specified;
 119         } options;
 120         gboolean use_skip;
 121         gboolean started;
 122         gboolean got_header;
 123         uint64_t prev_timestamp;
 124         uint64_t samplerate;
 125         size_t vcdsignals; /* VCD signals (input) */
 126         GSList *ignored_signals;
 127         gboolean data_after_timestamp;
 128         gboolean ignore_end_keyword;
 129         gboolean skip_until_end;
 130         GSList *channels;
 131         size_t unit_size;
 132         size_t logic_count;
 133         size_t analog_count;
 134         uint8_t *current_logic;
 135         float *current_floats;
 136         struct {
 137                 size_t max_bits;
 138                 size_t unit_size;
 139                 uint8_t *value;
 140                 size_t sig_count;
 141         } conv_bits;
 142         GString *scope_prefix;
 143         struct feed_queue_logic *feed_logic;
 144         struct split_state {
 145                 size_t alloced;
 146                 char **words;
 147                 gboolean in_use;
 148         } split;
 149         struct vcd_prev {
 150                 GSList *sr_channels;
 151                 GSList *sr_groups;
 152         } prev;
 153 };
 154
 155 struct vcd_channel {
 156         char *name;
 157         char *identifier;
 158         size_t size;
 159         enum sr_channeltype type;
 160         size_t array_index;
 161         size_t byte_idx;
 162         uint8_t bit_mask;
 163         char *base_name;
 164         size_t range_lower, range_upper;
 165         int submit_digits;
 166         struct feed_queue_analog *feed_analog;
 167 };
 168
 169 static void free_channel(void *data)
 170 {
 171         struct vcd_channel *vcd_ch;
 172
 173         vcd_ch = data;
 174         if (!vcd_ch)
 175                 return;
 176
 177         g_free(vcd_ch->name);
 178         g_free(vcd_ch->identifier);
 179         g_free(vcd_ch->base_name);
 180         feed_queue_analog_free(vcd_ch->feed_analog);
 181
 182         g_free(vcd_ch);
 183 }
 184
 185 /* TODO Drop the local decl when this has become a common helper. */
 186 void sr_channel_group_free(struct sr_channel_group *cg);
 187
 188 /* Wrapper for GDestroyNotify compatibility. */
 189 static void cg_free(void *p)
 190 {
 191         sr_channel_group_free(p);
 192 }
 193
 194 static void check_remove_bom(GString *buf)
 195 {
 196         static const char *bom_text = "\xef\xbb\xbf";
 197
 198         if (buf->len < strlen(bom_text))
 199                 return;
 200         if (strncmp(buf->str, bom_text, strlen(bom_text)) != 0)
 201                 return;
 202         g_string_erase(buf, 0, strlen(bom_text));
 203 }
 204
 205 /*
 206  * Reads a single VCD section from input file and parses it to name/contents.
 207  * e.g. $timescale 1ps $end => "timescale" "1ps"
 208  */
 209 static gboolean parse_section(GString *buf, char **name, char **contents)
 210 {
 211         static const char *end_text = "$end";
 212
 213         gboolean status;
 214         size_t pos, len;
 215         const char *grab_start, *grab_end;
 216         GString *sname, *scontent;
 217
 218         /* Preset falsy return values. Gets updated below. */
 219         *name = *contents = NULL;
 220         status = FALSE;
 221
 222         /* Skip any initial white-space. */
 223         pos = 0;
 224         while (pos < buf->len && g_ascii_isspace(buf->str[pos]))
 225                 pos++;
 226
 227         /* Section tag should start with $. */
 228         if (buf->str[pos++] != '$')
 229                 return FALSE;
 230
 231         /* Read the section tag. */
 232         grab_start = &buf->str[pos];
 233         while (pos < buf->len && !g_ascii_isspace(buf->str[pos]))
 234                 pos++;
 235         grab_end = &buf->str[pos];
 236         sname = g_string_new_len(grab_start, grab_end - grab_start);
 237
 238         /* Skip whitespace before content. */
 239         while (pos < buf->len && g_ascii_isspace(buf->str[pos]))
 240                 pos++;
 241
 242         /* Read the content up to the '$end' marker. */
 243         scontent = g_string_sized_new(128);
 244         grab_start = &buf->str[pos];
 245         grab_end = g_strstr_len(grab_start, buf->len - pos, end_text);
 246         if (grab_end) {
 247                 /* Advance 'pos' to after '$end' and more whitespace. */
 248                 pos = grab_end - buf->str;
 249                 pos += strlen(end_text);
 250                 while (pos < buf->len && g_ascii_isspace(buf->str[pos]))
 251                         pos++;
 252
 253                 /* Grab the (trimmed) content text. */
 254                 while (grab_end > grab_start && g_ascii_isspace(grab_end[-1]))
 255                         grab_end--;
 256                 len = grab_end - grab_start;
 257                 g_string_append_len(scontent, grab_start, len);
 258                 if (sname->len)
 259                         status = TRUE;
 260
 261                 /* Consume the input text which just was taken. */
 262                 g_string_erase(buf, 0, pos);
 263         }
 264
 265         /* Return section name and content if a section was seen. */
 266         *name = g_string_free(sname, !status);
 267         *contents = g_string_free(scontent, !status);
 268
 269         return status;
 270 }
 271
 272 /*
 273  * The glib routine which splits an input text into a list of words also
 274  * "provides empty strings" which application code then needs to remove.
 275  * And copies of the input text get allocated for all words.
 276  *
 277  * The repeated memory allocation is acceptable for small workloads like
 278  * parsing the header sections. But the heavy lifting for sample data is
 279  * done by DIY code to speedup execution. The use of glib routines would
 280  * severely hurt throughput. Allocated memory gets re-used while a strict
 281  * ping-pong pattern is assumed (each text line of input data enters and
 282  * leaves in a strict symmetrical manner, due to the organization of the
 283  * receive() routine and parse calls).
 284  */
 285
 286 /* Remove empty parts from an array returned by g_strsplit(). */
 287 static void remove_empty_parts(gchar **parts)
 288 {
 289         gchar **src, **dest;
 290
 291         src = dest = parts;
 292         while (*src) {
 293                 if (!**src) {
 294                         g_free(*src);
 295                 } else {
 296                         if (dest != src)
 297                                 *dest = *src;
 298                         dest++;
 299                 }
 300                 src++;
 301         }
 302         *dest = NULL;
 303 }
 304
 305 static char **split_text_line(struct context *inc, char *text, size_t *count)
 306 {
 307         struct split_state *state;
 308         size_t counted, alloced, wanted;
 309         char **words, *p, **new_words;
 310
 311         state = &inc->split;
 312
 313         if (count)
 314                 *count = 0;
 315
 316         if (state->in_use) {
 317                 sr_dbg("coding error, split() called while \"in use\".");
 318                 return NULL;
 319         }
 320
 321         /*
 322          * Seed allocation when invoked for the first time. Assume
 323          * simple logic data, start with a few words per line. Will
 324          * automatically adjust with subsequent use.
 325          */
 326         if (!state->alloced) {
 327                 alloced = 20;
 328                 words = g_malloc(sizeof(words[0]) * alloced);
 329                 if (!words)
 330                         return NULL;
 331                 state->alloced = alloced;
 332                 state->words = words;
 333         }
 334
 335         /* Start with most recently allocated word list space. */
 336         alloced = state->alloced;
 337         words = state->words;
 338         counted = 0;
 339
 340         /* As long as more input text remains ... */
 341         p = text;
 342         while (*p) {
 343                 /* Resize word list if needed. Just double the size. */
 344                 if (counted + 1 >= alloced) {
 345                         wanted = 2 * alloced;
 346                         new_words = g_realloc(words, sizeof(words[0]) * wanted);
 347                         if (!new_words) {
 348                                 return NULL;
 349                         }
 350                         words = new_words;
 351                         alloced = wanted;
 352                         state->words = words;
 353                         state->alloced = alloced;
 354                 }
 355
 356                 /* Skip leading spaces. */
 357                 while (g_ascii_isspace(*p))
 358                         p++;
 359                 if (!*p)
 360                         break;
 361
 362                 /* Add found word to word list. */
 363                 words[counted++] = p;
 364
 365                 /* Find end of the word. Terminate loop upon EOS. */
 366                 while (*p && !g_ascii_isspace(*p))
 367                         p++;
 368                 if (!*p)
 369                         break;
 370
 371                 /* More text follows. Terminate the word. */
 372                 *p++ = '\0';
 373         }
 374
 375         /*
 376          * NULL terminate the word list. Provide its length so that
 377          * calling code need not re-iterate the list to get the count.
 378          */
 379         words[counted] = NULL;
 380         if (count)
 381                 *count = counted;
 382         state->in_use = TRUE;
 383
 384         return words;
 385 }
 386
 387 static void free_text_split(struct context *inc, char **words)
 388 {
 389         struct split_state *state;
 390
 391         state = &inc->split;
 392
 393         if (words && words != state->words) {
 394                 sr_dbg("coding error, free() arg differs from split() result.");
 395         }
 396
 397         /* "Double free" finally releases the memory. */
 398         if (!state->in_use) {
 399                 g_free(state->words);
 400                 state->words = NULL;
 401                 state->alloced = 0;
 402         }
 403
 404         /* Mark as no longer in use. */
 405         state->in_use = FALSE;
 406 }
 407
 408 static gboolean have_header(GString *buf)
 409 {
 410         static const char *enddef_txt = "$enddefinitions";
 411         static const char *end_txt = "$end";
 412
 413         char *p, *p_stop;
 414
 415         /* Search for "end of definitions" section keyword. */
 416         p = g_strstr_len(buf->str, buf->len, enddef_txt);
 417         if (!p)
 418                 return FALSE;
 419         p += strlen(enddef_txt);
 420
 421         /* Search for end of section (content expected to be empty). */
 422         p_stop = &buf->str[buf->len];
 423         p_stop -= strlen(end_txt);
 424         while (p < p_stop && g_ascii_isspace(*p))
 425                 p++;
 426         if (strncmp(p, end_txt, strlen(end_txt)) != 0)
 427                 return FALSE;
 428         p += strlen(end_txt);
 429
 430         return TRUE;
 431 }
 432
 433 static int parse_timescale(struct context *inc, char *contents)
 434 {
 435         uint64_t p, q;
 436
 437         /*
 438          * The standard allows for values 1, 10 or 100
 439          * and units s, ms, us, ns, ps and fs.
 440          */
 441         if (sr_parse_period(contents, &p, &q) != SR_OK) {
 442                 sr_err("Parsing $timescale failed.");
 443                 return SR_ERR_DATA;
 444         }
 445
 446         inc->samplerate = q / p;
 447         sr_dbg("Samplerate: %" PRIu64, inc->samplerate);
 448         if (q % p != 0) {
 449                 /* Does not happen unless time value is non-standard */
 450                 sr_warn("Inexact rounding of samplerate, %" PRIu64 " / %" PRIu64 " to %" PRIu64 " Hz.",
 451                         q, p, inc->samplerate);
 452         }
 453
 454         return SR_OK;
 455 }
 456
 457 /*
 458  * Handle '$scope' and '$upscope' sections in the input file. Assume that
 459  * input signals have a "base name", which may be ambiguous within the
 460  * file. These names get declared within potentially nested scopes, which
 461  * this implementation uses to create longer but hopefully unique and
 462  * thus more usable sigrok channel names.
 463  *
 464  * Track the currently effective scopes in a string variable to simplify
 465  * the channel name creation. Start from an empty string, then append the
 466  * scope name and a separator when a new scope opens, and remove the last
 467  * scope name when a scope closes. This allows to simply prefix basenames
 468  * with the current scope to get a full name.
 469  *
 470  * It's an implementation detail to keep the trailing NUL here in the
 471  * GString member, to simplify the g_strconcat() call in the channel name
 472  * creation.
 473  *
 474  * TODO
 475  * - Check whether scope types must get supported, this implementation
 476  *   does not distinguish between 'module' and 'begin' and what else
 477  *   may be seen. The first word simply gets ignored.
 478  * - Check the allowed alphabet for scope names. This implementation
 479  *   assumes "programming language identifier" style (alphanumeric with
 480  *   underscores, plus brackets since we've seen them in example files).
 481  */
 482 static int parse_scope(struct context *inc, char *contents, gboolean is_up)
 483 {
 484         char *sep_pos, *name_pos;
 485         char **parts;
 486         size_t length;
 487
 488         /*
 489          * The 'upscope' case, drop one scope level (if available). Accept
 490          * excess 'upscope' calls, assume that a previous 'scope' section
 491          * was ignored because it referenced our software package's name.
 492          */
 493         if (is_up) {
 494                 /*
 495                  * Check for a second right-most separator (and position
 496                  * right behind that, which is the start of the last
 497                  * scope component), or fallback to the start of string.
 498                  * g_string_erase() from that positon to the end to drop
 499                  * the last component.
 500                  */
 501                 name_pos = inc->scope_prefix->str;
 502                 do {
 503                         sep_pos = strrchr(name_pos, SCOPE_SEP);
 504                         if (!sep_pos)
 505                                 break;
 506                         *sep_pos = '\0';
 507                         sep_pos = strrchr(name_pos, SCOPE_SEP);
 508                         if (!sep_pos)
 509                                 break;
 510                         name_pos = ++sep_pos;
 511                 } while (0);
 512                 length = name_pos - inc->scope_prefix->str;
 513                 g_string_truncate(inc->scope_prefix, length);
 514                 g_string_append_c(inc->scope_prefix, '\0');
 515                 sr_dbg("$upscope, prefix now: \"%s\"", inc->scope_prefix->str);
 516                 return SR_OK;
 517         }
 518
 519         /*
 520          * The 'scope' case, add another scope level. But skip our own
 521          * package name, assuming that this is an artificial node which
 522          * was emitted by libsigrok's VCD output module.
 523          */
 524         sr_spew("$scope, got: \"%s\"", contents);
 525         parts = g_strsplit_set(contents, " \r\n\t", 0);
 526         remove_empty_parts(parts);
 527         length = g_strv_length(parts);
 528         if (length != 2) {
 529                 sr_err("Unsupported 'scope' syntax: %s", contents);
 530                 g_strfreev(parts);
 531                 return SR_ERR_DATA;
 532         }
 533         name_pos = parts[1];
 534         if (strcmp(name_pos, PACKAGE_NAME) == 0) {
 535                 sr_info("Skipping scope with application's package name: %s",
 536                         name_pos);
 537                 *name_pos = '\0';
 538         }
 539         if (*name_pos) {
 540                 /* Drop NUL, append scope name and separator, and re-add NUL. */
 541                 g_string_truncate(inc->scope_prefix, inc->scope_prefix->len - 1);
 542                 g_string_append_printf(inc->scope_prefix,
 543                         "%s%c%c", name_pos, SCOPE_SEP, '\0');
 544         }
 545         g_strfreev(parts);
 546         sr_dbg("$scope, prefix now: \"%s\"", inc->scope_prefix->str);
 547
 548         return SR_OK;
 549 }
 550
 551 /**
 552  * Parse a $var section which describes a VCD signal ("variable").
 553  *
 554  * @param[in] inc Input module context.
 555  * @param[in] contents Input text, content of $var section.
 556  */
 557 static int parse_header_var(struct context *inc, char *contents)
 558 {
 559         char **parts;
 560         size_t length;
 561         char *type, *size_txt, *id, *ref, *idx;
 562         gboolean is_reg, is_wire, is_real, is_int;
 563         enum sr_channeltype ch_type;
 564         size_t size, next_size;
 565         struct vcd_channel *vcd_ch;
 566
 567         /*
 568          * Format of $var or $reg header specs:
 569          * $var type size identifier reference [opt-index] $end
 570          */
 571         parts = g_strsplit_set(contents, " \r\n\t", 0);
 572         remove_empty_parts(parts);
 573         length = g_strv_length(parts);
 574         if (length != 4 && length != 5) {
 575                 sr_warn("$var section should have 4 or 5 items");
 576                 g_strfreev(parts);
 577                 return SR_ERR_DATA;
 578         }
 579
 580         type = parts[0];
 581         size_txt = parts[1];
 582         id = parts[2];
 583         ref = parts[3];
 584         idx = parts[4];
 585         if (idx && !*idx)
 586                 idx = NULL;
 587         is_reg = g_strcmp0(type, "reg") == 0;
 588         is_wire = g_strcmp0(type, "wire") == 0;
 589         is_real = g_strcmp0(type, "real") == 0;
 590         is_int = g_strcmp0(type, "integer") == 0;
 591
 592         if (is_reg || is_wire) {
 593                 ch_type = SR_CHANNEL_LOGIC;
 594         } else if (is_real || is_int) {
 595                 ch_type = SR_CHANNEL_ANALOG;
 596         } else {
 597                 sr_info("Unsupported signal type: '%s'", type);
 598                 g_strfreev(parts);
 599                 return SR_ERR_DATA;
 600         }
 601
 602         size = strtol(size_txt, NULL, 10);
 603         if (ch_type == SR_CHANNEL_ANALOG) {
 604                 if (is_real && size != 32 && size != 64) {
 605                         /*
 606                          * The VCD input module does not depend on the
 607                          * specific width of the floating point value.
 608                          * This is just for information. Upon value
 609                          * changes, a mere string gets converted to
 610                          * float, so we may not care at all.
 611                          *
 612                          * Strictly speaking we might warn for 64bit
 613                          * (double precision) declarations, because
 614                          * sigrok internally uses single precision
 615                          * (32bit) only.
 616                          */
 617                         sr_info("Unexpected real width: '%s'", size_txt);
 618                 }
 619                 /* Simplify code paths below, by assuming size 1. */
 620                 size = 1;
 621         }
 622         if (!size) {
 623                 sr_warn("Unsupported signal size: '%s'", size_txt);
 624                 g_strfreev(parts);
 625                 return SR_ERR_DATA;
 626         }
 627         if (inc->conv_bits.max_bits < size)
 628                 inc->conv_bits.max_bits = size;
 629         next_size = inc->logic_count + inc->analog_count + size;
 630         if (inc->options.maxchannels && next_size > inc->options.maxchannels) {
 631                 sr_warn("Skipping '%s%s', exceeds requested channel count %zu.",
 632                         ref, idx ? idx : "", inc->options.maxchannels);
 633                 inc->ignored_signals = g_slist_append(inc->ignored_signals,
 634                         g_strdup(id));
 635                 g_strfreev(parts);
 636                 return SR_OK;
 637         }
 638
 639         vcd_ch = g_malloc0(sizeof(*vcd_ch));
 640         vcd_ch->identifier = g_strdup(id);
 641         vcd_ch->name = g_strconcat(inc->scope_prefix->str, ref, idx, NULL);
 642         vcd_ch->size = size;
 643         vcd_ch->type = ch_type;
 644         switch (ch_type) {
 645         case SR_CHANNEL_LOGIC:
 646                 vcd_ch->array_index = inc->logic_count;
 647                 vcd_ch->byte_idx = vcd_ch->array_index / 8;
 648                 vcd_ch->bit_mask = 1 << (vcd_ch->array_index % 8);
 649                 inc->logic_count += size;
 650                 break;
 651         case SR_CHANNEL_ANALOG:
 652                 vcd_ch->array_index = inc->analog_count++;
 653                 /* TODO: Use proper 'digits' value for this input module. */
 654                 vcd_ch->submit_digits = is_real ? 2 : 0;
 655                 break;
 656         }
 657         inc->vcdsignals++;
 658         sr_spew("VCD signal %zu '%s' ID '%s' (size %zu), sr type %s, idx %zu.",
 659                 inc->vcdsignals, vcd_ch->name,
 660                 vcd_ch->identifier, vcd_ch->size,
 661                 vcd_ch->type == SR_CHANNEL_ANALOG ? "A" : "L",
 662                 vcd_ch->array_index);
 663         inc->channels = g_slist_append(inc->channels, vcd_ch);
 664         g_strfreev(parts);
 665
 666         return SR_OK;
 667 }
 668
 669 /**
 670  * Construct the name of the nth sigrok channel for a VCD signal.
 671  *
 672  * Uses the VCD signal name for scalar types and single-bit signals.
 673  * Uses "signal.idx" for multi-bit VCD signals without a range spec in
 674  * their declaration. Uses "signal[idx]" when a range is known and was
 675  * verified.
 676  *
 677  * @param[in] vcd_ch The VCD signal's description.
 678  * @param[in] idx The sigrok channel's index within the VCD signal's group.
 679  *
 680  * @return An allocated text buffer which callers need to release, #NULL
 681  *   upon failure to create a sigrok channel name.
 682  */
 683 static char *get_channel_name(struct vcd_channel *vcd_ch, size_t idx)
 684 {
 685         char *open_pos, *close_pos, *check_pos, *endptr;
 686         gboolean has_brackets, has_range;
 687         size_t upper, lower, tmp;
 688         char *ch_name;
 689
 690         /* Handle simple scalar types, and single-bit logic first. */
 691         if (vcd_ch->size <= 1)
 692                 return g_strdup(vcd_ch->name);
 693
 694         /*
 695          * If not done before: Search for a matching pair of brackets in
 696          * the right-most position at the very end of the string. Get the
 697          * two colon separated numbers between the brackets, which are
 698          * the range limits for array indices into the multi-bit signal.
 699          * Grab the "base name" of the VCD signal.
 700          *
 701          * Notice that arrays can get nested. Earlier path components can
 702          * be indexed as well, that's why we need the right-most range.
 703          * This implementation does not handle bit vectors of size 1 here
 704          * by explicit logic. The check for a [0:0] range would even fail.
 705          * But the case of size 1 is handled above, and "happens to" give
 706          * the expected result (just the VCD signal name).
 707          *
 708          * This implementation also deals with range limits in the reverse
 709          * order, as well as ranges which are not 0-based (like "[4:7]").
 710          */
 711         if (!vcd_ch->base_name) {
 712                 has_range = TRUE;
 713                 open_pos = strrchr(vcd_ch->name, '[');
 714                 close_pos = strrchr(vcd_ch->name, ']');
 715                 if (close_pos && close_pos[1])
 716                         close_pos = NULL;
 717                 has_brackets = open_pos && close_pos && close_pos > open_pos;
 718                 if (!has_brackets)
 719                         has_range = FALSE;
 720                 if (has_range) {
 721                         check_pos = &open_pos[1];
 722                         endptr = NULL;
 723                         upper = strtoul(check_pos, &endptr, 10);
 724                         if (!endptr || *endptr != ':')
 725                                 has_range = FALSE;
 726                 }
 727                 if (has_range) {
 728                         check_pos = &endptr[1];
 729                         endptr = NULL;
 730                         lower = strtoul(check_pos, &endptr, 10);
 731                         if (!endptr || endptr != close_pos)
 732                                 has_range = FALSE;
 733                 }
 734                 if (has_range && lower > upper) {
 735                         tmp = lower;
 736                         lower = upper;
 737                         upper = tmp;
 738                 }
 739                 if (has_range) {
 740                         if (lower >= upper)
 741                                 has_range = FALSE;
 742                         if (upper + 1 - lower != vcd_ch->size)
 743                                 has_range = FALSE;
 744                 }
 745                 if (has_range) {
 746                         /* Temporarily patch the VCD channel's name. */
 747                         *open_pos = '\0';
 748                         vcd_ch->base_name = g_strdup(vcd_ch->name);
 749                         *open_pos = '[';
 750                         vcd_ch->range_lower = lower;
 751                         vcd_ch->range_upper = upper;
 752                 }
 753         }
 754         has_range = vcd_ch->range_lower + vcd_ch->range_upper;
 755         if (has_range && idx >= vcd_ch->size)
 756                 has_range = FALSE;
 757         if (!has_range)
 758                 return g_strdup_printf("%s.%zu", vcd_ch->name, idx);
 759
 760         /*
 761          * Create a sigrok channel name with just the bit's index in
 762          * brackets. This avoids "name[7:0].3" results, instead results
 763          * in "name[3]".
 764          */
 765         ch_name = g_strdup_printf("%s[%zu]",
 766                 vcd_ch->base_name, vcd_ch->range_lower + idx);
 767         return ch_name;
 768 }
 769
 770 /*
 771  * Create (analog or logic) sigrok channels for the VCD signals. Create
 772  * multiple sigrok channels for vector input since sigrok has no concept
 773  * of multi-bit signals. Create a channel group for the vector's bits
 774  * though to reflect that they form a unit. This is beneficial when UIs
 775  * support optional "collapsed" displays of channel groups (like
 776  * "parallel bus, hex output").
 777  *
 778  * Defer channel creation until after completion of parsing the input
 779  * file header. Make sure to create all logic channels first before the
 780  * analog channels get created. This avoids issues with the mapping of
 781  * channel indices to bitmap positions in the sample buffer.
 782  */
 783 static void create_channels(const struct sr_input *in,
 784         struct sr_dev_inst *sdi, enum sr_channeltype ch_type)
 785 {
 786         struct context *inc;
 787         size_t ch_idx;
 788         GSList *l;
 789         struct vcd_channel *vcd_ch;
 790         size_t size_idx;
 791         char *ch_name;
 792         struct sr_channel_group *cg;
 793         struct sr_channel *ch;
 794
 795         inc = in->priv;
 796
 797         ch_idx = 0;
 798         if (ch_type > SR_CHANNEL_LOGIC)
 799                 ch_idx += inc->logic_count;
 800         if (ch_type > SR_CHANNEL_ANALOG)
 801                 ch_idx += inc->analog_count;
 802         for (l = inc->channels; l; l = l->next) {
 803                 vcd_ch = l->data;
 804                 if (vcd_ch->type != ch_type)
 805                         continue;
 806                 cg = NULL;
 807                 if (vcd_ch->size != 1) {
 808                         cg = g_malloc0(sizeof(*cg));
 809                         cg->name = g_strdup(vcd_ch->name);
 810                 }
 811                 for (size_idx = 0; size_idx < vcd_ch->size; size_idx++) {
 812                         ch_name = get_channel_name(vcd_ch, size_idx);
 813                         sr_dbg("sigrok channel idx %zu, name %s, type %s, en %d.",
 814                                 ch_idx, ch_name,
 815                                 ch_type == SR_CHANNEL_ANALOG ? "A" : "L", TRUE);
 816                         ch = sr_channel_new(sdi, ch_idx, ch_type, TRUE, ch_name);
 817                         g_free(ch_name);
 818                         ch_idx++;
 819                         if (cg)
 820                                 cg->channels = g_slist_append(cg->channels, ch);
 821                 }
 822                 if (cg)
 823                         sdi->channel_groups = g_slist_append(sdi->channel_groups, cg);
 824         }
 825 }
 826
 827 static void create_feeds(const struct sr_input *in)
 828 {
 829         struct context *inc;
 830         GSList *l;
 831         struct vcd_channel *vcd_ch;
 832         size_t ch_idx;
 833         struct sr_channel *ch;
 834
 835         inc = in->priv;
 836
 837         /* Create one feed for logic data. */
 838         inc->unit_size = (inc->logic_count + 7) / 8;
 839         inc->feed_logic = feed_queue_logic_alloc(in->sdi,
 840                 CHUNK_SIZE / inc->unit_size, inc->unit_size);
 841
 842         /* Create one feed per analog channel. */
 843         for (l = inc->channels; l; l = l->next) {
 844                 vcd_ch = l->data;
 845                 if (vcd_ch->type != SR_CHANNEL_ANALOG)
 846                         continue;
 847                 ch_idx = vcd_ch->array_index;
 848                 ch_idx += inc->logic_count;
 849                 ch = g_slist_nth_data(in->sdi->channels, ch_idx);
 850                 vcd_ch->feed_analog = feed_queue_analog_alloc(in->sdi,
 851                         CHUNK_SIZE / sizeof(float),
 852                         vcd_ch->submit_digits, ch);
 853         }
 854 }
 855
 856 /*
 857  * Keep track of a previously created channel list, in preparation of
 858  * re-reading the input file. Gets called from reset()/cleanup() paths.
 859  */
 860 static void keep_header_for_reread(const struct sr_input *in)
 861 {
 862         struct context *inc;
 863
 864         inc = in->priv;
 865
 866         g_slist_free_full(inc->prev.sr_groups, cg_free);
 867         inc->prev.sr_groups = in->sdi->channel_groups;
 868         in->sdi->channel_groups = NULL;
 869
 870         g_slist_free_full(inc->prev.sr_channels, sr_channel_free_cb);
 871         inc->prev.sr_channels = in->sdi->channels;
 872         in->sdi->channels = NULL;
 873 }
 874
 875 /*
 876  * Check whether the input file is being re-read, and refuse operation
 877  * when essential parameters of the acquisition have changed in ways
 878  * that are unexpected to calling applications. Gets called after the
 879  * file header got parsed (again).
 880  *
 881  * Changing the channel list across re-imports of the same file is not
 882  * supported, by design and for valid reasons, see bug #1215 for details.
 883  * Users are expected to start new sessions when they change these
 884  * essential parameters in the acquisition's setup. When we accept the
 885  * re-read file, then make sure to keep using the previous channel list,
 886  * applications may still reference them.
 887  */
 888 static gboolean check_header_in_reread(const struct sr_input *in)
 889 {
 890         struct context *inc;
 891
 892         if (!in)
 893                 return FALSE;
 894         inc = in->priv;
 895         if (!inc)
 896                 return FALSE;
 897         if (!inc->prev.sr_channels)
 898                 return TRUE;
 899
 900         if (sr_channel_lists_differ(inc->prev.sr_channels, in->sdi->channels)) {
 901                 sr_err("Channel list change not supported for file re-read.");
 902                 return FALSE;
 903         }
 904
 905         g_slist_free_full(in->sdi->channel_groups, cg_free);
 906         in->sdi->channel_groups = inc->prev.sr_groups;
 907         inc->prev.sr_groups = NULL;
 908
 909         g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
 910         in->sdi->channels = inc->prev.sr_channels;
 911         inc->prev.sr_channels = NULL;
 912
 913         return TRUE;
 914 }
 915
 916 /* Parse VCD file header sections (rate and variables declarations). */
 917 static int parse_header(const struct sr_input *in, GString *buf)
 918 {
 919         struct context *inc;
 920         gboolean status;
 921         char *name, *contents;
 922         size_t size;
 923
 924         inc = in->priv;
 925
 926         /* Parse sections until complete header was seen. */
 927         status = FALSE;
 928         name = contents = NULL;
 929         inc->conv_bits.max_bits = 1;
 930         while (parse_section(buf, &name, &contents)) {
 931                 sr_dbg("Section '%s', contents '%s'.", name, contents);
 932
 933                 if (g_strcmp0(name, "enddefinitions") == 0) {
 934                         status = TRUE;
 935                         goto done_section;
 936                 }
 937                 if (g_strcmp0(name, "timescale") == 0) {
 938                         if (parse_timescale(inc, contents) != SR_OK)
 939                                 status = FALSE;
 940                         goto done_section;
 941                 }
 942                 if (g_strcmp0(name, "scope") == 0) {
 943                         if (parse_scope(inc, contents, FALSE) != SR_OK)
 944                                 status = FALSE;
 945                         goto done_section;
 946                 }
 947                 if (g_strcmp0(name, "upscope") == 0) {
 948                         if (parse_scope(inc, NULL, TRUE) != SR_OK)
 949                                 status = FALSE;
 950                         goto done_section;
 951                 }
 952                 if (g_strcmp0(name, "var") == 0) {
 953                         if (parse_header_var(inc, contents) != SR_OK)
 954                                 status = FALSE;
 955                         goto done_section;
 956                 }
 957
 958 done_section:
 959                 g_free(name);
 960                 name = NULL;
 961                 g_free(contents);
 962                 contents = NULL;
 963
 964                 if (status)
 965                         break;
 966         }
 967         g_free(name);
 968         g_free(contents);
 969
 970         inc->got_header = status;
 971         if (!status)
 972                 return SR_ERR_DATA;
 973
 974         /* Create sigrok channels here, late, logic before analog. */
 975         create_channels(in, in->sdi, SR_CHANNEL_LOGIC);
 976         create_channels(in, in->sdi, SR_CHANNEL_ANALOG);
 977         if (!check_header_in_reread(in))
 978                 return SR_ERR_DATA;
 979         create_feeds(in);
 980
 981         /*
 982          * Allocate space for text to number conversion, and buffers to
 983          * hold current sample values before submission to the session
 984          * feed. Allocate one buffer for all logic bits, and another for
 985          * all floating point values of all analog channels.
 986          *
 987          * The buffers get updated when the VCD input stream communicates
 988          * value changes. Upon reception of VCD timestamps, the buffer can
 989          * provide the previously received values, to "fill in the gaps"
 990          * in the generation of a continuous stream of samples for the
 991          * sigrok session.
 992          */
 993         size = (inc->conv_bits.max_bits + 7) / 8;
 994         inc->conv_bits.unit_size = size;
 995         inc->conv_bits.value = g_malloc0(size);
 996         if (!inc->conv_bits.value)
 997                 return SR_ERR_MALLOC;
 998
 999         size = (inc->logic_count + 7) / 8;
1000         inc->unit_size = size;
1001         inc->current_logic = g_malloc0(size);
1002         if (inc->unit_size && !inc->current_logic)
1003                 return SR_ERR_MALLOC;
1004         size = sizeof(inc->current_floats[0]) * inc->analog_count;
1005         inc->current_floats = g_malloc0(size);
1006         if (size && !inc->current_floats)
1007                 return SR_ERR_MALLOC;
1008         for (size = 0; size < inc->analog_count; size++)
1009                 inc->current_floats[size] = 0.;
1010
1011         return SR_OK;
1012 }
1013
1014 /*
1015  * Add N copies of previously received values to the session, before
1016  * subsequent value changes will update the data buffer. Locally buffer
1017  * sample data to minimize the number of send() calls.
1018  */
1019 static void add_samples(const struct sr_input *in, size_t count, gboolean flush)
1020 {
1021         struct context *inc;
1022         GSList *ch_list;
1023         struct vcd_channel *vcd_ch;
1024         struct feed_queue_analog *q;
1025         float value;
1026
1027         inc = in->priv;
1028
1029         if (inc->logic_count) {
1030                 feed_queue_logic_submit(inc->feed_logic,
1031                         inc->current_logic, count);
1032                 if (flush)
1033                         feed_queue_logic_flush(inc->feed_logic);
1034         }
1035         for (ch_list = inc->channels; ch_list; ch_list = ch_list->next) {
1036                 vcd_ch = ch_list->data;
1037                 if (vcd_ch->type != SR_CHANNEL_ANALOG)
1038                         continue;
1039                 q = vcd_ch->feed_analog;
1040                 if (!q)
1041                         continue;
1042                 value = inc->current_floats[vcd_ch->array_index];
1043                 feed_queue_analog_submit(q, value, count);
1044                 if (flush)
1045                         feed_queue_analog_flush(q);
1046         }
1047 }
1048
1049 static gint vcd_compare_id(gconstpointer a, gconstpointer b)
1050 {
1051         return strcmp((const char *)a, (const char *)b);
1052 }
1053
1054 static gboolean is_ignored(struct context *inc, const char *id)
1055 {
1056         GSList *ignored;
1057
1058         ignored = g_slist_find_custom(inc->ignored_signals, id, vcd_compare_id);
1059         return ignored != NULL;
1060 }
1061
1062 /*
1063  * Get an analog channel's value from a bit pattern (VCD 'integer' type).
1064  * The implementation assumes a maximum integer width (64bit), the API
1065  * doesn't (beyond the return data type). The use of SR_CHANNEL_ANALOG
1066  * channels may further constraint the number of significant digits
1067  * (current asumption: float -> 23bit).
1068  */
1069 static float get_int_val(uint8_t *in_bits_data, size_t in_bits_count)
1070 {
1071         uint64_t int_value;
1072         size_t byte_count, byte_idx;
1073         float flt_value; /* typeof(inc->current_floats[0]) */
1074
1075         /* Convert bit pattern to integer number (limited range). */
1076         int_value = 0;
1077         byte_count = (in_bits_count + 7) / 8;
1078         for (byte_idx = 0; byte_idx < byte_count; byte_idx++) {
1079                 if (byte_idx >= sizeof(int_value))
1080                         break;
1081                 int_value |= *in_bits_data++ << (byte_idx * 8);
1082         }
1083         flt_value = int_value;
1084
1085         return flt_value;
1086 }
1087
1088 /*
1089  * Set a logic channel's level depending on the VCD signal's identifier
1090  * and parsed value. Multi-bit VCD values will affect several sigrok
1091  * channels. One VCD signal name can translate to several sigrok channels.
1092  */
1093 static void process_bits(struct context *inc, char *identifier,
1094         uint8_t *in_bits_data, size_t in_bits_count)
1095 {
1096         size_t size;
1097         gboolean have_int;
1098         GSList *l;
1099         struct vcd_channel *vcd_ch;
1100         float int_val;
1101         size_t bit_idx;
1102         uint8_t *in_bit_ptr, in_bit_mask;
1103         uint8_t *out_bit_ptr, out_bit_mask;
1104         uint8_t bit_val;
1105
1106         size = 0;
1107         have_int = FALSE;
1108         int_val = 0;
1109         for (l = inc->channels; l; l = l->next) {
1110                 vcd_ch = l->data;
1111                 if (g_strcmp0(identifier, vcd_ch->identifier) != 0)
1112                         continue;
1113                 if (vcd_ch->type == SR_CHANNEL_ANALOG) {
1114                         /* Special case for 'integer' VCD signal types. */
1115                         size = vcd_ch->size; /* Flag for "VCD signal found". */
1116                         if (!have_int) {
1117                                 int_val = get_int_val(in_bits_data, in_bits_count);
1118                                 have_int = TRUE;
1119                         }
1120                         inc->current_floats[vcd_ch->array_index] = int_val;
1121                         continue;
1122                 }
1123                 if (vcd_ch->type != SR_CHANNEL_LOGIC)
1124                         continue;
1125                 sr_spew("Processing %s data, id '%s', ch %zu sz %zu",
1126                         (size == 1) ? "bit" : "vector",
1127                         identifier, vcd_ch->array_index, vcd_ch->size);
1128
1129                 /* Found our (logic) channel. Setup in/out bit positions. */
1130                 size = vcd_ch->size;
1131                 in_bit_ptr = in_bits_data;
1132                 in_bit_mask = 1 << 0;
1133                 out_bit_ptr = &inc->current_logic[vcd_ch->byte_idx];
1134                 out_bit_mask = vcd_ch->bit_mask;
1135
1136                 /*
1137                  * Pass VCD input bit(s) to sigrok logic bits. Conversion
1138                  * must be done repeatedly because one VCD signal name
1139                  * can translate to several sigrok channels, and shifting
1140                  * a previously computed bit field to another channel's
1141                  * position in the buffer would be nearly as expensive,
1142                  * and certain would increase complexity of the code.
1143                  */
1144                 for (bit_idx = 0; bit_idx < size; bit_idx++) {
1145                         /* Get the bit value from input data. */
1146                         bit_val = 0;
1147                         if (bit_idx < in_bits_count) {
1148                                 bit_val = *in_bit_ptr & in_bit_mask;
1149                                 in_bit_mask <<= 1;
1150                                 if (!in_bit_mask) {
1151                                         in_bit_mask = 1 << 0;
1152                                         in_bit_ptr++;
1153                                 }
1154                         }
1155                         /* Manipulate the sample buffer data image. */
1156                         if (bit_val)
1157                                 *out_bit_ptr |= out_bit_mask;
1158                         else
1159                                 *out_bit_ptr &= ~out_bit_mask;
1160                         /* Update output position after bitmap update. */
1161                         out_bit_mask <<= 1;
1162                         if (!out_bit_mask) {
1163                                 out_bit_mask = 1 << 0;
1164                                 out_bit_ptr++;
1165                         }
1166                 }
1167         }
1168         if (!size && !is_ignored(inc, identifier))
1169                 sr_warn("VCD signal not found for ID '%s'.", identifier);
1170 }
1171
1172 /*
1173  * Set an analog channel's value from a floating point number. One
1174  * VCD signal name can translate to several sigrok channels.
1175  */
1176 static void process_real(struct context *inc, char *identifier, float real_val)
1177 {
1178         gboolean found;
1179         GSList *l;
1180         struct vcd_channel *vcd_ch;
1181
1182         found = FALSE;
1183         for (l = inc->channels; l; l = l->next) {
1184                 vcd_ch = l->data;
1185                 if (vcd_ch->type != SR_CHANNEL_ANALOG)
1186                         continue;
1187                 if (g_strcmp0(identifier, vcd_ch->identifier) != 0)
1188                         continue;
1189
1190                 /* Found our (analog) channel. */
1191                 found = TRUE;
1192                 sr_spew("Processing real data, id '%s', ch %zu, val %.16g",
1193                         identifier, vcd_ch->array_index, real_val);
1194                 inc->current_floats[vcd_ch->array_index] = real_val;
1195         }
1196         if (!found && !is_ignored(inc, identifier))
1197                 sr_warn("VCD signal not found for ID '%s'.", identifier);
1198 }
1199
1200 /*
1201  * Converts a bit position's text character to a number value.
1202  *
1203  * TODO Check for complete coverage of Verilog's standard logic values
1204  * (IEEE-1364). The set is said to be “01XZHUWL-”, which only a part of
1205  * is handled here. What would be the complete mapping?
1206  * - 0/L -> bit value 0
1207  * - 1/H -> bit value 1
1208  * - X "don't care" -> TODO
1209  * - Z "high impedance" -> TODO
1210  * - W "weak(?)" -> TODO
1211  * - U "undefined" -> TODO
1212  * - '-' "TODO" -> TODO
1213  *
1214  * For simplicity, this input module implementation maps "known low"
1215  * values to 0, and "known high" values to 1. All other values will
1216  * end up assuming "low" (return number 0), while callers might warn.
1217  * It's up to users to provide compatible input data, or accept the
1218  * warnings. Silently accepting unknown input data is not desirable.
1219  */
1220 static uint8_t vcd_char_to_value(char bit_char, int *warn)
1221 {
1222
1223         bit_char = g_ascii_tolower(bit_char);
1224
1225         /* Convert the "undisputed" variants. */
1226         if (bit_char == '0' || bit_char == 'l')
1227                 return 0;
1228         if (bit_char == '1' || bit_char == 'h')
1229                 return 1;
1230
1231         /* Convert the "uncertain" variants. */
1232         if (warn)
1233                 *warn = 1;
1234         if (bit_char == 'x' || bit_char == 'z')
1235                 return 0;
1236         if (bit_char == 'u')
1237                 return 0;
1238         if (bit_char == '-')
1239                 return 0;
1240
1241         /* Unhandled input text. */
1242         return ~0;
1243 }
1244
1245 /* Parse one text line of the data section. */
1246 static int parse_textline(const struct sr_input *in, char *lines)
1247 {
1248         struct context *inc;
1249         int ret;
1250         char **words;
1251         size_t word_count, word_idx;
1252         char *curr_word, *next_word, curr_first;
1253         gboolean is_timestamp, is_section, is_real, is_multibit, is_singlebit;
1254         uint64_t timestamp;
1255         char *identifier, *endptr;
1256         size_t count;
1257
1258         inc = in->priv;
1259
1260         /*
1261          * Split the caller's text lines into a list of space separated
1262          * words. Note that some of the branches consume the very next
1263          * words as well, and assume that both adjacent words will be
1264          * available when the first word is seen. This constraint applies
1265          * to bit vector data, multi-bit integers and real (float) data,
1266          * as well as single-bit data with whitespace before its
1267          * identifier (if that's valid in VCD, we'd accept it here).
1268          * The fact that callers always pass complete text lines should
1269          * make this assumption acceptable.
1270          */
1271         ret = SR_OK;
1272         words = split_text_line(inc, lines, &word_count);
1273         for (word_idx = 0; word_idx < word_count; word_idx++) {
1274                 /*
1275                  * Make the next two words available, to simpilify code
1276                  * paths below. The second word is optional here.
1277                  */
1278                 curr_word = words[word_idx];
1279                 if (!curr_word && !curr_word[0])
1280                         continue;
1281                 curr_first = g_ascii_tolower(curr_word[0]);
1282                 next_word = words[word_idx + 1];
1283                 if (next_word && !next_word[0])
1284                         next_word = NULL;
1285
1286                 /*
1287                  * Optionally skip some sections that can be interleaved
1288                  * with data (and may or may not be supported by this
1289                  * input module). If the section is not skipped but the
1290                  * $end keyword needs to get tracked, specifically handle
1291                  * this case, for improved robustness (still reject files
1292                  * which happen to use invalid syntax).
1293                  */
1294                 if (inc->skip_until_end) {
1295                         if (strcmp(curr_word, "$end") == 0) {
1296                                 /* Done with unhandled/unknown section. */
1297                                 sr_dbg("done skipping until $end");
1298                                 inc->skip_until_end = FALSE;
1299                         } else {
1300                                 sr_spew("skipping word: %s", curr_word);
1301                         }
1302                         continue;
1303                 }
1304                 if (inc->ignore_end_keyword) {
1305                         if (strcmp(curr_word, "$end") == 0) {
1306                                 sr_dbg("done ignoring $end keyword");
1307                                 inc->ignore_end_keyword = FALSE;
1308                                 continue;
1309                         }
1310                 }
1311
1312                 /*
1313                  * There may be $keyword sections inside the data part of
1314                  * the input file. Do inspect some of the sections' content
1315                  * but ignore their surrounding keywords. Silently skip
1316                  * unsupported section types (which transparently covers
1317                  * $comment sections).
1318                  */
1319                 is_section = curr_first == '$' && curr_word[1];
1320                 if (is_section) {
1321                         gboolean inspect_data;
1322
1323                         inspect_data = FALSE;
1324                         inspect_data |= g_strcmp0(curr_word, "$dumpvars") == 0;
1325                         inspect_data |= g_strcmp0(curr_word, "$dumpon") == 0;
1326                         inspect_data |= g_strcmp0(curr_word, "$dumpoff") == 0;
1327                         if (inspect_data) {
1328                                 /* Ignore keywords, yet parse contents. */
1329                                 sr_dbg("%s section, will parse content", curr_word);
1330                                 inc->ignore_end_keyword = TRUE;
1331                         } else {
1332                                 /* Ignore section from here up to $end. */
1333                                 sr_dbg("%s section, will skip until $end", curr_word);
1334                                 inc->skip_until_end = TRUE;
1335                         }
1336                         continue;
1337                 }
1338
1339                 /*
1340                  * Numbers prefixed by '#' are timestamps, which translate
1341                  * to sigrok sample numbers. Apply optional downsampling,
1342                  * and apply the 'skip' logic. Check the recent timestamp
1343                  * for plausibility. Submit the corresponding number of
1344                  * samples of previously accumulated data values to the
1345                  * session feed.
1346                  */
1347                 is_timestamp = curr_first == '#' && g_ascii_isdigit(curr_word[1]);
1348                 if (is_timestamp) {
1349                         endptr = NULL;
1350                         timestamp = strtoull(&curr_word[1], &endptr, 10);
1351                         if (!endptr || *endptr) {
1352                                 sr_err("Invalid timestamp: %s.", curr_word);
1353                                 ret = SR_ERR_DATA;
1354                                 break;
1355                         }
1356                         sr_spew("Got timestamp: %" PRIu64, timestamp);
1357                         if (inc->options.downsample > 1) {
1358                                 timestamp /= inc->options.downsample;
1359                                 sr_spew("Downsampled timestamp: %" PRIu64, timestamp);
1360                         }
1361
1362                         /*
1363                          * Skip < 0 => skip until first timestamp.
1364                          * Skip = 0 => don't skip
1365                          * Skip > 0 => skip until timestamp >= skip.
1366                          */
1367                         if (inc->options.skip_specified && !inc->use_skip) {
1368                                 sr_dbg("Seeding skip from user spec %" PRIu64,
1369                                         inc->options.skip_starttime);
1370                                 inc->prev_timestamp = inc->options.skip_starttime;
1371                                 inc->use_skip = TRUE;
1372                         }
1373                         if (!inc->use_skip) {
1374                                 sr_dbg("Seeding skip from first timestamp");
1375                                 inc->options.skip_starttime = timestamp;
1376                                 inc->prev_timestamp = timestamp;
1377                                 inc->use_skip = TRUE;
1378                                 continue;
1379                         }
1380                         if (inc->options.skip_starttime && timestamp < inc->options.skip_starttime) {
1381                                 sr_spew("Timestamp skipped, before user spec");
1382                                 inc->prev_timestamp = inc->options.skip_starttime;
1383                                 continue;
1384                         }
1385                         if (timestamp == inc->prev_timestamp) {
1386                                 /*
1387                                  * Ignore repeated timestamps (e.g. sigrok
1388                                  * outputs these). Can also happen when
1389                                  * downsampling makes distinct input values
1390                                  * end up at the same scaled down value.
1391                                  * Also transparently covers the initial
1392                                  * timestamp.
1393                                  */
1394                                 sr_spew("Timestamp is identical to previous timestamp");
1395                                 continue;
1396                         }
1397                         if (timestamp < inc->prev_timestamp) {
1398                                 sr_err("Invalid timestamp: %" PRIu64 " (leap backwards).", timestamp);
1399                                 ret = SR_ERR_DATA;
1400                                 break;
1401                         }
1402                         if (inc->options.compress) {
1403                                 /* Compress long idle periods */
1404                                 count = timestamp - inc->prev_timestamp;
1405                                 if (count > inc->options.compress) {
1406                                         sr_dbg("Long idle period, compressing");
1407                                         count = timestamp - inc->options.compress;
1408                                         inc->prev_timestamp = count;
1409                                 }
1410                         }
1411
1412                         /* Generate samples from prev_timestamp up to timestamp - 1. */
1413                         count = timestamp - inc->prev_timestamp;
1414                         sr_spew("Got a new timestamp, feeding %zu samples", count);
1415                         add_samples(in, count, FALSE);
1416                         inc->prev_timestamp = timestamp;
1417                         inc->data_after_timestamp = FALSE;
1418                         continue;
1419                 }
1420                 inc->data_after_timestamp = TRUE;
1421
1422                 /*
1423                  * Data values come in different formats, are associated
1424                  * with channel identifiers, and correspond to the period
1425                  * of time from the most recent timestamp to the next
1426                  * timestamp.
1427                  *
1428                  * Supported input data formats are:
1429                  * - R<value> <sep> <id> (analog channel, VCD type 'real').
1430                  * - B<value> <sep> <id> (analog channel, VCD type 'integer').
1431                  * - B<value> <sep> <id> (logic channels, VCD bit vectors).
1432                  * - <value> <id> (logic channel, VCD single-bit values).
1433                  *
1434                  * Input values can be:
1435                  * - Floating point numbers.
1436                  * - Bit strings (which covers multi-bit aka integers
1437                  *   as well as vectors).
1438                  * - Single bits.
1439                  *
1440                  * Things to note:
1441                  * - Individual bits can be 0/1 which is supported by
1442                  *   libsigrok, or x or z which is treated like 0 here
1443                  *   (sigrok lacks support for ternary logic, neither is
1444                  *   there support for the full IEEE set of values).
1445                  * - Single-bit values typically won't be separated from
1446                  *   the signal identifer, multi-bit values and floats
1447                  *   are separated (will reference the next word). This
1448                  *   implementation silently accepts separators for
1449                  *   single-bit values, too.
1450                  */
1451                 is_real = curr_first == 'r' && curr_word[1];
1452                 is_multibit = curr_first == 'b' && curr_word[1];
1453                 is_singlebit = curr_first == '0' || curr_first == '1';
1454                 is_singlebit |= curr_first == 'l' || curr_first == 'h';
1455                 is_singlebit |= curr_first == 'x' || curr_first == 'z';
1456                 is_singlebit |= curr_first == 'u' || curr_first == '-';
1457                 if (is_real) {
1458                         char *real_text;
1459                         float real_val;
1460
1461                         real_text = &curr_word[1];
1462                         identifier = next_word;
1463                         word_idx++;
1464                         if (!*real_text || !identifier || !*identifier) {
1465                                 sr_err("Unexpected real format.");
1466                                 ret = SR_ERR_DATA;
1467                                 break;
1468                         }
1469                         sr_spew("Got real data %s for id '%s'.",
1470                                 real_text, identifier);
1471                         if (sr_atof_ascii(real_text, &real_val) != SR_OK) {
1472                                 sr_err("Cannot convert value: %s.", real_text);
1473                                 ret = SR_ERR_DATA;
1474                                 break;
1475                         }
1476                         process_real(inc, identifier, real_val);
1477                         continue;
1478                 }
1479                 if (is_multibit) {
1480                         char *bits_text_start;
1481                         size_t bit_count;
1482                         char *bits_text, bit_char;
1483                         uint8_t bit_value;
1484                         uint8_t *value_ptr, value_mask;
1485                         GString *bits_val_text;
1486
1487                         /* TODO
1488                          * Fold in single-bit code path here? To re-use
1489                          * the X/Z support. Current redundancy is few so
1490                          * there is little pressure to unify code paths.
1491                          * Also multi-bit handling is often different
1492                          * from single-bit handling, so the "unified"
1493                          * path would often check for special cases. So
1494                          * we may never unify code paths at all here.
1495                          */
1496                         bits_text = &curr_word[1];
1497                         identifier = next_word;
1498                         word_idx++;
1499
1500                         if (!*bits_text || !identifier || !*identifier) {
1501                                 sr_err("Unexpected integer/vector format.");
1502                                 ret = SR_ERR_DATA;
1503                                 break;
1504                         }
1505                         sr_spew("Got integer/vector data %s for id '%s'.",
1506                                 bits_text, identifier);
1507
1508                         /*
1509                          * Accept a bit string of arbitrary length (sort
1510                          * of, within the limits of the previously setup
1511                          * conversion buffer). The input text omits the
1512                          * leading zeroes, hence we convert from end to
1513                          * the start, to get the significant bits. There
1514                          * should only be errors for invalid input, or
1515                          * for input that is rather strange (data holds
1516                          * more bits than the signal's declaration in
1517                          * the header suggested). Silently accept data
1518                          * that fits in the conversion buffer, and has
1519                          * more significant bits than the signal's type
1520                          * (that'd be non-sence yet acceptable input).
1521                          */
1522                         bits_text_start = bits_text;
1523                         bits_text += strlen(bits_text);
1524                         bit_count = bits_text - bits_text_start;
1525                         if (bit_count > inc->conv_bits.max_bits) {
1526                                 sr_err("Value exceeds conversion buffer: %s",
1527                                         bits_text_start);
1528                                 ret = SR_ERR_DATA;
1529                                 break;
1530                         }
1531                         memset(inc->conv_bits.value, 0, inc->conv_bits.unit_size);
1532                         value_ptr = &inc->conv_bits.value[0];
1533                         value_mask = 1 << 0;
1534                         inc->conv_bits.sig_count = 0;
1535                         while (bits_text > bits_text_start) {
1536                                 inc->conv_bits.sig_count++;
1537                                 bit_char = *(--bits_text);
1538                                 bit_value = vcd_char_to_value(bit_char, NULL);
1539                                 if (bit_value == 0) {
1540                                         /* EMPTY */
1541                                 } else if (bit_value == 1) {
1542                                         *value_ptr |= value_mask;
1543                                 } else {
1544                                         inc->conv_bits.sig_count = 0;
1545                                         break;
1546                                 }
1547                                 value_mask <<= 1;
1548                                 if (!value_mask) {
1549                                         value_ptr++;
1550                                         value_mask = 1 << 0;
1551                                 }
1552                         }
1553                         if (!inc->conv_bits.sig_count) {
1554                                 sr_err("Unexpected vector format: %s",
1555                                         bits_text_start);
1556                                 ret = SR_ERR_DATA;
1557                                 break;
1558                         }
1559                         if (sr_log_loglevel_get() >= SR_LOG_SPEW) {
1560                                 bits_val_text = sr_hexdump_new(inc->conv_bits.value,
1561                                         value_ptr - inc->conv_bits.value + 1);
1562                                 sr_spew("Vector value: %s.", bits_val_text->str);
1563                                 sr_hexdump_free(bits_val_text);
1564                         }
1565
1566                         process_bits(inc, identifier,
1567                                 inc->conv_bits.value, inc->conv_bits.sig_count);
1568                         continue;
1569                 }
1570                 if (is_singlebit) {
1571                         char *bits_text, bit_char;
1572                         uint8_t bit_value;
1573
1574                         /* Get the value text, and signal identifier. */
1575                         bits_text = &curr_word[0];
1576                         bit_char = *bits_text;
1577                         if (!bit_char) {
1578                                 sr_err("Bit value missing.");
1579                                 ret = SR_ERR_DATA;
1580                                 break;
1581                         }
1582                         identifier = ++bits_text;
1583                         if (!*identifier) {
1584                                 identifier = next_word;
1585                                 word_idx++;
1586                         }
1587                         if (!identifier || !*identifier) {
1588                                 sr_err("Identifier missing.");
1589                                 ret = SR_ERR_DATA;
1590                                 break;
1591                         }
1592
1593                         /* Convert value text to single-bit number. */
1594                         bit_value = vcd_char_to_value(bit_char, NULL);
1595                         if (bit_value != 0 && bit_value != 1) {
1596                                 sr_err("Unsupported bit value '%c'.", bit_char);
1597                                 ret = SR_ERR_DATA;
1598                                 break;
1599                         }
1600                         inc->conv_bits.value[0] = bit_value;
1601                         process_bits(inc, identifier, inc->conv_bits.value, 1);
1602                         continue;
1603                 }
1604
1605                 /* Design choice: Consider unsupported input fatal. */
1606                 sr_err("Unknown token '%s'.", curr_word);
1607                 ret = SR_ERR_DATA;
1608                 break;
1609         }
1610         free_text_split(inc, words);
1611
1612         return ret;
1613 }
1614
1615 static int process_buffer(struct sr_input *in, gboolean is_eof)
1616 {
1617         struct context *inc;
1618         uint64_t samplerate;
1619         GVariant *gvar;
1620         int ret;
1621         char *rdptr, *endptr, *trimptr;
1622         size_t rdlen;
1623
1624         inc = in->priv;
1625
1626         /* Send feed header and samplerate (once) before sample data. */
1627         if (!inc->started) {
1628                 std_session_send_df_header(in->sdi);
1629
1630                 samplerate = inc->samplerate / inc->options.downsample;
1631                 if (samplerate) {
1632                         gvar = g_variant_new_uint64(samplerate);
1633                         sr_session_send_meta(in->sdi, SR_CONF_SAMPLERATE, gvar);
1634                 }
1635
1636                 inc->started = TRUE;
1637         }
1638
1639         /*
1640          * Workaround broken generators which output incomplete text
1641          * lines. Enforce the trailing line feed. Proper input is not
1642          * harmed by another empty line of input data.
1643          */
1644         if (is_eof)
1645                 g_string_append_c(in->buf, '\n');
1646
1647         /* Find and process complete text lines in the input data. */
1648         ret = SR_OK;
1649         rdptr = in->buf->str;
1650         while (TRUE) {
1651                 rdlen = &in->buf->str[in->buf->len] - rdptr;
1652                 endptr = g_strstr_len(rdptr, rdlen, "\n");
1653                 if (!endptr)
1654                         break;
1655                 trimptr = endptr;
1656                 *endptr++ = '\0';
1657                 while (g_ascii_isspace(*rdptr))
1658                         rdptr++;
1659                 while (trimptr > rdptr && g_ascii_isspace(trimptr[-1]))
1660                         *(--trimptr) = '\0';
1661                 if (!*rdptr) {
1662                         rdptr = endptr;
1663                         continue;
1664                 }
1665                 ret = parse_textline(in, rdptr);
1666                 rdptr = endptr;
1667                 if (ret != SR_OK)
1668                         break;
1669         }
1670         rdlen = rdptr - in->buf->str;
1671         g_string_erase(in->buf, 0, rdlen);
1672
1673         return ret;
1674 }
1675
1676 static int format_match(GHashTable *metadata, unsigned int *confidence)
1677 {
1678         GString *buf, *tmpbuf;
1679         gboolean status;
1680         char *name, *contents;
1681
1682         buf = g_hash_table_lookup(metadata,
1683                 GINT_TO_POINTER(SR_INPUT_META_HEADER));
1684         tmpbuf = g_string_new_len(buf->str, buf->len);
1685
1686         /*
1687          * If we can parse the first section correctly, then it is
1688          * assumed that the input is in VCD format.
1689          */
1690         check_remove_bom(tmpbuf);
1691         status = parse_section(tmpbuf, &name, &contents);
1692         g_string_free(tmpbuf, TRUE);
1693         g_free(name);
1694         g_free(contents);
1695
1696         if (!status)
1697                 return SR_ERR;
1698
1699         *confidence = 1;
1700         return SR_OK;
1701 }
1702
1703 static int init(struct sr_input *in, GHashTable *options)
1704 {
1705         struct context *inc;
1706         GVariant *data;
1707
1708         inc = g_malloc0(sizeof(*inc));
1709
1710         data = g_hash_table_lookup(options, "numchannels");
1711         inc->options.maxchannels = g_variant_get_uint32(data);
1712
1713         data = g_hash_table_lookup(options, "downsample");
1714         inc->options.downsample = g_variant_get_uint64(data);
1715         if (inc->options.downsample < 1)
1716                 inc->options.downsample = 1;
1717
1718         data = g_hash_table_lookup(options, "compress");
1719         inc->options.compress = g_variant_get_uint64(data);
1720         inc->options.compress /= inc->options.downsample;
1721
1722         data = g_hash_table_lookup(options, "skip");
1723         if (data) {
1724                 inc->options.skip_specified = TRUE;
1725                 inc->options.skip_starttime = g_variant_get_uint64(data);
1726                 if (inc->options.skip_starttime == ~UINT64_C(0)) {
1727                         inc->options.skip_specified = FALSE;
1728                         inc->options.skip_starttime = 0;
1729                 }
1730                 inc->options.skip_starttime /= inc->options.downsample;
1731         }
1732
1733         in->sdi = g_malloc0(sizeof(*in->sdi));
1734         in->priv = inc;
1735
1736         inc->scope_prefix = g_string_new("\0");
1737
1738         return SR_OK;
1739 }
1740
1741 static int receive(struct sr_input *in, GString *buf)
1742 {
1743         struct context *inc;
1744         int ret;
1745
1746         inc = in->priv;
1747
1748         /* Collect all input chunks, potential deferred processing. */
1749         g_string_append_len(in->buf, buf->str, buf->len);
1750         if (!inc->got_header && in->buf->len == buf->len)
1751                 check_remove_bom(in->buf);
1752
1753         /* Must complete reception of the VCD header first. */
1754         if (!inc->got_header) {
1755                 if (!have_header(in->buf))
1756                         return SR_OK;
1757                 ret = parse_header(in, in->buf);
1758                 if (ret != SR_OK)
1759                         return ret;
1760                 /* sdi is ready, notify frontend. */
1761                 in->sdi_ready = TRUE;
1762                 return SR_OK;
1763         }
1764
1765         /* Process sample data. */
1766         ret = process_buffer(in, FALSE);
1767
1768         return ret;
1769 }
1770
1771 static int end(struct sr_input *in)
1772 {
1773         struct context *inc;
1774         int ret;
1775         size_t count;
1776
1777         inc = in->priv;
1778
1779         /* Must complete processing of previously received chunks. */
1780         if (in->sdi_ready)
1781                 ret = process_buffer(in, TRUE);
1782         else
1783                 ret = SR_OK;
1784
1785         /* Flush most recently queued sample data when EOF is seen. */
1786         count = inc->data_after_timestamp ? 1 : 0;
1787         add_samples(in, count, TRUE);
1788
1789         /* Must send DF_END when DF_HEADER was sent before. */
1790         if (inc->started)
1791                 std_session_send_df_end(in->sdi);
1792
1793         return ret;
1794 }
1795
1796 static void cleanup(struct sr_input *in)
1797 {
1798         struct context *inc;
1799
1800         inc = in->priv;
1801
1802         keep_header_for_reread(in);
1803
1804         g_slist_free_full(inc->channels, free_channel);
1805         inc->channels = NULL;
1806         feed_queue_logic_free(inc->feed_logic);
1807         inc->feed_logic = NULL;
1808         g_free(inc->conv_bits.value);
1809         inc->conv_bits.value = NULL;
1810         g_free(inc->current_logic);
1811         inc->current_logic = NULL;
1812         g_free(inc->current_floats);
1813         inc->current_floats = NULL;
1814         g_string_free(inc->scope_prefix, TRUE);
1815         inc->scope_prefix = NULL;
1816         g_slist_free_full(inc->ignored_signals, g_free);
1817         inc->ignored_signals = NULL;
1818         free_text_split(inc, NULL);
1819 }
1820
1821 static int reset(struct sr_input *in)
1822 {
1823         struct context *inc;
1824         struct vcd_user_opt save;
1825         struct vcd_prev prev;
1826
1827         inc = in->priv;
1828
1829         /* Relase previously allocated resources. */
1830         cleanup(in);
1831         g_string_truncate(in->buf, 0);
1832
1833         /* Restore part of the context, init() won't run again. */
1834         save = inc->options;
1835         prev = inc->prev;
1836         memset(inc, 0, sizeof(*inc));
1837         inc->options = save;
1838         inc->prev = prev;
1839         inc->scope_prefix = g_string_new("\0");
1840
1841         return SR_OK;
1842 }
1843
1844 enum vcd_option_t {
1845         OPT_NUM_CHANS,
1846         OPT_DOWN_SAMPLE,
1847         OPT_SKIP_COUNT,
1848         OPT_COMPRESS,
1849         OPT_MAX,
1850 };
1851
1852 static struct sr_option options[] = {
1853         [OPT_NUM_CHANS] = {
1854                 "numchannels", "Max number of sigrok channels",
1855                 "The maximum number of sigrok channels to create for VCD input signals.",
1856                 NULL, NULL,
1857         },
1858         [OPT_DOWN_SAMPLE] = {
1859                 "downsample", "Downsampling factor",
1860                 "Downsample the input file's samplerate, i.e. divide by the specified factor.",
1861                 NULL, NULL,
1862         },
1863         [OPT_SKIP_COUNT] = {
1864                 "skip", "Skip this many initial samples",
1865                 "Skip samples until the specified timestamp. "
1866                 "By default samples start at the first timestamp in the file. "
1867                 "Value 0 creates samples starting at timestamp 0. "
1868                 "Values above 0 only start processing at the given timestamp.",
1869                 NULL, NULL,
1870         },
1871         [OPT_COMPRESS] = {
1872                 "compress", "Compress idle periods",
1873                 "Compress idle periods which are longer than the specified number of timescale ticks.",
1874                 NULL, NULL,
1875         },
1876         [OPT_MAX] = ALL_ZERO,
1877 };
1878
1879 static const struct sr_option *get_options(void)
1880 {
1881         if (!options[0].def) {
1882                 options[OPT_NUM_CHANS].def = g_variant_ref_sink(g_variant_new_uint32(0));
1883                 options[OPT_DOWN_SAMPLE].def = g_variant_ref_sink(g_variant_new_uint64(1));
1884                 options[OPT_SKIP_COUNT].def = g_variant_ref_sink(g_variant_new_uint64(~UINT64_C(0)));
1885                 options[OPT_COMPRESS].def = g_variant_ref_sink(g_variant_new_uint64(0));
1886         }
1887
1888         return options;
1889 }
1890
1891 SR_PRIV struct sr_input_module input_vcd = {
1892         .id = "vcd",
1893         .name = "VCD",
1894         .desc = "Value Change Dump data",
1895         .exts = (const char*[]){"vcd", NULL},
1896         .metadata = { SR_INPUT_META_HEADER | SR_INPUT_META_REQUIRED },
1897         .options = get_options,
1898         .format_match = format_match,
1899         .init = init,
1900         .receive = receive,
1901         .end = end,
1902         .cleanup = cleanup,
1903         .reset = reset,
1904 };