2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). The 'a' format marks analog
45 * data, an optionally following number is the digits count (resolution).
46 * The 't' format marks timestamp values, which could help in automatic
47 * determination of the input stream's samplerate. This "column_formats"
48 * option is most versatile, other forms of specifying the column layout
49 * only exist for backwards compatibility, and are rather limited. They
50 * exclusively support logic input data in strictly adjacent columns,
51 * with further constraints on column layout for multi-bit data.
53 * single_column: Specifies the column number which contains the logic data
54 * for single-column mode. All logic data is taken from several bits
55 * which all are kept within that one column. Only exists for backwards
56 * compatibility, see "column_formats" for more flexibility.
58 * first_column: Specifies the number of the first column with logic data
59 * in simple multi-column mode. Only exists for backwards compatibility,
60 * see "column_formats" for more flexibility.
62 * logic_channels: Specifies the number of logic channels. Is required in
63 * simple single-column mode. Is optional in simple multi-column mode
64 * (and defaults to all remaining columns). Only exists for backwards
65 * compatibility, see "column_formats" for more flexibility.
67 * single_format: Specifies the format of the input text in simple single-
68 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
69 * Simple multi-column mode always uses single-bit data per column.
70 * Only exists for backwards compatibility, see "column_formats" for
73 * start_line: Specifies at which line to start processing the input file.
74 * Allows to skip leading lines which neither are header nor data lines.
75 * By default all of the input file gets processed.
77 * header: Boolean option, controls whether the first processed line is used
78 * to determine channel names. Off by default. Generic channel names are
79 * used in the absence of header line content.
81 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
82 * User specs take precedence over data which optionally gets derived
85 * column_separator: Specifies the sequence which separates the text file
86 * columns. Cannot be empty. Defaults to comma.
88 * comment_leader: Specifies the sequence which starts comments that run
89 * up to the end of the current text line. Can be empty to disable
90 * comment support. Defaults to semicolon.
92 * Typical examples of using these options:
93 * - ... -I csv:column_formats=*l ...
94 * All columns are single-bit logic data. Identical to the previous
95 * multi-column mode (the default when no options were given at all).
96 * - ... -I csv:column_formats=3-,*l ...
97 * Ignore the first three columns, get single-bit logic data from all
98 * remaining lines (multi-column mode with first-column above 1).
99 * - ... -I csv:column_formats=3-,4l,x8 ...
100 * Ignore the first three columns, get single-bit logic data from the
101 * next four columns, then eight-bit data in hex format from the next
102 * column. More columns may follow in the input text but won't get
103 * processed. (Mix of previous multi-column as well as single-column
105 * - ... -I csv:column_formats=4x8,b16,5l ...
106 * Get eight-bit data in hex format from the first four columns, then
107 * sixteen-bit data in binary format, then five times single-bit data.
108 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
109 * Get eight logic bits in binary format from column 2. (Simple
110 * single-column mode, corresponds to the "-,b8" format.)
111 * - ... -I csv:first_column=6:logic_channels=4 ...
112 * Get four single-bit logic channels from columns 6 to 9 respectively.
113 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
114 * - ... -I csv:start_line=20:header=yes:...
115 * Skip the first 19 text lines. Use line 20 to derive channel names.
116 * Data starts at line 21.
117 * - ... -I csv:column_formats=*a6 ...
118 * Each column contains an analog value with six significant digits
119 * after the decimal period.
120 * - ... -I csv:column_formats=t,2a ...
121 * The first column contains timestamps, the next two columns contain
122 * analog values. The capture's samplerate could get determined from
123 * the timestamp values if not provided by the user by means of the
124 * 'samplerate' option. This assumes a mere number in units of seconds,
125 * and equidistant rows, there is no fancy support for textual unit
126 * suffixes nor gaps in the stream of samples nor other non-linearity,
127 * just '-' ignore the column if the format is not supported).
133 * - Extend support for analog input data.
134 * - Determine why analog samples of 'double' data type get scrambled
135 * in sigrok-cli screen output. Is analog.encoding->unitsize not
136 * handled properly? A sigrok-cli or libsigrok (src/output) issue?
137 * - Reconsider the channel creation after format processing. Current
138 * logic may "bleed" channel names into the analog group when logic
139 * channels' columns follow analog columns (seen with "-,2a,x8").
140 * Trying to sort it out, a naive change used to map logic channels'
141 * data to incorrect bitmap positions. The whole channel numbering
142 * needs reconsideration. Probably it's easiest to first create _all_
143 * logic channels so that they have adjacent numbers starting at 0
144 * (addressing logic bits), then all analog channels (again adjacent)
145 * to simplify the calculation of their index in the sample set as
146 * well as their sdi channel index from the "analog column index".
147 * - Optionally get sample rate from timestamp column. Just best-effort
148 * approach, not necessarily reliable. Users can always specify rates.
149 * - Add a test suite for input modules in general, and CSV in specific?
150 * Becomes more important with the multitude of options and their
151 * interaction. Could cover edge cases (BOM presence, line termination
152 * absence, etc) and auto-stuff as well (channel names, channel counts,
156 typedef float csv_analog_t; /* 'double' currently is flawed. */
158 /* Single column formats. */
159 enum single_col_format {
160 FORMAT_NONE, /* Ignore this column. */
161 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
162 FORMAT_HEX, /* Hex digits for a set of bits. */
163 FORMAT_OCT, /* Oct digits for a set of bits. */
164 FORMAT_ANALOG, /* Floating point number for an analog channel. */
165 FORMAT_TIME, /* Timestamps. */
168 static const char *col_format_text[] = {
169 [FORMAT_NONE] = "unknown",
170 [FORMAT_BIN] = "binary",
171 [FORMAT_HEX] = "hexadecimal",
172 [FORMAT_OCT] = "octal",
173 [FORMAT_ANALOG] = "analog",
174 [FORMAT_TIME] = "timestamp",
177 static const char col_format_char[] = {
182 [FORMAT_ANALOG] = 'a',
186 static gboolean format_is_ignore(enum single_col_format fmt)
188 return fmt == FORMAT_NONE;
191 static gboolean format_is_logic(enum single_col_format fmt)
193 return fmt >= FORMAT_BIN && fmt <= FORMAT_OCT;
196 static gboolean format_is_analog(enum single_col_format fmt)
198 return fmt == FORMAT_ANALOG;
201 static gboolean format_is_timestamp(enum single_col_format fmt)
203 return fmt == FORMAT_TIME;
206 struct column_details {
208 enum single_col_format text_format;
209 size_t channel_offset;
210 size_t channel_count;
211 size_t channel_index;
218 /* Current samplerate, optionally determined from input data. */
220 double prev_timestamp;
221 gboolean samplerate_sent;
223 /* Number of channels. */
224 size_t logic_channels;
225 size_t analog_channels;
227 /* Column delimiter (actually separator), comment leader, EOL sequence. */
232 /* Format specs for input columns, and processing state. */
233 size_t column_seen_count;
234 const char *column_formats;
235 size_t column_want_count;
236 struct column_details *column_details;
238 /* Line number to start processing. */
242 * Determines if the first line should be treated as header and used for
243 * channel names in multi column mode.
246 gboolean header_seen;
248 size_t sample_unit_size; /**!< Byte count for a single sample. */
249 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
250 csv_analog_t *analog_sample_buffer; /**!< Buffer for one set of analog values. */
252 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
253 size_t datafeed_buf_size;
254 size_t datafeed_buf_fill;
255 /* "Striped" layout, M samples for N channels each. */
256 csv_analog_t *analog_datafeed_buffer; /**!< Queue for analog datafeed. */
257 size_t analog_datafeed_buf_size;
258 size_t analog_datafeed_buf_fill;
259 GSList **analog_datafeed_channels;
260 int *analog_datafeed_digits;
262 /* Current line number. */
265 /* List of previously created sigrok channels. */
266 GSList *prev_sr_channels;
270 * Primitive operations to handle sample sets:
271 * - Keep a buffer for datafeed submission, capable of holding many
272 * samples (reduces call overhead, improves throughput).
273 * - Have a "current sample set" pointer reference one position in that
274 * large samples buffer.
275 * - Clear the current sample set before text line inspection, then set
276 * the bits which are found active in the current line of text input.
277 * Phrase the API such that call sites can be kept simple. Advance to
278 * the next sample set between lines, flush the larger buffer as needed
279 * (when it is full, or upon EOF).
282 static int flush_samplerate(const struct sr_input *in)
285 struct sr_datafeed_packet packet;
286 struct sr_datafeed_meta meta;
287 struct sr_config *src;
290 if (inc->samplerate && !inc->samplerate_sent) {
291 packet.type = SR_DF_META;
292 packet.payload = &meta;
293 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(inc->samplerate));
294 meta.config = g_slist_append(NULL, src);
295 sr_session_send(in->sdi, &packet);
296 g_slist_free(meta.config);
298 inc->samplerate_sent = TRUE;
304 static void clear_logic_samples(struct context *inc)
306 if (!inc->logic_channels)
308 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
309 memset(inc->sample_buffer, 0, inc->sample_unit_size);
312 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
314 size_t byte_idx, bit_idx;
317 if (ch_idx >= inc->logic_channels)
322 byte_idx = ch_idx / 8;
323 bit_idx = ch_idx % 8;
324 bit_mask = 1 << bit_idx;
325 inc->sample_buffer[byte_idx] |= bit_mask;
328 static int flush_logic_samples(const struct sr_input *in)
331 struct sr_datafeed_packet packet;
332 struct sr_datafeed_logic logic;
336 if (!inc->datafeed_buf_fill)
339 rc = flush_samplerate(in);
343 memset(&packet, 0, sizeof(packet));
344 memset(&logic, 0, sizeof(logic));
345 packet.type = SR_DF_LOGIC;
346 packet.payload = &logic;
347 logic.unitsize = inc->sample_unit_size;
348 logic.length = inc->datafeed_buf_fill;
349 logic.data = inc->datafeed_buffer;
351 rc = sr_session_send(in->sdi, &packet);
355 inc->datafeed_buf_fill = 0;
359 static int queue_logic_samples(const struct sr_input *in)
365 if (!inc->logic_channels)
368 inc->datafeed_buf_fill += inc->sample_unit_size;
369 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
370 rc = flush_logic_samples(in);
377 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value);
379 static void clear_analog_samples(struct context *inc)
383 if (!inc->analog_channels)
385 inc->analog_sample_buffer = &inc->analog_datafeed_buffer[inc->analog_datafeed_buf_fill];
386 for (idx = 0; idx < inc->analog_channels; idx++)
387 set_analog_value(inc, idx, 0.0);
390 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value)
392 if (ch_idx >= inc->analog_channels)
396 inc->analog_sample_buffer[ch_idx * inc->analog_datafeed_buf_size] = value;
399 static int flush_analog_samples(const struct sr_input *in)
402 struct sr_datafeed_packet packet;
403 struct sr_datafeed_analog analog;
404 struct sr_analog_encoding encoding;
405 struct sr_analog_meaning meaning;
406 struct sr_analog_spec spec;
407 csv_analog_t *samples;
413 if (!inc->analog_datafeed_buf_fill)
416 rc = flush_samplerate(in);
420 samples = inc->analog_datafeed_buffer;
421 for (ch_idx = 0; ch_idx < inc->analog_channels; ch_idx++) {
422 digits = inc->analog_datafeed_digits[ch_idx];
423 sr_analog_init(&analog, &encoding, &meaning, &spec, digits);
424 memset(&packet, 0, sizeof(packet));
425 packet.type = SR_DF_ANALOG;
426 packet.payload = &analog;
427 analog.num_samples = inc->analog_datafeed_buf_fill;
428 analog.data = samples;
429 analog.meaning->channels = inc->analog_datafeed_channels[ch_idx];
430 analog.meaning->mq = 0;
431 analog.meaning->mqflags = 0;
432 analog.meaning->unit = 0;
433 analog.encoding->unitsize = sizeof(samples[0]);
434 analog.encoding->is_signed = TRUE;
435 analog.encoding->is_float = TRUE;
436 #ifdef WORDS_BIGENDIAN
437 analog.encoding->is_bigendian = TRUE;
439 analog.encoding->is_bigendian = FALSE;
441 analog.encoding->digits = spec.spec_digits;
442 rc = sr_session_send(in->sdi, &packet);
445 samples += inc->analog_datafeed_buf_size;
448 inc->analog_datafeed_buf_fill = 0;
452 static int queue_analog_samples(const struct sr_input *in)
458 if (!inc->analog_channels)
461 inc->analog_datafeed_buf_fill++;
462 if (inc->analog_datafeed_buf_fill == inc->analog_datafeed_buf_size) {
463 rc = flush_analog_samples(in);
470 /* Helpers for "column processing". */
472 static int split_column_format(const char *spec,
473 size_t *column_count, enum single_col_format *format, size_t *bit_count)
476 char *endp, format_char;
477 enum single_col_format format_code;
482 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
485 /* Workaround, strtoul("*") won't always yield expected endp. */
487 endp = (char *)&spec[1];
489 count = strtoul(spec, &endp, 10);
496 *column_count = count;
499 /* Get the (mandatory, single letter) type spec (-/xob/l). */
500 format_char = *spec++;
501 switch (format_char) {
505 format_code = FORMAT_NONE;
508 format_code = FORMAT_HEX;
511 format_code = FORMAT_OCT;
515 format_code = FORMAT_BIN;
518 format_code = FORMAT_ANALOG;
521 format_code = FORMAT_TIME;
523 default: /* includes NUL */
527 *format = format_code;
529 /* Get the (optional, decimal, default 1) bit count. */
531 count = strtoul(spec, &endp, 10);
535 count = format_is_analog(format_code) ? 3 : 1;
536 if (format_is_ignore(format_code))
538 if (format_char == 'l')
544 /* Input spec must have been exhausted. */
551 static int make_column_details_from_format(const struct sr_input *in,
552 const char *column_format, char **column_texts)
555 char **formats, *format;
556 size_t format_count, column_count, logic_count, analog_count;
557 size_t auto_column_count;
558 size_t format_idx, c, b, column_idx, channel_idx, analog_idx;
559 enum single_col_format f;
560 struct column_details *detail;
561 GString *channel_name;
565 int channel_type, channel_sdi_nr;
569 inc->column_seen_count = g_strv_length(column_texts);
571 /* Split the input spec, count involved columns and bits. */
572 formats = g_strsplit(column_format, ",", 0);
574 sr_err("Cannot parse columns format %s (comma split).", column_format);
577 format_count = g_strv_length(formats);
579 sr_err("Cannot parse columns format %s (field count).", column_format);
583 column_count = logic_count = analog_count = 0;
584 auto_column_count = 0;
585 for (format_idx = 0; format_idx < format_count; format_idx++) {
586 format = formats[format_idx];
587 ret = split_column_format(format, &c, &f, &b);
588 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
590 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
595 /* User requested "auto-count", must be last format. */
596 if (formats[format_idx + 1]) {
597 sr_err("Auto column count must be last format field.");
601 auto_column_count = inc->column_seen_count - column_count;
602 c = auto_column_count;
605 if (format_is_analog(f))
607 else if (format_is_logic(f))
608 logic_count += c * b;
610 sr_dbg("Column format %s -> %zu columns, %zu logic, %zu analog channels.",
611 column_format, column_count, logic_count, analog_count);
613 /* Allocate and fill in "column processing" details. Create channels. */
614 inc->column_want_count = column_count;
615 if (inc->column_seen_count < inc->column_want_count) {
616 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
617 inc->column_seen_count, inc->column_want_count);
621 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
622 column_idx = channel_idx = analog_idx = 0;
623 channel_name = g_string_sized_new(64);
624 for (format_idx = 0; format_idx < format_count; format_idx++) {
625 /* Process a format field, which can span multiple columns. */
626 format = formats[format_idx];
627 (void)split_column_format(format, &c, &f, &b);
629 c = auto_column_count;
631 /* Fill in a column's processing details. */
632 detail = &inc->column_details[column_idx++];
633 detail->col_nr = column_idx;
634 detail->text_format = f;
635 if (format_is_analog(detail->text_format)) {
636 detail->channel_offset = analog_idx;
637 detail->channel_count = 1;
638 detail->analog_digits = b;
639 analog_idx += detail->channel_count;
640 } else if (format_is_logic(detail->text_format)) {
641 detail->channel_offset = channel_idx;
642 detail->channel_count = b;
643 channel_idx += detail->channel_count;
644 } else if (format_is_ignore(detail->text_format)) {
649 * Neither logic nor analog data, nor ignore.
650 * Format was noted. No channel creation involved.
655 * Pick most appropriate channel names. Optionally
656 * use text from a header line (when requested by the
657 * user). In the absence of header text, channels are
658 * assigned rather generic names.
660 * Manipulation of the column's caption (when a header
661 * line is seen) is acceptable, because this header
662 * line won't get processed another time.
664 column = column_texts[detail->col_nr - 1];
665 if (inc->use_header && column && *column)
666 caption = sr_scpi_unquote_string(column);
669 if (!caption || !*caption)
672 * TODO Need we first create _all_ logic channels,
673 * before creating analog channels? Just store the
674 * parameters here (index, type, name) and have the
675 * creation sequence done outside of the format
678 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
679 if (caption && detail->channel_count == 1) {
680 g_string_assign(channel_name, caption);
681 } else if (caption) {
682 g_string_printf(channel_name, "%s[%zu]",
683 caption, create_idx);
685 g_string_printf(channel_name, "%zu",
686 detail->channel_offset + create_idx);
688 if (format_is_analog(detail->text_format)) {
689 channel_sdi_nr = logic_count + detail->channel_offset + create_idx;
690 channel_type = SR_CHANNEL_ANALOG;
691 detail->channel_index = g_slist_length(in->sdi->channels);
692 } else if (format_is_logic(detail->text_format)) {
693 channel_sdi_nr = detail->channel_offset + create_idx;
694 channel_type = SR_CHANNEL_LOGIC;
698 sr_channel_new(in->sdi, channel_sdi_nr,
699 channel_type, TRUE, channel_name->str);
703 inc->logic_channels = channel_idx;
704 inc->analog_channels = analog_idx;
705 g_string_free(channel_name, TRUE);
711 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
713 if (!inc || !inc->column_details)
715 if (!nr || nr > inc->column_want_count)
717 return &inc->column_details[nr - 1];
721 * Primitive operations for text input: Strip comments off text lines.
722 * Split text lines into columns. Process input text for individual
726 static void strip_comment(char *buf, const GString *prefix)
733 if ((ptr = strstr(buf, prefix->str))) {
740 * @brief Splits a text line into a set of columns.
742 * @param[in] buf The input text line to split.
743 * @param[in] inc The input module's context.
745 * @returns An array of strings, representing the columns' text.
747 * This routine splits a text line on previously determined separators.
749 static char **split_line(char *buf, struct context *inc)
751 return g_strsplit(buf, inc->delimiter->str, 0);
755 * @brief Parse a multi-bit field into several logic channels.
757 * @param[in] column The input text, a run of bin/hex/oct digits.
758 * @param[in] inc The input module's context.
759 * @param[in] details The column processing details.
761 * @retval SR_OK Success.
762 * @retval SR_ERR Invalid input data (empty, or format error).
764 * This routine modifies the logic levels in the current sample set,
765 * based on the text input and a user provided format spec.
767 static int parse_logic(const char *column, struct context *inc,
768 const struct column_details *details)
770 size_t length, ch_rem, ch_idx, ch_inc;
774 const char *type_text;
778 * Prepare to read the digits from the text end towards the start.
779 * A digit corresponds to a variable number of channels (depending
780 * on the value's radix). Prepare the mapping of text digits to
781 * (a number of) logic channels.
783 length = strlen(column);
785 sr_err("Column %zu in line %zu is empty.", details->col_nr,
789 rdptr = &column[length];
790 ch_idx = details->channel_offset;
791 ch_rem = details->channel_count;
794 * Get another digit and derive up to four logic channels' state from
795 * it. Make sure to not process more bits than the column has channels
796 * associated with it.
798 while (rdptr > column && ch_rem) {
799 /* Check for valid digits according to the input radix. */
801 switch (details->text_format) {
803 valid = g_ascii_isxdigit(c) && c < '2';
807 valid = g_ascii_isxdigit(c) && c < '8';
811 valid = g_ascii_isxdigit(c);
819 type_text = col_format_text[details->text_format];
820 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
821 column, type_text, details->col_nr, inc->line_number);
824 /* Use the digit's bits for logic channels' data. */
825 bits = g_ascii_xdigit_value(c);
826 switch (details->text_format) {
830 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
836 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
840 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
845 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
848 /* ShouldNotHappen(TM), but silences compiler warning. */
854 * TODO Determine whether the availability of extra input data
855 * for unhandled logic channels is worth warning here. In this
856 * implementation users are in control, and can have the more
857 * significant bits ignored (which can be considered a feature
858 * and not really a limitation).
865 * @brief Parse a floating point text into an analog value.
867 * @param[in] column The input text, a floating point number.
868 * @param[in] inc The input module's context.
869 * @param[in] details The column processing details.
871 * @retval SR_OK Success.
872 * @retval SR_ERR Invalid input data (empty, or format error).
874 * This routine modifies the analog values in the current sample set,
875 * based on the text input and a user provided format spec.
877 static int parse_analog(const char *column, struct context *inc,
878 const struct column_details *details)
881 double dvalue; float fvalue;
885 if (!format_is_analog(details->text_format))
888 length = strlen(column);
890 sr_err("Column %zu in line %zu is empty.", details->col_nr,
894 if (sizeof(value) == sizeof(double)) {
895 ret = sr_atod_ascii(column, &dvalue);
897 } else if (sizeof(value) == sizeof(float)) {
898 ret = sr_atof_ascii(column, &fvalue);
904 sr_err("Cannot parse analog text %s in column %zu in line %zu.",
905 column, details->col_nr, inc->line_number);
908 set_analog_value(inc, details->channel_offset, value);
914 * @brief Parse a timestamp text, auto-determine samplerate.
916 * @param[in] column The input text, a floating point number.
917 * @param[in] inc The input module's context.
918 * @param[in] details The column processing details.
920 * @retval SR_OK Success.
921 * @retval SR_ERR Invalid input data (empty, or format error).
923 * This routine attempts to automatically determine the input data's
924 * samplerate from text rows' timestamp values. Only simple formats are
925 * supported, user provided values always take precedence.
927 static int parse_timestamp(const char *column, struct context *inc,
928 const struct column_details *details)
933 if (!format_is_timestamp(details->text_format))
937 * Implementor's notes on timestamp interpretation. Use a simple
938 * approach for improved maintainability which covers most cases
939 * of input data. There is not much gain in adding complexity,
940 * users can easily provide the rate when auto-detection fails.
941 * - Bail out if samplerate is known already.
942 * - Try to interpret the timestamp (simple float conversion).
943 * If conversion fails then clear all previous knowledge and
944 * bail out (non-fatal, perhaps warn). Silently ignore values
945 * of zero since those could be silent fails -- assume that
946 * genuine data contains at least two adjacent rows with useful
947 * timestamps for the feature to work reliably. Annoying users
948 * with "failed to detect" messages is acceptable here, since
949 * users expecting the feature to work should provide useful
950 * data, and there are easy ways to disable the detection or
952 * - If there is no previous timestamp, keep the current value
953 * for later reference and bail out.
954 * - If a previous timestamp was seen, determine the difference
955 * between them, and derive the samplerate. Update internal
956 * state (the value automatically gets sent to the datafeed),
957 * and clear previous knowledge. Subsequent calls will ignore
958 * following input data (see above, rate is known).
960 * TODO Potential future improvements:
961 * - Prefer rationals over floats for improved precision and
962 * reduced rounding errors which result in odd rates.
963 * - Support other formats ("2 ms" or similar)?
967 ret = sr_atod_ascii(column, &ts);
971 sr_warn("Cannot convert timestamp text %s in line %zu (or zero value).",
972 column, inc->line_number);
973 inc->prev_timestamp = 0.0;
976 if (!inc->prev_timestamp) {
977 sr_dbg("First timestamp value %g in line %zu.",
978 ts, inc->line_number);
979 inc->prev_timestamp = ts;
982 sr_dbg("Second timestamp value %g in line %zu.", ts, inc->line_number);
983 ts -= inc->prev_timestamp;
984 sr_dbg("Timestamp difference %g in line %zu.",
985 ts, inc->line_number);
987 sr_warn("Zero timestamp difference in line %zu.",
989 inc->prev_timestamp = ts;
994 rate = (uint64_t)rate;
995 sr_dbg("Rate from timestamp %g in line %zu.", rate, inc->line_number);
996 inc->samplerate = rate;
997 inc->prev_timestamp = 0.0;
1003 * @brief Parse routine which ignores the input text.
1005 * This routine exists to unify dispatch code paths, mapping input file
1006 * columns' data types to their respective parse routines.
1008 static int parse_ignore(const char *column, struct context *inc,
1009 const struct column_details *details)
1017 typedef int (*col_parse_cb)(const char *column, struct context *inc,
1018 const struct column_details *details);
1020 static const col_parse_cb col_parse_funcs[] = {
1021 [FORMAT_NONE] = parse_ignore,
1022 [FORMAT_BIN] = parse_logic,
1023 [FORMAT_OCT] = parse_logic,
1024 [FORMAT_HEX] = parse_logic,
1025 [FORMAT_ANALOG] = parse_analog,
1026 [FORMAT_TIME] = parse_timestamp,
1029 static int init(struct sr_input *in, GHashTable *options)
1031 struct context *inc;
1032 size_t single_column, first_column, logic_channels;
1034 enum single_col_format format;
1037 in->sdi = g_malloc0(sizeof(*in->sdi));
1038 in->priv = inc = g_malloc0(sizeof(*inc));
1040 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
1041 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
1042 inc->delimiter = g_string_new(g_variant_get_string(
1043 g_hash_table_lookup(options, "column_separator"), NULL));
1044 if (!inc->delimiter->len) {
1045 sr_err("Column separator cannot be empty.");
1048 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
1049 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
1050 format = FORMAT_BIN;
1051 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
1052 format = FORMAT_HEX;
1053 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
1054 format = FORMAT_OCT;
1056 sr_err("Invalid single-column format: '%s'", s);
1059 inc->comment = g_string_new(g_variant_get_string(
1060 g_hash_table_lookup(options, "comment_leader"), NULL));
1061 if (g_string_equal(inc->comment, inc->delimiter)) {
1063 * Using the same sequence as comment leader and column
1064 * separator won't work. The user probably specified ';'
1065 * as the column separator but did not adjust the comment
1066 * leader. Try DWIM, drop comment strippin support here.
1068 sr_warn("Comment leader and column separator conflict, disabling comment support.");
1069 g_string_truncate(inc->comment, 0);
1071 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
1072 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
1073 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
1074 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
1075 if (inc->start_line < 1) {
1076 sr_err("Invalid start line %zu.", inc->start_line);
1081 * Scan flexible, to get prefered format specs which describe
1082 * the input file's data formats. As well as some simple specs
1083 * for backwards compatibility and user convenience.
1085 * This logic ends up with a copy of the format string, either
1086 * user provided or internally derived. Actual creation of the
1087 * column processing details gets deferred until the first line
1088 * of input data was seen. To support automatic determination of
1089 * e.g. channel counts from column counts.
1091 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
1093 inc->column_formats = g_strdup(s);
1094 sr_dbg("User specified column_formats: %s.", s);
1095 } else if (single_column && logic_channels) {
1096 format_char = col_format_char[format];
1097 if (single_column == 1) {
1098 inc->column_formats = g_strdup_printf("%c%zu",
1099 format_char, logic_channels);
1101 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
1103 format_char, logic_channels);
1105 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
1106 single_column, col_format_text[format], logic_channels,
1107 inc->column_formats);
1108 } else if (!single_column) {
1109 if (first_column > 1) {
1110 inc->column_formats = g_strdup_printf("%zu-,%zul",
1111 first_column - 1, logic_channels);
1113 inc->column_formats = g_strdup_printf("%zul",
1116 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
1117 first_column, logic_channels,
1118 inc->column_formats);
1120 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
1121 inc->column_formats = g_strdup("*l");
1128 * Check the channel list for consistency across file re-import. See
1129 * the VCD input module for more details and motivation.
1132 static void keep_header_for_reread(const struct sr_input *in)
1134 struct context *inc;
1137 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
1138 inc->prev_sr_channels = in->sdi->channels;
1139 in->sdi->channels = NULL;
1142 static int check_header_in_reread(const struct sr_input *in)
1144 struct context *inc;
1151 if (!inc->prev_sr_channels)
1154 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
1155 sr_err("Channel list change not supported for file re-read.");
1158 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
1159 in->sdi->channels = inc->prev_sr_channels;
1160 inc->prev_sr_channels = NULL;
1165 static const char *delim_set = "\r\n";
1167 static const char *get_line_termination(GString *buf)
1172 if (g_strstr_len(buf->str, buf->len, "\r\n"))
1174 else if (memchr(buf->str, '\n', buf->len))
1176 else if (memchr(buf->str, '\r', buf->len))
1182 static int initial_parse(const struct sr_input *in, GString *buf)
1184 struct context *inc;
1186 size_t line_number, line_idx;
1188 char **lines, *line, **columns;
1194 /* Search for the first line to process (header or data). */
1196 if (inc->termination)
1197 lines = g_strsplit(buf->str, inc->termination, 0);
1199 lines = g_strsplit_set(buf->str, delim_set, 0);
1200 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1202 if (inc->start_line > line_number) {
1203 sr_spew("Line %zu skipped (before start).", line_number);
1206 if (line[0] == '\0') {
1207 sr_spew("Blank line %zu skipped.", line_number);
1210 strip_comment(line, inc->comment);
1211 if (line[0] == '\0') {
1212 sr_spew("Comment-only line %zu skipped.", line_number);
1216 /* Reached first proper line. */
1220 /* Not enough data for a proper line yet. */
1225 /* Get the number of columns in the line. */
1226 columns = split_line(line, inc);
1228 sr_err("Error while parsing line %zu.", line_number);
1232 num_columns = g_strv_length(columns);
1234 sr_err("Error while parsing line %zu.", line_number);
1238 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
1241 * Interpret the user provided column format specs. This might
1242 * involve inspection of the now received input text, to support
1243 * e.g. automatic detection of channel counts in the absence of
1244 * user provided specs. Optionally a header line is used to get
1247 * Check the then created channels for consistency across .reset
1248 * and .receive sequences (file re-load).
1250 ret = make_column_details_from_format(in, inc->column_formats, columns);
1252 sr_err("Cannot parse columns format using line %zu.", line_number);
1255 if (!check_header_in_reread(in)) {
1261 * Allocate buffer memory for datafeed submission of sample data.
1262 * Calculate the minimum buffer size to store the set of samples
1263 * of all channels (unit size). Determine a larger buffer size
1264 * for datafeed submission that is a multiple of the unit size.
1265 * Allocate the larger buffer, the "sample buffer" will point
1266 * to a location within that large buffer later.
1268 * TODO Move channel creation here, and just store required
1269 * parameters in the format parser above? Could simplify the
1270 * arrangement that logic and analog channels get created in
1271 * strict sequence in their respective groups.
1273 if (inc->logic_channels) {
1274 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
1275 inc->datafeed_buf_size = CHUNK_SIZE;
1276 inc->datafeed_buf_size *= inc->sample_unit_size;
1277 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
1278 if (!inc->datafeed_buffer) {
1279 sr_err("Cannot allocate datafeed send buffer (logic).");
1280 ret = SR_ERR_MALLOC;
1283 inc->datafeed_buf_fill = 0;
1286 if (inc->analog_channels) {
1287 size_t sample_size, sample_count;
1289 struct column_details *detail;
1292 sample_size = sizeof(inc->analog_datafeed_buffer[0]);
1293 inc->analog_datafeed_buf_size = CHUNK_SIZE;
1294 inc->analog_datafeed_buf_size /= sample_size;
1295 inc->analog_datafeed_buf_size /= inc->analog_channels;
1296 sample_count = inc->analog_channels * inc->analog_datafeed_buf_size;
1297 inc->analog_datafeed_buffer = g_malloc0(sample_count * sample_size);
1298 if (!inc->analog_datafeed_buffer) {
1299 sr_err("Cannot allocate datafeed send buffer (analog).");
1300 ret = SR_ERR_MALLOC;
1303 inc->analog_datafeed_buf_fill = 0;
1304 inc->analog_datafeed_channels = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_channels[0]));
1305 inc->analog_datafeed_digits = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_digits[0]));
1306 digits_item = inc->analog_datafeed_digits;
1307 for (detail_idx = 0; detail_idx < inc->column_want_count; detail_idx++) {
1308 detail = &inc->column_details[detail_idx];
1309 if (!format_is_analog(detail->text_format))
1311 channel = g_slist_nth_data(in->sdi->channels, detail->channel_index);
1312 inc->analog_datafeed_channels[detail->channel_offset] = g_slist_append(NULL, channel);
1313 *digits_item++ = detail->analog_digits;
1319 g_strfreev(columns);
1326 * Gets called from initial_receive(), which runs until the end-of-line
1327 * encoding of the input stream could get determined. Assumes that this
1328 * routine receives enough buffered initial input data to either see the
1329 * BOM when there is one, or that no BOM will follow when a text line
1330 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
1331 * from the input buffer if one was seen. Does not care to protect
1332 * against multiple execution or dropping the BOM multiple times --
1333 * there should be at most one in the input stream.
1335 static void initial_bom_check(const struct sr_input *in)
1337 static const char *utf8_bom = "\xef\xbb\xbf";
1339 if (in->buf->len < strlen(utf8_bom))
1341 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
1343 g_string_erase(in->buf, 0, strlen(utf8_bom));
1346 static int initial_receive(const struct sr_input *in)
1348 struct context *inc;
1352 const char *termination;
1354 initial_bom_check(in);
1358 termination = get_line_termination(in->buf);
1360 /* Don't have a full line yet. */
1363 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
1365 /* Don't have a full line yet. */
1367 len = p - in->buf->str - 1;
1368 new_buf = g_string_new_len(in->buf->str, len);
1369 g_string_append_c(new_buf, '\0');
1371 inc->termination = g_strdup(termination);
1373 if (in->buf->str[0] != '\0')
1374 ret = initial_parse(in, new_buf);
1378 g_string_free(new_buf, TRUE);
1383 static int process_buffer(struct sr_input *in, gboolean is_eof)
1385 struct context *inc;
1387 size_t line_idx, col_idx, col_nr;
1388 const struct column_details *details;
1389 col_parse_cb parse_func;
1391 char *p, **lines, *line, **columns, *column;
1394 if (!inc->started) {
1395 std_session_send_df_header(in->sdi);
1396 inc->started = TRUE;
1400 * Consider empty input non-fatal. Keep accumulating input until
1401 * at least one full text line has become available. Grab the
1402 * maximum amount of accumulated data that consists of full text
1403 * lines, and process what has been received so far, leaving not
1404 * yet complete lines for the next invocation.
1406 * Enforce that all previously buffered data gets processed in
1407 * the "EOF" condition. Do not insist in the presence of the
1408 * termination sequence for the last line (may often be missing
1409 * on Windows). A present termination sequence will just result
1410 * in the "execution of an empty line", and does not harm.
1415 p = in->buf->str + in->buf->len;
1417 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1421 p += strlen(inc->termination);
1423 g_strstrip(in->buf->str);
1426 lines = g_strsplit(in->buf->str, inc->termination, 0);
1427 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1429 if (inc->line_number < inc->start_line) {
1430 sr_spew("Line %zu skipped (before start).", inc->line_number);
1433 if (line[0] == '\0') {
1434 sr_spew("Blank line %zu skipped.", inc->line_number);
1438 /* Remove trailing comment. */
1439 strip_comment(line, inc->comment);
1440 if (line[0] == '\0') {
1441 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1445 /* Skip the header line, its content was used as the channel names. */
1446 if (inc->use_header && !inc->header_seen) {
1447 sr_spew("Header line %zu skipped.", inc->line_number);
1448 inc->header_seen = TRUE;
1452 /* Split the line into columns, check for minimum length. */
1453 columns = split_line(line, inc);
1455 sr_err("Error while parsing line %zu.", inc->line_number);
1459 num_columns = g_strv_length(columns);
1460 if (num_columns < inc->column_want_count) {
1461 sr_err("Insufficient column count %zu in line %zu.",
1462 num_columns, inc->line_number);
1463 g_strfreev(columns);
1468 /* Have the columns of the current text line processed. */
1469 clear_logic_samples(inc);
1470 clear_analog_samples(inc);
1471 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1472 column = columns[col_idx];
1473 col_nr = col_idx + 1;
1474 details = lookup_column_details(inc, col_nr);
1475 if (!details || !details->text_format)
1477 parse_func = col_parse_funcs[details->text_format];
1480 ret = parse_func(column, inc, details);
1482 g_strfreev(columns);
1488 /* Send sample data to the session bus (buffered). */
1489 ret = queue_logic_samples(in);
1490 ret += queue_analog_samples(in);
1492 sr_err("Sending samples failed.");
1493 g_strfreev(columns);
1498 g_strfreev(columns);
1501 g_string_erase(in->buf, 0, p - in->buf->str);
1506 static int receive(struct sr_input *in, GString *buf)
1508 struct context *inc;
1511 g_string_append_len(in->buf, buf->str, buf->len);
1514 if (!inc->column_seen_count) {
1515 ret = initial_receive(in);
1516 if (ret == SR_ERR_NA)
1517 /* Not enough data yet. */
1519 else if (ret != SR_OK)
1522 /* sdi is ready, notify frontend. */
1523 in->sdi_ready = TRUE;
1527 ret = process_buffer(in, FALSE);
1532 static int end(struct sr_input *in)
1534 struct context *inc;
1538 ret = process_buffer(in, TRUE);
1544 ret = flush_logic_samples(in);
1545 ret += flush_analog_samples(in);
1551 std_session_send_df_end(in->sdi);
1556 static void cleanup(struct sr_input *in)
1558 struct context *inc;
1560 keep_header_for_reread(in);
1564 g_free(inc->termination);
1565 inc->termination = NULL;
1566 g_free(inc->datafeed_buffer);
1567 inc->datafeed_buffer = NULL;
1568 g_free(inc->analog_datafeed_buffer);
1569 inc->analog_datafeed_buffer = NULL;
1572 static int reset(struct sr_input *in)
1574 struct context *inc = in->priv;
1577 inc->started = FALSE;
1578 g_string_truncate(in->buf, 0);
1597 static struct sr_option options[] = {
1599 "column_formats", "Column format specs",
1600 "Specifies text columns data types: A comma separated list of [<cols>]<fmt>[<bits>] items, with - to ignore columns, x/o/b/l for logic data, a (and resolution) for analog data, t for timestamps.",
1603 [OPT_SINGLE_COL] = {
1604 "single_column", "Single column",
1605 "Enable single-column mode, exclusively use text from the specified column (number starting at 1). Obsoleted by 'column_formats'.",
1609 "first_column", "First column",
1610 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1). Obsoleted by 'column_formats'.",
1614 "logic_channels", "Number of logic channels",
1615 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1619 "single_format", "Data format for simple single-column mode.",
1620 "The number format of single-column mode input data: bin, hex, oct. Obsoleted by 'column_formats'.",
1624 "start_line", "Start line",
1625 "The line number at which to start processing input text (default: 1).",
1629 "header", "Get channel names from first line.",
1630 "Use the first processed line's column captions (when available) as channel names. Off by default",
1634 "samplerate", "Samplerate (Hz)",
1635 "The input data's sample rate in Hz. No default value.",
1639 "column_separator", "Column separator",
1640 "The sequence which separates text columns. Non-empty text, comma by default.",
1644 "comment_leader", "Comment leader character",
1645 "The text which starts comments at the end of text lines, semicolon by default.",
1648 [OPT_MAX] = ALL_ZERO,
1651 static const struct sr_option *get_options(void)
1655 if (!options[0].def) {
1656 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1657 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1658 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1659 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1660 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1662 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1663 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1664 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1665 options[OPT_FORMAT].values = l;
1666 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1667 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1668 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1669 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1670 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1676 SR_PRIV struct sr_input_module input_csv = {
1679 .desc = "Comma-separated values",
1680 .exts = (const char*[]){"csv", NULL},
1681 .options = get_options,