2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). The 'a' format marks analog
45 * data, an optionally following number is the digits count (resolution).
46 * This "column_formats" option is most versatile, other forms of
47 * specifying the column layout only exist for backwards compatibility.
49 * single_column: Specifies the column number which contains the logic data
50 * for single-column mode. All logic data is taken from several bits
51 * which all are kept within that one column. Only exists for backwards
52 * compatibility, see "column_formats" for more flexibility.
54 * first_column: Specifies the number of the first column with logic data
55 * in simple multi-column mode. Only exists for backwards compatibility,
56 * see "column_formats" for more flexibility.
58 * logic_channels: Specifies the number of logic channels. Is required in
59 * simple single-column mode. Is optional in simple multi-column mode
60 * (and defaults to all remaining columns). Only exists for backwards
61 * compatibility, see "column_formats" for more flexibility.
63 * single_format: Specifies the format of the input text in simple single-
64 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
65 * Simple multi-column mode always uses single-bit data per column.
66 * Only exists for backwards compatibility, see "column_formats" for
69 * start_line: Specifies at which line to start processing the input file.
70 * Allows to skip leading lines which neither are header nor data lines.
71 * By default all of the input file gets processed.
73 * header: Boolean option, controls whether the first processed line is used
74 * to determine channel names. Off by default. Generic channel names are
75 * used in the absence of header line content.
77 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
78 * User specs take precedence over data which optionally gets derived
81 * column_separator: Specifies the sequence which separates the text file
82 * columns. Cannot be empty. Defaults to comma.
84 * comment_leader: Specifies the sequence which starts comments that run
85 * up to the end of the current text line. Can be empty to disable
86 * comment support. Defaults to semicolon.
88 * Typical examples of using these options:
89 * - ... -I csv:column_formats=*l ...
90 * All columns are single-bit logic data. Identical to the previous
91 * multi-column mode (the default when no options were given at all).
92 * - ... -I csv:column_formats=3-,*l ...
93 * Ignore the first three columns, get single-bit logic data from all
94 * remaining lines (multi-column mode with first-column above 1).
95 * - ... -I csv:column_formats=3-,4l,x8 ...
96 * Ignore the first three columns, get single-bit logic data from the
97 * next four columns, then eight-bit data in hex format from the next
98 * column. More columns may follow in the input text but won't get
99 * processed. (Mix of previous multi-column as well as single-column
101 * - ... -I csv:column_formats=4x8,b16,5l ...
102 * Get eight-bit data in hex format from the first four columns, then
103 * sixteen-bit data in binary format, then five times single-bit data.
104 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
105 * Get eight logic bits in binary format from column 2. (Simple
106 * single-column mode, corresponds to the "-,b8" format.)
107 * - ... -I csv:first_column=6:logic_channels=4 ...
108 * Get four single-bit logic channels from columns 6 to 9 respectively.
109 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
110 * - ... -I csv:start_line=20:header=yes:...
111 * Skip the first 19 text lines. Use line 20 to derive channel names.
112 * Data starts at line 21.
113 * - ... -I csv:column_formats=*a6 ...
114 * Each column contains an analog value with six significant digits
115 * after the decimal period.
121 * - Extend support for analog input data.
122 * - Determine why analog samples of 'double' data type get scrambled
123 * in sigrok-cli screen output. Is analog.encoding->unitsize not
124 * handled properly? A sigrok-cli or libsigrok (src/output) issue?
125 * - Reconsider the channel creation after format processing. Current
126 * logic may "bleed" channel names into the analog group when logic
127 * channels' columns follow analog columns (seen with "-,2a,x8").
128 * Trying to sort it out, a naive change used to map logic channels'
129 * data to incorrect bitmap positions. The whole channel numbering
130 * needs reconsideration. Probably it's easiest to first create _all_
131 * logic channels so that they have adjacent numbers starting at 0
132 * (addressing logic bits), then all analog channels (again adjacent)
133 * to simplify the calculation of their index in the sample set as
134 * well as their sdi channel index from the "analog column index".
135 * - Optionally get sample rate from timestamp column. Just best-effort
136 * approach, not necessarily reliable. Users can always specify rates.
137 * - Add a test suite for input modules in general, and CSV in specific?
138 * Becomes more important with the multitude of options and their
139 * interaction. Could cover edge cases (BOM presence, line termination
140 * absence, etc) and auto-stuff as well (channel names, channel counts,
144 typedef float csv_analog_t; /* 'double' currently is flawed. */
146 /* Single column formats. */
147 enum single_col_format {
148 FORMAT_NONE, /* Ignore this column. */
149 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
150 FORMAT_HEX, /* Hex digits for a set of bits. */
151 FORMAT_OCT, /* Oct digits for a set of bits. */
152 FORMAT_ANALOG, /* Floating point number for an analog channel. */
155 static const char *col_format_text[] = {
156 [FORMAT_NONE] = "unknown",
157 [FORMAT_BIN] = "binary",
158 [FORMAT_HEX] = "hexadecimal",
159 [FORMAT_OCT] = "octal",
160 [FORMAT_ANALOG] = "analog",
163 static const char col_format_char[] = {
168 [FORMAT_ANALOG] = 'a',
171 struct column_details {
173 enum single_col_format text_format;
174 size_t channel_offset;
175 size_t channel_count;
176 size_t channel_index;
183 /* Current selected samplerate. */
185 gboolean samplerate_sent;
187 /* Number of channels. */
188 size_t logic_channels;
189 size_t analog_channels;
191 /* Column delimiter (actually separator), comment leader, EOL sequence. */
196 /* Format specs for input columns, and processing state. */
197 size_t column_seen_count;
198 const char *column_formats;
199 size_t column_want_count;
200 struct column_details *column_details;
202 /* Line number to start processing. */
206 * Determines if the first line should be treated as header and used for
207 * channel names in multi column mode.
210 gboolean header_seen;
212 size_t sample_unit_size; /**!< Byte count for a single sample. */
213 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
214 csv_analog_t *analog_sample_buffer; /**!< Buffer for one set of analog values. */
216 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
217 size_t datafeed_buf_size;
218 size_t datafeed_buf_fill;
219 /* "Striped" layout, M samples for N channels each. */
220 csv_analog_t *analog_datafeed_buffer; /**!< Queue for analog datafeed. */
221 size_t analog_datafeed_buf_size;
222 size_t analog_datafeed_buf_fill;
223 GSList **analog_datafeed_channels;
224 int *analog_datafeed_digits;
226 /* Current line number. */
229 /* List of previously created sigrok channels. */
230 GSList *prev_sr_channels;
234 * Primitive operations to handle sample sets:
235 * - Keep a buffer for datafeed submission, capable of holding many
236 * samples (reduces call overhead, improves throughput).
237 * - Have a "current sample set" pointer reference one position in that
238 * large samples buffer.
239 * - Clear the current sample set before text line inspection, then set
240 * the bits which are found active in the current line of text input.
241 * Phrase the API such that call sites can be kept simple. Advance to
242 * the next sample set between lines, flush the larger buffer as needed
243 * (when it is full, or upon EOF).
246 static int flush_samplerate(const struct sr_input *in)
249 struct sr_datafeed_packet packet;
250 struct sr_datafeed_meta meta;
251 struct sr_config *src;
254 if (inc->samplerate && !inc->samplerate_sent) {
255 packet.type = SR_DF_META;
256 packet.payload = &meta;
257 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(inc->samplerate));
258 meta.config = g_slist_append(NULL, src);
259 sr_session_send(in->sdi, &packet);
260 g_slist_free(meta.config);
262 inc->samplerate_sent = TRUE;
268 static void clear_logic_samples(struct context *inc)
270 if (!inc->logic_channels)
272 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
273 memset(inc->sample_buffer, 0, inc->sample_unit_size);
276 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
278 size_t byte_idx, bit_idx;
281 if (ch_idx >= inc->logic_channels)
286 byte_idx = ch_idx / 8;
287 bit_idx = ch_idx % 8;
288 bit_mask = 1 << bit_idx;
289 inc->sample_buffer[byte_idx] |= bit_mask;
292 static int flush_logic_samples(const struct sr_input *in)
295 struct sr_datafeed_packet packet;
296 struct sr_datafeed_logic logic;
300 if (!inc->datafeed_buf_fill)
303 rc = flush_samplerate(in);
307 memset(&packet, 0, sizeof(packet));
308 memset(&logic, 0, sizeof(logic));
309 packet.type = SR_DF_LOGIC;
310 packet.payload = &logic;
311 logic.unitsize = inc->sample_unit_size;
312 logic.length = inc->datafeed_buf_fill;
313 logic.data = inc->datafeed_buffer;
315 rc = sr_session_send(in->sdi, &packet);
319 inc->datafeed_buf_fill = 0;
323 static int queue_logic_samples(const struct sr_input *in)
329 if (!inc->logic_channels)
332 inc->datafeed_buf_fill += inc->sample_unit_size;
333 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
334 rc = flush_logic_samples(in);
341 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value);
343 static void clear_analog_samples(struct context *inc)
347 if (!inc->analog_channels)
349 inc->analog_sample_buffer = &inc->analog_datafeed_buffer[inc->analog_datafeed_buf_fill];
350 for (idx = 0; idx < inc->analog_channels; idx++)
351 set_analog_value(inc, idx, 0.0);
354 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value)
356 if (ch_idx >= inc->analog_channels)
360 inc->analog_sample_buffer[ch_idx * inc->analog_datafeed_buf_size] = value;
363 static int flush_analog_samples(const struct sr_input *in)
366 struct sr_datafeed_packet packet;
367 struct sr_datafeed_analog analog;
368 struct sr_analog_encoding encoding;
369 struct sr_analog_meaning meaning;
370 struct sr_analog_spec spec;
371 csv_analog_t *samples;
377 if (!inc->analog_datafeed_buf_fill)
380 rc = flush_samplerate(in);
384 samples = inc->analog_datafeed_buffer;
385 for (ch_idx = 0; ch_idx < inc->analog_channels; ch_idx++) {
386 digits = inc->analog_datafeed_digits[ch_idx];
387 sr_analog_init(&analog, &encoding, &meaning, &spec, digits);
388 memset(&packet, 0, sizeof(packet));
389 packet.type = SR_DF_ANALOG;
390 packet.payload = &analog;
391 analog.num_samples = inc->analog_datafeed_buf_fill;
392 analog.data = samples;
393 analog.meaning->channels = inc->analog_datafeed_channels[ch_idx];
394 analog.meaning->mq = 0;
395 analog.meaning->mqflags = 0;
396 analog.meaning->unit = 0;
397 analog.encoding->unitsize = sizeof(samples[0]);
398 analog.encoding->is_signed = TRUE;
399 analog.encoding->is_float = TRUE;
400 #ifdef WORDS_BIGENDIAN
401 analog.encoding->is_bigendian = TRUE;
403 analog.encoding->is_bigendian = FALSE;
405 analog.encoding->digits = spec.spec_digits;
406 rc = sr_session_send(in->sdi, &packet);
409 samples += inc->analog_datafeed_buf_size;
412 inc->analog_datafeed_buf_fill = 0;
416 static int queue_analog_samples(const struct sr_input *in)
422 if (!inc->analog_channels)
425 inc->analog_datafeed_buf_fill++;
426 if (inc->analog_datafeed_buf_fill == inc->analog_datafeed_buf_size) {
427 rc = flush_analog_samples(in);
434 /* Helpers for "column processing". */
436 static int split_column_format(const char *spec,
437 size_t *column_count, enum single_col_format *format, size_t *bit_count)
440 char *endp, format_char;
441 enum single_col_format format_code;
446 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
449 /* Workaround, strtoul("*") won't always yield expected endp. */
451 endp = (char *)&spec[1];
453 count = strtoul(spec, &endp, 10);
460 *column_count = count;
463 /* Get the (mandatory, single letter) type spec (-/xob/l). */
464 format_char = *spec++;
465 switch (format_char) {
469 format_code = FORMAT_NONE;
472 format_code = FORMAT_HEX;
475 format_code = FORMAT_OCT;
479 format_code = FORMAT_BIN;
482 format_code = FORMAT_ANALOG;
484 default: /* includes NUL */
488 *format = format_code;
490 /* Get the (optional, decimal, default 1) bit count. */
492 count = strtoul(spec, &endp, 10);
496 count = (format_code == FORMAT_ANALOG) ? 3 : 1;
499 if (format_char == 'l')
505 /* Input spec must have been exhausted. */
512 static int make_column_details_from_format(const struct sr_input *in,
513 const char *column_format, char **column_texts)
516 char **formats, *format;
517 size_t format_count, column_count, logic_count, analog_count;
518 size_t auto_column_count;
519 size_t format_idx, c, b, column_idx, channel_idx, analog_idx;
520 enum single_col_format f;
521 struct column_details *detail;
522 GString *channel_name;
526 int channel_type, channel_sdi_nr;
530 inc->column_seen_count = g_strv_length(column_texts);
532 /* Split the input spec, count involved columns and bits. */
533 formats = g_strsplit(column_format, ",", 0);
535 sr_err("Cannot parse columns format %s (comma split).", column_format);
538 format_count = g_strv_length(formats);
540 sr_err("Cannot parse columns format %s (field count).", column_format);
544 column_count = logic_count = analog_count = 0;
545 auto_column_count = 0;
546 for (format_idx = 0; format_idx < format_count; format_idx++) {
547 format = formats[format_idx];
548 ret = split_column_format(format, &c, &f, &b);
549 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
551 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
556 /* User requested "auto-count", must be last format. */
557 if (formats[format_idx + 1]) {
558 sr_err("Auto column count must be last format field.");
562 auto_column_count = inc->column_seen_count - column_count;
563 c = auto_column_count;
566 if (f == FORMAT_ANALOG)
569 logic_count += c * b;
571 sr_dbg("Column format %s -> %zu columns, %zu logic, %zu analog channels.",
572 column_format, column_count, logic_count, analog_count);
574 /* Allocate and fill in "column processing" details. Create channels. */
575 inc->column_want_count = column_count;
576 if (inc->column_seen_count < inc->column_want_count) {
577 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
578 inc->column_seen_count, inc->column_want_count);
582 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
583 column_idx = channel_idx = analog_idx = 0;
584 channel_name = g_string_sized_new(64);
585 for (format_idx = 0; format_idx < format_count; format_idx++) {
586 /* Process a format field, which can span multiple columns. */
587 format = formats[format_idx];
588 (void)split_column_format(format, &c, &f, &b);
590 c = auto_column_count;
592 /* Fill in a column's processing details. */
593 detail = &inc->column_details[column_idx++];
594 detail->col_nr = column_idx;
595 detail->text_format = f;
596 if (detail->text_format == FORMAT_ANALOG) {
597 detail->channel_offset = analog_idx;
598 detail->channel_count = 1;
599 detail->analog_digits = b;
600 analog_idx += detail->channel_count;
601 } else if (detail->text_format) {
602 detail->channel_offset = channel_idx;
603 detail->channel_count = b;
604 channel_idx += detail->channel_count;
606 sr_dbg("detail -> col %zu, fmt %s, ch off/cnt %zu/%zu",
607 detail->col_nr, col_format_text[detail->text_format],
608 detail->channel_offset, detail->channel_count);
609 if (!detail->text_format)
612 * Pick most appropriate channel names. Optionally
613 * use text from a header line (when requested by the
614 * user). In the absence of header text, channels are
615 * assigned rather generic names.
617 * Manipulation of the column's caption (when a header
618 * line is seen) is acceptable, because this header
619 * line won't get processed another time.
621 column = column_texts[detail->col_nr - 1];
622 if (inc->use_header && column && *column)
623 caption = sr_scpi_unquote_string(column);
626 if (!caption || !*caption)
629 * TODO Need we first create _all_ logic channels,
630 * before creating analog channels?
632 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
633 if (caption && detail->channel_count == 1) {
634 g_string_assign(channel_name, caption);
635 } else if (caption) {
636 g_string_printf(channel_name, "%s[%zu]",
637 caption, create_idx);
639 g_string_printf(channel_name, "%zu",
640 detail->channel_offset + create_idx);
642 if (detail->text_format == FORMAT_ANALOG) {
643 channel_sdi_nr = logic_count + detail->channel_offset + create_idx;
644 channel_type = SR_CHANNEL_ANALOG;
645 detail->channel_index = g_slist_length(in->sdi->channels);
647 channel_sdi_nr = detail->channel_offset + create_idx;
648 channel_type = SR_CHANNEL_LOGIC;
650 sr_channel_new(in->sdi, channel_sdi_nr,
651 channel_type, TRUE, channel_name->str);
655 inc->logic_channels = channel_idx;
656 inc->analog_channels = analog_idx;
657 g_string_free(channel_name, TRUE);
663 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
665 if (!inc || !inc->column_details)
667 if (!nr || nr > inc->column_want_count)
669 return &inc->column_details[nr - 1];
673 * Primitive operations for text input: Strip comments off text lines.
674 * Split text lines into columns. Process input text for individual
678 static void strip_comment(char *buf, const GString *prefix)
685 if ((ptr = strstr(buf, prefix->str))) {
692 * @brief Splits a text line into a set of columns.
694 * @param[in] buf The input text line to split.
695 * @param[in] inc The input module's context.
697 * @returns An array of strings, representing the columns' text.
699 * This routine splits a text line on previously determined separators.
701 static char **split_line(char *buf, struct context *inc)
703 return g_strsplit(buf, inc->delimiter->str, 0);
707 * @brief Parse a multi-bit field into several logic channels.
709 * @param[in] column The input text, a run of bin/hex/oct digits.
710 * @param[in] inc The input module's context.
711 * @param[in] details The column processing details.
713 * @retval SR_OK Success.
714 * @retval SR_ERR Invalid input data (empty, or format error).
716 * This routine modifies the logic levels in the current sample set,
717 * based on the text input and a user provided format spec.
719 static int parse_logic(const char *column, struct context *inc,
720 const struct column_details *details)
722 size_t length, ch_rem, ch_idx, ch_inc;
726 const char *type_text;
730 * Prepare to read the digits from the text end towards the start.
731 * A digit corresponds to a variable number of channels (depending
732 * on the value's radix). Prepare the mapping of text digits to
733 * (a number of) logic channels.
735 length = strlen(column);
737 sr_err("Column %zu in line %zu is empty.", details->col_nr,
741 rdptr = &column[length];
742 ch_idx = details->channel_offset;
743 ch_rem = details->channel_count;
746 * Get another digit and derive up to four logic channels' state from
747 * it. Make sure to not process more bits than the column has channels
748 * associated with it.
750 while (rdptr > column && ch_rem) {
751 /* Check for valid digits according to the input radix. */
753 switch (details->text_format) {
755 valid = g_ascii_isxdigit(c) && c < '2';
759 valid = g_ascii_isxdigit(c) && c < '8';
763 valid = g_ascii_isxdigit(c);
771 type_text = col_format_text[details->text_format];
772 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
773 column, type_text, details->col_nr, inc->line_number);
776 /* Use the digit's bits for logic channels' data. */
777 bits = g_ascii_xdigit_value(c);
778 switch (details->text_format) {
782 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
788 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
792 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
797 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
801 /* ShouldNotHappen(TM), but silences compiler warning. */
807 * TODO Determine whether the availability of extra input data
808 * for unhandled logic channels is worth warning here. In this
809 * implementation users are in control, and can have the more
810 * significant bits ignored (which can be considered a feature
811 * and not really a limitation).
818 * @brief Parse a floating point text into an analog value.
820 * @param[in] column The input text, a floating point number.
821 * @param[in] inc The input module's context.
822 * @param[in] details The column processing details.
824 * @retval SR_OK Success.
825 * @retval SR_ERR Invalid input data (empty, or format error).
827 * This routine modifies the analog values in the current sample set,
828 * based on the text input and a user provided format spec.
830 static int parse_analog(const char *column, struct context *inc,
831 const struct column_details *details)
834 double dvalue; float fvalue;
838 if (details->text_format != FORMAT_ANALOG)
841 length = strlen(column);
843 sr_err("Column %zu in line %zu is empty.", details->col_nr,
847 if (sizeof(value) == sizeof(double)) {
848 ret = sr_atod_ascii(column, &dvalue);
850 } else if (sizeof(value) == sizeof(float)) {
851 ret = sr_atof_ascii(column, &fvalue);
857 sr_err("Cannot parse analog text %s in column %zu in line %zu.",
858 column, details->col_nr, inc->line_number);
861 set_analog_value(inc, details->channel_offset, value);
867 * @brief Parse routine which ignores the input text.
869 * This routine exists to unify dispatch code paths, mapping input file
870 * columns' data types to their respective parse routines.
872 static int parse_ignore(const char *column, struct context *inc,
873 const struct column_details *details)
881 typedef int (*col_parse_cb)(const char *column, struct context *inc,
882 const struct column_details *details);
884 static const col_parse_cb col_parse_funcs[] = {
885 [FORMAT_NONE] = parse_ignore,
886 [FORMAT_BIN] = parse_logic,
887 [FORMAT_OCT] = parse_logic,
888 [FORMAT_HEX] = parse_logic,
889 [FORMAT_ANALOG] = parse_analog,
892 static int init(struct sr_input *in, GHashTable *options)
895 size_t single_column, first_column, logic_channels;
897 enum single_col_format format;
900 in->sdi = g_malloc0(sizeof(*in->sdi));
901 in->priv = inc = g_malloc0(sizeof(*inc));
903 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
904 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
905 inc->delimiter = g_string_new(g_variant_get_string(
906 g_hash_table_lookup(options, "column_separator"), NULL));
907 if (!inc->delimiter->len) {
908 sr_err("Column separator cannot be empty.");
911 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
912 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
914 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
916 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
919 sr_err("Invalid single-column format: '%s'", s);
922 inc->comment = g_string_new(g_variant_get_string(
923 g_hash_table_lookup(options, "comment_leader"), NULL));
924 if (g_string_equal(inc->comment, inc->delimiter)) {
926 * Using the same sequence as comment leader and column
927 * separator won't work. The user probably specified ';'
928 * as the column separator but did not adjust the comment
929 * leader. Try DWIM, drop comment strippin support here.
931 sr_warn("Comment leader and column separator conflict, disabling comment support.");
932 g_string_truncate(inc->comment, 0);
934 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
935 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
936 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
937 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
938 if (inc->start_line < 1) {
939 sr_err("Invalid start line %zu.", inc->start_line);
944 * Scan flexible, to get prefered format specs which describe
945 * the input file's data formats. As well as some simple specs
946 * for backwards compatibility and user convenience.
948 * This logic ends up with a copy of the format string, either
949 * user provided or internally derived. Actual creation of the
950 * column processing details gets deferred until the first line
951 * of input data was seen. To support automatic determination of
952 * e.g. channel counts from column counts.
954 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
956 inc->column_formats = g_strdup(s);
957 sr_dbg("User specified column_formats: %s.", s);
958 } else if (single_column && logic_channels) {
959 format_char = col_format_char[format];
960 if (single_column == 1) {
961 inc->column_formats = g_strdup_printf("%c%zu",
962 format_char, logic_channels);
964 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
966 format_char, logic_channels);
968 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
969 single_column, col_format_text[format], logic_channels,
970 inc->column_formats);
971 } else if (!single_column) {
972 if (first_column > 1) {
973 inc->column_formats = g_strdup_printf("%zu-,%zul",
974 first_column - 1, logic_channels);
976 inc->column_formats = g_strdup_printf("%zul",
979 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
980 first_column, logic_channels,
981 inc->column_formats);
983 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
984 inc->column_formats = g_strdup("*l");
991 * Check the channel list for consistency across file re-import. See
992 * the VCD input module for more details and motivation.
995 static void keep_header_for_reread(const struct sr_input *in)
1000 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
1001 inc->prev_sr_channels = in->sdi->channels;
1002 in->sdi->channels = NULL;
1005 static int check_header_in_reread(const struct sr_input *in)
1007 struct context *inc;
1014 if (!inc->prev_sr_channels)
1017 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
1018 sr_err("Channel list change not supported for file re-read.");
1021 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
1022 in->sdi->channels = inc->prev_sr_channels;
1023 inc->prev_sr_channels = NULL;
1028 static const char *delim_set = "\r\n";
1030 static const char *get_line_termination(GString *buf)
1035 if (g_strstr_len(buf->str, buf->len, "\r\n"))
1037 else if (memchr(buf->str, '\n', buf->len))
1039 else if (memchr(buf->str, '\r', buf->len))
1045 static int initial_parse(const struct sr_input *in, GString *buf)
1047 struct context *inc;
1049 size_t line_number, line_idx;
1051 char **lines, *line, **columns;
1057 /* Search for the first line to process (header or data). */
1059 if (inc->termination)
1060 lines = g_strsplit(buf->str, inc->termination, 0);
1062 lines = g_strsplit_set(buf->str, delim_set, 0);
1063 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1065 if (inc->start_line > line_number) {
1066 sr_spew("Line %zu skipped (before start).", line_number);
1069 if (line[0] == '\0') {
1070 sr_spew("Blank line %zu skipped.", line_number);
1073 strip_comment(line, inc->comment);
1074 if (line[0] == '\0') {
1075 sr_spew("Comment-only line %zu skipped.", line_number);
1079 /* Reached first proper line. */
1083 /* Not enough data for a proper line yet. */
1088 /* Get the number of columns in the line. */
1089 columns = split_line(line, inc);
1091 sr_err("Error while parsing line %zu.", line_number);
1095 num_columns = g_strv_length(columns);
1097 sr_err("Error while parsing line %zu.", line_number);
1101 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
1104 * Interpret the user provided column format specs. This might
1105 * involve inspection of the now received input text, to support
1106 * e.g. automatic detection of channel counts in the absence of
1107 * user provided specs. Optionally a header line is used to get
1110 * Check the then created channels for consistency across .reset
1111 * and .receive sequences (file re-load).
1113 ret = make_column_details_from_format(in, inc->column_formats, columns);
1115 sr_err("Cannot parse columns format using line %zu.", line_number);
1118 if (!check_header_in_reread(in)) {
1124 * Allocate buffer memory for datafeed submission of sample data.
1125 * Calculate the minimum buffer size to store the set of samples
1126 * of all channels (unit size). Determine a larger buffer size
1127 * for datafeed submission that is a multiple of the unit size.
1128 * Allocate the larger buffer, the "sample buffer" will point
1129 * to a location within that large buffer later.
1131 if (inc->logic_channels) {
1132 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
1133 inc->datafeed_buf_size = CHUNK_SIZE;
1134 inc->datafeed_buf_size *= inc->sample_unit_size;
1135 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
1136 if (!inc->datafeed_buffer) {
1137 sr_err("Cannot allocate datafeed send buffer (logic).");
1138 ret = SR_ERR_MALLOC;
1141 inc->datafeed_buf_fill = 0;
1144 if (inc->analog_channels) {
1145 size_t sample_size, sample_count;
1147 struct column_details *detail;
1150 sample_size = sizeof(inc->analog_datafeed_buffer[0]);
1151 inc->analog_datafeed_buf_size = CHUNK_SIZE;
1152 inc->analog_datafeed_buf_size /= sample_size;
1153 inc->analog_datafeed_buf_size /= inc->analog_channels;
1154 sample_count = inc->analog_channels * inc->analog_datafeed_buf_size;
1155 inc->analog_datafeed_buffer = g_malloc0(sample_count * sample_size);
1156 if (!inc->analog_datafeed_buffer) {
1157 sr_err("Cannot allocate datafeed send buffer (analog).");
1158 ret = SR_ERR_MALLOC;
1161 inc->analog_datafeed_buf_fill = 0;
1162 inc->analog_datafeed_channels = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_channels[0]));
1163 inc->analog_datafeed_digits = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_digits[0]));
1164 digits_item = inc->analog_datafeed_digits;
1165 for (detail_idx = 0; detail_idx < inc->column_want_count; detail_idx++) {
1166 detail = &inc->column_details[detail_idx];
1167 if (detail->text_format != FORMAT_ANALOG)
1169 channel = g_slist_nth_data(in->sdi->channels, detail->channel_index);
1170 inc->analog_datafeed_channels[detail->channel_offset] = g_slist_append(NULL, channel);
1171 *digits_item++ = detail->analog_digits;
1177 g_strfreev(columns);
1184 * Gets called from initial_receive(), which runs until the end-of-line
1185 * encoding of the input stream could get determined. Assumes that this
1186 * routine receives enough buffered initial input data to either see the
1187 * BOM when there is one, or that no BOM will follow when a text line
1188 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
1189 * from the input buffer if one was seen. Does not care to protect
1190 * against multiple execution or dropping the BOM multiple times --
1191 * there should be at most one in the input stream.
1193 static void initial_bom_check(const struct sr_input *in)
1195 static const char *utf8_bom = "\xef\xbb\xbf";
1197 if (in->buf->len < strlen(utf8_bom))
1199 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
1201 g_string_erase(in->buf, 0, strlen(utf8_bom));
1204 static int initial_receive(const struct sr_input *in)
1206 struct context *inc;
1210 const char *termination;
1212 initial_bom_check(in);
1216 termination = get_line_termination(in->buf);
1218 /* Don't have a full line yet. */
1221 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
1223 /* Don't have a full line yet. */
1225 len = p - in->buf->str - 1;
1226 new_buf = g_string_new_len(in->buf->str, len);
1227 g_string_append_c(new_buf, '\0');
1229 inc->termination = g_strdup(termination);
1231 if (in->buf->str[0] != '\0')
1232 ret = initial_parse(in, new_buf);
1236 g_string_free(new_buf, TRUE);
1241 static int process_buffer(struct sr_input *in, gboolean is_eof)
1243 struct context *inc;
1245 size_t line_idx, col_idx, col_nr;
1246 const struct column_details *details;
1247 col_parse_cb parse_func;
1249 char *p, **lines, *line, **columns, *column;
1252 if (!inc->started) {
1253 std_session_send_df_header(in->sdi);
1254 inc->started = TRUE;
1258 * Consider empty input non-fatal. Keep accumulating input until
1259 * at least one full text line has become available. Grab the
1260 * maximum amount of accumulated data that consists of full text
1261 * lines, and process what has been received so far, leaving not
1262 * yet complete lines for the next invocation.
1264 * Enforce that all previously buffered data gets processed in
1265 * the "EOF" condition. Do not insist in the presence of the
1266 * termination sequence for the last line (may often be missing
1267 * on Windows). A present termination sequence will just result
1268 * in the "execution of an empty line", and does not harm.
1273 p = in->buf->str + in->buf->len;
1275 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1279 p += strlen(inc->termination);
1281 g_strstrip(in->buf->str);
1284 lines = g_strsplit(in->buf->str, inc->termination, 0);
1285 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1287 if (inc->line_number < inc->start_line) {
1288 sr_spew("Line %zu skipped (before start).", inc->line_number);
1291 if (line[0] == '\0') {
1292 sr_spew("Blank line %zu skipped.", inc->line_number);
1296 /* Remove trailing comment. */
1297 strip_comment(line, inc->comment);
1298 if (line[0] == '\0') {
1299 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1303 /* Skip the header line, its content was used as the channel names. */
1304 if (inc->use_header && !inc->header_seen) {
1305 sr_spew("Header line %zu skipped.", inc->line_number);
1306 inc->header_seen = TRUE;
1310 /* Split the line into columns, check for minimum length. */
1311 columns = split_line(line, inc);
1313 sr_err("Error while parsing line %zu.", inc->line_number);
1317 num_columns = g_strv_length(columns);
1318 if (num_columns < inc->column_want_count) {
1319 sr_err("Insufficient column count %zu in line %zu.",
1320 num_columns, inc->line_number);
1321 g_strfreev(columns);
1326 /* Have the columns of the current text line processed. */
1327 clear_logic_samples(inc);
1328 clear_analog_samples(inc);
1329 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1330 column = columns[col_idx];
1331 col_nr = col_idx + 1;
1332 details = lookup_column_details(inc, col_nr);
1333 if (!details || !details->text_format)
1335 parse_func = col_parse_funcs[details->text_format];
1338 ret = parse_func(column, inc, details);
1340 g_strfreev(columns);
1346 /* Send sample data to the session bus (buffered). */
1347 ret = queue_logic_samples(in);
1348 ret += queue_analog_samples(in);
1350 sr_err("Sending samples failed.");
1351 g_strfreev(columns);
1356 g_strfreev(columns);
1359 g_string_erase(in->buf, 0, p - in->buf->str);
1364 static int receive(struct sr_input *in, GString *buf)
1366 struct context *inc;
1369 g_string_append_len(in->buf, buf->str, buf->len);
1372 if (!inc->column_seen_count) {
1373 ret = initial_receive(in);
1374 if (ret == SR_ERR_NA)
1375 /* Not enough data yet. */
1377 else if (ret != SR_OK)
1380 /* sdi is ready, notify frontend. */
1381 in->sdi_ready = TRUE;
1385 ret = process_buffer(in, FALSE);
1390 static int end(struct sr_input *in)
1392 struct context *inc;
1396 ret = process_buffer(in, TRUE);
1402 ret = flush_logic_samples(in);
1403 ret += flush_analog_samples(in);
1409 std_session_send_df_end(in->sdi);
1414 static void cleanup(struct sr_input *in)
1416 struct context *inc;
1418 keep_header_for_reread(in);
1422 g_free(inc->termination);
1423 inc->termination = NULL;
1424 g_free(inc->datafeed_buffer);
1425 inc->datafeed_buffer = NULL;
1426 g_free(inc->analog_datafeed_buffer);
1427 inc->analog_datafeed_buffer = NULL;
1430 static int reset(struct sr_input *in)
1432 struct context *inc = in->priv;
1435 inc->started = FALSE;
1436 g_string_truncate(in->buf, 0);
1455 static struct sr_option options[] = {
1457 "column_formats", "Column format specs",
1458 "Specifies text columns data types: A comma separated list of [<cols>]<fmt>[<bits>] items, with - to ignore columns, x/o/b/l for logic data, a (and resolution) for analog data.",
1461 [OPT_SINGLE_COL] = {
1462 "single_column", "Single column",
1463 "Enable single-column mode, exclusively use text from the specified column (number starting at 1). Obsoleted by 'column_formats'.",
1467 "first_column", "First column",
1468 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1). Obsoleted by 'column_formats'.",
1472 "logic_channels", "Number of logic channels",
1473 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1477 "single_format", "Data format for simple single-column mode.",
1478 "The number format of single-column mode input data: bin, hex, oct. Obsoleted by 'column_formats'.",
1482 "start_line", "Start line",
1483 "The line number at which to start processing input text (default: 1).",
1487 "header", "Get channel names from first line.",
1488 "Use the first processed line's column captions (when available) as channel names. Off by default",
1492 "samplerate", "Samplerate (Hz)",
1493 "The input data's sample rate in Hz. No default value.",
1497 "column_separator", "Column separator",
1498 "The sequence which separates text columns. Non-empty text, comma by default.",
1502 "comment_leader", "Comment leader character",
1503 "The text which starts comments at the end of text lines, semicolon by default.",
1506 [OPT_MAX] = ALL_ZERO,
1509 static const struct sr_option *get_options(void)
1513 if (!options[0].def) {
1514 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1515 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1516 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1517 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1518 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1520 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1521 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1522 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1523 options[OPT_FORMAT].values = l;
1524 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1525 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1526 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1527 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1528 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1534 SR_PRIV struct sr_input_module input_csv = {
1537 .desc = "Comma-separated values",
1538 .exts = (const char*[]){"csv", NULL},
1539 .options = get_options,