2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). This "column_formats" option
45 * is most versatile, other forms of specifying the column layout only
46 * exist for backwards compatibility.
48 * single_column: Specifies the column number which contains the logic data
49 * for single-column mode. All logic data is taken from several bits
50 * which all are kept within that one column. Only exists for backwards
51 * compatibility, see "column_formats" for more flexibility.
53 * first_column: Specifies the number of the first column with logic data
54 * in simple multi-column mode. Only exists for backwards compatibility,
55 * see "column_formats" for more flexibility.
57 * logic_channels: Specifies the number of logic channels. Is required in
58 * simple single-column mode. Is optional in simple multi-column mode
59 * (and defaults to all remaining columns). Only exists for backwards
60 * compatibility, see "column_formats" for more flexibility.
62 * single_format: Specifies the format of the input text in simple single-
63 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
64 * Simple multi-column mode always uses single-bit data per column.
65 * Only exists for backwards compatibility, see "column_formats" for
68 * start_line: Specifies at which line to start processing the input file.
69 * Allows to skip leading lines which neither are header nor data lines.
70 * By default all of the input file gets processed.
72 * header: Boolean option, controls whether the first processed line is used
73 * to determine channel names. Off by default. Generic channel names are
74 * used in the absence of header line content.
76 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
77 * User specs take precedence over data which optionally gets derived
80 * column_separator: Specifies the sequence which separates the text file
81 * columns. Cannot be empty. Defaults to comma.
83 * comment_leader: Specifies the sequence which starts comments that run
84 * up to the end of the current text line. Can be empty to disable
85 * comment support. Defaults to semicolon.
87 * Typical examples of using these options:
88 * - ... -I csv:column_formats=*l ...
89 * All columns are single-bit logic data. Identical to the previous
90 * multi-column mode (the default when no options were given at all).
91 * - ... -I csv:column_formats=3-,*l ...
92 * Ignore the first three columns, get single-bit logic data from all
93 * remaining lines (multi-column mode with first-column above 1).
94 * - ... -I csv:column_formats=3-,4l,x8 ...
95 * Ignore the first three columns, get single-bit logic data from the
96 * next four columns, then eight-bit data in hex format from the next
97 * column. More columns may follow in the input text but won't get
98 * processed. (Mix of previous multi-column as well as single-column
100 * - ... -I csv:column_formats=4x8,b16,5l ...
101 * Get eight-bit data in hex format from the first four columns, then
102 * sixteen-bit data in binary format, then five times single-bit data.
103 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
104 * Get eight logic bits in binary format from column 2. (Simple
105 * single-column mode, corresponds to the "-,b8" format.)
106 * - ... -I csv:first_column=6:logic_channels=4 ...
107 * Get four single-bit logic channels from columns 6 to 9 respectively.
108 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
109 * - ... -I csv:start_line=20:header=yes:...
110 * Skip the first 19 text lines. Use line 20 to derive channel names.
111 * Data starts at line 21.
117 * - Extend support for analog input data.
118 * - Determine why analog samples of 'double' data type get scrambled
119 * in sigrok-cli screen output. Is analog.encoding->unitsize not
120 * handled properly? A sigrok-cli or libsigrok (src/output) issue?
121 * - Reconsider the channel creation after format processing. Current
122 * logic may "bleed" channel names into the analog group when logic
123 * channels' columns follow analog columns (seen with "-,2a,x8").
124 * Trying to sort it out, a naive change used to map logic channels'
125 * data to incorrect bitmap positions. The whole channel numbering
126 * needs reconsideration. Probably it's easiest to first create _all_
127 * logic channels so that they have adjacent numbers starting at 0
128 * (addressing logic bits), then all analog channels (again adjacent)
129 * to simplify the calculation of their index in the sample set as
130 * well as their sdi channel index from the "analog column index".
131 * - Optionally get sample rate from timestamp column. Just best-effort
132 * approach, not necessarily reliable. Users can always specify rates.
133 * - Add a test suite for input modules in general, and CSV in specific?
134 * Becomes more important with the multitude of options and their
135 * interaction. Could cover edge cases (BOM presence, line termination
136 * absence, etc) and auto-stuff as well (channel names, channel counts,
140 typedef float csv_analog_t; /* 'double' currently is flawed. */
142 /* Single column formats. */
143 enum single_col_format {
144 FORMAT_NONE, /* Ignore this column. */
145 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
146 FORMAT_HEX, /* Hex digits for a set of bits. */
147 FORMAT_OCT, /* Oct digits for a set of bits. */
148 FORMAT_ANALOG, /* Floating point number for an analog channel. */
151 static const char *col_format_text[] = {
152 [FORMAT_NONE] = "unknown",
153 [FORMAT_BIN] = "binary",
154 [FORMAT_HEX] = "hexadecimal",
155 [FORMAT_OCT] = "octal",
156 [FORMAT_ANALOG] = "analog",
159 static const char col_format_char[] = {
164 [FORMAT_ANALOG] = 'a',
167 struct column_details {
169 enum single_col_format text_format;
170 size_t channel_offset;
171 size_t channel_count;
172 size_t channel_index;
179 /* Current selected samplerate. */
181 gboolean samplerate_sent;
183 /* Number of channels. */
184 size_t logic_channels;
185 size_t analog_channels;
187 /* Column delimiter (actually separator), comment leader, EOL sequence. */
192 /* Format specs for input columns, and processing state. */
193 size_t column_seen_count;
194 const char *column_formats;
195 size_t column_want_count;
196 struct column_details *column_details;
198 /* Line number to start processing. */
202 * Determines if the first line should be treated as header and used for
203 * channel names in multi column mode.
206 gboolean header_seen;
208 size_t sample_unit_size; /**!< Byte count for a single sample. */
209 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
210 csv_analog_t *analog_sample_buffer; /**!< Buffer for one set of analog values. */
212 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
213 size_t datafeed_buf_size;
214 size_t datafeed_buf_fill;
215 /* "Striped" layout, M samples for N channels each. */
216 csv_analog_t *analog_datafeed_buffer; /**!< Queue for analog datafeed. */
217 size_t analog_datafeed_buf_size;
218 size_t analog_datafeed_buf_fill;
219 GSList **analog_datafeed_channels;
220 int *analog_datafeed_digits;
222 /* Current line number. */
225 /* List of previously created sigrok channels. */
226 GSList *prev_sr_channels;
230 * Primitive operations to handle sample sets:
231 * - Keep a buffer for datafeed submission, capable of holding many
232 * samples (reduces call overhead, improves throughput).
233 * - Have a "current sample set" pointer reference one position in that
234 * large samples buffer.
235 * - Clear the current sample set before text line inspection, then set
236 * the bits which are found active in the current line of text input.
237 * Phrase the API such that call sites can be kept simple. Advance to
238 * the next sample set between lines, flush the larger buffer as needed
239 * (when it is full, or upon EOF).
242 static int flush_samplerate(const struct sr_input *in)
245 struct sr_datafeed_packet packet;
246 struct sr_datafeed_meta meta;
247 struct sr_config *src;
250 if (inc->samplerate && !inc->samplerate_sent) {
251 packet.type = SR_DF_META;
252 packet.payload = &meta;
253 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(inc->samplerate));
254 meta.config = g_slist_append(NULL, src);
255 sr_session_send(in->sdi, &packet);
256 g_slist_free(meta.config);
258 inc->samplerate_sent = TRUE;
264 static void clear_logic_samples(struct context *inc)
266 if (!inc->logic_channels)
268 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
269 memset(inc->sample_buffer, 0, inc->sample_unit_size);
272 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
274 size_t byte_idx, bit_idx;
277 if (ch_idx >= inc->logic_channels)
282 byte_idx = ch_idx / 8;
283 bit_idx = ch_idx % 8;
284 bit_mask = 1 << bit_idx;
285 inc->sample_buffer[byte_idx] |= bit_mask;
288 static int flush_logic_samples(const struct sr_input *in)
291 struct sr_datafeed_packet packet;
292 struct sr_datafeed_logic logic;
296 if (!inc->datafeed_buf_fill)
299 rc = flush_samplerate(in);
303 memset(&packet, 0, sizeof(packet));
304 memset(&logic, 0, sizeof(logic));
305 packet.type = SR_DF_LOGIC;
306 packet.payload = &logic;
307 logic.unitsize = inc->sample_unit_size;
308 logic.length = inc->datafeed_buf_fill;
309 logic.data = inc->datafeed_buffer;
311 rc = sr_session_send(in->sdi, &packet);
315 inc->datafeed_buf_fill = 0;
319 static int queue_logic_samples(const struct sr_input *in)
325 if (!inc->logic_channels)
328 inc->datafeed_buf_fill += inc->sample_unit_size;
329 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
330 rc = flush_logic_samples(in);
337 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value);
339 static void clear_analog_samples(struct context *inc)
343 if (!inc->analog_channels)
345 inc->analog_sample_buffer = &inc->analog_datafeed_buffer[inc->analog_datafeed_buf_fill];
346 for (idx = 0; idx < inc->analog_channels; idx++)
347 set_analog_value(inc, idx, 0.0);
350 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value)
352 if (ch_idx >= inc->analog_channels)
356 inc->analog_sample_buffer[ch_idx * inc->analog_datafeed_buf_size] = value;
359 static int flush_analog_samples(const struct sr_input *in)
362 struct sr_datafeed_packet packet;
363 struct sr_datafeed_analog analog;
364 struct sr_analog_encoding encoding;
365 struct sr_analog_meaning meaning;
366 struct sr_analog_spec spec;
367 csv_analog_t *samples;
373 if (!inc->analog_datafeed_buf_fill)
376 rc = flush_samplerate(in);
380 samples = inc->analog_datafeed_buffer;
381 for (ch_idx = 0; ch_idx < inc->analog_channels; ch_idx++) {
382 digits = inc->analog_datafeed_digits[ch_idx];
383 sr_analog_init(&analog, &encoding, &meaning, &spec, digits);
384 memset(&packet, 0, sizeof(packet));
385 packet.type = SR_DF_ANALOG;
386 packet.payload = &analog;
387 analog.num_samples = inc->analog_datafeed_buf_fill;
388 analog.data = samples;
389 analog.meaning->channels = inc->analog_datafeed_channels[ch_idx];
390 analog.meaning->mq = 0;
391 analog.meaning->mqflags = 0;
392 analog.meaning->unit = 0;
393 analog.encoding->unitsize = sizeof(samples[0]);
394 analog.encoding->is_signed = TRUE;
395 analog.encoding->is_float = TRUE;
396 #ifdef WORDS_BIGENDIAN
397 analog.encoding->is_bigendian = TRUE;
399 analog.encoding->is_bigendian = FALSE;
401 analog.encoding->digits = spec.spec_digits;
402 rc = sr_session_send(in->sdi, &packet);
405 samples += inc->analog_datafeed_buf_size;
408 inc->analog_datafeed_buf_fill = 0;
412 static int queue_analog_samples(const struct sr_input *in)
418 if (!inc->analog_channels)
421 inc->analog_datafeed_buf_fill++;
422 if (inc->analog_datafeed_buf_fill == inc->analog_datafeed_buf_size) {
423 rc = flush_analog_samples(in);
430 /* Helpers for "column processing". */
432 static int split_column_format(const char *spec,
433 size_t *column_count, enum single_col_format *format, size_t *bit_count)
436 char *endp, format_char;
437 enum single_col_format format_code;
442 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
445 /* Workaround, strtoul("*") won't always yield expected endp. */
447 endp = (char *)&spec[1];
449 count = strtoul(spec, &endp, 10);
456 *column_count = count;
459 /* Get the (mandatory, single letter) type spec (-/xob/l). */
460 format_char = *spec++;
461 switch (format_char) {
465 format_code = FORMAT_NONE;
468 format_code = FORMAT_HEX;
471 format_code = FORMAT_OCT;
475 format_code = FORMAT_BIN;
478 format_code = FORMAT_ANALOG;
480 default: /* includes NUL */
484 *format = format_code;
486 /* Get the (optional, decimal, default 1) bit count. */
488 count = strtoul(spec, &endp, 10);
492 count = (format_code == FORMAT_ANALOG) ? 3 : 1;
495 if (format_char == 'l')
501 /* Input spec must have been exhausted. */
508 static int make_column_details_from_format(const struct sr_input *in,
509 const char *column_format, char **column_texts)
512 char **formats, *format;
513 size_t format_count, column_count, logic_count, analog_count;
514 size_t auto_column_count;
515 size_t format_idx, c, b, column_idx, channel_idx, analog_idx;
516 enum single_col_format f;
517 struct column_details *detail;
518 GString *channel_name;
522 int channel_type, channel_sdi_nr;
526 inc->column_seen_count = g_strv_length(column_texts);
528 /* Split the input spec, count involved columns and bits. */
529 formats = g_strsplit(column_format, ",", 0);
531 sr_err("Cannot parse columns format %s (comma split).", column_format);
534 format_count = g_strv_length(formats);
536 sr_err("Cannot parse columns format %s (field count).", column_format);
540 column_count = logic_count = analog_count = 0;
541 auto_column_count = 0;
542 for (format_idx = 0; format_idx < format_count; format_idx++) {
543 format = formats[format_idx];
544 ret = split_column_format(format, &c, &f, &b);
545 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
547 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
552 /* User requested "auto-count", must be last format. */
553 if (formats[format_idx + 1]) {
554 sr_err("Auto column count must be last format field.");
558 auto_column_count = inc->column_seen_count - column_count;
559 c = auto_column_count;
562 if (f == FORMAT_ANALOG)
565 logic_count += c * b;
567 sr_dbg("Column format %s -> %zu columns, %zu logic, %zu analog channels.",
568 column_format, column_count, logic_count, analog_count);
570 /* Allocate and fill in "column processing" details. Create channels. */
571 inc->column_want_count = column_count;
572 if (inc->column_seen_count < inc->column_want_count) {
573 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
574 inc->column_seen_count, inc->column_want_count);
578 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
579 column_idx = channel_idx = analog_idx = 0;
580 channel_name = g_string_sized_new(64);
581 for (format_idx = 0; format_idx < format_count; format_idx++) {
582 /* Process a format field, which can span multiple columns. */
583 format = formats[format_idx];
584 (void)split_column_format(format, &c, &f, &b);
586 c = auto_column_count;
588 /* Fill in a column's processing details. */
589 detail = &inc->column_details[column_idx++];
590 detail->col_nr = column_idx;
591 detail->text_format = f;
592 if (detail->text_format == FORMAT_ANALOG) {
593 detail->channel_offset = analog_idx;
594 detail->channel_count = 1;
595 detail->analog_digits = b;
596 analog_idx += detail->channel_count;
597 } else if (detail->text_format) {
598 detail->channel_offset = channel_idx;
599 detail->channel_count = b;
600 channel_idx += detail->channel_count;
602 sr_dbg("detail -> col %zu, fmt %s, ch off/cnt %zu/%zu",
603 detail->col_nr, col_format_text[detail->text_format],
604 detail->channel_offset, detail->channel_count);
605 if (!detail->text_format)
608 * Create channels with appropriate names. Optionally
609 * use text from a header line (when requested by the
610 * user). In the absence of header text, channels are
611 * assigned rather generic names.
613 * Manipulation of the column's caption (when a header
614 * line is seen) is acceptable, because this header
615 * line won't get processed another time.
617 column = column_texts[detail->col_nr - 1];
618 if (inc->use_header && column && *column)
619 caption = sr_scpi_unquote_string(column);
622 if (!caption || !*caption)
625 * TODO Need we first create _all_ logic channels,
626 * before creating analog channels?
628 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
629 if (caption && detail->channel_count == 1) {
630 g_string_assign(channel_name, caption);
631 } else if (caption) {
632 g_string_printf(channel_name, "%s[%zu]",
633 caption, create_idx);
635 g_string_printf(channel_name, "%zu",
636 detail->channel_offset + create_idx);
638 if (detail->text_format == FORMAT_ANALOG) {
639 channel_sdi_nr = logic_count + detail->channel_offset + create_idx;
640 channel_type = SR_CHANNEL_ANALOG;
641 detail->channel_index = g_slist_length(in->sdi->channels);
643 channel_sdi_nr = detail->channel_offset + create_idx;
644 channel_type = SR_CHANNEL_LOGIC;
646 sr_channel_new(in->sdi, channel_sdi_nr,
647 channel_type, TRUE, channel_name->str);
651 inc->logic_channels = channel_idx;
652 inc->analog_channels = analog_idx;
653 g_string_free(channel_name, TRUE);
659 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
661 if (!inc || !inc->column_details)
663 if (!nr || nr > inc->column_want_count)
665 return &inc->column_details[nr - 1];
669 * Primitive operations for text input: Strip comments off text lines.
670 * Split text lines into columns. Process input text for individual
674 static void strip_comment(char *buf, const GString *prefix)
681 if ((ptr = strstr(buf, prefix->str))) {
688 * @brief Splits a text line into a set of columns.
690 * @param[in] buf The input text line to split.
691 * @param[in] inc The input module's context.
693 * @returns An array of strings, representing the columns' text.
695 * This routine splits a text line on previously determined separators.
697 static char **split_line(char *buf, struct context *inc)
699 return g_strsplit(buf, inc->delimiter->str, 0);
703 * @brief Parse a multi-bit field into several logic channels.
705 * @param[in] column The input text, a run of bin/hex/oct digits.
706 * @param[in] inc The input module's context.
707 * @param[in] details The column processing details.
709 * @retval SR_OK Success.
710 * @retval SR_ERR Invalid input data (empty, or format error).
712 * This routine modifies the logic levels in the current sample set,
713 * based on the text input and a user provided format spec.
715 static int parse_logic(const char *column, struct context *inc,
716 const struct column_details *details)
718 size_t length, ch_rem, ch_idx, ch_inc;
722 const char *type_text;
726 * Prepare to read the digits from the text end towards the start.
727 * A digit corresponds to a variable number of channels (depending
728 * on the value's radix). Prepare the mapping of text digits to
729 * (a number of) logic channels.
731 length = strlen(column);
733 sr_err("Column %zu in line %zu is empty.", details->col_nr,
737 rdptr = &column[length];
738 ch_idx = details->channel_offset;
739 ch_rem = details->channel_count;
742 * Get another digit and derive up to four logic channels' state from
743 * it. Make sure to not process more bits than the column has channels
744 * associated with it.
746 while (rdptr > column && ch_rem) {
747 /* Check for valid digits according to the input radix. */
749 switch (details->text_format) {
751 valid = g_ascii_isxdigit(c) && c < '2';
755 valid = g_ascii_isxdigit(c) && c < '8';
759 valid = g_ascii_isxdigit(c);
767 type_text = col_format_text[details->text_format];
768 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
769 column, type_text, details->col_nr, inc->line_number);
772 /* Use the digit's bits for logic channels' data. */
773 bits = g_ascii_xdigit_value(c);
774 switch (details->text_format) {
778 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
784 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
788 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
793 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
797 /* ShouldNotHappen(TM), but silences compiler warning. */
803 * TODO Determine whether the availability of extra input data
804 * for unhandled logic channels is worth warning here. In this
805 * implementation users are in control, and can have the more
806 * significant bits ignored (which can be considered a feature
807 * and not really a limitation).
814 * @brief Parse a floating point text into an analog value.
816 * @param[in] column The input text, a floating point number.
817 * @param[in] inc The input module's context.
818 * @param[in] details The column processing details.
820 * @retval SR_OK Success.
821 * @retval SR_ERR Invalid input data (empty, or format error).
823 * This routine modifies the analog values in the current sample set,
824 * based on the text input and a user provided format spec.
826 static int parse_analog(const char *column, struct context *inc,
827 const struct column_details *details)
830 double dvalue; float fvalue;
834 if (details->text_format != FORMAT_ANALOG)
837 length = strlen(column);
839 sr_err("Column %zu in line %zu is empty.", details->col_nr,
843 if (sizeof(value) == sizeof(double)) {
844 ret = sr_atod_ascii(column, &dvalue);
846 } else if (sizeof(value) == sizeof(float)) {
847 ret = sr_atof_ascii(column, &fvalue);
853 sr_err("Cannot parse analog text %s in column %zu in line %zu.",
854 column, details->col_nr, inc->line_number);
857 set_analog_value(inc, details->channel_offset, value);
863 * @brief Parse routine which ignores the input text.
865 * This routine exists to unify dispatch code paths, mapping input file
866 * columns' data types to their respective parse routines.
868 static int parse_ignore(const char *column, struct context *inc,
869 const struct column_details *details)
877 typedef int (*col_parse_cb)(const char *column, struct context *inc,
878 const struct column_details *details);
880 static const col_parse_cb col_parse_funcs[] = {
881 [FORMAT_NONE] = parse_ignore,
882 [FORMAT_BIN] = parse_logic,
883 [FORMAT_OCT] = parse_logic,
884 [FORMAT_HEX] = parse_logic,
885 [FORMAT_ANALOG] = parse_analog,
888 static int init(struct sr_input *in, GHashTable *options)
891 size_t single_column, first_column, logic_channels;
893 enum single_col_format format;
896 in->sdi = g_malloc0(sizeof(*in->sdi));
897 in->priv = inc = g_malloc0(sizeof(*inc));
899 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
900 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
901 inc->delimiter = g_string_new(g_variant_get_string(
902 g_hash_table_lookup(options, "column_separator"), NULL));
903 if (!inc->delimiter->len) {
904 sr_err("Column separator cannot be empty.");
907 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
908 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
910 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
912 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
915 sr_err("Invalid single-column format: '%s'", s);
918 inc->comment = g_string_new(g_variant_get_string(
919 g_hash_table_lookup(options, "comment_leader"), NULL));
920 if (g_string_equal(inc->comment, inc->delimiter)) {
922 * Using the same sequence as comment leader and column
923 * separator won't work. The user probably specified ';'
924 * as the column separator but did not adjust the comment
925 * leader. Try DWIM, drop comment strippin support here.
927 sr_warn("Comment leader and column separator conflict, disabling comment support.");
928 g_string_truncate(inc->comment, 0);
930 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
931 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
932 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
933 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
934 if (inc->start_line < 1) {
935 sr_err("Invalid start line %zu.", inc->start_line);
940 * Scan flexible, to get prefered format specs which describe
941 * the input file's data formats. As well as some simple specs
942 * for backwards compatibility and user convenience.
944 * This logic ends up with a copy of the format string, either
945 * user provided or internally derived. Actual creation of the
946 * column processing details gets deferred until the first line
947 * of input data was seen. To support automatic determination of
948 * e.g. channel counts from column counts.
950 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
952 inc->column_formats = g_strdup(s);
953 sr_dbg("User specified column_formats: %s.", s);
954 } else if (single_column && logic_channels) {
955 format_char = col_format_char[format];
956 if (single_column == 1) {
957 inc->column_formats = g_strdup_printf("%c%zu",
958 format_char, logic_channels);
960 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
962 format_char, logic_channels);
964 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
965 single_column, col_format_text[format], logic_channels,
966 inc->column_formats);
967 } else if (!single_column) {
968 if (first_column > 1) {
969 inc->column_formats = g_strdup_printf("%zu-,%zul",
970 first_column - 1, logic_channels);
972 inc->column_formats = g_strdup_printf("%zul",
975 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
976 first_column, logic_channels,
977 inc->column_formats);
979 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
980 inc->column_formats = g_strdup("*l");
987 * Check the channel list for consistency across file re-import. See
988 * the VCD input module for more details and motivation.
991 static void keep_header_for_reread(const struct sr_input *in)
996 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
997 inc->prev_sr_channels = in->sdi->channels;
998 in->sdi->channels = NULL;
1001 static int check_header_in_reread(const struct sr_input *in)
1003 struct context *inc;
1010 if (!inc->prev_sr_channels)
1013 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
1014 sr_err("Channel list change not supported for file re-read.");
1017 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
1018 in->sdi->channels = inc->prev_sr_channels;
1019 inc->prev_sr_channels = NULL;
1024 static const char *delim_set = "\r\n";
1026 static const char *get_line_termination(GString *buf)
1031 if (g_strstr_len(buf->str, buf->len, "\r\n"))
1033 else if (memchr(buf->str, '\n', buf->len))
1035 else if (memchr(buf->str, '\r', buf->len))
1041 static int initial_parse(const struct sr_input *in, GString *buf)
1043 struct context *inc;
1045 size_t line_number, line_idx;
1047 char **lines, *line, **columns;
1053 /* Search for the first line to process (header or data). */
1055 if (inc->termination)
1056 lines = g_strsplit(buf->str, inc->termination, 0);
1058 lines = g_strsplit_set(buf->str, delim_set, 0);
1059 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1061 if (inc->start_line > line_number) {
1062 sr_spew("Line %zu skipped (before start).", line_number);
1065 if (line[0] == '\0') {
1066 sr_spew("Blank line %zu skipped.", line_number);
1069 strip_comment(line, inc->comment);
1070 if (line[0] == '\0') {
1071 sr_spew("Comment-only line %zu skipped.", line_number);
1075 /* Reached first proper line. */
1079 /* Not enough data for a proper line yet. */
1084 /* Get the number of columns in the line. */
1085 columns = split_line(line, inc);
1087 sr_err("Error while parsing line %zu.", line_number);
1091 num_columns = g_strv_length(columns);
1093 sr_err("Error while parsing line %zu.", line_number);
1097 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
1100 * Interpret the user provided column format specs. This might
1101 * involve inspection of the now received input text, to support
1102 * e.g. automatic detection of channel counts in the absence of
1103 * user provided specs. Optionally a header line is used to get
1106 * Check the then created channels for consistency across .reset
1107 * and .receive sequences (file re-load).
1109 ret = make_column_details_from_format(in, inc->column_formats, columns);
1111 sr_err("Cannot parse columns format using line %zu.", line_number);
1114 if (!check_header_in_reread(in)) {
1120 * Allocate buffer memory for datafeed submission of sample data.
1121 * Calculate the minimum buffer size to store the set of samples
1122 * of all channels (unit size). Determine a larger buffer size
1123 * for datafeed submission that is a multiple of the unit size.
1124 * Allocate the larger buffer, the "sample buffer" will point
1125 * to a location within that large buffer later.
1127 if (inc->logic_channels) {
1128 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
1129 inc->datafeed_buf_size = CHUNK_SIZE;
1130 inc->datafeed_buf_size *= inc->sample_unit_size;
1131 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
1132 if (!inc->datafeed_buffer) {
1133 sr_err("Cannot allocate datafeed send buffer (logic).");
1134 ret = SR_ERR_MALLOC;
1137 inc->datafeed_buf_fill = 0;
1140 if (inc->analog_channels) {
1141 size_t sample_size, sample_count;
1143 struct column_details *detail;
1146 sample_size = sizeof(inc->analog_datafeed_buffer[0]);
1147 inc->analog_datafeed_buf_size = CHUNK_SIZE;
1148 inc->analog_datafeed_buf_size /= sample_size;
1149 inc->analog_datafeed_buf_size /= inc->analog_channels;
1150 sample_count = inc->analog_channels * inc->analog_datafeed_buf_size;
1151 inc->analog_datafeed_buffer = g_malloc0(sample_count * sample_size);
1152 if (!inc->analog_datafeed_buffer) {
1153 sr_err("Cannot allocate datafeed send buffer (analog).");
1154 ret = SR_ERR_MALLOC;
1157 inc->analog_datafeed_buf_fill = 0;
1158 inc->analog_datafeed_channels = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_channels[0]));
1159 inc->analog_datafeed_digits = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_digits[0]));
1160 digits_item = inc->analog_datafeed_digits;
1161 for (detail_idx = 0; detail_idx < inc->column_want_count; detail_idx++) {
1162 detail = &inc->column_details[detail_idx];
1163 if (detail->text_format != FORMAT_ANALOG)
1165 channel = g_slist_nth_data(in->sdi->channels, detail->channel_index);
1166 inc->analog_datafeed_channels[detail->channel_offset] = g_slist_append(NULL, channel);
1167 *digits_item++ = detail->analog_digits;
1173 g_strfreev(columns);
1180 * Gets called from initial_receive(), which runs until the end-of-line
1181 * encoding of the input stream could get determined. Assumes that this
1182 * routine receives enough buffered initial input data to either see the
1183 * BOM when there is one, or that no BOM will follow when a text line
1184 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
1185 * from the input buffer if one was seen. Does not care to protect
1186 * against multiple execution or dropping the BOM multiple times --
1187 * there should be at most one in the input stream.
1189 static void initial_bom_check(const struct sr_input *in)
1191 static const char *utf8_bom = "\xef\xbb\xbf";
1193 if (in->buf->len < strlen(utf8_bom))
1195 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
1197 g_string_erase(in->buf, 0, strlen(utf8_bom));
1200 static int initial_receive(const struct sr_input *in)
1202 struct context *inc;
1206 const char *termination;
1208 initial_bom_check(in);
1212 termination = get_line_termination(in->buf);
1214 /* Don't have a full line yet. */
1217 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
1219 /* Don't have a full line yet. */
1221 len = p - in->buf->str - 1;
1222 new_buf = g_string_new_len(in->buf->str, len);
1223 g_string_append_c(new_buf, '\0');
1225 inc->termination = g_strdup(termination);
1227 if (in->buf->str[0] != '\0')
1228 ret = initial_parse(in, new_buf);
1232 g_string_free(new_buf, TRUE);
1237 static int process_buffer(struct sr_input *in, gboolean is_eof)
1239 struct context *inc;
1241 size_t line_idx, col_idx, col_nr;
1242 const struct column_details *details;
1243 col_parse_cb parse_func;
1245 char *p, **lines, *line, **columns, *column;
1248 if (!inc->started) {
1249 std_session_send_df_header(in->sdi);
1250 inc->started = TRUE;
1254 * Consider empty input non-fatal. Keep accumulating input until
1255 * at least one full text line has become available. Grab the
1256 * maximum amount of accumulated data that consists of full text
1257 * lines, and process what has been received so far, leaving not
1258 * yet complete lines for the next invocation.
1260 * Enforce that all previously buffered data gets processed in
1261 * the "EOF" condition. Do not insist in the presence of the
1262 * termination sequence for the last line (may often be missing
1263 * on Windows). A present termination sequence will just result
1264 * in the "execution of an empty line", and does not harm.
1269 p = in->buf->str + in->buf->len;
1271 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1275 p += strlen(inc->termination);
1277 g_strstrip(in->buf->str);
1280 lines = g_strsplit(in->buf->str, inc->termination, 0);
1281 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1283 if (inc->line_number < inc->start_line) {
1284 sr_spew("Line %zu skipped (before start).", inc->line_number);
1287 if (line[0] == '\0') {
1288 sr_spew("Blank line %zu skipped.", inc->line_number);
1292 /* Remove trailing comment. */
1293 strip_comment(line, inc->comment);
1294 if (line[0] == '\0') {
1295 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1299 /* Skip the header line, its content was used as the channel names. */
1300 if (inc->use_header && !inc->header_seen) {
1301 sr_spew("Header line %zu skipped.", inc->line_number);
1302 inc->header_seen = TRUE;
1306 /* Split the line into columns, check for minimum length. */
1307 columns = split_line(line, inc);
1309 sr_err("Error while parsing line %zu.", inc->line_number);
1313 num_columns = g_strv_length(columns);
1314 if (num_columns < inc->column_want_count) {
1315 sr_err("Insufficient column count %zu in line %zu.",
1316 num_columns, inc->line_number);
1317 g_strfreev(columns);
1322 /* Have the columns of the current text line processed. */
1323 clear_logic_samples(inc);
1324 clear_analog_samples(inc);
1325 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1326 column = columns[col_idx];
1327 col_nr = col_idx + 1;
1328 details = lookup_column_details(inc, col_nr);
1329 if (!details || !details->text_format)
1331 parse_func = col_parse_funcs[details->text_format];
1334 ret = parse_func(column, inc, details);
1336 g_strfreev(columns);
1342 /* Send sample data to the session bus (buffered). */
1343 ret = queue_logic_samples(in);
1344 ret += queue_analog_samples(in);
1346 sr_err("Sending samples failed.");
1347 g_strfreev(columns);
1352 g_strfreev(columns);
1355 g_string_erase(in->buf, 0, p - in->buf->str);
1360 static int receive(struct sr_input *in, GString *buf)
1362 struct context *inc;
1365 g_string_append_len(in->buf, buf->str, buf->len);
1368 if (!inc->column_seen_count) {
1369 ret = initial_receive(in);
1370 if (ret == SR_ERR_NA)
1371 /* Not enough data yet. */
1373 else if (ret != SR_OK)
1376 /* sdi is ready, notify frontend. */
1377 in->sdi_ready = TRUE;
1381 ret = process_buffer(in, FALSE);
1386 static int end(struct sr_input *in)
1388 struct context *inc;
1392 ret = process_buffer(in, TRUE);
1398 ret = flush_logic_samples(in);
1399 ret += flush_analog_samples(in);
1405 std_session_send_df_end(in->sdi);
1410 static void cleanup(struct sr_input *in)
1412 struct context *inc;
1414 keep_header_for_reread(in);
1418 g_free(inc->termination);
1419 inc->termination = NULL;
1420 g_free(inc->datafeed_buffer);
1421 inc->datafeed_buffer = NULL;
1422 g_free(inc->analog_datafeed_buffer);
1423 inc->analog_datafeed_buffer = NULL;
1426 static int reset(struct sr_input *in)
1428 struct context *inc = in->priv;
1431 inc->started = FALSE;
1432 g_string_truncate(in->buf, 0);
1451 static struct sr_option options[] = {
1453 "column_formats", "Column format specs",
1454 "Specifies text columns data types: comma separated list of [<cols>]<fmt>[<bits>], with -/x/o/b/l format specifiers.",
1457 [OPT_SINGLE_COL] = {
1458 "single_column", "Single column",
1459 "Enable single-column mode, exclusively use text from the specified column (number starting at 1).",
1463 "first_column", "First column",
1464 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1).",
1468 "logic_channels", "Number of logic channels",
1469 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1473 "single_format", "Data format for simple single-column mode.",
1474 "The number format of single-column mode input data: bin, hex, oct.",
1478 "start_line", "Start line",
1479 "The line number at which to start processing input text (default: 1).",
1483 "header", "Get channel names from first line.",
1484 "Use the first processed line's column captions (when available) as channel names.",
1488 "samplerate", "Samplerate (Hz)",
1489 "The input data's sample rate in Hz.",
1493 "column_separator", "Column separator",
1494 "The sequence which separates text columns. Non-empty text, comma by default.",
1498 "comment_leader", "Comment leader character",
1499 "The text which starts comments at the end of text lines.",
1502 [OPT_MAX] = ALL_ZERO,
1505 static const struct sr_option *get_options(void)
1509 if (!options[0].def) {
1510 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1511 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1512 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1513 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1514 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1516 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1517 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1518 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1519 options[OPT_FORMAT].values = l;
1520 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1521 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1522 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1523 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1524 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1530 SR_PRIV struct sr_input_module input_csv = {
1533 .desc = "Comma-separated values",
1534 .exts = (const char*[]){"csv", NULL},
1535 .options = get_options,