2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). The 'a' format marks analog
45 * data, an optionally following number is the digits count (resolution).
46 * This "column_formats" option is most versatile, other forms of
47 * specifying the column layout only exist for backwards compatibility,
48 * and are rather limited. They exclusively support logic input data in
49 * strictly adjacent columns, with further constraints on column layout
52 * single_column: Specifies the column number which contains the logic data
53 * for single-column mode. All logic data is taken from several bits
54 * which all are kept within that one column. Only exists for backwards
55 * compatibility, see "column_formats" for more flexibility.
57 * first_column: Specifies the number of the first column with logic data
58 * in simple multi-column mode. Only exists for backwards compatibility,
59 * see "column_formats" for more flexibility.
61 * logic_channels: Specifies the number of logic channels. Is required in
62 * simple single-column mode. Is optional in simple multi-column mode
63 * (and defaults to all remaining columns). Only exists for backwards
64 * compatibility, see "column_formats" for more flexibility.
66 * single_format: Specifies the format of the input text in simple single-
67 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
68 * Simple multi-column mode always uses single-bit data per column.
69 * Only exists for backwards compatibility, see "column_formats" for
72 * start_line: Specifies at which line to start processing the input file.
73 * Allows to skip leading lines which neither are header nor data lines.
74 * By default all of the input file gets processed.
76 * header: Boolean option, controls whether the first processed line is used
77 * to determine channel names. Off by default. Generic channel names are
78 * used in the absence of header line content.
80 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
81 * User specs take precedence over data which optionally gets derived
84 * column_separator: Specifies the sequence which separates the text file
85 * columns. Cannot be empty. Defaults to comma.
87 * comment_leader: Specifies the sequence which starts comments that run
88 * up to the end of the current text line. Can be empty to disable
89 * comment support. Defaults to semicolon.
91 * Typical examples of using these options:
92 * - ... -I csv:column_formats=*l ...
93 * All columns are single-bit logic data. Identical to the previous
94 * multi-column mode (the default when no options were given at all).
95 * - ... -I csv:column_formats=3-,*l ...
96 * Ignore the first three columns, get single-bit logic data from all
97 * remaining lines (multi-column mode with first-column above 1).
98 * - ... -I csv:column_formats=3-,4l,x8 ...
99 * Ignore the first three columns, get single-bit logic data from the
100 * next four columns, then eight-bit data in hex format from the next
101 * column. More columns may follow in the input text but won't get
102 * processed. (Mix of previous multi-column as well as single-column
104 * - ... -I csv:column_formats=4x8,b16,5l ...
105 * Get eight-bit data in hex format from the first four columns, then
106 * sixteen-bit data in binary format, then five times single-bit data.
107 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
108 * Get eight logic bits in binary format from column 2. (Simple
109 * single-column mode, corresponds to the "-,b8" format.)
110 * - ... -I csv:first_column=6:logic_channels=4 ...
111 * Get four single-bit logic channels from columns 6 to 9 respectively.
112 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
113 * - ... -I csv:start_line=20:header=yes:...
114 * Skip the first 19 text lines. Use line 20 to derive channel names.
115 * Data starts at line 21.
116 * - ... -I csv:column_formats=*a6 ...
117 * Each column contains an analog value with six significant digits
118 * after the decimal period.
124 * - Extend support for analog input data.
125 * - Determine why analog samples of 'double' data type get scrambled
126 * in sigrok-cli screen output. Is analog.encoding->unitsize not
127 * handled properly? A sigrok-cli or libsigrok (src/output) issue?
128 * - Reconsider the channel creation after format processing. Current
129 * logic may "bleed" channel names into the analog group when logic
130 * channels' columns follow analog columns (seen with "-,2a,x8").
131 * Trying to sort it out, a naive change used to map logic channels'
132 * data to incorrect bitmap positions. The whole channel numbering
133 * needs reconsideration. Probably it's easiest to first create _all_
134 * logic channels so that they have adjacent numbers starting at 0
135 * (addressing logic bits), then all analog channels (again adjacent)
136 * to simplify the calculation of their index in the sample set as
137 * well as their sdi channel index from the "analog column index".
138 * - Optionally get sample rate from timestamp column. Just best-effort
139 * approach, not necessarily reliable. Users can always specify rates.
140 * - Add a test suite for input modules in general, and CSV in specific?
141 * Becomes more important with the multitude of options and their
142 * interaction. Could cover edge cases (BOM presence, line termination
143 * absence, etc) and auto-stuff as well (channel names, channel counts,
147 typedef float csv_analog_t; /* 'double' currently is flawed. */
149 /* Single column formats. */
150 enum single_col_format {
151 FORMAT_NONE, /* Ignore this column. */
152 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
153 FORMAT_HEX, /* Hex digits for a set of bits. */
154 FORMAT_OCT, /* Oct digits for a set of bits. */
155 FORMAT_ANALOG, /* Floating point number for an analog channel. */
158 static const char *col_format_text[] = {
159 [FORMAT_NONE] = "unknown",
160 [FORMAT_BIN] = "binary",
161 [FORMAT_HEX] = "hexadecimal",
162 [FORMAT_OCT] = "octal",
163 [FORMAT_ANALOG] = "analog",
166 static const char col_format_char[] = {
171 [FORMAT_ANALOG] = 'a',
174 static gboolean format_is_ignore(enum single_col_format fmt)
176 return fmt == FORMAT_NONE;
179 static gboolean format_is_logic(enum single_col_format fmt)
181 return fmt >= FORMAT_BIN && fmt <= FORMAT_OCT;
184 static gboolean format_is_analog(enum single_col_format fmt)
186 return fmt == FORMAT_ANALOG;
189 struct column_details {
191 enum single_col_format text_format;
192 size_t channel_offset;
193 size_t channel_count;
194 size_t channel_index;
201 /* Current selected samplerate. */
203 gboolean samplerate_sent;
205 /* Number of channels. */
206 size_t logic_channels;
207 size_t analog_channels;
209 /* Column delimiter (actually separator), comment leader, EOL sequence. */
214 /* Format specs for input columns, and processing state. */
215 size_t column_seen_count;
216 const char *column_formats;
217 size_t column_want_count;
218 struct column_details *column_details;
220 /* Line number to start processing. */
224 * Determines if the first line should be treated as header and used for
225 * channel names in multi column mode.
228 gboolean header_seen;
230 size_t sample_unit_size; /**!< Byte count for a single sample. */
231 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
232 csv_analog_t *analog_sample_buffer; /**!< Buffer for one set of analog values. */
234 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
235 size_t datafeed_buf_size;
236 size_t datafeed_buf_fill;
237 /* "Striped" layout, M samples for N channels each. */
238 csv_analog_t *analog_datafeed_buffer; /**!< Queue for analog datafeed. */
239 size_t analog_datafeed_buf_size;
240 size_t analog_datafeed_buf_fill;
241 GSList **analog_datafeed_channels;
242 int *analog_datafeed_digits;
244 /* Current line number. */
247 /* List of previously created sigrok channels. */
248 GSList *prev_sr_channels;
252 * Primitive operations to handle sample sets:
253 * - Keep a buffer for datafeed submission, capable of holding many
254 * samples (reduces call overhead, improves throughput).
255 * - Have a "current sample set" pointer reference one position in that
256 * large samples buffer.
257 * - Clear the current sample set before text line inspection, then set
258 * the bits which are found active in the current line of text input.
259 * Phrase the API such that call sites can be kept simple. Advance to
260 * the next sample set between lines, flush the larger buffer as needed
261 * (when it is full, or upon EOF).
264 static int flush_samplerate(const struct sr_input *in)
267 struct sr_datafeed_packet packet;
268 struct sr_datafeed_meta meta;
269 struct sr_config *src;
272 if (inc->samplerate && !inc->samplerate_sent) {
273 packet.type = SR_DF_META;
274 packet.payload = &meta;
275 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(inc->samplerate));
276 meta.config = g_slist_append(NULL, src);
277 sr_session_send(in->sdi, &packet);
278 g_slist_free(meta.config);
280 inc->samplerate_sent = TRUE;
286 static void clear_logic_samples(struct context *inc)
288 if (!inc->logic_channels)
290 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
291 memset(inc->sample_buffer, 0, inc->sample_unit_size);
294 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
296 size_t byte_idx, bit_idx;
299 if (ch_idx >= inc->logic_channels)
304 byte_idx = ch_idx / 8;
305 bit_idx = ch_idx % 8;
306 bit_mask = 1 << bit_idx;
307 inc->sample_buffer[byte_idx] |= bit_mask;
310 static int flush_logic_samples(const struct sr_input *in)
313 struct sr_datafeed_packet packet;
314 struct sr_datafeed_logic logic;
318 if (!inc->datafeed_buf_fill)
321 rc = flush_samplerate(in);
325 memset(&packet, 0, sizeof(packet));
326 memset(&logic, 0, sizeof(logic));
327 packet.type = SR_DF_LOGIC;
328 packet.payload = &logic;
329 logic.unitsize = inc->sample_unit_size;
330 logic.length = inc->datafeed_buf_fill;
331 logic.data = inc->datafeed_buffer;
333 rc = sr_session_send(in->sdi, &packet);
337 inc->datafeed_buf_fill = 0;
341 static int queue_logic_samples(const struct sr_input *in)
347 if (!inc->logic_channels)
350 inc->datafeed_buf_fill += inc->sample_unit_size;
351 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
352 rc = flush_logic_samples(in);
359 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value);
361 static void clear_analog_samples(struct context *inc)
365 if (!inc->analog_channels)
367 inc->analog_sample_buffer = &inc->analog_datafeed_buffer[inc->analog_datafeed_buf_fill];
368 for (idx = 0; idx < inc->analog_channels; idx++)
369 set_analog_value(inc, idx, 0.0);
372 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value)
374 if (ch_idx >= inc->analog_channels)
378 inc->analog_sample_buffer[ch_idx * inc->analog_datafeed_buf_size] = value;
381 static int flush_analog_samples(const struct sr_input *in)
384 struct sr_datafeed_packet packet;
385 struct sr_datafeed_analog analog;
386 struct sr_analog_encoding encoding;
387 struct sr_analog_meaning meaning;
388 struct sr_analog_spec spec;
389 csv_analog_t *samples;
395 if (!inc->analog_datafeed_buf_fill)
398 rc = flush_samplerate(in);
402 samples = inc->analog_datafeed_buffer;
403 for (ch_idx = 0; ch_idx < inc->analog_channels; ch_idx++) {
404 digits = inc->analog_datafeed_digits[ch_idx];
405 sr_analog_init(&analog, &encoding, &meaning, &spec, digits);
406 memset(&packet, 0, sizeof(packet));
407 packet.type = SR_DF_ANALOG;
408 packet.payload = &analog;
409 analog.num_samples = inc->analog_datafeed_buf_fill;
410 analog.data = samples;
411 analog.meaning->channels = inc->analog_datafeed_channels[ch_idx];
412 analog.meaning->mq = 0;
413 analog.meaning->mqflags = 0;
414 analog.meaning->unit = 0;
415 analog.encoding->unitsize = sizeof(samples[0]);
416 analog.encoding->is_signed = TRUE;
417 analog.encoding->is_float = TRUE;
418 #ifdef WORDS_BIGENDIAN
419 analog.encoding->is_bigendian = TRUE;
421 analog.encoding->is_bigendian = FALSE;
423 analog.encoding->digits = spec.spec_digits;
424 rc = sr_session_send(in->sdi, &packet);
427 samples += inc->analog_datafeed_buf_size;
430 inc->analog_datafeed_buf_fill = 0;
434 static int queue_analog_samples(const struct sr_input *in)
440 if (!inc->analog_channels)
443 inc->analog_datafeed_buf_fill++;
444 if (inc->analog_datafeed_buf_fill == inc->analog_datafeed_buf_size) {
445 rc = flush_analog_samples(in);
452 /* Helpers for "column processing". */
454 static int split_column_format(const char *spec,
455 size_t *column_count, enum single_col_format *format, size_t *bit_count)
458 char *endp, format_char;
459 enum single_col_format format_code;
464 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
467 /* Workaround, strtoul("*") won't always yield expected endp. */
469 endp = (char *)&spec[1];
471 count = strtoul(spec, &endp, 10);
478 *column_count = count;
481 /* Get the (mandatory, single letter) type spec (-/xob/l). */
482 format_char = *spec++;
483 switch (format_char) {
487 format_code = FORMAT_NONE;
490 format_code = FORMAT_HEX;
493 format_code = FORMAT_OCT;
497 format_code = FORMAT_BIN;
500 format_code = FORMAT_ANALOG;
502 default: /* includes NUL */
506 *format = format_code;
508 /* Get the (optional, decimal, default 1) bit count. */
510 count = strtoul(spec, &endp, 10);
514 count = format_is_analog(format_code) ? 3 : 1;
515 if (format_is_ignore(format_code))
517 if (format_char == 'l')
523 /* Input spec must have been exhausted. */
530 static int make_column_details_from_format(const struct sr_input *in,
531 const char *column_format, char **column_texts)
534 char **formats, *format;
535 size_t format_count, column_count, logic_count, analog_count;
536 size_t auto_column_count;
537 size_t format_idx, c, b, column_idx, channel_idx, analog_idx;
538 enum single_col_format f;
539 struct column_details *detail;
540 GString *channel_name;
544 int channel_type, channel_sdi_nr;
548 inc->column_seen_count = g_strv_length(column_texts);
550 /* Split the input spec, count involved columns and bits. */
551 formats = g_strsplit(column_format, ",", 0);
553 sr_err("Cannot parse columns format %s (comma split).", column_format);
556 format_count = g_strv_length(formats);
558 sr_err("Cannot parse columns format %s (field count).", column_format);
562 column_count = logic_count = analog_count = 0;
563 auto_column_count = 0;
564 for (format_idx = 0; format_idx < format_count; format_idx++) {
565 format = formats[format_idx];
566 ret = split_column_format(format, &c, &f, &b);
567 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
569 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
574 /* User requested "auto-count", must be last format. */
575 if (formats[format_idx + 1]) {
576 sr_err("Auto column count must be last format field.");
580 auto_column_count = inc->column_seen_count - column_count;
581 c = auto_column_count;
584 if (format_is_analog(f))
586 else if (format_is_logic(f))
587 logic_count += c * b;
589 sr_dbg("Column format %s -> %zu columns, %zu logic, %zu analog channels.",
590 column_format, column_count, logic_count, analog_count);
592 /* Allocate and fill in "column processing" details. Create channels. */
593 inc->column_want_count = column_count;
594 if (inc->column_seen_count < inc->column_want_count) {
595 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
596 inc->column_seen_count, inc->column_want_count);
600 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
601 column_idx = channel_idx = analog_idx = 0;
602 channel_name = g_string_sized_new(64);
603 for (format_idx = 0; format_idx < format_count; format_idx++) {
604 /* Process a format field, which can span multiple columns. */
605 format = formats[format_idx];
606 (void)split_column_format(format, &c, &f, &b);
608 c = auto_column_count;
610 /* Fill in a column's processing details. */
611 detail = &inc->column_details[column_idx++];
612 detail->col_nr = column_idx;
613 detail->text_format = f;
614 if (format_is_analog(detail->text_format)) {
615 detail->channel_offset = analog_idx;
616 detail->channel_count = 1;
617 detail->analog_digits = b;
618 analog_idx += detail->channel_count;
619 } else if (format_is_logic(detail->text_format)) {
620 detail->channel_offset = channel_idx;
621 detail->channel_count = b;
622 channel_idx += detail->channel_count;
623 } else if (format_is_ignore(detail->text_format)) {
628 * Neither logic nor analog data, nor ignore.
629 * Format was noted. No channel creation involved.
634 * Pick most appropriate channel names. Optionally
635 * use text from a header line (when requested by the
636 * user). In the absence of header text, channels are
637 * assigned rather generic names.
639 * Manipulation of the column's caption (when a header
640 * line is seen) is acceptable, because this header
641 * line won't get processed another time.
643 column = column_texts[detail->col_nr - 1];
644 if (inc->use_header && column && *column)
645 caption = sr_scpi_unquote_string(column);
648 if (!caption || !*caption)
651 * TODO Need we first create _all_ logic channels,
652 * before creating analog channels? Just store the
653 * parameters here (index, type, name) and have the
654 * creation sequence done outside of the format
657 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
658 if (caption && detail->channel_count == 1) {
659 g_string_assign(channel_name, caption);
660 } else if (caption) {
661 g_string_printf(channel_name, "%s[%zu]",
662 caption, create_idx);
664 g_string_printf(channel_name, "%zu",
665 detail->channel_offset + create_idx);
667 if (format_is_analog(detail->text_format)) {
668 channel_sdi_nr = logic_count + detail->channel_offset + create_idx;
669 channel_type = SR_CHANNEL_ANALOG;
670 detail->channel_index = g_slist_length(in->sdi->channels);
671 } else if (format_is_logic(detail->text_format)) {
672 channel_sdi_nr = detail->channel_offset + create_idx;
673 channel_type = SR_CHANNEL_LOGIC;
677 sr_channel_new(in->sdi, channel_sdi_nr,
678 channel_type, TRUE, channel_name->str);
682 inc->logic_channels = channel_idx;
683 inc->analog_channels = analog_idx;
684 g_string_free(channel_name, TRUE);
690 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
692 if (!inc || !inc->column_details)
694 if (!nr || nr > inc->column_want_count)
696 return &inc->column_details[nr - 1];
700 * Primitive operations for text input: Strip comments off text lines.
701 * Split text lines into columns. Process input text for individual
705 static void strip_comment(char *buf, const GString *prefix)
712 if ((ptr = strstr(buf, prefix->str))) {
719 * @brief Splits a text line into a set of columns.
721 * @param[in] buf The input text line to split.
722 * @param[in] inc The input module's context.
724 * @returns An array of strings, representing the columns' text.
726 * This routine splits a text line on previously determined separators.
728 static char **split_line(char *buf, struct context *inc)
730 return g_strsplit(buf, inc->delimiter->str, 0);
734 * @brief Parse a multi-bit field into several logic channels.
736 * @param[in] column The input text, a run of bin/hex/oct digits.
737 * @param[in] inc The input module's context.
738 * @param[in] details The column processing details.
740 * @retval SR_OK Success.
741 * @retval SR_ERR Invalid input data (empty, or format error).
743 * This routine modifies the logic levels in the current sample set,
744 * based on the text input and a user provided format spec.
746 static int parse_logic(const char *column, struct context *inc,
747 const struct column_details *details)
749 size_t length, ch_rem, ch_idx, ch_inc;
753 const char *type_text;
757 * Prepare to read the digits from the text end towards the start.
758 * A digit corresponds to a variable number of channels (depending
759 * on the value's radix). Prepare the mapping of text digits to
760 * (a number of) logic channels.
762 length = strlen(column);
764 sr_err("Column %zu in line %zu is empty.", details->col_nr,
768 rdptr = &column[length];
769 ch_idx = details->channel_offset;
770 ch_rem = details->channel_count;
773 * Get another digit and derive up to four logic channels' state from
774 * it. Make sure to not process more bits than the column has channels
775 * associated with it.
777 while (rdptr > column && ch_rem) {
778 /* Check for valid digits according to the input radix. */
780 switch (details->text_format) {
782 valid = g_ascii_isxdigit(c) && c < '2';
786 valid = g_ascii_isxdigit(c) && c < '8';
790 valid = g_ascii_isxdigit(c);
798 type_text = col_format_text[details->text_format];
799 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
800 column, type_text, details->col_nr, inc->line_number);
803 /* Use the digit's bits for logic channels' data. */
804 bits = g_ascii_xdigit_value(c);
805 switch (details->text_format) {
809 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
815 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
819 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
824 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
827 /* ShouldNotHappen(TM), but silences compiler warning. */
833 * TODO Determine whether the availability of extra input data
834 * for unhandled logic channels is worth warning here. In this
835 * implementation users are in control, and can have the more
836 * significant bits ignored (which can be considered a feature
837 * and not really a limitation).
844 * @brief Parse a floating point text into an analog value.
846 * @param[in] column The input text, a floating point number.
847 * @param[in] inc The input module's context.
848 * @param[in] details The column processing details.
850 * @retval SR_OK Success.
851 * @retval SR_ERR Invalid input data (empty, or format error).
853 * This routine modifies the analog values in the current sample set,
854 * based on the text input and a user provided format spec.
856 static int parse_analog(const char *column, struct context *inc,
857 const struct column_details *details)
860 double dvalue; float fvalue;
864 if (!format_is_analog(details->text_format))
867 length = strlen(column);
869 sr_err("Column %zu in line %zu is empty.", details->col_nr,
873 if (sizeof(value) == sizeof(double)) {
874 ret = sr_atod_ascii(column, &dvalue);
876 } else if (sizeof(value) == sizeof(float)) {
877 ret = sr_atof_ascii(column, &fvalue);
883 sr_err("Cannot parse analog text %s in column %zu in line %zu.",
884 column, details->col_nr, inc->line_number);
887 set_analog_value(inc, details->channel_offset, value);
893 * @brief Parse routine which ignores the input text.
895 * This routine exists to unify dispatch code paths, mapping input file
896 * columns' data types to their respective parse routines.
898 static int parse_ignore(const char *column, struct context *inc,
899 const struct column_details *details)
907 typedef int (*col_parse_cb)(const char *column, struct context *inc,
908 const struct column_details *details);
910 static const col_parse_cb col_parse_funcs[] = {
911 [FORMAT_NONE] = parse_ignore,
912 [FORMAT_BIN] = parse_logic,
913 [FORMAT_OCT] = parse_logic,
914 [FORMAT_HEX] = parse_logic,
915 [FORMAT_ANALOG] = parse_analog,
918 static int init(struct sr_input *in, GHashTable *options)
921 size_t single_column, first_column, logic_channels;
923 enum single_col_format format;
926 in->sdi = g_malloc0(sizeof(*in->sdi));
927 in->priv = inc = g_malloc0(sizeof(*inc));
929 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
930 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
931 inc->delimiter = g_string_new(g_variant_get_string(
932 g_hash_table_lookup(options, "column_separator"), NULL));
933 if (!inc->delimiter->len) {
934 sr_err("Column separator cannot be empty.");
937 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
938 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
940 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
942 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
945 sr_err("Invalid single-column format: '%s'", s);
948 inc->comment = g_string_new(g_variant_get_string(
949 g_hash_table_lookup(options, "comment_leader"), NULL));
950 if (g_string_equal(inc->comment, inc->delimiter)) {
952 * Using the same sequence as comment leader and column
953 * separator won't work. The user probably specified ';'
954 * as the column separator but did not adjust the comment
955 * leader. Try DWIM, drop comment strippin support here.
957 sr_warn("Comment leader and column separator conflict, disabling comment support.");
958 g_string_truncate(inc->comment, 0);
960 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
961 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
962 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
963 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
964 if (inc->start_line < 1) {
965 sr_err("Invalid start line %zu.", inc->start_line);
970 * Scan flexible, to get prefered format specs which describe
971 * the input file's data formats. As well as some simple specs
972 * for backwards compatibility and user convenience.
974 * This logic ends up with a copy of the format string, either
975 * user provided or internally derived. Actual creation of the
976 * column processing details gets deferred until the first line
977 * of input data was seen. To support automatic determination of
978 * e.g. channel counts from column counts.
980 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
982 inc->column_formats = g_strdup(s);
983 sr_dbg("User specified column_formats: %s.", s);
984 } else if (single_column && logic_channels) {
985 format_char = col_format_char[format];
986 if (single_column == 1) {
987 inc->column_formats = g_strdup_printf("%c%zu",
988 format_char, logic_channels);
990 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
992 format_char, logic_channels);
994 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
995 single_column, col_format_text[format], logic_channels,
996 inc->column_formats);
997 } else if (!single_column) {
998 if (first_column > 1) {
999 inc->column_formats = g_strdup_printf("%zu-,%zul",
1000 first_column - 1, logic_channels);
1002 inc->column_formats = g_strdup_printf("%zul",
1005 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
1006 first_column, logic_channels,
1007 inc->column_formats);
1009 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
1010 inc->column_formats = g_strdup("*l");
1017 * Check the channel list for consistency across file re-import. See
1018 * the VCD input module for more details and motivation.
1021 static void keep_header_for_reread(const struct sr_input *in)
1023 struct context *inc;
1026 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
1027 inc->prev_sr_channels = in->sdi->channels;
1028 in->sdi->channels = NULL;
1031 static int check_header_in_reread(const struct sr_input *in)
1033 struct context *inc;
1040 if (!inc->prev_sr_channels)
1043 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
1044 sr_err("Channel list change not supported for file re-read.");
1047 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
1048 in->sdi->channels = inc->prev_sr_channels;
1049 inc->prev_sr_channels = NULL;
1054 static const char *delim_set = "\r\n";
1056 static const char *get_line_termination(GString *buf)
1061 if (g_strstr_len(buf->str, buf->len, "\r\n"))
1063 else if (memchr(buf->str, '\n', buf->len))
1065 else if (memchr(buf->str, '\r', buf->len))
1071 static int initial_parse(const struct sr_input *in, GString *buf)
1073 struct context *inc;
1075 size_t line_number, line_idx;
1077 char **lines, *line, **columns;
1083 /* Search for the first line to process (header or data). */
1085 if (inc->termination)
1086 lines = g_strsplit(buf->str, inc->termination, 0);
1088 lines = g_strsplit_set(buf->str, delim_set, 0);
1089 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1091 if (inc->start_line > line_number) {
1092 sr_spew("Line %zu skipped (before start).", line_number);
1095 if (line[0] == '\0') {
1096 sr_spew("Blank line %zu skipped.", line_number);
1099 strip_comment(line, inc->comment);
1100 if (line[0] == '\0') {
1101 sr_spew("Comment-only line %zu skipped.", line_number);
1105 /* Reached first proper line. */
1109 /* Not enough data for a proper line yet. */
1114 /* Get the number of columns in the line. */
1115 columns = split_line(line, inc);
1117 sr_err("Error while parsing line %zu.", line_number);
1121 num_columns = g_strv_length(columns);
1123 sr_err("Error while parsing line %zu.", line_number);
1127 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
1130 * Interpret the user provided column format specs. This might
1131 * involve inspection of the now received input text, to support
1132 * e.g. automatic detection of channel counts in the absence of
1133 * user provided specs. Optionally a header line is used to get
1136 * Check the then created channels for consistency across .reset
1137 * and .receive sequences (file re-load).
1139 ret = make_column_details_from_format(in, inc->column_formats, columns);
1141 sr_err("Cannot parse columns format using line %zu.", line_number);
1144 if (!check_header_in_reread(in)) {
1150 * Allocate buffer memory for datafeed submission of sample data.
1151 * Calculate the minimum buffer size to store the set of samples
1152 * of all channels (unit size). Determine a larger buffer size
1153 * for datafeed submission that is a multiple of the unit size.
1154 * Allocate the larger buffer, the "sample buffer" will point
1155 * to a location within that large buffer later.
1157 * TODO Move channel creation here, and just store required
1158 * parameters in the format parser above? Could simplify the
1159 * arrangement that logic and analog channels get created in
1160 * strict sequence in their respective groups.
1162 if (inc->logic_channels) {
1163 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
1164 inc->datafeed_buf_size = CHUNK_SIZE;
1165 inc->datafeed_buf_size *= inc->sample_unit_size;
1166 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
1167 if (!inc->datafeed_buffer) {
1168 sr_err("Cannot allocate datafeed send buffer (logic).");
1169 ret = SR_ERR_MALLOC;
1172 inc->datafeed_buf_fill = 0;
1175 if (inc->analog_channels) {
1176 size_t sample_size, sample_count;
1178 struct column_details *detail;
1181 sample_size = sizeof(inc->analog_datafeed_buffer[0]);
1182 inc->analog_datafeed_buf_size = CHUNK_SIZE;
1183 inc->analog_datafeed_buf_size /= sample_size;
1184 inc->analog_datafeed_buf_size /= inc->analog_channels;
1185 sample_count = inc->analog_channels * inc->analog_datafeed_buf_size;
1186 inc->analog_datafeed_buffer = g_malloc0(sample_count * sample_size);
1187 if (!inc->analog_datafeed_buffer) {
1188 sr_err("Cannot allocate datafeed send buffer (analog).");
1189 ret = SR_ERR_MALLOC;
1192 inc->analog_datafeed_buf_fill = 0;
1193 inc->analog_datafeed_channels = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_channels[0]));
1194 inc->analog_datafeed_digits = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_digits[0]));
1195 digits_item = inc->analog_datafeed_digits;
1196 for (detail_idx = 0; detail_idx < inc->column_want_count; detail_idx++) {
1197 detail = &inc->column_details[detail_idx];
1198 if (!format_is_analog(detail->text_format))
1200 channel = g_slist_nth_data(in->sdi->channels, detail->channel_index);
1201 inc->analog_datafeed_channels[detail->channel_offset] = g_slist_append(NULL, channel);
1202 *digits_item++ = detail->analog_digits;
1208 g_strfreev(columns);
1215 * Gets called from initial_receive(), which runs until the end-of-line
1216 * encoding of the input stream could get determined. Assumes that this
1217 * routine receives enough buffered initial input data to either see the
1218 * BOM when there is one, or that no BOM will follow when a text line
1219 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
1220 * from the input buffer if one was seen. Does not care to protect
1221 * against multiple execution or dropping the BOM multiple times --
1222 * there should be at most one in the input stream.
1224 static void initial_bom_check(const struct sr_input *in)
1226 static const char *utf8_bom = "\xef\xbb\xbf";
1228 if (in->buf->len < strlen(utf8_bom))
1230 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
1232 g_string_erase(in->buf, 0, strlen(utf8_bom));
1235 static int initial_receive(const struct sr_input *in)
1237 struct context *inc;
1241 const char *termination;
1243 initial_bom_check(in);
1247 termination = get_line_termination(in->buf);
1249 /* Don't have a full line yet. */
1252 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
1254 /* Don't have a full line yet. */
1256 len = p - in->buf->str - 1;
1257 new_buf = g_string_new_len(in->buf->str, len);
1258 g_string_append_c(new_buf, '\0');
1260 inc->termination = g_strdup(termination);
1262 if (in->buf->str[0] != '\0')
1263 ret = initial_parse(in, new_buf);
1267 g_string_free(new_buf, TRUE);
1272 static int process_buffer(struct sr_input *in, gboolean is_eof)
1274 struct context *inc;
1276 size_t line_idx, col_idx, col_nr;
1277 const struct column_details *details;
1278 col_parse_cb parse_func;
1280 char *p, **lines, *line, **columns, *column;
1283 if (!inc->started) {
1284 std_session_send_df_header(in->sdi);
1285 inc->started = TRUE;
1289 * Consider empty input non-fatal. Keep accumulating input until
1290 * at least one full text line has become available. Grab the
1291 * maximum amount of accumulated data that consists of full text
1292 * lines, and process what has been received so far, leaving not
1293 * yet complete lines for the next invocation.
1295 * Enforce that all previously buffered data gets processed in
1296 * the "EOF" condition. Do not insist in the presence of the
1297 * termination sequence for the last line (may often be missing
1298 * on Windows). A present termination sequence will just result
1299 * in the "execution of an empty line", and does not harm.
1304 p = in->buf->str + in->buf->len;
1306 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1310 p += strlen(inc->termination);
1312 g_strstrip(in->buf->str);
1315 lines = g_strsplit(in->buf->str, inc->termination, 0);
1316 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1318 if (inc->line_number < inc->start_line) {
1319 sr_spew("Line %zu skipped (before start).", inc->line_number);
1322 if (line[0] == '\0') {
1323 sr_spew("Blank line %zu skipped.", inc->line_number);
1327 /* Remove trailing comment. */
1328 strip_comment(line, inc->comment);
1329 if (line[0] == '\0') {
1330 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1334 /* Skip the header line, its content was used as the channel names. */
1335 if (inc->use_header && !inc->header_seen) {
1336 sr_spew("Header line %zu skipped.", inc->line_number);
1337 inc->header_seen = TRUE;
1341 /* Split the line into columns, check for minimum length. */
1342 columns = split_line(line, inc);
1344 sr_err("Error while parsing line %zu.", inc->line_number);
1348 num_columns = g_strv_length(columns);
1349 if (num_columns < inc->column_want_count) {
1350 sr_err("Insufficient column count %zu in line %zu.",
1351 num_columns, inc->line_number);
1352 g_strfreev(columns);
1357 /* Have the columns of the current text line processed. */
1358 clear_logic_samples(inc);
1359 clear_analog_samples(inc);
1360 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1361 column = columns[col_idx];
1362 col_nr = col_idx + 1;
1363 details = lookup_column_details(inc, col_nr);
1364 if (!details || !details->text_format)
1366 parse_func = col_parse_funcs[details->text_format];
1369 ret = parse_func(column, inc, details);
1371 g_strfreev(columns);
1377 /* Send sample data to the session bus (buffered). */
1378 ret = queue_logic_samples(in);
1379 ret += queue_analog_samples(in);
1381 sr_err("Sending samples failed.");
1382 g_strfreev(columns);
1387 g_strfreev(columns);
1390 g_string_erase(in->buf, 0, p - in->buf->str);
1395 static int receive(struct sr_input *in, GString *buf)
1397 struct context *inc;
1400 g_string_append_len(in->buf, buf->str, buf->len);
1403 if (!inc->column_seen_count) {
1404 ret = initial_receive(in);
1405 if (ret == SR_ERR_NA)
1406 /* Not enough data yet. */
1408 else if (ret != SR_OK)
1411 /* sdi is ready, notify frontend. */
1412 in->sdi_ready = TRUE;
1416 ret = process_buffer(in, FALSE);
1421 static int end(struct sr_input *in)
1423 struct context *inc;
1427 ret = process_buffer(in, TRUE);
1433 ret = flush_logic_samples(in);
1434 ret += flush_analog_samples(in);
1440 std_session_send_df_end(in->sdi);
1445 static void cleanup(struct sr_input *in)
1447 struct context *inc;
1449 keep_header_for_reread(in);
1453 g_free(inc->termination);
1454 inc->termination = NULL;
1455 g_free(inc->datafeed_buffer);
1456 inc->datafeed_buffer = NULL;
1457 g_free(inc->analog_datafeed_buffer);
1458 inc->analog_datafeed_buffer = NULL;
1461 static int reset(struct sr_input *in)
1463 struct context *inc = in->priv;
1466 inc->started = FALSE;
1467 g_string_truncate(in->buf, 0);
1486 static struct sr_option options[] = {
1488 "column_formats", "Column format specs",
1489 "Specifies text columns data types: A comma separated list of [<cols>]<fmt>[<bits>] items, with - to ignore columns, x/o/b/l for logic data, a (and resolution) for analog data.",
1492 [OPT_SINGLE_COL] = {
1493 "single_column", "Single column",
1494 "Enable single-column mode, exclusively use text from the specified column (number starting at 1). Obsoleted by 'column_formats'.",
1498 "first_column", "First column",
1499 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1). Obsoleted by 'column_formats'.",
1503 "logic_channels", "Number of logic channels",
1504 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1508 "single_format", "Data format for simple single-column mode.",
1509 "The number format of single-column mode input data: bin, hex, oct. Obsoleted by 'column_formats'.",
1513 "start_line", "Start line",
1514 "The line number at which to start processing input text (default: 1).",
1518 "header", "Get channel names from first line.",
1519 "Use the first processed line's column captions (when available) as channel names. Off by default",
1523 "samplerate", "Samplerate (Hz)",
1524 "The input data's sample rate in Hz. No default value.",
1528 "column_separator", "Column separator",
1529 "The sequence which separates text columns. Non-empty text, comma by default.",
1533 "comment_leader", "Comment leader character",
1534 "The text which starts comments at the end of text lines, semicolon by default.",
1537 [OPT_MAX] = ALL_ZERO,
1540 static const struct sr_option *get_options(void)
1544 if (!options[0].def) {
1545 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1546 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1547 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1548 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1549 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1551 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1552 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1553 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1554 options[OPT_FORMAT].values = l;
1555 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1556 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1557 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1558 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1559 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1565 SR_PRIV struct sr_input_module input_csv = {
1568 .desc = "Comma-separated values",
1569 .exts = (const char*[]){"csv", NULL},
1570 .options = get_options,