2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). This "column_formats" option
45 * is most versatile, other forms of specifying the column layout only
46 * exist for backwards compatibility.
48 * single_column: Specifies the column number which contains the logic data
49 * for single-column mode. All logic data is taken from several bits
50 * which all are kept within that one column. Only exists for backwards
51 * compatibility, see "column_formats" for more flexibility.
53 * first_column: Specifies the number of the first column with logic data
54 * in simple multi-column mode. Only exists for backwards compatibility,
55 * see "column_formats" for more flexibility.
57 * logic_channels: Specifies the number of logic channels. Is required in
58 * simple single-column mode. Is optional in simple multi-column mode
59 * (and defaults to all remaining columns). Only exists for backwards
60 * compatibility, see "column_formats" for more flexibility.
62 * single_format: Specifies the format of the input text in simple single-
63 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
64 * Simple multi-column mode always uses single-bit data per column.
65 * Only exists for backwards compatibility, see "column_formats" for
68 * start_line: Specifies at which line to start processing the input file.
69 * Allows to skip leading lines which neither are header nor data lines.
70 * By default all of the input file gets processed.
72 * header: Boolean option, controls whether the first processed line is used
73 * to determine channel names. Off by default. Generic channel names are
74 * used in the absence of header line content.
76 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
77 * User specs take precedence over data which optionally gets derived
80 * column_separator: Specifies the sequence which separates the text file
81 * columns. Cannot be empty. Defaults to comma.
83 * comment_leader: Specifies the sequence which starts comments that run
84 * up to the end of the current text line. Can be empty to disable
85 * comment support. Defaults to semicolon.
87 * Typical examples of using these options:
88 * - ... -I csv:column_formats=*l ...
89 * All columns are single-bit logic data. Identical to the previous
90 * multi-column mode (the default when no options were given at all).
91 * - ... -I csv:column_formats=3-,*l ...
92 * Ignore the first three columns, get single-bit logic data from all
93 * remaining lines (multi-column mode with first-column above 1).
94 * - ... -I csv:column_formats=3-,4l,x8 ...
95 * Ignore the first three columns, get single-bit logic data from the
96 * next four columns, then eight-bit data in hex format from the next
97 * column. More columns may follow in the input text but won't get
98 * processed. (Mix of previous multi-column as well as single-column
100 * - ... -I csv:column_formats=4x8,b16,5l ...
101 * Get eight-bit data in hex format from the first four columns, then
102 * sixteen-bit data in binary format, then five times single-bit data.
103 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
104 * Get eight logic bits in binary format from column 2. (Simple
105 * single-column mode, corresponds to the "-,b8" format.)
106 * - ... -I csv:first_column=6:logic_channels=4 ...
107 * Get four single-bit logic channels from columns 6 to 9 respectively.
108 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
109 * - ... -I csv:start_line=20:header=yes:...
110 * Skip the first 19 text lines. Use line 20 to derive channel names.
111 * Data starts at line 21.
117 * - Determine how the text line handling can get improved, regarding
118 * all of robustness and flexibility and correctness.
119 * - The current implementation splits on "any run of CR and LF". Which
120 * translates to: Line numbers are wrong in the presence of empty
121 * lines in the input stream. See below for an (expensive) fix.
122 * - Dropping support for CR style end-of-line markers could improve
123 * the situation a lot. Code could search for and split on LF, and
124 * trim optional trailing CR. This would result in proper support
125 * for CRLF (Windows) as well as LF (Unix), and allow for correct
126 * line number counts.
127 * - When support for CR-only line termination cannot get dropped,
128 * then the current implementation is inappropriate. Currently the
129 * input stream is scanned for the first occurance of either of the
130 * supported termination styles (which is good). For the remaining
131 * session a consistent encoding of the text lines is assumed (which
133 * - When line numbers need to be correct and reliable, _and_ the full
134 * set of previously supported line termination sequences are required,
135 * and potentially more are to get added for improved compatibility
136 * with more platforms or generators, then the current approach of
137 * splitting on runs of termination characters needs to get replaced,
138 * by the more expensive approach to scan for and count the initially
139 * determined termination sequence.
141 * - Add support for analog input data? (optional)
142 * - Needs a syntax first for user specs which channels (columns) are
143 * logic and which are analog. May need heuristics(?) to guess from
144 * input data in the absence of user provided specs.
147 /* Single column formats. */
148 enum single_col_format {
149 FORMAT_NONE, /* Ignore this column. */
150 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
151 FORMAT_HEX, /* Hex digits for a set of bits. */
152 FORMAT_OCT, /* Oct digits for a set of bits. */
155 static const char *col_format_text[] = {
156 [FORMAT_NONE] = "unknown",
157 [FORMAT_BIN] = "binary",
158 [FORMAT_HEX] = "hexadecimal",
159 [FORMAT_OCT] = "octal",
162 static const char col_format_char[] = {
169 struct column_details {
171 enum single_col_format text_format;
172 size_t channel_offset;
173 size_t channel_count;
179 /* Current selected samplerate. */
181 gboolean samplerate_sent;
183 /* Number of logic channels. */
184 size_t logic_channels;
186 /* Column delimiter (actually separator), comment leader, EOL sequence. */
191 /* Format specs for input columns, and processing state. */
192 size_t column_seen_count;
193 const char *column_formats;
194 size_t column_want_count;
195 struct column_details *column_details;
197 /* Line number to start processing. */
201 * Determines if the first line should be treated as header and used for
202 * channel names in multi column mode.
205 gboolean header_seen;
207 size_t sample_unit_size; /**!< Byte count for a single sample. */
208 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
210 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
211 size_t datafeed_buf_size;
212 size_t datafeed_buf_fill;
214 /* Current line number. */
217 /* List of previously created sigrok channels. */
218 GSList *prev_sr_channels;
222 * Primitive operations to handle sample sets:
223 * - Keep a buffer for datafeed submission, capable of holding many
224 * samples (reduces call overhead, improves throughput).
225 * - Have a "current sample set" pointer reference one position in that
226 * large samples buffer.
227 * - Clear the current sample set before text line inspection, then set
228 * the bits which are found active in the current line of text input.
229 * Phrase the API such that call sites can be kept simple. Advance to
230 * the next sample set between lines, flush the larger buffer as needed
231 * (when it is full, or upon EOF).
234 static void clear_logic_samples(struct context *inc)
236 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
237 memset(inc->sample_buffer, 0, inc->sample_unit_size);
240 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
242 size_t byte_idx, bit_idx;
245 if (ch_idx >= inc->logic_channels)
250 byte_idx = ch_idx / 8;
251 bit_idx = ch_idx % 8;
252 bit_mask = 1 << bit_idx;
253 inc->sample_buffer[byte_idx] |= bit_mask;
256 static int flush_logic_samples(const struct sr_input *in)
259 struct sr_datafeed_packet packet;
260 struct sr_datafeed_meta meta;
261 struct sr_config *src;
263 struct sr_datafeed_logic logic;
267 if (!inc->datafeed_buf_fill)
270 if (inc->samplerate && !inc->samplerate_sent) {
271 packet.type = SR_DF_META;
272 packet.payload = &meta;
273 samplerate = inc->samplerate;
274 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(samplerate));
275 meta.config = g_slist_append(NULL, src);
276 sr_session_send(in->sdi, &packet);
277 g_slist_free(meta.config);
279 inc->samplerate_sent = TRUE;
282 memset(&packet, 0, sizeof(packet));
283 memset(&logic, 0, sizeof(logic));
284 packet.type = SR_DF_LOGIC;
285 packet.payload = &logic;
286 logic.unitsize = inc->sample_unit_size;
287 logic.length = inc->datafeed_buf_fill;
288 logic.data = inc->datafeed_buffer;
290 rc = sr_session_send(in->sdi, &packet);
294 inc->datafeed_buf_fill = 0;
298 static int queue_logic_samples(const struct sr_input *in)
304 if (!inc->logic_channels)
307 inc->datafeed_buf_fill += inc->sample_unit_size;
308 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
309 rc = flush_logic_samples(in);
316 /* Helpers for "column processing". */
318 static int split_column_format(const char *spec,
319 size_t *column_count, enum single_col_format *format, size_t *bit_count)
322 char *endp, format_char;
323 enum single_col_format format_code;
328 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
332 endp = (char *)&spec[1];
334 count = strtoul(spec, &endp, 10);
341 *column_count = count;
344 /* Get the (mandatory, single letter) type spec (-/xob/l). */
345 format_char = *spec++;
346 switch (format_char) {
347 case '-': /* Might conflict with number-parsing. */
350 format_code = FORMAT_NONE;
353 format_code = FORMAT_HEX;
356 format_code = FORMAT_OCT;
360 format_code = FORMAT_BIN;
362 default: /* includes NUL */
366 *format = format_code;
368 /* Get the (optional, decimal, default 1) bit count. */
370 count = strtoul(spec, &endp, 10);
375 if (format_char == '-')
377 if (format_char == 'l')
383 /* Input spec must have been exhausted. */
390 static int make_column_details_from_format(struct context *inc,
391 const char *column_format)
393 char **formats, *format;
394 size_t format_count, column_count, bit_count;
395 size_t auto_column_count;
396 size_t format_idx, c, b, column_idx, channel_idx;
397 enum single_col_format f;
398 struct column_details *detail;
401 /* Split the input spec, count involved columns and bits. */
402 formats = g_strsplit(column_format, ",", 0);
404 sr_err("Cannot parse columns format %s (comma split).", column_format);
407 format_count = g_strv_length(formats);
409 sr_err("Cannot parse columns format %s (field count).", column_format);
413 column_count = bit_count = 0;
414 auto_column_count = 0;
415 for (format_idx = 0; format_idx < format_count; format_idx++) {
416 format = formats[format_idx];
417 ret = split_column_format(format, &c, &f, &b);
418 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
420 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
425 /* User requested "auto-count", must be last format. */
426 if (formats[format_idx + 1]) {
427 sr_err("Auto column count must be last format field.");
431 auto_column_count = inc->column_seen_count - column_count;
432 c = auto_column_count;
437 sr_dbg("Column format %s -> %zu columns, %zu logic channels.",
438 column_format, column_count, bit_count);
440 /* Allocate and fill in "column processing" details. */
441 inc->column_want_count = column_count;
442 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
443 column_idx = channel_idx = 0;
444 for (format_idx = 0; format_idx < format_count; format_idx++) {
445 format = formats[format_idx];
446 (void)split_column_format(format, &c, &f, &b);
448 c = auto_column_count;
450 detail = &inc->column_details[column_idx++];
451 detail->col_nr = column_idx;
452 detail->text_format = f;
453 if (detail->text_format) {
454 detail->channel_offset = channel_idx;
455 detail->channel_count = b;
458 sr_dbg("detail -> col %zu, fmt %s, ch off/cnt %zu/%zu",
459 detail->col_nr, col_format_text[detail->text_format],
460 detail->channel_offset, detail->channel_count);
463 inc->logic_channels = channel_idx;
469 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
471 if (!inc || !inc->column_details)
473 if (!nr || nr > inc->column_want_count)
475 return &inc->column_details[nr - 1];
479 * Primitive operations for text input: Strip comments off text lines.
480 * Split text lines into columns. Process input text for individual
484 static void strip_comment(char *buf, const GString *prefix)
491 if ((ptr = strstr(buf, prefix->str))) {
498 * @brief Splits a text line into a set of columns.
500 * @param[in] buf The input text line to split.
501 * @param[in] inc The input module's context.
503 * @returns An array of strings, representing the columns' text.
505 * This routine splits a text line on previously determined separators.
507 static char **split_line(char *buf, struct context *inc)
509 return g_strsplit(buf, inc->delimiter->str, 0);
513 * @brief Parse a multi-bit field into several logic channels.
515 * @param[in] column The input text, a run of bin/hex/oct digits.
516 * @param[in] inc The input module's context.
517 * @param[in] details The column processing details.
519 * @retval SR_OK Success.
520 * @retval SR_ERR Invalid input data (empty, or format error).
522 * This routine modifies the logic levels in the current sample set,
523 * based on the text input and a user provided format spec.
525 static int parse_logic(const char *column, struct context *inc,
526 const struct column_details *details)
528 size_t length, ch_rem, ch_idx, ch_inc;
532 const char *type_text;
536 * Prepare to read the digits from the text end towards the start.
537 * A digit corresponds to a variable number of channels (depending
538 * on the value's radix). Prepare the mapping of text digits to
539 * (a number of) logic channels.
541 length = strlen(column);
543 sr_err("Column %zu in line %zu is empty.", details->col_nr,
547 rdptr = &column[length];
548 ch_idx = details->channel_offset;
549 ch_rem = details->channel_count;
552 * Get another digit and derive up to four logic channels' state from
553 * it. Make sure to not process more bits than the column has channels
554 * associated with it.
556 while (rdptr > column && ch_rem) {
557 /* Check for valid digits according to the input radix. */
559 switch (details->text_format) {
561 valid = g_ascii_isxdigit(c) && c < '2';
565 valid = g_ascii_isxdigit(c) && c < '8';
569 valid = g_ascii_isxdigit(c);
577 type_text = col_format_text[details->text_format];
578 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
579 column, type_text, details->col_nr, inc->line_number);
582 /* Use the digit's bits for logic channels' data. */
583 bits = g_ascii_xdigit_value(c);
584 switch (details->text_format) {
588 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
594 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
598 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
603 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
606 /* ShouldNotHappen(TM), but silences compiler warning. */
612 * TODO Determine whether the availability of extra input data
613 * for unhandled logic channels is worth warning here. In this
614 * implementation users are in control, and can have the more
615 * significant bits ignored (which can be considered a feature
616 * and not really a limitation).
623 * @brief Parse routine which ignores the input text.
625 * This routine exists to unify dispatch code paths, mapping input file
626 * columns' data types to their respective parse routines.
628 static int parse_ignore(const char *column, struct context *inc,
629 const struct column_details *details)
637 typedef int (*col_parse_cb)(const char *column, struct context *inc,
638 const struct column_details *details);
640 static const col_parse_cb col_parse_funcs[] = {
641 [FORMAT_NONE] = parse_ignore,
642 [FORMAT_BIN] = parse_logic,
643 [FORMAT_OCT] = parse_logic,
644 [FORMAT_HEX] = parse_logic,
647 static int init(struct sr_input *in, GHashTable *options)
650 size_t single_column, first_column, logic_channels;
652 enum single_col_format format;
655 in->sdi = g_malloc0(sizeof(*in->sdi));
656 in->priv = inc = g_malloc0(sizeof(*inc));
658 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
660 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
662 inc->delimiter = g_string_new(g_variant_get_string(
663 g_hash_table_lookup(options, "column_separator"), NULL));
664 if (!inc->delimiter->len) {
665 sr_err("Column separator cannot be empty.");
669 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
670 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
672 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
674 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
677 sr_err("Invalid single-column format: '%s'", s);
681 inc->comment = g_string_new(g_variant_get_string(
682 g_hash_table_lookup(options, "comment_leader"), NULL));
683 if (g_string_equal(inc->comment, inc->delimiter)) {
685 * Using the same sequence as comment leader and column
686 * separator won't work. The user probably specified ';'
687 * as the column separator but did not adjust the comment
688 * leader. Try DWIM, drop comment strippin support here.
690 sr_warn("Comment leader and column separator conflict, disabling comment support.");
691 g_string_truncate(inc->comment, 0);
694 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
696 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
698 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
700 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
701 if (inc->start_line < 1) {
702 sr_err("Invalid start line %zu.", inc->start_line);
707 * Scan flexible, to get prefered format specs which describe
708 * the input file's data formats. As well as some simple specs
709 * for backwards compatibility and user convenience.
711 * This logic ends up with a copy of the format string, either
712 * user provided or internally derived. Actual creation of the
713 * column processing details gets deferred until the first line
714 * of input data was seen. To support automatic determination of
715 * e.g. channel counts from column counts.
717 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
719 inc->column_formats = g_strdup(s);
720 sr_dbg("User specified column_formats: %s.", s);
721 } else if (single_column && logic_channels) {
722 format_char = col_format_char[format];
723 if (single_column == 1) {
724 inc->column_formats = g_strdup_printf("%c%zu",
725 format_char, logic_channels);
727 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
729 format_char, logic_channels);
731 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
732 single_column, col_format_text[format], logic_channels,
733 inc->column_formats);
734 } else if (!single_column) {
735 if (first_column > 1) {
736 inc->column_formats = g_strdup_printf("%zu-,%zul",
737 first_column - 1, logic_channels);
739 inc->column_formats = g_strdup_printf("%zul",
742 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
743 first_column, logic_channels,
744 inc->column_formats);
746 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
747 inc->column_formats = g_strdup("*l");
754 * Check the channel list for consistency across file re-import. See
755 * the VCD input module for more details and motivation.
758 static void keep_header_for_reread(const struct sr_input *in)
763 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
764 inc->prev_sr_channels = in->sdi->channels;
765 in->sdi->channels = NULL;
768 static int check_header_in_reread(const struct sr_input *in)
777 if (!inc->prev_sr_channels)
780 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
781 sr_err("Channel list change not supported for file re-read.");
784 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
785 in->sdi->channels = inc->prev_sr_channels;
786 inc->prev_sr_channels = NULL;
791 static const char *delim_set = "\r\n";
793 static const char *get_line_termination(GString *buf)
798 if (g_strstr_len(buf->str, buf->len, "\r\n"))
800 else if (memchr(buf->str, '\n', buf->len))
802 else if (memchr(buf->str, '\r', buf->len))
808 static int initial_parse(const struct sr_input *in, GString *buf)
811 GString *channel_name;
812 size_t num_columns, ch_idx, ch_name_idx, col_idx, col_nr;
813 size_t line_number, line_idx;
815 char **lines, *line, **columns, *column;
816 const char *col_caption;
817 gboolean got_caption;
818 const struct column_details *detail;
825 if (inc->termination)
826 lines = g_strsplit(buf->str, inc->termination, 0);
828 lines = g_strsplit_set(buf->str, delim_set, 0);
829 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
831 if (inc->start_line > line_number) {
832 sr_spew("Line %zu skipped (before start).", line_number);
835 if (line[0] == '\0') {
836 sr_spew("Blank line %zu skipped.", line_number);
839 strip_comment(line, inc->comment);
840 if (line[0] == '\0') {
841 sr_spew("Comment-only line %zu skipped.", line_number);
845 /* Reached first proper line. */
849 /* Not enough data for a proper line yet. */
854 /* See how many columns the current line has. */
855 columns = split_line(line, inc);
857 sr_err("Error while parsing line %zu.", line_number);
861 num_columns = g_strv_length(columns);
863 sr_err("Error while parsing line %zu.", line_number);
867 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
870 * Track the observed number of columns in the input file. Do
871 * process the previously gathered columns format spec now that
872 * automatic channel count can be dealt with.
874 inc->column_seen_count = num_columns;
875 ret = make_column_details_from_format(inc, inc->column_formats);
877 sr_err("Cannot parse columns format using line %zu.", line_number);
882 * Assume all lines have equal length (column count). Bail out
883 * early on suspicious or insufficient input data (check input
884 * which became available here against previous user specs or
885 * auto-determined properties, regardless of layout variant).
887 if (num_columns < inc->column_want_count) {
888 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
889 num_columns, inc->column_want_count);
895 * Determine channel names. Optionally use text from a header
896 * line (when requested by the user, and only works in multi
897 * column mode). In the absence of header text, or in single
898 * column mode, channels are assigned rather generic names.
900 * Manipulation of the column's caption is acceptable here, the
901 * header line will never get processed another time.
903 channel_name = g_string_sized_new(64);
904 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
906 col_nr = col_idx + 1;
907 detail = lookup_column_details(inc, col_nr);
908 if (detail->text_format == FORMAT_NONE)
910 column = columns[col_idx];
911 col_caption = sr_scpi_unquote_string(column);
912 got_caption = inc->use_header && *col_caption;
913 sr_dbg("DIAG col %zu, ch count %zu, text %s.",
914 col_nr, detail->channel_count, col_caption);
915 for (ch_idx = 0; ch_idx < detail->channel_count; ch_idx++) {
916 ch_name_idx = detail->channel_offset + ch_idx;
917 if (got_caption && detail->channel_count == 1)
918 g_string_assign(channel_name, col_caption);
919 else if (got_caption)
920 g_string_printf(channel_name, "%s[%zu]",
921 col_caption, ch_idx);
923 g_string_printf(channel_name, "%zu", ch_name_idx);
924 sr_dbg("DIAG ch idx %zu, name %s.", ch_name_idx, channel_name->str);
925 sr_channel_new(in->sdi, ch_name_idx, SR_CHANNEL_LOGIC, TRUE,
929 g_string_free(channel_name, TRUE);
930 if (!check_header_in_reread(in)) {
936 * Calculate the minimum buffer size to store the set of samples
937 * of all channels (unit size). Determine a larger buffer size
938 * for datafeed submission that is a multiple of the unit size.
939 * Allocate the larger buffer, the "sample buffer" will point
940 * to a location within that large buffer later.
942 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
943 inc->datafeed_buf_size = CHUNK_SIZE;
944 inc->datafeed_buf_size *= inc->sample_unit_size;
945 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
946 inc->datafeed_buf_fill = 0;
957 * Gets called from initial_receive(), which runs until the end-of-line
958 * encoding of the input stream could get determined. Assumes that this
959 * routine receives enough buffered initial input data to either see the
960 * BOM when there is one, or that no BOM will follow when a text line
961 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
962 * from the input buffer if one was seen. Does not care to protect
963 * against multiple execution or dropping the BOM multiple times --
964 * there should be at most one in the input stream.
966 static void initial_bom_check(const struct sr_input *in)
968 static const char *utf8_bom = "\xef\xbb\xbf";
970 if (in->buf->len < strlen(utf8_bom))
972 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
974 g_string_erase(in->buf, 0, strlen(utf8_bom));
977 static int initial_receive(const struct sr_input *in)
983 const char *termination;
985 initial_bom_check(in);
989 termination = get_line_termination(in->buf);
991 /* Don't have a full line yet. */
994 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
996 /* Don't have a full line yet. */
998 len = p - in->buf->str - 1;
999 new_buf = g_string_new_len(in->buf->str, len);
1000 g_string_append_c(new_buf, '\0');
1002 inc->termination = g_strdup(termination);
1004 if (in->buf->str[0] != '\0')
1005 ret = initial_parse(in, new_buf);
1009 g_string_free(new_buf, TRUE);
1014 static int process_buffer(struct sr_input *in, gboolean is_eof)
1016 struct context *inc;
1018 size_t line_idx, col_idx, col_nr;
1019 const struct column_details *details;
1020 col_parse_cb parse_func;
1022 char *p, **lines, *line, **columns, *column;
1025 if (!inc->started) {
1026 std_session_send_df_header(in->sdi);
1027 inc->started = TRUE;
1031 * Consider empty input non-fatal. Keep accumulating input until
1032 * at least one full text line has become available. Grab the
1033 * maximum amount of accumulated data that consists of full text
1034 * lines, and process what has been received so far, leaving not
1035 * yet complete lines for the next invocation.
1037 * Enforce that all previously buffered data gets processed in
1038 * the "EOF" condition. Do not insist in the presence of the
1039 * termination sequence for the last line (may often be missing
1040 * on Windows). A present termination sequence will just result
1041 * in the "execution of an empty line", and does not harm.
1046 p = in->buf->str + in->buf->len;
1048 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1052 p += strlen(inc->termination);
1054 g_strstrip(in->buf->str);
1057 lines = g_strsplit(in->buf->str, inc->termination, 0);
1058 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1060 if (inc->line_number < inc->start_line) {
1061 sr_spew("Line %zu skipped (before start).", inc->line_number);
1064 if (line[0] == '\0') {
1065 sr_spew("Blank line %zu skipped.", inc->line_number);
1069 /* Remove trailing comment. */
1070 strip_comment(line, inc->comment);
1071 if (line[0] == '\0') {
1072 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1076 /* Skip the header line, its content was used as the channel names. */
1077 if (inc->use_header && !inc->header_seen) {
1078 sr_spew("Header line %zu skipped.", inc->line_number);
1079 inc->header_seen = TRUE;
1083 /* Split the line into columns, check for minimum length. */
1084 columns = split_line(line, inc);
1086 sr_err("Error while parsing line %zu.", inc->line_number);
1090 num_columns = g_strv_length(columns);
1091 if (num_columns < inc->column_want_count) {
1092 sr_err("Insufficient column count %zu in line %zu.",
1093 num_columns, inc->line_number);
1094 g_strfreev(columns);
1099 /* Have the columns of the current text line processed. */
1100 clear_logic_samples(inc);
1101 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1102 column = columns[col_idx];
1103 col_nr = col_idx + 1;
1104 details = lookup_column_details(inc, col_nr);
1105 if (!details || !details->text_format)
1107 parse_func = col_parse_funcs[details->text_format];
1110 ret = parse_func(column, inc, details);
1112 g_strfreev(columns);
1118 /* Send sample data to the session bus (buffered). */
1119 ret = queue_logic_samples(in);
1121 sr_err("Sending samples failed.");
1122 g_strfreev(columns);
1127 g_strfreev(columns);
1130 g_string_erase(in->buf, 0, p - in->buf->str);
1135 static int receive(struct sr_input *in, GString *buf)
1137 struct context *inc;
1140 g_string_append_len(in->buf, buf->str, buf->len);
1143 if (!inc->column_seen_count) {
1144 ret = initial_receive(in);
1145 if (ret == SR_ERR_NA)
1146 /* Not enough data yet. */
1148 else if (ret != SR_OK)
1151 /* sdi is ready, notify frontend. */
1152 in->sdi_ready = TRUE;
1156 ret = process_buffer(in, FALSE);
1161 static int end(struct sr_input *in)
1163 struct context *inc;
1167 ret = process_buffer(in, TRUE);
1173 ret = flush_logic_samples(in);
1179 std_session_send_df_end(in->sdi);
1184 static void cleanup(struct sr_input *in)
1186 struct context *inc;
1188 keep_header_for_reread(in);
1192 g_free(inc->termination);
1193 inc->termination = NULL;
1194 g_free(inc->datafeed_buffer);
1195 inc->datafeed_buffer = NULL;
1198 static int reset(struct sr_input *in)
1200 struct context *inc = in->priv;
1203 inc->started = FALSE;
1204 g_string_truncate(in->buf, 0);
1223 static struct sr_option options[] = {
1225 "column_formats", "Column format specs",
1226 "Specifies text columns data types: comma separated list of [<cols>]<fmt>[<bits>], with -/x/o/b/l format specifiers.",
1229 [OPT_SINGLE_COL] = {
1230 "single_column", "Single column",
1231 "Enable single-column mode, exclusively use text from the specified column (number starting at 1).",
1235 "first_column", "First column",
1236 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1).",
1240 "logic_channels", "Number of logic channels",
1241 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1245 "single_format", "Data format for simple single-column mode.",
1246 "The number format of single-column mode input data: bin, hex, oct.",
1250 "start_line", "Start line",
1251 "The line number at which to start processing input text (default: 1).",
1255 "header", "Get channel names from first line.",
1256 "Use the first processed line's column captions (when available) as channel names.",
1260 "samplerate", "Samplerate (Hz)",
1261 "The input data's sample rate in Hz.",
1265 "column_separator", "Column separator",
1266 "The sequence which separates text columns. Non-empty text, comma by default.",
1270 "comment_leader", "Comment leader character",
1271 "The text which starts comments at the end of text lines.",
1274 [OPT_MAX] = ALL_ZERO,
1277 static const struct sr_option *get_options(void)
1281 if (!options[0].def) {
1282 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1283 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1284 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1285 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1286 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1288 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1289 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1290 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1291 options[OPT_FORMAT].values = l;
1292 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1293 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1294 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1295 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1296 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1302 SR_PRIV struct sr_input_module input_csv = {
1305 .desc = "Comma-separated values",
1306 .exts = (const char*[]){"csv", NULL},
1307 .options = get_options,