2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). This "column_formats" option
45 * is most versatile, other forms of specifying the column layout only
46 * exist for backwards compatibility.
48 * single_column: Specifies the column number which contains the logic data
49 * for single-column mode. All logic data is taken from several bits
50 * which all are kept within that one column. Only exists for backwards
51 * compatibility, see "column_formats" for more flexibility.
53 * first_column: Specifies the number of the first column with logic data
54 * in simple multi-column mode. Only exists for backwards compatibility,
55 * see "column_formats" for more flexibility.
57 * logic_channels: Specifies the number of logic channels. Is required in
58 * simple single-column mode. Is optional in simple multi-column mode
59 * (and defaults to all remaining columns). Only exists for backwards
60 * compatibility, see "column_formats" for more flexibility.
62 * single_format: Specifies the format of the input text in simple single-
63 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
64 * Simple multi-column mode always uses single-bit data per column.
65 * Only exists for backwards compatibility, see "column_formats" for
68 * start_line: Specifies at which line to start processing the input file.
69 * Allows to skip leading lines which neither are header nor data lines.
70 * By default all of the input file gets processed.
72 * header: Boolean option, controls whether the first processed line is used
73 * to determine channel names. Off by default. Generic channel names are
74 * used in the absence of header line content.
76 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
77 * User specs take precedence over data which optionally gets derived
80 * column_separator: Specifies the sequence which separates the text file
81 * columns. Cannot be empty. Defaults to comma.
83 * comment_leader: Specifies the sequence which starts comments that run
84 * up to the end of the current text line. Can be empty to disable
85 * comment support. Defaults to semicolon.
87 * Typical examples of using these options:
88 * - ... -I csv:column_formats=*l ...
89 * All columns are single-bit logic data. Identical to the previous
90 * multi-column mode (the default when no options were given at all).
91 * - ... -I csv:column_formats=3-,*l ...
92 * Ignore the first three columns, get single-bit logic data from all
93 * remaining lines (multi-column mode with first-column above 1).
94 * - ... -I csv:column_formats=3-,4l,x8 ...
95 * Ignore the first three columns, get single-bit logic data from the
96 * next four columns, then eight-bit data in hex format from the next
97 * column. More columns may follow in the input text but won't get
98 * processed. (Mix of previous multi-column as well as single-column
100 * - ... -I csv:column_formats=4x8,b16,5l ...
101 * Get eight-bit data in hex format from the first four columns, then
102 * sixteen-bit data in binary format, then five times single-bit data.
103 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
104 * Get eight logic bits in binary format from column 2. (Simple
105 * single-column mode, corresponds to the "-,b8" format.)
106 * - ... -I csv:first_column=6:logic_channels=4 ...
107 * Get four single-bit logic channels from columns 6 to 9 respectively.
108 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
109 * - ... -I csv:start_line=20:header=yes:...
110 * Skip the first 19 text lines. Use line 20 to derive channel names.
111 * Data starts at line 21.
117 * - Add support for analog input data? (optional)
118 * - Extend the set of supported column types. Just grab a double
119 * value from floating point format input text.
120 * - Optionally get precision ('digits') from the column's format spec?
121 * From the position which is "bit count" for logic channels?
122 * - Optionally get sample rate from timestamp column. Just best-effort
123 * approach, not necessarily reliable. Users can always specify rates.
124 * - Add a test suite for input modules in general, and CSV in specific?
125 * Becomes more important with the multitude of options and their
126 * interaction. Could cover edge cases (BOM presence, line termination
127 * absence, etc) and auto-stuff as well (channel names, channel counts,
131 /* Single column formats. */
132 enum single_col_format {
133 FORMAT_NONE, /* Ignore this column. */
134 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
135 FORMAT_HEX, /* Hex digits for a set of bits. */
136 FORMAT_OCT, /* Oct digits for a set of bits. */
139 static const char *col_format_text[] = {
140 [FORMAT_NONE] = "unknown",
141 [FORMAT_BIN] = "binary",
142 [FORMAT_HEX] = "hexadecimal",
143 [FORMAT_OCT] = "octal",
146 static const char col_format_char[] = {
153 struct column_details {
155 enum single_col_format text_format;
156 size_t channel_offset;
157 size_t channel_count;
163 /* Current selected samplerate. */
165 gboolean samplerate_sent;
167 /* Number of logic channels. */
168 size_t logic_channels;
170 /* Column delimiter (actually separator), comment leader, EOL sequence. */
175 /* Format specs for input columns, and processing state. */
176 size_t column_seen_count;
177 const char *column_formats;
178 size_t column_want_count;
179 struct column_details *column_details;
181 /* Line number to start processing. */
185 * Determines if the first line should be treated as header and used for
186 * channel names in multi column mode.
189 gboolean header_seen;
191 size_t sample_unit_size; /**!< Byte count for a single sample. */
192 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
194 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
195 size_t datafeed_buf_size;
196 size_t datafeed_buf_fill;
198 /* Current line number. */
201 /* List of previously created sigrok channels. */
202 GSList *prev_sr_channels;
206 * Primitive operations to handle sample sets:
207 * - Keep a buffer for datafeed submission, capable of holding many
208 * samples (reduces call overhead, improves throughput).
209 * - Have a "current sample set" pointer reference one position in that
210 * large samples buffer.
211 * - Clear the current sample set before text line inspection, then set
212 * the bits which are found active in the current line of text input.
213 * Phrase the API such that call sites can be kept simple. Advance to
214 * the next sample set between lines, flush the larger buffer as needed
215 * (when it is full, or upon EOF).
218 static void clear_logic_samples(struct context *inc)
220 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
221 memset(inc->sample_buffer, 0, inc->sample_unit_size);
224 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
226 size_t byte_idx, bit_idx;
229 if (ch_idx >= inc->logic_channels)
234 byte_idx = ch_idx / 8;
235 bit_idx = ch_idx % 8;
236 bit_mask = 1 << bit_idx;
237 inc->sample_buffer[byte_idx] |= bit_mask;
240 static int flush_logic_samples(const struct sr_input *in)
243 struct sr_datafeed_packet packet;
244 struct sr_datafeed_meta meta;
245 struct sr_config *src;
246 struct sr_datafeed_logic logic;
250 if (!inc->datafeed_buf_fill)
253 if (inc->samplerate && !inc->samplerate_sent) {
254 packet.type = SR_DF_META;
255 packet.payload = &meta;
256 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(inc->samplerate));
257 meta.config = g_slist_append(NULL, src);
258 sr_session_send(in->sdi, &packet);
259 g_slist_free(meta.config);
261 inc->samplerate_sent = TRUE;
264 memset(&packet, 0, sizeof(packet));
265 memset(&logic, 0, sizeof(logic));
266 packet.type = SR_DF_LOGIC;
267 packet.payload = &logic;
268 logic.unitsize = inc->sample_unit_size;
269 logic.length = inc->datafeed_buf_fill;
270 logic.data = inc->datafeed_buffer;
272 rc = sr_session_send(in->sdi, &packet);
276 inc->datafeed_buf_fill = 0;
280 static int queue_logic_samples(const struct sr_input *in)
286 if (!inc->logic_channels)
289 inc->datafeed_buf_fill += inc->sample_unit_size;
290 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
291 rc = flush_logic_samples(in);
298 /* Helpers for "column processing". */
300 static int split_column_format(const char *spec,
301 size_t *column_count, enum single_col_format *format, size_t *bit_count)
304 char *endp, format_char;
305 enum single_col_format format_code;
310 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
313 /* Workaround, strtoul("*") won't always yield expected endp. */
315 endp = (char *)&spec[1];
317 count = strtoul(spec, &endp, 10);
324 *column_count = count;
327 /* Get the (mandatory, single letter) type spec (-/xob/l). */
328 format_char = *spec++;
329 switch (format_char) {
333 format_code = FORMAT_NONE;
336 format_code = FORMAT_HEX;
339 format_code = FORMAT_OCT;
343 format_code = FORMAT_BIN;
345 default: /* includes NUL */
349 *format = format_code;
351 /* Get the (optional, decimal, default 1) bit count. */
353 count = strtoul(spec, &endp, 10);
358 if (format_char == '-')
360 if (format_char == 'l')
366 /* Input spec must have been exhausted. */
373 static int make_column_details_from_format(const struct sr_input *in,
374 const char *column_format, char **column_texts)
377 char **formats, *format;
378 size_t format_count, column_count, bit_count;
379 size_t auto_column_count;
380 size_t format_idx, c, b, column_idx, channel_idx;
381 enum single_col_format f;
382 struct column_details *detail;
383 GString *channel_name;
390 inc->column_seen_count = g_strv_length(column_texts);
392 /* Split the input spec, count involved columns and bits. */
393 formats = g_strsplit(column_format, ",", 0);
395 sr_err("Cannot parse columns format %s (comma split).", column_format);
398 format_count = g_strv_length(formats);
400 sr_err("Cannot parse columns format %s (field count).", column_format);
404 column_count = bit_count = 0;
405 auto_column_count = 0;
406 for (format_idx = 0; format_idx < format_count; format_idx++) {
407 format = formats[format_idx];
408 ret = split_column_format(format, &c, &f, &b);
409 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
411 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
416 /* User requested "auto-count", must be last format. */
417 if (formats[format_idx + 1]) {
418 sr_err("Auto column count must be last format field.");
422 auto_column_count = inc->column_seen_count - column_count;
423 c = auto_column_count;
428 sr_dbg("Column format %s -> %zu columns, %zu logic channels.",
429 column_format, column_count, bit_count);
431 /* Allocate and fill in "column processing" details. Create channels. */
432 inc->column_want_count = column_count;
433 if (inc->column_seen_count < inc->column_want_count) {
434 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
435 inc->column_seen_count, inc->column_want_count);
439 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
440 column_idx = channel_idx = 0;
441 channel_name = g_string_sized_new(64);
442 for (format_idx = 0; format_idx < format_count; format_idx++) {
443 /* Process a format field, which can span multiple columns. */
444 format = formats[format_idx];
445 (void)split_column_format(format, &c, &f, &b);
447 c = auto_column_count;
449 /* Fill in a column's processing details. */
450 detail = &inc->column_details[column_idx++];
451 detail->col_nr = column_idx;
452 detail->text_format = f;
453 if (detail->text_format) {
454 detail->channel_offset = channel_idx;
455 detail->channel_count = b;
458 sr_dbg("detail -> col %zu, fmt %s, ch off/cnt %zu/%zu",
459 detail->col_nr, col_format_text[detail->text_format],
460 detail->channel_offset, detail->channel_count);
461 if (!detail->text_format)
464 * Create channels with appropriate names. Optionally
465 * use text from a header line (when requested by the
466 * user). In the absence of header text, channels are
467 * assigned rather generic names.
469 * Manipulation of the column's caption (when a header
470 * line is seen) is acceptable, because this header
471 * line won't get processed another time.
473 column = column_texts[detail->col_nr - 1];
474 if (inc->use_header && column && *column)
475 caption = sr_scpi_unquote_string(column);
478 if (!caption || !*caption)
480 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
481 if (caption && detail->channel_count == 1) {
482 g_string_assign(channel_name, caption);
483 } else if (caption) {
484 g_string_printf(channel_name, "%s[%zu]",
485 caption, create_idx);
487 g_string_printf(channel_name, "%zu",
488 detail->channel_offset + create_idx);
490 sr_channel_new(in->sdi, detail->channel_offset + create_idx,
491 SR_CHANNEL_LOGIC, TRUE, channel_name->str);
495 inc->logic_channels = channel_idx;
496 g_string_free(channel_name, TRUE);
502 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
504 if (!inc || !inc->column_details)
506 if (!nr || nr > inc->column_want_count)
508 return &inc->column_details[nr - 1];
512 * Primitive operations for text input: Strip comments off text lines.
513 * Split text lines into columns. Process input text for individual
517 static void strip_comment(char *buf, const GString *prefix)
524 if ((ptr = strstr(buf, prefix->str))) {
531 * @brief Splits a text line into a set of columns.
533 * @param[in] buf The input text line to split.
534 * @param[in] inc The input module's context.
536 * @returns An array of strings, representing the columns' text.
538 * This routine splits a text line on previously determined separators.
540 static char **split_line(char *buf, struct context *inc)
542 return g_strsplit(buf, inc->delimiter->str, 0);
546 * @brief Parse a multi-bit field into several logic channels.
548 * @param[in] column The input text, a run of bin/hex/oct digits.
549 * @param[in] inc The input module's context.
550 * @param[in] details The column processing details.
552 * @retval SR_OK Success.
553 * @retval SR_ERR Invalid input data (empty, or format error).
555 * This routine modifies the logic levels in the current sample set,
556 * based on the text input and a user provided format spec.
558 static int parse_logic(const char *column, struct context *inc,
559 const struct column_details *details)
561 size_t length, ch_rem, ch_idx, ch_inc;
565 const char *type_text;
569 * Prepare to read the digits from the text end towards the start.
570 * A digit corresponds to a variable number of channels (depending
571 * on the value's radix). Prepare the mapping of text digits to
572 * (a number of) logic channels.
574 length = strlen(column);
576 sr_err("Column %zu in line %zu is empty.", details->col_nr,
580 rdptr = &column[length];
581 ch_idx = details->channel_offset;
582 ch_rem = details->channel_count;
585 * Get another digit and derive up to four logic channels' state from
586 * it. Make sure to not process more bits than the column has channels
587 * associated with it.
589 while (rdptr > column && ch_rem) {
590 /* Check for valid digits according to the input radix. */
592 switch (details->text_format) {
594 valid = g_ascii_isxdigit(c) && c < '2';
598 valid = g_ascii_isxdigit(c) && c < '8';
602 valid = g_ascii_isxdigit(c);
610 type_text = col_format_text[details->text_format];
611 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
612 column, type_text, details->col_nr, inc->line_number);
615 /* Use the digit's bits for logic channels' data. */
616 bits = g_ascii_xdigit_value(c);
617 switch (details->text_format) {
621 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
627 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
631 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
636 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
639 /* ShouldNotHappen(TM), but silences compiler warning. */
645 * TODO Determine whether the availability of extra input data
646 * for unhandled logic channels is worth warning here. In this
647 * implementation users are in control, and can have the more
648 * significant bits ignored (which can be considered a feature
649 * and not really a limitation).
656 * @brief Parse routine which ignores the input text.
658 * This routine exists to unify dispatch code paths, mapping input file
659 * columns' data types to their respective parse routines.
661 static int parse_ignore(const char *column, struct context *inc,
662 const struct column_details *details)
670 typedef int (*col_parse_cb)(const char *column, struct context *inc,
671 const struct column_details *details);
673 static const col_parse_cb col_parse_funcs[] = {
674 [FORMAT_NONE] = parse_ignore,
675 [FORMAT_BIN] = parse_logic,
676 [FORMAT_OCT] = parse_logic,
677 [FORMAT_HEX] = parse_logic,
680 static int init(struct sr_input *in, GHashTable *options)
683 size_t single_column, first_column, logic_channels;
685 enum single_col_format format;
688 in->sdi = g_malloc0(sizeof(*in->sdi));
689 in->priv = inc = g_malloc0(sizeof(*inc));
691 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
692 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
693 inc->delimiter = g_string_new(g_variant_get_string(
694 g_hash_table_lookup(options, "column_separator"), NULL));
695 if (!inc->delimiter->len) {
696 sr_err("Column separator cannot be empty.");
699 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
700 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
702 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
704 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
707 sr_err("Invalid single-column format: '%s'", s);
710 inc->comment = g_string_new(g_variant_get_string(
711 g_hash_table_lookup(options, "comment_leader"), NULL));
712 if (g_string_equal(inc->comment, inc->delimiter)) {
714 * Using the same sequence as comment leader and column
715 * separator won't work. The user probably specified ';'
716 * as the column separator but did not adjust the comment
717 * leader. Try DWIM, drop comment strippin support here.
719 sr_warn("Comment leader and column separator conflict, disabling comment support.");
720 g_string_truncate(inc->comment, 0);
722 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
723 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
724 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
725 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
726 if (inc->start_line < 1) {
727 sr_err("Invalid start line %zu.", inc->start_line);
732 * Scan flexible, to get prefered format specs which describe
733 * the input file's data formats. As well as some simple specs
734 * for backwards compatibility and user convenience.
736 * This logic ends up with a copy of the format string, either
737 * user provided or internally derived. Actual creation of the
738 * column processing details gets deferred until the first line
739 * of input data was seen. To support automatic determination of
740 * e.g. channel counts from column counts.
742 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
744 inc->column_formats = g_strdup(s);
745 sr_dbg("User specified column_formats: %s.", s);
746 } else if (single_column && logic_channels) {
747 format_char = col_format_char[format];
748 if (single_column == 1) {
749 inc->column_formats = g_strdup_printf("%c%zu",
750 format_char, logic_channels);
752 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
754 format_char, logic_channels);
756 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
757 single_column, col_format_text[format], logic_channels,
758 inc->column_formats);
759 } else if (!single_column) {
760 if (first_column > 1) {
761 inc->column_formats = g_strdup_printf("%zu-,%zul",
762 first_column - 1, logic_channels);
764 inc->column_formats = g_strdup_printf("%zul",
767 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
768 first_column, logic_channels,
769 inc->column_formats);
771 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
772 inc->column_formats = g_strdup("*l");
779 * Check the channel list for consistency across file re-import. See
780 * the VCD input module for more details and motivation.
783 static void keep_header_for_reread(const struct sr_input *in)
788 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
789 inc->prev_sr_channels = in->sdi->channels;
790 in->sdi->channels = NULL;
793 static int check_header_in_reread(const struct sr_input *in)
802 if (!inc->prev_sr_channels)
805 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
806 sr_err("Channel list change not supported for file re-read.");
809 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
810 in->sdi->channels = inc->prev_sr_channels;
811 inc->prev_sr_channels = NULL;
816 static const char *delim_set = "\r\n";
818 static const char *get_line_termination(GString *buf)
823 if (g_strstr_len(buf->str, buf->len, "\r\n"))
825 else if (memchr(buf->str, '\n', buf->len))
827 else if (memchr(buf->str, '\r', buf->len))
833 static int initial_parse(const struct sr_input *in, GString *buf)
837 size_t line_number, line_idx;
839 char **lines, *line, **columns;
845 /* Search for the first line to process (header or data). */
847 if (inc->termination)
848 lines = g_strsplit(buf->str, inc->termination, 0);
850 lines = g_strsplit_set(buf->str, delim_set, 0);
851 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
853 if (inc->start_line > line_number) {
854 sr_spew("Line %zu skipped (before start).", line_number);
857 if (line[0] == '\0') {
858 sr_spew("Blank line %zu skipped.", line_number);
861 strip_comment(line, inc->comment);
862 if (line[0] == '\0') {
863 sr_spew("Comment-only line %zu skipped.", line_number);
867 /* Reached first proper line. */
871 /* Not enough data for a proper line yet. */
876 /* Get the number of columns in the line. */
877 columns = split_line(line, inc);
879 sr_err("Error while parsing line %zu.", line_number);
883 num_columns = g_strv_length(columns);
885 sr_err("Error while parsing line %zu.", line_number);
889 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
892 * Interpret the user provided column format specs. This might
893 * involve inspection of the now received input text, to support
894 * e.g. automatic detection of channel counts in the absence of
895 * user provided specs. Optionally a header line is used to get
898 * Check the then created channels for consistency across .reset
899 * and .receive sequences (file re-load).
901 ret = make_column_details_from_format(in, inc->column_formats, columns);
903 sr_err("Cannot parse columns format using line %zu.", line_number);
906 if (!check_header_in_reread(in)) {
912 * Allocate buffer memory for datafeed submission of sample data.
913 * Calculate the minimum buffer size to store the set of samples
914 * of all channels (unit size). Determine a larger buffer size
915 * for datafeed submission that is a multiple of the unit size.
916 * Allocate the larger buffer, the "sample buffer" will point
917 * to a location within that large buffer later.
919 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
920 inc->datafeed_buf_size = CHUNK_SIZE;
921 inc->datafeed_buf_size *= inc->sample_unit_size;
922 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
923 inc->datafeed_buf_fill = 0;
934 * Gets called from initial_receive(), which runs until the end-of-line
935 * encoding of the input stream could get determined. Assumes that this
936 * routine receives enough buffered initial input data to either see the
937 * BOM when there is one, or that no BOM will follow when a text line
938 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
939 * from the input buffer if one was seen. Does not care to protect
940 * against multiple execution or dropping the BOM multiple times --
941 * there should be at most one in the input stream.
943 static void initial_bom_check(const struct sr_input *in)
945 static const char *utf8_bom = "\xef\xbb\xbf";
947 if (in->buf->len < strlen(utf8_bom))
949 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
951 g_string_erase(in->buf, 0, strlen(utf8_bom));
954 static int initial_receive(const struct sr_input *in)
960 const char *termination;
962 initial_bom_check(in);
966 termination = get_line_termination(in->buf);
968 /* Don't have a full line yet. */
971 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
973 /* Don't have a full line yet. */
975 len = p - in->buf->str - 1;
976 new_buf = g_string_new_len(in->buf->str, len);
977 g_string_append_c(new_buf, '\0');
979 inc->termination = g_strdup(termination);
981 if (in->buf->str[0] != '\0')
982 ret = initial_parse(in, new_buf);
986 g_string_free(new_buf, TRUE);
991 static int process_buffer(struct sr_input *in, gboolean is_eof)
995 size_t line_idx, col_idx, col_nr;
996 const struct column_details *details;
997 col_parse_cb parse_func;
999 char *p, **lines, *line, **columns, *column;
1002 if (!inc->started) {
1003 std_session_send_df_header(in->sdi);
1004 inc->started = TRUE;
1008 * Consider empty input non-fatal. Keep accumulating input until
1009 * at least one full text line has become available. Grab the
1010 * maximum amount of accumulated data that consists of full text
1011 * lines, and process what has been received so far, leaving not
1012 * yet complete lines for the next invocation.
1014 * Enforce that all previously buffered data gets processed in
1015 * the "EOF" condition. Do not insist in the presence of the
1016 * termination sequence for the last line (may often be missing
1017 * on Windows). A present termination sequence will just result
1018 * in the "execution of an empty line", and does not harm.
1023 p = in->buf->str + in->buf->len;
1025 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1029 p += strlen(inc->termination);
1031 g_strstrip(in->buf->str);
1034 lines = g_strsplit(in->buf->str, inc->termination, 0);
1035 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1037 if (inc->line_number < inc->start_line) {
1038 sr_spew("Line %zu skipped (before start).", inc->line_number);
1041 if (line[0] == '\0') {
1042 sr_spew("Blank line %zu skipped.", inc->line_number);
1046 /* Remove trailing comment. */
1047 strip_comment(line, inc->comment);
1048 if (line[0] == '\0') {
1049 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1053 /* Skip the header line, its content was used as the channel names. */
1054 if (inc->use_header && !inc->header_seen) {
1055 sr_spew("Header line %zu skipped.", inc->line_number);
1056 inc->header_seen = TRUE;
1060 /* Split the line into columns, check for minimum length. */
1061 columns = split_line(line, inc);
1063 sr_err("Error while parsing line %zu.", inc->line_number);
1067 num_columns = g_strv_length(columns);
1068 if (num_columns < inc->column_want_count) {
1069 sr_err("Insufficient column count %zu in line %zu.",
1070 num_columns, inc->line_number);
1071 g_strfreev(columns);
1076 /* Have the columns of the current text line processed. */
1077 clear_logic_samples(inc);
1078 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1079 column = columns[col_idx];
1080 col_nr = col_idx + 1;
1081 details = lookup_column_details(inc, col_nr);
1082 if (!details || !details->text_format)
1084 parse_func = col_parse_funcs[details->text_format];
1087 ret = parse_func(column, inc, details);
1089 g_strfreev(columns);
1095 /* Send sample data to the session bus (buffered). */
1096 ret = queue_logic_samples(in);
1098 sr_err("Sending samples failed.");
1099 g_strfreev(columns);
1104 g_strfreev(columns);
1107 g_string_erase(in->buf, 0, p - in->buf->str);
1112 static int receive(struct sr_input *in, GString *buf)
1114 struct context *inc;
1117 g_string_append_len(in->buf, buf->str, buf->len);
1120 if (!inc->column_seen_count) {
1121 ret = initial_receive(in);
1122 if (ret == SR_ERR_NA)
1123 /* Not enough data yet. */
1125 else if (ret != SR_OK)
1128 /* sdi is ready, notify frontend. */
1129 in->sdi_ready = TRUE;
1133 ret = process_buffer(in, FALSE);
1138 static int end(struct sr_input *in)
1140 struct context *inc;
1144 ret = process_buffer(in, TRUE);
1150 ret = flush_logic_samples(in);
1156 std_session_send_df_end(in->sdi);
1161 static void cleanup(struct sr_input *in)
1163 struct context *inc;
1165 keep_header_for_reread(in);
1169 g_free(inc->termination);
1170 inc->termination = NULL;
1171 g_free(inc->datafeed_buffer);
1172 inc->datafeed_buffer = NULL;
1175 static int reset(struct sr_input *in)
1177 struct context *inc = in->priv;
1180 inc->started = FALSE;
1181 g_string_truncate(in->buf, 0);
1200 static struct sr_option options[] = {
1202 "column_formats", "Column format specs",
1203 "Specifies text columns data types: comma separated list of [<cols>]<fmt>[<bits>], with -/x/o/b/l format specifiers.",
1206 [OPT_SINGLE_COL] = {
1207 "single_column", "Single column",
1208 "Enable single-column mode, exclusively use text from the specified column (number starting at 1).",
1212 "first_column", "First column",
1213 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1).",
1217 "logic_channels", "Number of logic channels",
1218 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1222 "single_format", "Data format for simple single-column mode.",
1223 "The number format of single-column mode input data: bin, hex, oct.",
1227 "start_line", "Start line",
1228 "The line number at which to start processing input text (default: 1).",
1232 "header", "Get channel names from first line.",
1233 "Use the first processed line's column captions (when available) as channel names.",
1237 "samplerate", "Samplerate (Hz)",
1238 "The input data's sample rate in Hz.",
1242 "column_separator", "Column separator",
1243 "The sequence which separates text columns. Non-empty text, comma by default.",
1247 "comment_leader", "Comment leader character",
1248 "The text which starts comments at the end of text lines.",
1251 [OPT_MAX] = ALL_ZERO,
1254 static const struct sr_option *get_options(void)
1258 if (!options[0].def) {
1259 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1260 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1261 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1262 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1263 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1265 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1266 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1267 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1268 options[OPT_FORMAT].values = l;
1269 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1270 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1271 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1272 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1273 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1279 SR_PRIV struct sr_input_module input_csv = {
1282 .desc = "Comma-separated values",
1283 .exts = (const char*[]){"csv", NULL},
1284 .options = get_options,