2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). This "column_formats" option
45 * is most versatile, other forms of specifying the column layout only
46 * exist for backwards compatibility.
48 * single_column: Specifies the column number which contains the logic data
49 * for single-column mode. All logic data is taken from several bits
50 * which all are kept within that one column. Only exists for backwards
51 * compatibility, see "column_formats" for more flexibility.
53 * first_column: Specifies the number of the first column with logic data
54 * in simple multi-column mode. Only exists for backwards compatibility,
55 * see "column_formats" for more flexibility.
57 * logic_channels: Specifies the number of logic channels. Is required in
58 * simple single-column mode. Is optional in simple multi-column mode
59 * (and defaults to all remaining columns). Only exists for backwards
60 * compatibility, see "column_formats" for more flexibility.
62 * single_format: Specifies the format of the input text in simple single-
63 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
64 * Simple multi-column mode always uses single-bit data per column.
65 * Only exists for backwards compatibility, see "column_formats" for
68 * start_line: Specifies at which line to start processing the input file.
69 * Allows to skip leading lines which neither are header nor data lines.
70 * By default all of the input file gets processed.
72 * header: Boolean option, controls whether the first processed line is used
73 * to determine channel names. Off by default. Generic channel names are
74 * used in the absence of header line content.
76 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
77 * User specs take precedence over data which optionally gets derived
80 * column_separator: Specifies the sequence which separates the text file
81 * columns. Cannot be empty. Defaults to comma.
83 * comment_leader: Specifies the sequence which starts comments that run
84 * up to the end of the current text line. Can be empty to disable
85 * comment support. Defaults to semicolon.
87 * Typical examples of using these options:
88 * - ... -I csv:column_formats=*l ...
89 * All columns are single-bit logic data. Identical to the previous
90 * multi-column mode (the default when no options were given at all).
91 * - ... -I csv:column_formats=3-,*l ...
92 * Ignore the first three columns, get single-bit logic data from all
93 * remaining lines (multi-column mode with first-column above 1).
94 * - ... -I csv:column_formats=3-,4l,x8 ...
95 * Ignore the first three columns, get single-bit logic data from the
96 * next four columns, then eight-bit data in hex format from the next
97 * column. More columns may follow in the input text but won't get
98 * processed. (Mix of previous multi-column as well as single-column
100 * - ... -I csv:column_formats=4x8,b16,5l ...
101 * Get eight-bit data in hex format from the first four columns, then
102 * sixteen-bit data in binary format, then five times single-bit data.
103 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
104 * Get eight logic bits in binary format from column 2. (Simple
105 * single-column mode, corresponds to the "-,b8" format.)
106 * - ... -I csv:first_column=6:logic_channels=4 ...
107 * Get four single-bit logic channels from columns 6 to 9 respectively.
108 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
109 * - ... -I csv:start_line=20:header=yes:...
110 * Skip the first 19 text lines. Use line 20 to derive channel names.
111 * Data starts at line 21.
117 * - Add support for analog input data? (optional)
118 * - Extend the set of supported column types. Just grab a double
119 * value from floating point format input text.
120 * - Optionally get precision ('digits') from the column's format spec?
121 * From the position which is "bit count" for logic channels?
122 * - Optionally get sample rate from timestamp column. Just best-effort
123 * approach, not necessarily reliable. Users can always specify rates.
124 * - Add a test suite for input modules in general, and CSV in specific?
125 * Becomes more important with the multitude of options and their
126 * interaction. Could cover edge cases (BOM presence, line termination
127 * absence, etc) and auto-stuff as well (channel names, channel counts,
131 /* Single column formats. */
132 enum single_col_format {
133 FORMAT_NONE, /* Ignore this column. */
134 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
135 FORMAT_HEX, /* Hex digits for a set of bits. */
136 FORMAT_OCT, /* Oct digits for a set of bits. */
139 static const char *col_format_text[] = {
140 [FORMAT_NONE] = "unknown",
141 [FORMAT_BIN] = "binary",
142 [FORMAT_HEX] = "hexadecimal",
143 [FORMAT_OCT] = "octal",
146 static const char col_format_char[] = {
153 struct column_details {
155 enum single_col_format text_format;
156 size_t channel_offset;
157 size_t channel_count;
163 /* Current selected samplerate. */
165 gboolean samplerate_sent;
167 /* Number of logic channels. */
168 size_t logic_channels;
170 /* Column delimiter (actually separator), comment leader, EOL sequence. */
175 /* Format specs for input columns, and processing state. */
176 size_t column_seen_count;
177 const char *column_formats;
178 size_t column_want_count;
179 struct column_details *column_details;
181 /* Line number to start processing. */
185 * Determines if the first line should be treated as header and used for
186 * channel names in multi column mode.
189 gboolean header_seen;
191 size_t sample_unit_size; /**!< Byte count for a single sample. */
192 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
194 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
195 size_t datafeed_buf_size;
196 size_t datafeed_buf_fill;
198 /* Current line number. */
201 /* List of previously created sigrok channels. */
202 GSList *prev_sr_channels;
206 * Primitive operations to handle sample sets:
207 * - Keep a buffer for datafeed submission, capable of holding many
208 * samples (reduces call overhead, improves throughput).
209 * - Have a "current sample set" pointer reference one position in that
210 * large samples buffer.
211 * - Clear the current sample set before text line inspection, then set
212 * the bits which are found active in the current line of text input.
213 * Phrase the API such that call sites can be kept simple. Advance to
214 * the next sample set between lines, flush the larger buffer as needed
215 * (when it is full, or upon EOF).
218 static void clear_logic_samples(struct context *inc)
220 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
221 memset(inc->sample_buffer, 0, inc->sample_unit_size);
224 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
226 size_t byte_idx, bit_idx;
229 if (ch_idx >= inc->logic_channels)
234 byte_idx = ch_idx / 8;
235 bit_idx = ch_idx % 8;
236 bit_mask = 1 << bit_idx;
237 inc->sample_buffer[byte_idx] |= bit_mask;
240 static int flush_logic_samples(const struct sr_input *in)
243 struct sr_datafeed_packet packet;
244 struct sr_datafeed_meta meta;
245 struct sr_config *src;
247 struct sr_datafeed_logic logic;
251 if (!inc->datafeed_buf_fill)
254 if (inc->samplerate && !inc->samplerate_sent) {
255 packet.type = SR_DF_META;
256 packet.payload = &meta;
257 samplerate = inc->samplerate;
258 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(samplerate));
259 meta.config = g_slist_append(NULL, src);
260 sr_session_send(in->sdi, &packet);
261 g_slist_free(meta.config);
263 inc->samplerate_sent = TRUE;
266 memset(&packet, 0, sizeof(packet));
267 memset(&logic, 0, sizeof(logic));
268 packet.type = SR_DF_LOGIC;
269 packet.payload = &logic;
270 logic.unitsize = inc->sample_unit_size;
271 logic.length = inc->datafeed_buf_fill;
272 logic.data = inc->datafeed_buffer;
274 rc = sr_session_send(in->sdi, &packet);
278 inc->datafeed_buf_fill = 0;
282 static int queue_logic_samples(const struct sr_input *in)
288 if (!inc->logic_channels)
291 inc->datafeed_buf_fill += inc->sample_unit_size;
292 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
293 rc = flush_logic_samples(in);
300 /* Helpers for "column processing". */
302 static int split_column_format(const char *spec,
303 size_t *column_count, enum single_col_format *format, size_t *bit_count)
306 char *endp, format_char;
307 enum single_col_format format_code;
312 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
316 endp = (char *)&spec[1];
318 count = strtoul(spec, &endp, 10);
325 *column_count = count;
328 /* Get the (mandatory, single letter) type spec (-/xob/l). */
329 format_char = *spec++;
330 switch (format_char) {
331 case '-': /* Might conflict with number-parsing. */
334 format_code = FORMAT_NONE;
337 format_code = FORMAT_HEX;
340 format_code = FORMAT_OCT;
344 format_code = FORMAT_BIN;
346 default: /* includes NUL */
350 *format = format_code;
352 /* Get the (optional, decimal, default 1) bit count. */
354 count = strtoul(spec, &endp, 10);
359 if (format_char == '-')
361 if (format_char == 'l')
367 /* Input spec must have been exhausted. */
374 static int make_column_details_from_format(const struct sr_input *in,
375 const char *column_format, char **column_texts)
378 char **formats, *format;
379 size_t format_count, column_count, bit_count;
380 size_t auto_column_count;
381 size_t format_idx, c, b, column_idx, channel_idx;
382 enum single_col_format f;
383 struct column_details *detail;
384 GString *channel_name;
391 inc->column_seen_count = g_strv_length(column_texts);
393 /* Split the input spec, count involved columns and bits. */
394 formats = g_strsplit(column_format, ",", 0);
396 sr_err("Cannot parse columns format %s (comma split).", column_format);
399 format_count = g_strv_length(formats);
401 sr_err("Cannot parse columns format %s (field count).", column_format);
405 column_count = bit_count = 0;
406 auto_column_count = 0;
407 for (format_idx = 0; format_idx < format_count; format_idx++) {
408 format = formats[format_idx];
409 ret = split_column_format(format, &c, &f, &b);
410 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
412 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
417 /* User requested "auto-count", must be last format. */
418 if (formats[format_idx + 1]) {
419 sr_err("Auto column count must be last format field.");
423 auto_column_count = inc->column_seen_count - column_count;
424 c = auto_column_count;
429 sr_dbg("Column format %s -> %zu columns, %zu logic channels.",
430 column_format, column_count, bit_count);
432 /* Allocate and fill in "column processing" details. Create channels. */
433 inc->column_want_count = column_count;
434 if (inc->column_seen_count < inc->column_want_count) {
435 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
436 inc->column_seen_count, inc->column_want_count);
440 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
441 column_idx = channel_idx = 0;
442 channel_name = g_string_sized_new(64);
443 for (format_idx = 0; format_idx < format_count; format_idx++) {
444 /* Process a format field, which can span multiple columns. */
445 format = formats[format_idx];
446 (void)split_column_format(format, &c, &f, &b);
448 c = auto_column_count;
450 /* Fill in a column's processing details. */
451 detail = &inc->column_details[column_idx++];
452 detail->col_nr = column_idx;
453 detail->text_format = f;
454 if (detail->text_format) {
455 detail->channel_offset = channel_idx;
456 detail->channel_count = b;
459 sr_dbg("detail -> col %zu, fmt %s, ch off/cnt %zu/%zu",
460 detail->col_nr, col_format_text[detail->text_format],
461 detail->channel_offset, detail->channel_count);
462 if (!detail->text_format)
465 * Create channels with appropriate names. Optionally
466 * use text from a header line (when requested by the
467 * user). In the absence of header text, channels are
468 * assigned rather generic names.
470 * Manipulation of the column's caption (when a header
471 * line is seen) is acceptable, because this header
472 * line won't get processed another time.
474 column = column_texts[detail->col_nr - 1];
475 if (inc->use_header && column && *column)
476 caption = sr_scpi_unquote_string(column);
479 if (!caption || !*caption)
481 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
482 if (caption && detail->channel_count == 1) {
483 g_string_assign(channel_name, caption);
484 } else if (caption) {
485 g_string_printf(channel_name, "%s[%zu]",
486 caption, create_idx);
488 g_string_printf(channel_name, "%zu",
489 detail->channel_offset + create_idx);
491 sr_channel_new(in->sdi, detail->channel_offset + create_idx,
492 SR_CHANNEL_LOGIC, TRUE, channel_name->str);
496 inc->logic_channels = channel_idx;
497 g_string_free(channel_name, TRUE);
503 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
505 if (!inc || !inc->column_details)
507 if (!nr || nr > inc->column_want_count)
509 return &inc->column_details[nr - 1];
513 * Primitive operations for text input: Strip comments off text lines.
514 * Split text lines into columns. Process input text for individual
518 static void strip_comment(char *buf, const GString *prefix)
525 if ((ptr = strstr(buf, prefix->str))) {
532 * @brief Splits a text line into a set of columns.
534 * @param[in] buf The input text line to split.
535 * @param[in] inc The input module's context.
537 * @returns An array of strings, representing the columns' text.
539 * This routine splits a text line on previously determined separators.
541 static char **split_line(char *buf, struct context *inc)
543 return g_strsplit(buf, inc->delimiter->str, 0);
547 * @brief Parse a multi-bit field into several logic channels.
549 * @param[in] column The input text, a run of bin/hex/oct digits.
550 * @param[in] inc The input module's context.
551 * @param[in] details The column processing details.
553 * @retval SR_OK Success.
554 * @retval SR_ERR Invalid input data (empty, or format error).
556 * This routine modifies the logic levels in the current sample set,
557 * based on the text input and a user provided format spec.
559 static int parse_logic(const char *column, struct context *inc,
560 const struct column_details *details)
562 size_t length, ch_rem, ch_idx, ch_inc;
566 const char *type_text;
570 * Prepare to read the digits from the text end towards the start.
571 * A digit corresponds to a variable number of channels (depending
572 * on the value's radix). Prepare the mapping of text digits to
573 * (a number of) logic channels.
575 length = strlen(column);
577 sr_err("Column %zu in line %zu is empty.", details->col_nr,
581 rdptr = &column[length];
582 ch_idx = details->channel_offset;
583 ch_rem = details->channel_count;
586 * Get another digit and derive up to four logic channels' state from
587 * it. Make sure to not process more bits than the column has channels
588 * associated with it.
590 while (rdptr > column && ch_rem) {
591 /* Check for valid digits according to the input radix. */
593 switch (details->text_format) {
595 valid = g_ascii_isxdigit(c) && c < '2';
599 valid = g_ascii_isxdigit(c) && c < '8';
603 valid = g_ascii_isxdigit(c);
611 type_text = col_format_text[details->text_format];
612 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
613 column, type_text, details->col_nr, inc->line_number);
616 /* Use the digit's bits for logic channels' data. */
617 bits = g_ascii_xdigit_value(c);
618 switch (details->text_format) {
622 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
628 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
632 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
637 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
640 /* ShouldNotHappen(TM), but silences compiler warning. */
646 * TODO Determine whether the availability of extra input data
647 * for unhandled logic channels is worth warning here. In this
648 * implementation users are in control, and can have the more
649 * significant bits ignored (which can be considered a feature
650 * and not really a limitation).
657 * @brief Parse routine which ignores the input text.
659 * This routine exists to unify dispatch code paths, mapping input file
660 * columns' data types to their respective parse routines.
662 static int parse_ignore(const char *column, struct context *inc,
663 const struct column_details *details)
671 typedef int (*col_parse_cb)(const char *column, struct context *inc,
672 const struct column_details *details);
674 static const col_parse_cb col_parse_funcs[] = {
675 [FORMAT_NONE] = parse_ignore,
676 [FORMAT_BIN] = parse_logic,
677 [FORMAT_OCT] = parse_logic,
678 [FORMAT_HEX] = parse_logic,
681 static int init(struct sr_input *in, GHashTable *options)
684 size_t single_column, first_column, logic_channels;
686 enum single_col_format format;
689 in->sdi = g_malloc0(sizeof(*in->sdi));
690 in->priv = inc = g_malloc0(sizeof(*inc));
692 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
694 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
696 inc->delimiter = g_string_new(g_variant_get_string(
697 g_hash_table_lookup(options, "column_separator"), NULL));
698 if (!inc->delimiter->len) {
699 sr_err("Column separator cannot be empty.");
703 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
704 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
706 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
708 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
711 sr_err("Invalid single-column format: '%s'", s);
715 inc->comment = g_string_new(g_variant_get_string(
716 g_hash_table_lookup(options, "comment_leader"), NULL));
717 if (g_string_equal(inc->comment, inc->delimiter)) {
719 * Using the same sequence as comment leader and column
720 * separator won't work. The user probably specified ';'
721 * as the column separator but did not adjust the comment
722 * leader. Try DWIM, drop comment strippin support here.
724 sr_warn("Comment leader and column separator conflict, disabling comment support.");
725 g_string_truncate(inc->comment, 0);
728 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
730 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
732 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
734 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
735 if (inc->start_line < 1) {
736 sr_err("Invalid start line %zu.", inc->start_line);
741 * Scan flexible, to get prefered format specs which describe
742 * the input file's data formats. As well as some simple specs
743 * for backwards compatibility and user convenience.
745 * This logic ends up with a copy of the format string, either
746 * user provided or internally derived. Actual creation of the
747 * column processing details gets deferred until the first line
748 * of input data was seen. To support automatic determination of
749 * e.g. channel counts from column counts.
751 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
753 inc->column_formats = g_strdup(s);
754 sr_dbg("User specified column_formats: %s.", s);
755 } else if (single_column && logic_channels) {
756 format_char = col_format_char[format];
757 if (single_column == 1) {
758 inc->column_formats = g_strdup_printf("%c%zu",
759 format_char, logic_channels);
761 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
763 format_char, logic_channels);
765 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
766 single_column, col_format_text[format], logic_channels,
767 inc->column_formats);
768 } else if (!single_column) {
769 if (first_column > 1) {
770 inc->column_formats = g_strdup_printf("%zu-,%zul",
771 first_column - 1, logic_channels);
773 inc->column_formats = g_strdup_printf("%zul",
776 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
777 first_column, logic_channels,
778 inc->column_formats);
780 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
781 inc->column_formats = g_strdup("*l");
788 * Check the channel list for consistency across file re-import. See
789 * the VCD input module for more details and motivation.
792 static void keep_header_for_reread(const struct sr_input *in)
797 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
798 inc->prev_sr_channels = in->sdi->channels;
799 in->sdi->channels = NULL;
802 static int check_header_in_reread(const struct sr_input *in)
811 if (!inc->prev_sr_channels)
814 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
815 sr_err("Channel list change not supported for file re-read.");
818 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
819 in->sdi->channels = inc->prev_sr_channels;
820 inc->prev_sr_channels = NULL;
825 static const char *delim_set = "\r\n";
827 static const char *get_line_termination(GString *buf)
832 if (g_strstr_len(buf->str, buf->len, "\r\n"))
834 else if (memchr(buf->str, '\n', buf->len))
836 else if (memchr(buf->str, '\r', buf->len))
842 static int initial_parse(const struct sr_input *in, GString *buf)
846 size_t line_number, line_idx;
848 char **lines, *line, **columns;
854 /* Search for the first line to process (header or data). */
856 if (inc->termination)
857 lines = g_strsplit(buf->str, inc->termination, 0);
859 lines = g_strsplit_set(buf->str, delim_set, 0);
860 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
862 if (inc->start_line > line_number) {
863 sr_spew("Line %zu skipped (before start).", line_number);
866 if (line[0] == '\0') {
867 sr_spew("Blank line %zu skipped.", line_number);
870 strip_comment(line, inc->comment);
871 if (line[0] == '\0') {
872 sr_spew("Comment-only line %zu skipped.", line_number);
876 /* Reached first proper line. */
880 /* Not enough data for a proper line yet. */
885 /* Get the number of columns in the line. */
886 columns = split_line(line, inc);
888 sr_err("Error while parsing line %zu.", line_number);
892 num_columns = g_strv_length(columns);
894 sr_err("Error while parsing line %zu.", line_number);
898 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
901 * Interpret the user provided column format specs. This might
902 * involve inspection of the now received input text, to support
903 * e.g. automatic detection of channel counts in the absence of
904 * user provided specs. Optionally a header line is used to get
907 * Check the then created channels for consistency across .reset
908 * and .receive sequences (file re-load).
910 ret = make_column_details_from_format(in, inc->column_formats, columns);
912 sr_err("Cannot parse columns format using line %zu.", line_number);
915 if (!check_header_in_reread(in)) {
921 * Allocate buffer memory for datafeed submission of sample data.
922 * Calculate the minimum buffer size to store the set of samples
923 * of all channels (unit size). Determine a larger buffer size
924 * for datafeed submission that is a multiple of the unit size.
925 * Allocate the larger buffer, the "sample buffer" will point
926 * to a location within that large buffer later.
928 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
929 inc->datafeed_buf_size = CHUNK_SIZE;
930 inc->datafeed_buf_size *= inc->sample_unit_size;
931 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
932 inc->datafeed_buf_fill = 0;
943 * Gets called from initial_receive(), which runs until the end-of-line
944 * encoding of the input stream could get determined. Assumes that this
945 * routine receives enough buffered initial input data to either see the
946 * BOM when there is one, or that no BOM will follow when a text line
947 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
948 * from the input buffer if one was seen. Does not care to protect
949 * against multiple execution or dropping the BOM multiple times --
950 * there should be at most one in the input stream.
952 static void initial_bom_check(const struct sr_input *in)
954 static const char *utf8_bom = "\xef\xbb\xbf";
956 if (in->buf->len < strlen(utf8_bom))
958 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
960 g_string_erase(in->buf, 0, strlen(utf8_bom));
963 static int initial_receive(const struct sr_input *in)
969 const char *termination;
971 initial_bom_check(in);
975 termination = get_line_termination(in->buf);
977 /* Don't have a full line yet. */
980 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
982 /* Don't have a full line yet. */
984 len = p - in->buf->str - 1;
985 new_buf = g_string_new_len(in->buf->str, len);
986 g_string_append_c(new_buf, '\0');
988 inc->termination = g_strdup(termination);
990 if (in->buf->str[0] != '\0')
991 ret = initial_parse(in, new_buf);
995 g_string_free(new_buf, TRUE);
1000 static int process_buffer(struct sr_input *in, gboolean is_eof)
1002 struct context *inc;
1004 size_t line_idx, col_idx, col_nr;
1005 const struct column_details *details;
1006 col_parse_cb parse_func;
1008 char *p, **lines, *line, **columns, *column;
1011 if (!inc->started) {
1012 std_session_send_df_header(in->sdi);
1013 inc->started = TRUE;
1017 * Consider empty input non-fatal. Keep accumulating input until
1018 * at least one full text line has become available. Grab the
1019 * maximum amount of accumulated data that consists of full text
1020 * lines, and process what has been received so far, leaving not
1021 * yet complete lines for the next invocation.
1023 * Enforce that all previously buffered data gets processed in
1024 * the "EOF" condition. Do not insist in the presence of the
1025 * termination sequence for the last line (may often be missing
1026 * on Windows). A present termination sequence will just result
1027 * in the "execution of an empty line", and does not harm.
1032 p = in->buf->str + in->buf->len;
1034 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1038 p += strlen(inc->termination);
1040 g_strstrip(in->buf->str);
1043 lines = g_strsplit(in->buf->str, inc->termination, 0);
1044 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1046 if (inc->line_number < inc->start_line) {
1047 sr_spew("Line %zu skipped (before start).", inc->line_number);
1050 if (line[0] == '\0') {
1051 sr_spew("Blank line %zu skipped.", inc->line_number);
1055 /* Remove trailing comment. */
1056 strip_comment(line, inc->comment);
1057 if (line[0] == '\0') {
1058 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1062 /* Skip the header line, its content was used as the channel names. */
1063 if (inc->use_header && !inc->header_seen) {
1064 sr_spew("Header line %zu skipped.", inc->line_number);
1065 inc->header_seen = TRUE;
1069 /* Split the line into columns, check for minimum length. */
1070 columns = split_line(line, inc);
1072 sr_err("Error while parsing line %zu.", inc->line_number);
1076 num_columns = g_strv_length(columns);
1077 if (num_columns < inc->column_want_count) {
1078 sr_err("Insufficient column count %zu in line %zu.",
1079 num_columns, inc->line_number);
1080 g_strfreev(columns);
1085 /* Have the columns of the current text line processed. */
1086 clear_logic_samples(inc);
1087 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1088 column = columns[col_idx];
1089 col_nr = col_idx + 1;
1090 details = lookup_column_details(inc, col_nr);
1091 if (!details || !details->text_format)
1093 parse_func = col_parse_funcs[details->text_format];
1096 ret = parse_func(column, inc, details);
1098 g_strfreev(columns);
1104 /* Send sample data to the session bus (buffered). */
1105 ret = queue_logic_samples(in);
1107 sr_err("Sending samples failed.");
1108 g_strfreev(columns);
1113 g_strfreev(columns);
1116 g_string_erase(in->buf, 0, p - in->buf->str);
1121 static int receive(struct sr_input *in, GString *buf)
1123 struct context *inc;
1126 g_string_append_len(in->buf, buf->str, buf->len);
1129 if (!inc->column_seen_count) {
1130 ret = initial_receive(in);
1131 if (ret == SR_ERR_NA)
1132 /* Not enough data yet. */
1134 else if (ret != SR_OK)
1137 /* sdi is ready, notify frontend. */
1138 in->sdi_ready = TRUE;
1142 ret = process_buffer(in, FALSE);
1147 static int end(struct sr_input *in)
1149 struct context *inc;
1153 ret = process_buffer(in, TRUE);
1159 ret = flush_logic_samples(in);
1165 std_session_send_df_end(in->sdi);
1170 static void cleanup(struct sr_input *in)
1172 struct context *inc;
1174 keep_header_for_reread(in);
1178 g_free(inc->termination);
1179 inc->termination = NULL;
1180 g_free(inc->datafeed_buffer);
1181 inc->datafeed_buffer = NULL;
1184 static int reset(struct sr_input *in)
1186 struct context *inc = in->priv;
1189 inc->started = FALSE;
1190 g_string_truncate(in->buf, 0);
1209 static struct sr_option options[] = {
1211 "column_formats", "Column format specs",
1212 "Specifies text columns data types: comma separated list of [<cols>]<fmt>[<bits>], with -/x/o/b/l format specifiers.",
1215 [OPT_SINGLE_COL] = {
1216 "single_column", "Single column",
1217 "Enable single-column mode, exclusively use text from the specified column (number starting at 1).",
1221 "first_column", "First column",
1222 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1).",
1226 "logic_channels", "Number of logic channels",
1227 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1231 "single_format", "Data format for simple single-column mode.",
1232 "The number format of single-column mode input data: bin, hex, oct.",
1236 "start_line", "Start line",
1237 "The line number at which to start processing input text (default: 1).",
1241 "header", "Get channel names from first line.",
1242 "Use the first processed line's column captions (when available) as channel names.",
1246 "samplerate", "Samplerate (Hz)",
1247 "The input data's sample rate in Hz.",
1251 "column_separator", "Column separator",
1252 "The sequence which separates text columns. Non-empty text, comma by default.",
1256 "comment_leader", "Comment leader character",
1257 "The text which starts comments at the end of text lines.",
1260 [OPT_MAX] = ALL_ZERO,
1263 static const struct sr_option *get_options(void)
1267 if (!options[0].def) {
1268 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1269 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1270 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1271 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1272 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1274 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1275 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1276 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1277 options[OPT_FORMAT].values = l;
1278 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1279 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1280 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1281 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1282 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1288 SR_PRIV struct sr_input_module input_csv = {
1291 .desc = "Comma-separated values",
1292 .exts = (const char*[]){"csv", NULL},
1293 .options = get_options,