2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). This "column_formats" option
45 * is most versatile, other forms of specifying the column layout only
46 * exist for backwards compatibility.
48 * single_column: Specifies the column number which contains the logic data
49 * for single-column mode. All logic data is taken from several bits
50 * which all are kept within that one column. Only exists for backwards
51 * compatibility, see "column_formats" for more flexibility.
53 * first_column: Specifies the number of the first column with logic data
54 * in simple multi-column mode. Only exists for backwards compatibility,
55 * see "column_formats" for more flexibility.
57 * logic_channels: Specifies the number of logic channels. Is required in
58 * simple single-column mode. Is optional in simple multi-column mode
59 * (and defaults to all remaining columns). Only exists for backwards
60 * compatibility, see "column_formats" for more flexibility.
62 * single_format: Specifies the format of the input text in simple single-
63 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
64 * Simple multi-column mode always uses single-bit data per column.
65 * Only exists for backwards compatibility, see "column_formats" for
68 * start_line: Specifies at which line to start processing the input file.
69 * Allows to skip leading lines which neither are header nor data lines.
70 * By default all of the input file gets processed.
72 * header: Boolean option, controls whether the first processed line is used
73 * to determine channel names. Off by default. Generic channel names are
74 * used in the absence of header line content.
76 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
77 * User specs take precedence over data which optionally gets derived
80 * column_separator: Specifies the sequence which separates the text file
81 * columns. Cannot be empty. Defaults to comma.
83 * comment_leader: Specifies the sequence which starts comments that run
84 * up to the end of the current text line. Can be empty to disable
85 * comment support. Defaults to semicolon.
87 * Typical examples of using these options:
88 * - ... -I csv:column_formats=*l ...
89 * All columns are single-bit logic data. Identical to the previous
90 * multi-column mode (the default when no options were given at all).
91 * - ... -I csv:column_formats=3-,*l ...
92 * Ignore the first three columns, get single-bit logic data from all
93 * remaining lines (multi-column mode with first-column above 1).
94 * - ... -I csv:column_formats=3-,4l,x8 ...
95 * Ignore the first three columns, get single-bit logic data from the
96 * next four columns, then eight-bit data in hex format from the next
97 * column. More columns may follow in the input text but won't get
98 * processed. (Mix of previous multi-column as well as single-column
100 * - ... -I csv:column_formats=4x8,b16,5l ...
101 * Get eight-bit data in hex format from the first four columns, then
102 * sixteen-bit data in binary format, then five times single-bit data.
103 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
104 * Get eight logic bits in binary format from column 2. (Simple
105 * single-column mode, corresponds to the "-,b8" format.)
106 * - ... -I csv:first_column=6:logic_channels=4 ...
107 * Get four single-bit logic channels from columns 6 to 9 respectively.
108 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
109 * - ... -I csv:start_line=20:header=yes:...
110 * Skip the first 19 text lines. Use line 20 to derive channel names.
111 * Data starts at line 21.
117 * - Add support for analog input data? (optional)
118 * - Extend the set of supported column types. Just grab a double
119 * value from floating point format input text.
120 * - Optionally get precision ('digits') from the column's format spec?
121 * From the position which is "bit count" for logic channels?
122 * - Optionally get sample rate from timestamp column. Just best-effort
123 * approach, not necessarily reliable. Users can always specify rates.
124 * - Add a test suite for input modules in general, and CSV in specific?
125 * Becomes more important with the multitude of options and their
126 * interaction. Could cover edge cases (BOM presence, line termination
127 * absence, etc) and auto-stuff as well (channel names, channel counts,
131 /* Single column formats. */
132 enum single_col_format {
133 FORMAT_NONE, /* Ignore this column. */
134 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
135 FORMAT_HEX, /* Hex digits for a set of bits. */
136 FORMAT_OCT, /* Oct digits for a set of bits. */
139 static const char *col_format_text[] = {
140 [FORMAT_NONE] = "unknown",
141 [FORMAT_BIN] = "binary",
142 [FORMAT_HEX] = "hexadecimal",
143 [FORMAT_OCT] = "octal",
146 static const char col_format_char[] = {
153 struct column_details {
155 enum single_col_format text_format;
156 size_t channel_offset;
157 size_t channel_count;
163 /* Current selected samplerate. */
165 gboolean samplerate_sent;
167 /* Number of logic channels. */
168 size_t logic_channels;
170 /* Column delimiter (actually separator), comment leader, EOL sequence. */
175 /* Format specs for input columns, and processing state. */
176 size_t column_seen_count;
177 const char *column_formats;
178 size_t column_want_count;
179 struct column_details *column_details;
181 /* Line number to start processing. */
185 * Determines if the first line should be treated as header and used for
186 * channel names in multi column mode.
189 gboolean header_seen;
191 size_t sample_unit_size; /**!< Byte count for a single sample. */
192 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
194 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
195 size_t datafeed_buf_size;
196 size_t datafeed_buf_fill;
198 /* Current line number. */
201 /* List of previously created sigrok channels. */
202 GSList *prev_sr_channels;
206 * Primitive operations to handle sample sets:
207 * - Keep a buffer for datafeed submission, capable of holding many
208 * samples (reduces call overhead, improves throughput).
209 * - Have a "current sample set" pointer reference one position in that
210 * large samples buffer.
211 * - Clear the current sample set before text line inspection, then set
212 * the bits which are found active in the current line of text input.
213 * Phrase the API such that call sites can be kept simple. Advance to
214 * the next sample set between lines, flush the larger buffer as needed
215 * (when it is full, or upon EOF).
218 static void clear_logic_samples(struct context *inc)
220 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
221 memset(inc->sample_buffer, 0, inc->sample_unit_size);
224 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
226 size_t byte_idx, bit_idx;
229 if (ch_idx >= inc->logic_channels)
234 byte_idx = ch_idx / 8;
235 bit_idx = ch_idx % 8;
236 bit_mask = 1 << bit_idx;
237 inc->sample_buffer[byte_idx] |= bit_mask;
240 static int flush_logic_samples(const struct sr_input *in)
243 struct sr_datafeed_packet packet;
244 struct sr_datafeed_meta meta;
245 struct sr_config *src;
247 struct sr_datafeed_logic logic;
251 if (!inc->datafeed_buf_fill)
254 if (inc->samplerate && !inc->samplerate_sent) {
255 packet.type = SR_DF_META;
256 packet.payload = &meta;
257 samplerate = inc->samplerate;
258 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(samplerate));
259 meta.config = g_slist_append(NULL, src);
260 sr_session_send(in->sdi, &packet);
261 g_slist_free(meta.config);
263 inc->samplerate_sent = TRUE;
266 memset(&packet, 0, sizeof(packet));
267 memset(&logic, 0, sizeof(logic));
268 packet.type = SR_DF_LOGIC;
269 packet.payload = &logic;
270 logic.unitsize = inc->sample_unit_size;
271 logic.length = inc->datafeed_buf_fill;
272 logic.data = inc->datafeed_buffer;
274 rc = sr_session_send(in->sdi, &packet);
278 inc->datafeed_buf_fill = 0;
282 static int queue_logic_samples(const struct sr_input *in)
288 if (!inc->logic_channels)
291 inc->datafeed_buf_fill += inc->sample_unit_size;
292 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
293 rc = flush_logic_samples(in);
300 /* Helpers for "column processing". */
302 static int split_column_format(const char *spec,
303 size_t *column_count, enum single_col_format *format, size_t *bit_count)
306 char *endp, format_char;
307 enum single_col_format format_code;
312 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
316 endp = (char *)&spec[1];
318 count = strtoul(spec, &endp, 10);
325 *column_count = count;
328 /* Get the (mandatory, single letter) type spec (-/xob/l). */
329 format_char = *spec++;
330 switch (format_char) {
331 case '-': /* Might conflict with number-parsing. */
334 format_code = FORMAT_NONE;
337 format_code = FORMAT_HEX;
340 format_code = FORMAT_OCT;
344 format_code = FORMAT_BIN;
346 default: /* includes NUL */
350 *format = format_code;
352 /* Get the (optional, decimal, default 1) bit count. */
354 count = strtoul(spec, &endp, 10);
359 if (format_char == '-')
361 if (format_char == 'l')
367 /* Input spec must have been exhausted. */
374 static int make_column_details_from_format(struct context *inc,
375 const char *column_format)
377 char **formats, *format;
378 size_t format_count, column_count, bit_count;
379 size_t auto_column_count;
380 size_t format_idx, c, b, column_idx, channel_idx;
381 enum single_col_format f;
382 struct column_details *detail;
385 /* Split the input spec, count involved columns and bits. */
386 formats = g_strsplit(column_format, ",", 0);
388 sr_err("Cannot parse columns format %s (comma split).", column_format);
391 format_count = g_strv_length(formats);
393 sr_err("Cannot parse columns format %s (field count).", column_format);
397 column_count = bit_count = 0;
398 auto_column_count = 0;
399 for (format_idx = 0; format_idx < format_count; format_idx++) {
400 format = formats[format_idx];
401 ret = split_column_format(format, &c, &f, &b);
402 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
404 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
409 /* User requested "auto-count", must be last format. */
410 if (formats[format_idx + 1]) {
411 sr_err("Auto column count must be last format field.");
415 auto_column_count = inc->column_seen_count - column_count;
416 c = auto_column_count;
421 sr_dbg("Column format %s -> %zu columns, %zu logic channels.",
422 column_format, column_count, bit_count);
424 /* Allocate and fill in "column processing" details. */
425 inc->column_want_count = column_count;
426 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
427 column_idx = channel_idx = 0;
428 for (format_idx = 0; format_idx < format_count; format_idx++) {
429 format = formats[format_idx];
430 (void)split_column_format(format, &c, &f, &b);
432 c = auto_column_count;
434 detail = &inc->column_details[column_idx++];
435 detail->col_nr = column_idx;
436 detail->text_format = f;
437 if (detail->text_format) {
438 detail->channel_offset = channel_idx;
439 detail->channel_count = b;
442 sr_dbg("detail -> col %zu, fmt %s, ch off/cnt %zu/%zu",
443 detail->col_nr, col_format_text[detail->text_format],
444 detail->channel_offset, detail->channel_count);
447 inc->logic_channels = channel_idx;
453 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
455 if (!inc || !inc->column_details)
457 if (!nr || nr > inc->column_want_count)
459 return &inc->column_details[nr - 1];
463 * Primitive operations for text input: Strip comments off text lines.
464 * Split text lines into columns. Process input text for individual
468 static void strip_comment(char *buf, const GString *prefix)
475 if ((ptr = strstr(buf, prefix->str))) {
482 * @brief Splits a text line into a set of columns.
484 * @param[in] buf The input text line to split.
485 * @param[in] inc The input module's context.
487 * @returns An array of strings, representing the columns' text.
489 * This routine splits a text line on previously determined separators.
491 static char **split_line(char *buf, struct context *inc)
493 return g_strsplit(buf, inc->delimiter->str, 0);
497 * @brief Parse a multi-bit field into several logic channels.
499 * @param[in] column The input text, a run of bin/hex/oct digits.
500 * @param[in] inc The input module's context.
501 * @param[in] details The column processing details.
503 * @retval SR_OK Success.
504 * @retval SR_ERR Invalid input data (empty, or format error).
506 * This routine modifies the logic levels in the current sample set,
507 * based on the text input and a user provided format spec.
509 static int parse_logic(const char *column, struct context *inc,
510 const struct column_details *details)
512 size_t length, ch_rem, ch_idx, ch_inc;
516 const char *type_text;
520 * Prepare to read the digits from the text end towards the start.
521 * A digit corresponds to a variable number of channels (depending
522 * on the value's radix). Prepare the mapping of text digits to
523 * (a number of) logic channels.
525 length = strlen(column);
527 sr_err("Column %zu in line %zu is empty.", details->col_nr,
531 rdptr = &column[length];
532 ch_idx = details->channel_offset;
533 ch_rem = details->channel_count;
536 * Get another digit and derive up to four logic channels' state from
537 * it. Make sure to not process more bits than the column has channels
538 * associated with it.
540 while (rdptr > column && ch_rem) {
541 /* Check for valid digits according to the input radix. */
543 switch (details->text_format) {
545 valid = g_ascii_isxdigit(c) && c < '2';
549 valid = g_ascii_isxdigit(c) && c < '8';
553 valid = g_ascii_isxdigit(c);
561 type_text = col_format_text[details->text_format];
562 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
563 column, type_text, details->col_nr, inc->line_number);
566 /* Use the digit's bits for logic channels' data. */
567 bits = g_ascii_xdigit_value(c);
568 switch (details->text_format) {
572 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
578 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
582 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
587 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
590 /* ShouldNotHappen(TM), but silences compiler warning. */
596 * TODO Determine whether the availability of extra input data
597 * for unhandled logic channels is worth warning here. In this
598 * implementation users are in control, and can have the more
599 * significant bits ignored (which can be considered a feature
600 * and not really a limitation).
607 * @brief Parse routine which ignores the input text.
609 * This routine exists to unify dispatch code paths, mapping input file
610 * columns' data types to their respective parse routines.
612 static int parse_ignore(const char *column, struct context *inc,
613 const struct column_details *details)
621 typedef int (*col_parse_cb)(const char *column, struct context *inc,
622 const struct column_details *details);
624 static const col_parse_cb col_parse_funcs[] = {
625 [FORMAT_NONE] = parse_ignore,
626 [FORMAT_BIN] = parse_logic,
627 [FORMAT_OCT] = parse_logic,
628 [FORMAT_HEX] = parse_logic,
631 static int init(struct sr_input *in, GHashTable *options)
634 size_t single_column, first_column, logic_channels;
636 enum single_col_format format;
639 in->sdi = g_malloc0(sizeof(*in->sdi));
640 in->priv = inc = g_malloc0(sizeof(*inc));
642 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
644 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
646 inc->delimiter = g_string_new(g_variant_get_string(
647 g_hash_table_lookup(options, "column_separator"), NULL));
648 if (!inc->delimiter->len) {
649 sr_err("Column separator cannot be empty.");
653 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
654 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
656 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
658 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
661 sr_err("Invalid single-column format: '%s'", s);
665 inc->comment = g_string_new(g_variant_get_string(
666 g_hash_table_lookup(options, "comment_leader"), NULL));
667 if (g_string_equal(inc->comment, inc->delimiter)) {
669 * Using the same sequence as comment leader and column
670 * separator won't work. The user probably specified ';'
671 * as the column separator but did not adjust the comment
672 * leader. Try DWIM, drop comment strippin support here.
674 sr_warn("Comment leader and column separator conflict, disabling comment support.");
675 g_string_truncate(inc->comment, 0);
678 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
680 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
682 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
684 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
685 if (inc->start_line < 1) {
686 sr_err("Invalid start line %zu.", inc->start_line);
691 * Scan flexible, to get prefered format specs which describe
692 * the input file's data formats. As well as some simple specs
693 * for backwards compatibility and user convenience.
695 * This logic ends up with a copy of the format string, either
696 * user provided or internally derived. Actual creation of the
697 * column processing details gets deferred until the first line
698 * of input data was seen. To support automatic determination of
699 * e.g. channel counts from column counts.
701 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
703 inc->column_formats = g_strdup(s);
704 sr_dbg("User specified column_formats: %s.", s);
705 } else if (single_column && logic_channels) {
706 format_char = col_format_char[format];
707 if (single_column == 1) {
708 inc->column_formats = g_strdup_printf("%c%zu",
709 format_char, logic_channels);
711 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
713 format_char, logic_channels);
715 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
716 single_column, col_format_text[format], logic_channels,
717 inc->column_formats);
718 } else if (!single_column) {
719 if (first_column > 1) {
720 inc->column_formats = g_strdup_printf("%zu-,%zul",
721 first_column - 1, logic_channels);
723 inc->column_formats = g_strdup_printf("%zul",
726 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
727 first_column, logic_channels,
728 inc->column_formats);
730 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
731 inc->column_formats = g_strdup("*l");
738 * Check the channel list for consistency across file re-import. See
739 * the VCD input module for more details and motivation.
742 static void keep_header_for_reread(const struct sr_input *in)
747 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
748 inc->prev_sr_channels = in->sdi->channels;
749 in->sdi->channels = NULL;
752 static int check_header_in_reread(const struct sr_input *in)
761 if (!inc->prev_sr_channels)
764 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
765 sr_err("Channel list change not supported for file re-read.");
768 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
769 in->sdi->channels = inc->prev_sr_channels;
770 inc->prev_sr_channels = NULL;
775 static const char *delim_set = "\r\n";
777 static const char *get_line_termination(GString *buf)
782 if (g_strstr_len(buf->str, buf->len, "\r\n"))
784 else if (memchr(buf->str, '\n', buf->len))
786 else if (memchr(buf->str, '\r', buf->len))
792 static int initial_parse(const struct sr_input *in, GString *buf)
795 GString *channel_name;
796 size_t num_columns, ch_idx, ch_name_idx, col_idx, col_nr;
797 size_t line_number, line_idx;
799 char **lines, *line, **columns, *column;
800 const char *col_caption;
801 gboolean got_caption;
802 const struct column_details *detail;
809 if (inc->termination)
810 lines = g_strsplit(buf->str, inc->termination, 0);
812 lines = g_strsplit_set(buf->str, delim_set, 0);
813 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
815 if (inc->start_line > line_number) {
816 sr_spew("Line %zu skipped (before start).", line_number);
819 if (line[0] == '\0') {
820 sr_spew("Blank line %zu skipped.", line_number);
823 strip_comment(line, inc->comment);
824 if (line[0] == '\0') {
825 sr_spew("Comment-only line %zu skipped.", line_number);
829 /* Reached first proper line. */
833 /* Not enough data for a proper line yet. */
838 /* See how many columns the current line has. */
839 columns = split_line(line, inc);
841 sr_err("Error while parsing line %zu.", line_number);
845 num_columns = g_strv_length(columns);
847 sr_err("Error while parsing line %zu.", line_number);
851 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
854 * Track the observed number of columns in the input file. Do
855 * process the previously gathered columns format spec now that
856 * automatic channel count can be dealt with.
858 inc->column_seen_count = num_columns;
859 ret = make_column_details_from_format(inc, inc->column_formats);
861 sr_err("Cannot parse columns format using line %zu.", line_number);
866 * Assume all lines have equal length (column count). Bail out
867 * early on suspicious or insufficient input data (check input
868 * which became available here against previous user specs or
869 * auto-determined properties, regardless of layout variant).
871 if (num_columns < inc->column_want_count) {
872 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
873 num_columns, inc->column_want_count);
879 * Determine channel names. Optionally use text from a header
880 * line (when requested by the user, and only works in multi
881 * column mode). In the absence of header text, or in single
882 * column mode, channels are assigned rather generic names.
884 * Manipulation of the column's caption is acceptable here, the
885 * header line will never get processed another time.
887 channel_name = g_string_sized_new(64);
888 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
890 col_nr = col_idx + 1;
891 detail = lookup_column_details(inc, col_nr);
892 if (detail->text_format == FORMAT_NONE)
894 column = columns[col_idx];
895 col_caption = sr_scpi_unquote_string(column);
896 got_caption = inc->use_header && *col_caption;
897 sr_dbg("DIAG col %zu, ch count %zu, text %s.",
898 col_nr, detail->channel_count, col_caption);
899 for (ch_idx = 0; ch_idx < detail->channel_count; ch_idx++) {
900 ch_name_idx = detail->channel_offset + ch_idx;
901 if (got_caption && detail->channel_count == 1)
902 g_string_assign(channel_name, col_caption);
903 else if (got_caption)
904 g_string_printf(channel_name, "%s[%zu]",
905 col_caption, ch_idx);
907 g_string_printf(channel_name, "%zu", ch_name_idx);
908 sr_dbg("DIAG ch idx %zu, name %s.", ch_name_idx, channel_name->str);
909 sr_channel_new(in->sdi, ch_name_idx, SR_CHANNEL_LOGIC, TRUE,
913 g_string_free(channel_name, TRUE);
914 if (!check_header_in_reread(in)) {
920 * Calculate the minimum buffer size to store the set of samples
921 * of all channels (unit size). Determine a larger buffer size
922 * for datafeed submission that is a multiple of the unit size.
923 * Allocate the larger buffer, the "sample buffer" will point
924 * to a location within that large buffer later.
926 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
927 inc->datafeed_buf_size = CHUNK_SIZE;
928 inc->datafeed_buf_size *= inc->sample_unit_size;
929 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
930 inc->datafeed_buf_fill = 0;
941 * Gets called from initial_receive(), which runs until the end-of-line
942 * encoding of the input stream could get determined. Assumes that this
943 * routine receives enough buffered initial input data to either see the
944 * BOM when there is one, or that no BOM will follow when a text line
945 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
946 * from the input buffer if one was seen. Does not care to protect
947 * against multiple execution or dropping the BOM multiple times --
948 * there should be at most one in the input stream.
950 static void initial_bom_check(const struct sr_input *in)
952 static const char *utf8_bom = "\xef\xbb\xbf";
954 if (in->buf->len < strlen(utf8_bom))
956 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
958 g_string_erase(in->buf, 0, strlen(utf8_bom));
961 static int initial_receive(const struct sr_input *in)
967 const char *termination;
969 initial_bom_check(in);
973 termination = get_line_termination(in->buf);
975 /* Don't have a full line yet. */
978 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
980 /* Don't have a full line yet. */
982 len = p - in->buf->str - 1;
983 new_buf = g_string_new_len(in->buf->str, len);
984 g_string_append_c(new_buf, '\0');
986 inc->termination = g_strdup(termination);
988 if (in->buf->str[0] != '\0')
989 ret = initial_parse(in, new_buf);
993 g_string_free(new_buf, TRUE);
998 static int process_buffer(struct sr_input *in, gboolean is_eof)
1000 struct context *inc;
1002 size_t line_idx, col_idx, col_nr;
1003 const struct column_details *details;
1004 col_parse_cb parse_func;
1006 char *p, **lines, *line, **columns, *column;
1009 if (!inc->started) {
1010 std_session_send_df_header(in->sdi);
1011 inc->started = TRUE;
1015 * Consider empty input non-fatal. Keep accumulating input until
1016 * at least one full text line has become available. Grab the
1017 * maximum amount of accumulated data that consists of full text
1018 * lines, and process what has been received so far, leaving not
1019 * yet complete lines for the next invocation.
1021 * Enforce that all previously buffered data gets processed in
1022 * the "EOF" condition. Do not insist in the presence of the
1023 * termination sequence for the last line (may often be missing
1024 * on Windows). A present termination sequence will just result
1025 * in the "execution of an empty line", and does not harm.
1030 p = in->buf->str + in->buf->len;
1032 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1036 p += strlen(inc->termination);
1038 g_strstrip(in->buf->str);
1041 lines = g_strsplit(in->buf->str, inc->termination, 0);
1042 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1044 if (inc->line_number < inc->start_line) {
1045 sr_spew("Line %zu skipped (before start).", inc->line_number);
1048 if (line[0] == '\0') {
1049 sr_spew("Blank line %zu skipped.", inc->line_number);
1053 /* Remove trailing comment. */
1054 strip_comment(line, inc->comment);
1055 if (line[0] == '\0') {
1056 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1060 /* Skip the header line, its content was used as the channel names. */
1061 if (inc->use_header && !inc->header_seen) {
1062 sr_spew("Header line %zu skipped.", inc->line_number);
1063 inc->header_seen = TRUE;
1067 /* Split the line into columns, check for minimum length. */
1068 columns = split_line(line, inc);
1070 sr_err("Error while parsing line %zu.", inc->line_number);
1074 num_columns = g_strv_length(columns);
1075 if (num_columns < inc->column_want_count) {
1076 sr_err("Insufficient column count %zu in line %zu.",
1077 num_columns, inc->line_number);
1078 g_strfreev(columns);
1083 /* Have the columns of the current text line processed. */
1084 clear_logic_samples(inc);
1085 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1086 column = columns[col_idx];
1087 col_nr = col_idx + 1;
1088 details = lookup_column_details(inc, col_nr);
1089 if (!details || !details->text_format)
1091 parse_func = col_parse_funcs[details->text_format];
1094 ret = parse_func(column, inc, details);
1096 g_strfreev(columns);
1102 /* Send sample data to the session bus (buffered). */
1103 ret = queue_logic_samples(in);
1105 sr_err("Sending samples failed.");
1106 g_strfreev(columns);
1111 g_strfreev(columns);
1114 g_string_erase(in->buf, 0, p - in->buf->str);
1119 static int receive(struct sr_input *in, GString *buf)
1121 struct context *inc;
1124 g_string_append_len(in->buf, buf->str, buf->len);
1127 if (!inc->column_seen_count) {
1128 ret = initial_receive(in);
1129 if (ret == SR_ERR_NA)
1130 /* Not enough data yet. */
1132 else if (ret != SR_OK)
1135 /* sdi is ready, notify frontend. */
1136 in->sdi_ready = TRUE;
1140 ret = process_buffer(in, FALSE);
1145 static int end(struct sr_input *in)
1147 struct context *inc;
1151 ret = process_buffer(in, TRUE);
1157 ret = flush_logic_samples(in);
1163 std_session_send_df_end(in->sdi);
1168 static void cleanup(struct sr_input *in)
1170 struct context *inc;
1172 keep_header_for_reread(in);
1176 g_free(inc->termination);
1177 inc->termination = NULL;
1178 g_free(inc->datafeed_buffer);
1179 inc->datafeed_buffer = NULL;
1182 static int reset(struct sr_input *in)
1184 struct context *inc = in->priv;
1187 inc->started = FALSE;
1188 g_string_truncate(in->buf, 0);
1207 static struct sr_option options[] = {
1209 "column_formats", "Column format specs",
1210 "Specifies text columns data types: comma separated list of [<cols>]<fmt>[<bits>], with -/x/o/b/l format specifiers.",
1213 [OPT_SINGLE_COL] = {
1214 "single_column", "Single column",
1215 "Enable single-column mode, exclusively use text from the specified column (number starting at 1).",
1219 "first_column", "First column",
1220 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1).",
1224 "logic_channels", "Number of logic channels",
1225 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1229 "single_format", "Data format for simple single-column mode.",
1230 "The number format of single-column mode input data: bin, hex, oct.",
1234 "start_line", "Start line",
1235 "The line number at which to start processing input text (default: 1).",
1239 "header", "Get channel names from first line.",
1240 "Use the first processed line's column captions (when available) as channel names.",
1244 "samplerate", "Samplerate (Hz)",
1245 "The input data's sample rate in Hz.",
1249 "column_separator", "Column separator",
1250 "The sequence which separates text columns. Non-empty text, comma by default.",
1254 "comment_leader", "Comment leader character",
1255 "The text which starts comments at the end of text lines.",
1258 [OPT_MAX] = ALL_ZERO,
1261 static const struct sr_option *get_options(void)
1265 if (!options[0].def) {
1266 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1267 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1268 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1269 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1270 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1272 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1273 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1274 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1275 options[OPT_FORMAT].values = l;
1276 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1277 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1278 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1279 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1280 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1286 SR_PRIV struct sr_input_module input_csv = {
1289 .desc = "Comma-separated values",
1290 .exts = (const char*[]){"csv", NULL},
1291 .options = get_options,