2 * This file is part of the libsigrok project.
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
27 #include <libsigrok/libsigrok.h>
28 #include "libsigrok-internal.h"
29 #include "scpi.h" /* String un-quote for channel name from header line. */
31 #define LOG_PREFIX "input/csv"
33 #define CHUNK_SIZE (4 * 1024 * 1024)
36 * The CSV input module has the following options:
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
44 * channels communicated in that column). This "column_formats" option
45 * is most versatile, other forms of specifying the column layout only
46 * exist for backwards compatibility.
48 * single_column: Specifies the column number which contains the logic data
49 * for single-column mode. All logic data is taken from several bits
50 * which all are kept within that one column. Only exists for backwards
51 * compatibility, see "column_formats" for more flexibility.
53 * first_column: Specifies the number of the first column with logic data
54 * in simple multi-column mode. Only exists for backwards compatibility,
55 * see "column_formats" for more flexibility.
57 * logic_channels: Specifies the number of logic channels. Is required in
58 * simple single-column mode. Is optional in simple multi-column mode
59 * (and defaults to all remaining columns). Only exists for backwards
60 * compatibility, see "column_formats" for more flexibility.
62 * single_format: Specifies the format of the input text in simple single-
63 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
64 * Simple multi-column mode always uses single-bit data per column.
65 * Only exists for backwards compatibility, see "column_formats" for
68 * start_line: Specifies at which line to start processing the input file.
69 * Allows to skip leading lines which neither are header nor data lines.
70 * By default all of the input file gets processed.
72 * header: Boolean option, controls whether the first processed line is used
73 * to determine channel names. Off by default. Generic channel names are
74 * used in the absence of header line content.
76 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
77 * User specs take precedence over data which optionally gets derived
80 * column_separator: Specifies the sequence which separates the text file
81 * columns. Cannot be empty. Defaults to comma.
83 * comment_leader: Specifies the sequence which starts comments that run
84 * up to the end of the current text line. Can be empty to disable
85 * comment support. Defaults to semicolon.
87 * Typical examples of using these options:
88 * - ... -I csv:column_formats=*l ...
89 * All columns are single-bit logic data. Identical to the previous
90 * multi-column mode (the default when no options were given at all).
91 * - ... -I csv:column_formats=3-,*l ...
92 * Ignore the first three columns, get single-bit logic data from all
93 * remaining lines (multi-column mode with first-column above 1).
94 * - ... -I csv:column_formats=3-,4l,x8 ...
95 * Ignore the first three columns, get single-bit logic data from the
96 * next four columns, then eight-bit data in hex format from the next
97 * column. More columns may follow in the input text but won't get
98 * processed. (Mix of previous multi-column as well as single-column
100 * - ... -I csv:column_formats=4x8,b16,5l ...
101 * Get eight-bit data in hex format from the first four columns, then
102 * sixteen-bit data in binary format, then five times single-bit data.
103 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
104 * Get eight logic bits in binary format from column 2. (Simple
105 * single-column mode, corresponds to the "-,b8" format.)
106 * - ... -I csv:first_column=6:logic_channels=4 ...
107 * Get four single-bit logic channels from columns 6 to 9 respectively.
108 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
109 * - ... -I csv:start_line=20:header=yes:...
110 * Skip the first 19 text lines. Use line 20 to derive channel names.
111 * Data starts at line 21.
117 * - Extend support for analog input data? (optional)
118 * - Determine why analog samples of 'double' data type get scrambled
119 * in sigrok-cli screen output. Is analog.encoding->unitsize not
120 * handled properly? A sigrok-cli or libsigrok (src/output) issue?
121 * - Optionally get sample rate from timestamp column. Just best-effort
122 * approach, not necessarily reliable. Users can always specify rates.
123 * - Add a test suite for input modules in general, and CSV in specific?
124 * Becomes more important with the multitude of options and their
125 * interaction. Could cover edge cases (BOM presence, line termination
126 * absence, etc) and auto-stuff as well (channel names, channel counts,
130 typedef float csv_analog_t; /* 'double' currently is flawed. */
132 /* Single column formats. */
133 enum single_col_format {
134 FORMAT_NONE, /* Ignore this column. */
135 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
136 FORMAT_HEX, /* Hex digits for a set of bits. */
137 FORMAT_OCT, /* Oct digits for a set of bits. */
138 FORMAT_ANALOG, /* Floating point number for an analog channel. */
141 static const char *col_format_text[] = {
142 [FORMAT_NONE] = "unknown",
143 [FORMAT_BIN] = "binary",
144 [FORMAT_HEX] = "hexadecimal",
145 [FORMAT_OCT] = "octal",
146 [FORMAT_ANALOG] = "analog",
149 static const char col_format_char[] = {
154 [FORMAT_ANALOG] = 'a',
157 struct column_details {
159 enum single_col_format text_format;
160 size_t channel_offset;
161 size_t channel_count;
168 /* Current selected samplerate. */
170 gboolean samplerate_sent;
172 /* Number of channels. */
173 size_t logic_channels;
174 size_t analog_channels;
176 /* Column delimiter (actually separator), comment leader, EOL sequence. */
181 /* Format specs for input columns, and processing state. */
182 size_t column_seen_count;
183 const char *column_formats;
184 size_t column_want_count;
185 struct column_details *column_details;
187 /* Line number to start processing. */
191 * Determines if the first line should be treated as header and used for
192 * channel names in multi column mode.
195 gboolean header_seen;
197 size_t sample_unit_size; /**!< Byte count for a single sample. */
198 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
199 csv_analog_t *analog_sample_buffer; /**!< Buffer for one set of analog values. */
201 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
202 size_t datafeed_buf_size;
203 size_t datafeed_buf_fill;
204 /* "Striped" layout, M samples for N channels each. */
205 csv_analog_t *analog_datafeed_buffer; /**!< Queue for analog datafeed. */
206 size_t analog_datafeed_buf_size;
207 size_t analog_datafeed_buf_fill;
208 GSList **analog_datafeed_channels;
209 int *analog_datafeed_digits;
211 /* Current line number. */
214 /* List of previously created sigrok channels. */
215 GSList *prev_sr_channels;
219 * Primitive operations to handle sample sets:
220 * - Keep a buffer for datafeed submission, capable of holding many
221 * samples (reduces call overhead, improves throughput).
222 * - Have a "current sample set" pointer reference one position in that
223 * large samples buffer.
224 * - Clear the current sample set before text line inspection, then set
225 * the bits which are found active in the current line of text input.
226 * Phrase the API such that call sites can be kept simple. Advance to
227 * the next sample set between lines, flush the larger buffer as needed
228 * (when it is full, or upon EOF).
231 static int flush_samplerate(const struct sr_input *in)
234 struct sr_datafeed_packet packet;
235 struct sr_datafeed_meta meta;
236 struct sr_config *src;
239 if (inc->samplerate && !inc->samplerate_sent) {
240 packet.type = SR_DF_META;
241 packet.payload = &meta;
242 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(inc->samplerate));
243 meta.config = g_slist_append(NULL, src);
244 sr_session_send(in->sdi, &packet);
245 g_slist_free(meta.config);
247 inc->samplerate_sent = TRUE;
253 static void clear_logic_samples(struct context *inc)
255 if (!inc->logic_channels)
257 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
258 memset(inc->sample_buffer, 0, inc->sample_unit_size);
261 static void set_logic_level(struct context *inc, size_t ch_idx, int on)
263 size_t byte_idx, bit_idx;
266 if (ch_idx >= inc->logic_channels)
271 byte_idx = ch_idx / 8;
272 bit_idx = ch_idx % 8;
273 bit_mask = 1 << bit_idx;
274 inc->sample_buffer[byte_idx] |= bit_mask;
277 static int flush_logic_samples(const struct sr_input *in)
280 struct sr_datafeed_packet packet;
281 struct sr_datafeed_logic logic;
285 if (!inc->datafeed_buf_fill)
288 rc = flush_samplerate(in);
292 memset(&packet, 0, sizeof(packet));
293 memset(&logic, 0, sizeof(logic));
294 packet.type = SR_DF_LOGIC;
295 packet.payload = &logic;
296 logic.unitsize = inc->sample_unit_size;
297 logic.length = inc->datafeed_buf_fill;
298 logic.data = inc->datafeed_buffer;
300 rc = sr_session_send(in->sdi, &packet);
304 inc->datafeed_buf_fill = 0;
308 static int queue_logic_samples(const struct sr_input *in)
314 if (!inc->logic_channels)
317 inc->datafeed_buf_fill += inc->sample_unit_size;
318 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
319 rc = flush_logic_samples(in);
326 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value);
328 static void clear_analog_samples(struct context *inc)
332 if (!inc->analog_channels)
334 inc->analog_sample_buffer = &inc->analog_datafeed_buffer[inc->analog_datafeed_buf_fill];
335 for (idx = 0; idx < inc->analog_channels; idx++)
336 set_analog_value(inc, idx, 0.0);
339 static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value)
341 if (ch_idx >= inc->analog_channels)
345 inc->analog_sample_buffer[ch_idx * inc->analog_datafeed_buf_size] = value;
348 static int flush_analog_samples(const struct sr_input *in)
351 struct sr_datafeed_packet packet;
352 struct sr_datafeed_analog analog;
353 struct sr_analog_encoding encoding;
354 struct sr_analog_meaning meaning;
355 struct sr_analog_spec spec;
356 csv_analog_t *samples;
362 if (!inc->analog_datafeed_buf_fill)
365 rc = flush_samplerate(in);
369 samples = inc->analog_datafeed_buffer;
370 for (ch_idx = 0; ch_idx < inc->analog_channels; ch_idx++) {
371 digits = inc->analog_datafeed_digits[ch_idx];
372 sr_analog_init(&analog, &encoding, &meaning, &spec, digits);
373 memset(&packet, 0, sizeof(packet));
374 packet.type = SR_DF_ANALOG;
375 packet.payload = &analog;
376 analog.num_samples = inc->analog_datafeed_buf_fill;
377 analog.data = samples;
378 analog.meaning->channels = inc->analog_datafeed_channels[ch_idx];
379 analog.meaning->mq = 0;
380 analog.meaning->mqflags = 0;
381 analog.meaning->unit = 0;
382 analog.encoding->unitsize = sizeof(samples[0]);
383 analog.encoding->is_signed = TRUE;
384 analog.encoding->is_float = TRUE;
385 #ifdef WORDS_BIGENDIAN
386 analog.encoding->is_bigendian = TRUE;
388 analog.encoding->is_bigendian = FALSE;
390 analog.encoding->digits = spec.spec_digits;
391 rc = sr_session_send(in->sdi, &packet);
394 samples += inc->analog_datafeed_buf_size;
397 inc->analog_datafeed_buf_fill = 0;
401 static int queue_analog_samples(const struct sr_input *in)
407 if (!inc->analog_channels)
410 inc->analog_datafeed_buf_fill++;
411 if (inc->analog_datafeed_buf_fill == inc->analog_datafeed_buf_size) {
412 rc = flush_analog_samples(in);
419 /* Helpers for "column processing". */
421 static int split_column_format(const char *spec,
422 size_t *column_count, enum single_col_format *format, size_t *bit_count)
425 char *endp, format_char;
426 enum single_col_format format_code;
431 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
434 /* Workaround, strtoul("*") won't always yield expected endp. */
436 endp = (char *)&spec[1];
438 count = strtoul(spec, &endp, 10);
445 *column_count = count;
448 /* Get the (mandatory, single letter) type spec (-/xob/l). */
449 format_char = *spec++;
450 switch (format_char) {
454 format_code = FORMAT_NONE;
457 format_code = FORMAT_HEX;
460 format_code = FORMAT_OCT;
464 format_code = FORMAT_BIN;
467 format_code = FORMAT_ANALOG;
469 default: /* includes NUL */
473 *format = format_code;
475 /* Get the (optional, decimal, default 1) bit count. */
477 count = strtoul(spec, &endp, 10);
481 count = (format_code == FORMAT_ANALOG) ? 3 : 1;
484 if (format_char == 'l')
490 /* Input spec must have been exhausted. */
497 static int make_column_details_from_format(const struct sr_input *in,
498 const char *column_format, char **column_texts)
501 char **formats, *format;
502 size_t format_count, column_count, logic_count, analog_count;
503 size_t auto_column_count;
504 size_t format_idx, c, b, column_idx, channel_idx, analog_idx;
505 enum single_col_format f;
506 struct column_details *detail;
507 GString *channel_name;
511 int channel_type, channel_sdi_nr;
515 inc->column_seen_count = g_strv_length(column_texts);
517 /* Split the input spec, count involved columns and bits. */
518 formats = g_strsplit(column_format, ",", 0);
520 sr_err("Cannot parse columns format %s (comma split).", column_format);
523 format_count = g_strv_length(formats);
525 sr_err("Cannot parse columns format %s (field count).", column_format);
529 column_count = logic_count = analog_count = 0;
530 auto_column_count = 0;
531 for (format_idx = 0; format_idx < format_count; format_idx++) {
532 format = formats[format_idx];
533 ret = split_column_format(format, &c, &f, &b);
534 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
536 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
541 /* User requested "auto-count", must be last format. */
542 if (formats[format_idx + 1]) {
543 sr_err("Auto column count must be last format field.");
547 auto_column_count = inc->column_seen_count - column_count;
548 c = auto_column_count;
551 if (f == FORMAT_ANALOG)
554 logic_count += c * b;
556 sr_dbg("Column format %s -> %zu columns, %zu logic, %zu analog channels.",
557 column_format, column_count, logic_count, analog_count);
559 /* Allocate and fill in "column processing" details. Create channels. */
560 inc->column_want_count = column_count;
561 if (inc->column_seen_count < inc->column_want_count) {
562 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
563 inc->column_seen_count, inc->column_want_count);
567 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
568 column_idx = channel_idx = analog_idx = 0;
569 channel_name = g_string_sized_new(64);
570 for (format_idx = 0; format_idx < format_count; format_idx++) {
571 /* Process a format field, which can span multiple columns. */
572 format = formats[format_idx];
573 (void)split_column_format(format, &c, &f, &b);
575 c = auto_column_count;
577 /* Fill in a column's processing details. */
578 detail = &inc->column_details[column_idx++];
579 detail->col_nr = column_idx;
580 detail->text_format = f;
581 if (detail->text_format == FORMAT_ANALOG) {
582 detail->channel_offset = analog_idx;
583 detail->channel_count = 1;
584 detail->analog_digits = b;
585 analog_idx += detail->channel_count;
586 } else if (detail->text_format) {
587 detail->channel_offset = channel_idx;
588 detail->channel_count = b;
589 channel_idx += detail->channel_count;
591 sr_dbg("detail -> col %zu, fmt %s, ch off/cnt %zu/%zu",
592 detail->col_nr, col_format_text[detail->text_format],
593 detail->channel_offset, detail->channel_count);
594 if (!detail->text_format)
597 * Create channels with appropriate names. Optionally
598 * use text from a header line (when requested by the
599 * user). In the absence of header text, channels are
600 * assigned rather generic names.
602 * Manipulation of the column's caption (when a header
603 * line is seen) is acceptable, because this header
604 * line won't get processed another time.
606 column = column_texts[detail->col_nr - 1];
607 if (inc->use_header && column && *column)
608 caption = sr_scpi_unquote_string(column);
611 if (!caption || !*caption)
613 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
614 if (caption && detail->channel_count == 1) {
615 g_string_assign(channel_name, caption);
616 } else if (caption) {
617 g_string_printf(channel_name, "%s[%zu]",
618 caption, create_idx);
620 g_string_printf(channel_name, "%zu",
621 detail->channel_offset + create_idx);
623 if (detail->text_format == FORMAT_ANALOG) {
624 channel_sdi_nr = logic_count + detail->channel_offset + create_idx;
625 channel_type = SR_CHANNEL_ANALOG;
627 channel_sdi_nr = detail->channel_offset + create_idx;
628 channel_type = SR_CHANNEL_LOGIC;
630 sr_channel_new(in->sdi, channel_sdi_nr,
631 channel_type, TRUE, channel_name->str);
635 inc->logic_channels = channel_idx;
636 inc->analog_channels = analog_idx;
637 g_string_free(channel_name, TRUE);
643 static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
645 if (!inc || !inc->column_details)
647 if (!nr || nr > inc->column_want_count)
649 return &inc->column_details[nr - 1];
653 * Primitive operations for text input: Strip comments off text lines.
654 * Split text lines into columns. Process input text for individual
658 static void strip_comment(char *buf, const GString *prefix)
665 if ((ptr = strstr(buf, prefix->str))) {
672 * @brief Splits a text line into a set of columns.
674 * @param[in] buf The input text line to split.
675 * @param[in] inc The input module's context.
677 * @returns An array of strings, representing the columns' text.
679 * This routine splits a text line on previously determined separators.
681 static char **split_line(char *buf, struct context *inc)
683 return g_strsplit(buf, inc->delimiter->str, 0);
687 * @brief Parse a multi-bit field into several logic channels.
689 * @param[in] column The input text, a run of bin/hex/oct digits.
690 * @param[in] inc The input module's context.
691 * @param[in] details The column processing details.
693 * @retval SR_OK Success.
694 * @retval SR_ERR Invalid input data (empty, or format error).
696 * This routine modifies the logic levels in the current sample set,
697 * based on the text input and a user provided format spec.
699 static int parse_logic(const char *column, struct context *inc,
700 const struct column_details *details)
702 size_t length, ch_rem, ch_idx, ch_inc;
706 const char *type_text;
710 * Prepare to read the digits from the text end towards the start.
711 * A digit corresponds to a variable number of channels (depending
712 * on the value's radix). Prepare the mapping of text digits to
713 * (a number of) logic channels.
715 length = strlen(column);
717 sr_err("Column %zu in line %zu is empty.", details->col_nr,
721 rdptr = &column[length];
722 ch_idx = details->channel_offset;
723 ch_rem = details->channel_count;
726 * Get another digit and derive up to four logic channels' state from
727 * it. Make sure to not process more bits than the column has channels
728 * associated with it.
730 while (rdptr > column && ch_rem) {
731 /* Check for valid digits according to the input radix. */
733 switch (details->text_format) {
735 valid = g_ascii_isxdigit(c) && c < '2';
739 valid = g_ascii_isxdigit(c) && c < '8';
743 valid = g_ascii_isxdigit(c);
751 type_text = col_format_text[details->text_format];
752 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
753 column, type_text, details->col_nr, inc->line_number);
756 /* Use the digit's bits for logic channels' data. */
757 bits = g_ascii_xdigit_value(c);
758 switch (details->text_format) {
762 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
768 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
772 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
777 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
781 /* ShouldNotHappen(TM), but silences compiler warning. */
787 * TODO Determine whether the availability of extra input data
788 * for unhandled logic channels is worth warning here. In this
789 * implementation users are in control, and can have the more
790 * significant bits ignored (which can be considered a feature
791 * and not really a limitation).
798 * @brief Parse a floating point text into an analog value.
800 * @param[in] column The input text, a floating point number.
801 * @param[in] inc The input module's context.
802 * @param[in] details The column processing details.
804 * @retval SR_OK Success.
805 * @retval SR_ERR Invalid input data (empty, or format error).
807 * This routine modifies the analog values in the current sample set,
808 * based on the text input and a user provided format spec.
810 static int parse_analog(const char *column, struct context *inc,
811 const struct column_details *details)
814 double dvalue; float fvalue;
818 if (details->text_format != FORMAT_ANALOG)
821 length = strlen(column);
823 sr_err("Column %zu in line %zu is empty.", details->col_nr,
827 if (sizeof(value) == sizeof(double)) {
828 ret = sr_atod_ascii(column, &dvalue);
830 } else if (sizeof(value) == sizeof(float)) {
831 ret = sr_atof_ascii(column, &fvalue);
837 sr_err("Cannot parse analog text %s in column %zu in line %zu.",
838 column, details->col_nr, inc->line_number);
841 set_analog_value(inc, details->channel_offset, value);
847 * @brief Parse routine which ignores the input text.
849 * This routine exists to unify dispatch code paths, mapping input file
850 * columns' data types to their respective parse routines.
852 static int parse_ignore(const char *column, struct context *inc,
853 const struct column_details *details)
861 typedef int (*col_parse_cb)(const char *column, struct context *inc,
862 const struct column_details *details);
864 static const col_parse_cb col_parse_funcs[] = {
865 [FORMAT_NONE] = parse_ignore,
866 [FORMAT_BIN] = parse_logic,
867 [FORMAT_OCT] = parse_logic,
868 [FORMAT_HEX] = parse_logic,
869 [FORMAT_ANALOG] = parse_analog,
872 static int init(struct sr_input *in, GHashTable *options)
875 size_t single_column, first_column, logic_channels;
877 enum single_col_format format;
880 in->sdi = g_malloc0(sizeof(*in->sdi));
881 in->priv = inc = g_malloc0(sizeof(*inc));
883 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
884 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
885 inc->delimiter = g_string_new(g_variant_get_string(
886 g_hash_table_lookup(options, "column_separator"), NULL));
887 if (!inc->delimiter->len) {
888 sr_err("Column separator cannot be empty.");
891 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
892 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
894 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
896 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
899 sr_err("Invalid single-column format: '%s'", s);
902 inc->comment = g_string_new(g_variant_get_string(
903 g_hash_table_lookup(options, "comment_leader"), NULL));
904 if (g_string_equal(inc->comment, inc->delimiter)) {
906 * Using the same sequence as comment leader and column
907 * separator won't work. The user probably specified ';'
908 * as the column separator but did not adjust the comment
909 * leader. Try DWIM, drop comment strippin support here.
911 sr_warn("Comment leader and column separator conflict, disabling comment support.");
912 g_string_truncate(inc->comment, 0);
914 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
915 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
916 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
917 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
918 if (inc->start_line < 1) {
919 sr_err("Invalid start line %zu.", inc->start_line);
924 * Scan flexible, to get prefered format specs which describe
925 * the input file's data formats. As well as some simple specs
926 * for backwards compatibility and user convenience.
928 * This logic ends up with a copy of the format string, either
929 * user provided or internally derived. Actual creation of the
930 * column processing details gets deferred until the first line
931 * of input data was seen. To support automatic determination of
932 * e.g. channel counts from column counts.
934 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
936 inc->column_formats = g_strdup(s);
937 sr_dbg("User specified column_formats: %s.", s);
938 } else if (single_column && logic_channels) {
939 format_char = col_format_char[format];
940 if (single_column == 1) {
941 inc->column_formats = g_strdup_printf("%c%zu",
942 format_char, logic_channels);
944 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
946 format_char, logic_channels);
948 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
949 single_column, col_format_text[format], logic_channels,
950 inc->column_formats);
951 } else if (!single_column) {
952 if (first_column > 1) {
953 inc->column_formats = g_strdup_printf("%zu-,%zul",
954 first_column - 1, logic_channels);
956 inc->column_formats = g_strdup_printf("%zul",
959 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
960 first_column, logic_channels,
961 inc->column_formats);
963 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
964 inc->column_formats = g_strdup("*l");
971 * Check the channel list for consistency across file re-import. See
972 * the VCD input module for more details and motivation.
975 static void keep_header_for_reread(const struct sr_input *in)
980 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
981 inc->prev_sr_channels = in->sdi->channels;
982 in->sdi->channels = NULL;
985 static int check_header_in_reread(const struct sr_input *in)
994 if (!inc->prev_sr_channels)
997 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
998 sr_err("Channel list change not supported for file re-read.");
1001 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
1002 in->sdi->channels = inc->prev_sr_channels;
1003 inc->prev_sr_channels = NULL;
1008 static const char *delim_set = "\r\n";
1010 static const char *get_line_termination(GString *buf)
1015 if (g_strstr_len(buf->str, buf->len, "\r\n"))
1017 else if (memchr(buf->str, '\n', buf->len))
1019 else if (memchr(buf->str, '\r', buf->len))
1025 static int initial_parse(const struct sr_input *in, GString *buf)
1027 struct context *inc;
1029 size_t line_number, line_idx, ch_idx;
1031 char **lines, *line, **columns;
1037 /* Search for the first line to process (header or data). */
1039 if (inc->termination)
1040 lines = g_strsplit(buf->str, inc->termination, 0);
1042 lines = g_strsplit_set(buf->str, delim_set, 0);
1043 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1045 if (inc->start_line > line_number) {
1046 sr_spew("Line %zu skipped (before start).", line_number);
1049 if (line[0] == '\0') {
1050 sr_spew("Blank line %zu skipped.", line_number);
1053 strip_comment(line, inc->comment);
1054 if (line[0] == '\0') {
1055 sr_spew("Comment-only line %zu skipped.", line_number);
1059 /* Reached first proper line. */
1063 /* Not enough data for a proper line yet. */
1068 /* Get the number of columns in the line. */
1069 columns = split_line(line, inc);
1071 sr_err("Error while parsing line %zu.", line_number);
1075 num_columns = g_strv_length(columns);
1077 sr_err("Error while parsing line %zu.", line_number);
1081 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
1084 * Interpret the user provided column format specs. This might
1085 * involve inspection of the now received input text, to support
1086 * e.g. automatic detection of channel counts in the absence of
1087 * user provided specs. Optionally a header line is used to get
1090 * Check the then created channels for consistency across .reset
1091 * and .receive sequences (file re-load).
1093 ret = make_column_details_from_format(in, inc->column_formats, columns);
1095 sr_err("Cannot parse columns format using line %zu.", line_number);
1098 if (!check_header_in_reread(in)) {
1104 * Allocate buffer memory for datafeed submission of sample data.
1105 * Calculate the minimum buffer size to store the set of samples
1106 * of all channels (unit size). Determine a larger buffer size
1107 * for datafeed submission that is a multiple of the unit size.
1108 * Allocate the larger buffer, the "sample buffer" will point
1109 * to a location within that large buffer later.
1111 if (inc->logic_channels) {
1112 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
1113 inc->datafeed_buf_size = CHUNK_SIZE;
1114 inc->datafeed_buf_size *= inc->sample_unit_size;
1115 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
1116 if (!inc->datafeed_buffer) {
1117 sr_err("Cannot allocate datafeed send buffer (logic).");
1118 ret = SR_ERR_MALLOC;
1121 inc->datafeed_buf_fill = 0;
1124 if (inc->analog_channels) {
1125 size_t sample_size, sample_count;
1128 sample_size = sizeof(inc->analog_datafeed_buffer[0]);
1129 inc->analog_datafeed_buf_size = CHUNK_SIZE;
1130 inc->analog_datafeed_buf_size /= sample_size;
1131 inc->analog_datafeed_buf_size /= inc->analog_channels;
1132 sample_count = inc->analog_channels * inc->analog_datafeed_buf_size;
1133 inc->analog_datafeed_buffer = g_malloc0(sample_count * sample_size);
1134 if (!inc->analog_datafeed_buffer) {
1135 sr_err("Cannot allocate datafeed send buffer (analog).");
1136 ret = SR_ERR_MALLOC;
1139 inc->analog_datafeed_buf_fill = 0;
1140 inc->analog_datafeed_channels = g_malloc0_n(inc->analog_channels, sizeof(inc->analog_datafeed_channels[0]));
1141 for (ch_idx = 0; ch_idx < inc->analog_channels; ch_idx++) {
1143 channel = g_slist_nth_data(in->sdi->channels, inc->logic_channels + ch_idx);
1144 inc->analog_datafeed_channels[ch_idx] = g_slist_append(NULL, channel);
1146 inc->analog_datafeed_digits = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_digits[0]));
1147 digits_item = inc->analog_datafeed_digits;
1148 for (detail_idx = 0; detail_idx < inc->column_want_count; detail_idx++) {
1149 if (inc->column_details[detail_idx].text_format != FORMAT_ANALOG)
1151 *digits_item++ = inc->column_details[detail_idx].analog_digits;
1157 g_strfreev(columns);
1164 * Gets called from initial_receive(), which runs until the end-of-line
1165 * encoding of the input stream could get determined. Assumes that this
1166 * routine receives enough buffered initial input data to either see the
1167 * BOM when there is one, or that no BOM will follow when a text line
1168 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
1169 * from the input buffer if one was seen. Does not care to protect
1170 * against multiple execution or dropping the BOM multiple times --
1171 * there should be at most one in the input stream.
1173 static void initial_bom_check(const struct sr_input *in)
1175 static const char *utf8_bom = "\xef\xbb\xbf";
1177 if (in->buf->len < strlen(utf8_bom))
1179 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
1181 g_string_erase(in->buf, 0, strlen(utf8_bom));
1184 static int initial_receive(const struct sr_input *in)
1186 struct context *inc;
1190 const char *termination;
1192 initial_bom_check(in);
1196 termination = get_line_termination(in->buf);
1198 /* Don't have a full line yet. */
1201 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
1203 /* Don't have a full line yet. */
1205 len = p - in->buf->str - 1;
1206 new_buf = g_string_new_len(in->buf->str, len);
1207 g_string_append_c(new_buf, '\0');
1209 inc->termination = g_strdup(termination);
1211 if (in->buf->str[0] != '\0')
1212 ret = initial_parse(in, new_buf);
1216 g_string_free(new_buf, TRUE);
1221 static int process_buffer(struct sr_input *in, gboolean is_eof)
1223 struct context *inc;
1225 size_t line_idx, col_idx, col_nr;
1226 const struct column_details *details;
1227 col_parse_cb parse_func;
1229 char *p, **lines, *line, **columns, *column;
1232 if (!inc->started) {
1233 std_session_send_df_header(in->sdi);
1234 inc->started = TRUE;
1238 * Consider empty input non-fatal. Keep accumulating input until
1239 * at least one full text line has become available. Grab the
1240 * maximum amount of accumulated data that consists of full text
1241 * lines, and process what has been received so far, leaving not
1242 * yet complete lines for the next invocation.
1244 * Enforce that all previously buffered data gets processed in
1245 * the "EOF" condition. Do not insist in the presence of the
1246 * termination sequence for the last line (may often be missing
1247 * on Windows). A present termination sequence will just result
1248 * in the "execution of an empty line", and does not harm.
1253 p = in->buf->str + in->buf->len;
1255 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1259 p += strlen(inc->termination);
1261 g_strstrip(in->buf->str);
1264 lines = g_strsplit(in->buf->str, inc->termination, 0);
1265 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
1267 if (inc->line_number < inc->start_line) {
1268 sr_spew("Line %zu skipped (before start).", inc->line_number);
1271 if (line[0] == '\0') {
1272 sr_spew("Blank line %zu skipped.", inc->line_number);
1276 /* Remove trailing comment. */
1277 strip_comment(line, inc->comment);
1278 if (line[0] == '\0') {
1279 sr_spew("Comment-only line %zu skipped.", inc->line_number);
1283 /* Skip the header line, its content was used as the channel names. */
1284 if (inc->use_header && !inc->header_seen) {
1285 sr_spew("Header line %zu skipped.", inc->line_number);
1286 inc->header_seen = TRUE;
1290 /* Split the line into columns, check for minimum length. */
1291 columns = split_line(line, inc);
1293 sr_err("Error while parsing line %zu.", inc->line_number);
1297 num_columns = g_strv_length(columns);
1298 if (num_columns < inc->column_want_count) {
1299 sr_err("Insufficient column count %zu in line %zu.",
1300 num_columns, inc->line_number);
1301 g_strfreev(columns);
1306 /* Have the columns of the current text line processed. */
1307 clear_logic_samples(inc);
1308 clear_analog_samples(inc);
1309 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1310 column = columns[col_idx];
1311 col_nr = col_idx + 1;
1312 details = lookup_column_details(inc, col_nr);
1313 if (!details || !details->text_format)
1315 parse_func = col_parse_funcs[details->text_format];
1318 ret = parse_func(column, inc, details);
1320 g_strfreev(columns);
1326 /* Send sample data to the session bus (buffered). */
1327 ret = queue_logic_samples(in);
1328 ret += queue_analog_samples(in);
1330 sr_err("Sending samples failed.");
1331 g_strfreev(columns);
1336 g_strfreev(columns);
1339 g_string_erase(in->buf, 0, p - in->buf->str);
1344 static int receive(struct sr_input *in, GString *buf)
1346 struct context *inc;
1349 g_string_append_len(in->buf, buf->str, buf->len);
1352 if (!inc->column_seen_count) {
1353 ret = initial_receive(in);
1354 if (ret == SR_ERR_NA)
1355 /* Not enough data yet. */
1357 else if (ret != SR_OK)
1360 /* sdi is ready, notify frontend. */
1361 in->sdi_ready = TRUE;
1365 ret = process_buffer(in, FALSE);
1370 static int end(struct sr_input *in)
1372 struct context *inc;
1376 ret = process_buffer(in, TRUE);
1382 ret = flush_logic_samples(in);
1383 ret += flush_analog_samples(in);
1389 std_session_send_df_end(in->sdi);
1394 static void cleanup(struct sr_input *in)
1396 struct context *inc;
1398 keep_header_for_reread(in);
1402 g_free(inc->termination);
1403 inc->termination = NULL;
1404 g_free(inc->datafeed_buffer);
1405 inc->datafeed_buffer = NULL;
1406 g_free(inc->analog_datafeed_buffer);
1407 inc->analog_datafeed_buffer = NULL;
1410 static int reset(struct sr_input *in)
1412 struct context *inc = in->priv;
1415 inc->started = FALSE;
1416 g_string_truncate(in->buf, 0);
1435 static struct sr_option options[] = {
1437 "column_formats", "Column format specs",
1438 "Specifies text columns data types: comma separated list of [<cols>]<fmt>[<bits>], with -/x/o/b/l format specifiers.",
1441 [OPT_SINGLE_COL] = {
1442 "single_column", "Single column",
1443 "Enable single-column mode, exclusively use text from the specified column (number starting at 1).",
1447 "first_column", "First column",
1448 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1).",
1452 "logic_channels", "Number of logic channels",
1453 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1457 "single_format", "Data format for simple single-column mode.",
1458 "The number format of single-column mode input data: bin, hex, oct.",
1462 "start_line", "Start line",
1463 "The line number at which to start processing input text (default: 1).",
1467 "header", "Get channel names from first line.",
1468 "Use the first processed line's column captions (when available) as channel names.",
1472 "samplerate", "Samplerate (Hz)",
1473 "The input data's sample rate in Hz.",
1477 "column_separator", "Column separator",
1478 "The sequence which separates text columns. Non-empty text, comma by default.",
1482 "comment_leader", "Comment leader character",
1483 "The text which starts comments at the end of text lines.",
1486 [OPT_MAX] = ALL_ZERO,
1489 static const struct sr_option *get_options(void)
1493 if (!options[0].def) {
1494 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
1495 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
1496 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
1497 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
1498 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
1500 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1501 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1502 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
1503 options[OPT_FORMAT].values = l;
1504 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
1505 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1506 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1507 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1508 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
1514 SR_PRIV struct sr_input_module input_csv = {
1517 .desc = "Comma-separated values",
1518 .exts = (const char*[]){"csv", NULL},
1519 .options = get_options,