]> sigrok.org Git - libsigrok.git/blame - src/input/csv.c
input/csv: another stab at option help texts
[libsigrok.git] / src / input / csv.c
CommitLineData
4a35548b
MS
1/*
2 * This file is part of the libsigrok project.
3 *
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
e53f32d2 5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
4a35548b
MS
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
e05f1827
GS
21#include "config.h"
22
23#include <glib.h>
4a35548b
MS
24#include <stdlib.h>
25#include <string.h>
e05f1827 26
c1aae900 27#include <libsigrok/libsigrok.h>
4a35548b 28#include "libsigrok-internal.h"
f6dcb320 29#include "scpi.h" /* String un-quote for channel name from header line. */
4a35548b 30
3544f848 31#define LOG_PREFIX "input/csv"
4a35548b 32
9a4fd01a 33#define CHUNK_SIZE (4 * 1024 * 1024)
cd59e6ec 34
4a35548b
MS
35/*
36 * The CSV input module has the following options:
37 *
72903e9d
GS
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
08eb955a
GS
44 * channels communicated in that column). The 'a' format marks analog
45 * data, an optionally following number is the digits count (resolution).
7e4e65bf
GS
46 * The 't' format marks timestamp values, which could help in automatic
47 * determination of the input stream's samplerate. This "column_formats"
48 * option is most versatile, other forms of specifying the column layout
49 * only exist for backwards compatibility, and are rather limited. They
50 * exclusively support logic input data in strictly adjacent columns,
51 * with further constraints on column layout for multi-bit data.
4a35548b 52 *
72903e9d
GS
53 * single_column: Specifies the column number which contains the logic data
54 * for single-column mode. All logic data is taken from several bits
55 * which all are kept within that one column. Only exists for backwards
56 * compatibility, see "column_formats" for more flexibility.
4a35548b 57 *
72903e9d
GS
58 * first_column: Specifies the number of the first column with logic data
59 * in simple multi-column mode. Only exists for backwards compatibility,
60 * see "column_formats" for more flexibility.
4a35548b 61 *
72903e9d
GS
62 * logic_channels: Specifies the number of logic channels. Is required in
63 * simple single-column mode. Is optional in simple multi-column mode
64 * (and defaults to all remaining columns). Only exists for backwards
65 * compatibility, see "column_formats" for more flexibility.
4a35548b 66 *
72903e9d
GS
67 * single_format: Specifies the format of the input text in simple single-
68 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
69 * Simple multi-column mode always uses single-bit data per column.
70 * Only exists for backwards compatibility, see "column_formats" for
71 * more flexibility.
4a35548b 72 *
72903e9d
GS
73 * start_line: Specifies at which line to start processing the input file.
74 * Allows to skip leading lines which neither are header nor data lines.
75 * By default all of the input file gets processed.
4a35548b 76 *
72903e9d
GS
77 * header: Boolean option, controls whether the first processed line is used
78 * to determine channel names. Off by default. Generic channel names are
79 * used in the absence of header line content.
4a35548b 80 *
72903e9d
GS
81 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
82 * User specs take precedence over data which optionally gets derived
83 * from input data.
4a35548b 84 *
72903e9d
GS
85 * column_separator: Specifies the sequence which separates the text file
86 * columns. Cannot be empty. Defaults to comma.
87 *
88 * comment_leader: Specifies the sequence which starts comments that run
89 * up to the end of the current text line. Can be empty to disable
90 * comment support. Defaults to semicolon.
91 *
92 * Typical examples of using these options:
93 * - ... -I csv:column_formats=*l ...
94 * All columns are single-bit logic data. Identical to the previous
95 * multi-column mode (the default when no options were given at all).
96 * - ... -I csv:column_formats=3-,*l ...
97 * Ignore the first three columns, get single-bit logic data from all
98 * remaining lines (multi-column mode with first-column above 1).
99 * - ... -I csv:column_formats=3-,4l,x8 ...
100 * Ignore the first three columns, get single-bit logic data from the
101 * next four columns, then eight-bit data in hex format from the next
102 * column. More columns may follow in the input text but won't get
103 * processed. (Mix of previous multi-column as well as single-column
104 * modes.)
105 * - ... -I csv:column_formats=4x8,b16,5l ...
106 * Get eight-bit data in hex format from the first four columns, then
107 * sixteen-bit data in binary format, then five times single-bit data.
108 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
109 * Get eight logic bits in binary format from column 2. (Simple
110 * single-column mode, corresponds to the "-,b8" format.)
111 * - ... -I csv:first_column=6:logic_channels=4 ...
112 * Get four single-bit logic channels from columns 6 to 9 respectively.
113 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
114 * - ... -I csv:start_line=20:header=yes:...
115 * Skip the first 19 text lines. Use line 20 to derive channel names.
116 * Data starts at line 21.
08eb955a
GS
117 * - ... -I csv:column_formats=*a6 ...
118 * Each column contains an analog value with six significant digits
119 * after the decimal period.
7e4e65bf
GS
120 * - ... -I csv:column_formats=t,2a ...
121 * The first column contains timestamps, the next two columns contain
122 * analog values. The capture's samplerate could get determined from
123 * the timestamp values if not provided by the user by means of the
124 * 'samplerate' option. This assumes a mere number in units of seconds,
125 * and equidistant rows, there is no fancy support for textual unit
126 * suffixes nor gaps in the stream of samples nor other non-linearity,
127 * just '-' ignore the column if the format is not supported).
4a35548b
MS
128 */
129
ccff468b
GS
130/*
131 * TODO
132 *
3f1f63f0 133 * - Extend support for analog input data.
43bdef26
GS
134 * - Determine why analog samples of 'double' data type get scrambled
135 * in sigrok-cli screen output. Is analog.encoding->unitsize not
136 * handled properly? A sigrok-cli or libsigrok (src/output) issue?
3f1f63f0
GS
137 * - Reconsider the channel creation after format processing. Current
138 * logic may "bleed" channel names into the analog group when logic
139 * channels' columns follow analog columns (seen with "-,2a,x8").
140 * Trying to sort it out, a naive change used to map logic channels'
141 * data to incorrect bitmap positions. The whole channel numbering
142 * needs reconsideration. Probably it's easiest to first create _all_
143 * logic channels so that they have adjacent numbers starting at 0
144 * (addressing logic bits), then all analog channels (again adjacent)
145 * to simplify the calculation of their index in the sample set as
146 * well as their sdi channel index from the "analog column index".
5a971176
GS
147 * - Optionally get sample rate from timestamp column. Just best-effort
148 * approach, not necessarily reliable. Users can always specify rates.
149 * - Add a test suite for input modules in general, and CSV in specific?
150 * Becomes more important with the multitude of options and their
151 * interaction. Could cover edge cases (BOM presence, line termination
152 * absence, etc) and auto-stuff as well (channel names, channel counts,
153 * samplerates, etc).
ccff468b
GS
154 */
155
43bdef26
GS
156typedef float csv_analog_t; /* 'double' currently is flawed. */
157
4a35548b 158/* Single column formats. */
ad6a2bee 159enum single_col_format {
e53f32d2
GS
160 FORMAT_NONE, /* Ignore this column. */
161 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
162 FORMAT_HEX, /* Hex digits for a set of bits. */
163 FORMAT_OCT, /* Oct digits for a set of bits. */
43bdef26 164 FORMAT_ANALOG, /* Floating point number for an analog channel. */
7e4e65bf 165 FORMAT_TIME, /* Timestamps. */
e53f32d2
GS
166};
167
168static const char *col_format_text[] = {
169 [FORMAT_NONE] = "unknown",
170 [FORMAT_BIN] = "binary",
171 [FORMAT_HEX] = "hexadecimal",
172 [FORMAT_OCT] = "octal",
43bdef26 173 [FORMAT_ANALOG] = "analog",
7e4e65bf 174 [FORMAT_TIME] = "timestamp",
e53f32d2
GS
175};
176
1a920e33
GS
177static const char col_format_char[] = {
178 [FORMAT_NONE] = '?',
179 [FORMAT_BIN] = 'b',
180 [FORMAT_HEX] = 'x',
181 [FORMAT_OCT] = 'o',
43bdef26 182 [FORMAT_ANALOG] = 'a',
7e4e65bf 183 [FORMAT_TIME] = 't',
1a920e33
GS
184};
185
fc3b42e9
GS
186static gboolean format_is_ignore(enum single_col_format fmt)
187{
188 return fmt == FORMAT_NONE;
189}
190
191static gboolean format_is_logic(enum single_col_format fmt)
192{
193 return fmt >= FORMAT_BIN && fmt <= FORMAT_OCT;
194}
195
196static gboolean format_is_analog(enum single_col_format fmt)
197{
198 return fmt == FORMAT_ANALOG;
199}
200
7e4e65bf
GS
201static gboolean format_is_timestamp(enum single_col_format fmt)
202{
203 return fmt == FORMAT_TIME;
204}
205
e53f32d2
GS
206struct column_details {
207 size_t col_nr;
208 enum single_col_format text_format;
209 size_t channel_offset;
210 size_t channel_count;
3f1f63f0 211 size_t channel_index;
a267bf45 212 int analog_digits;
4a35548b
MS
213};
214
215struct context {
41d214f6
BV
216 gboolean started;
217
7e4e65bf 218 /* Current samplerate, optionally determined from input data. */
4a35548b 219 uint64_t samplerate;
7e4e65bf 220 double prev_timestamp;
246aca5f 221 gboolean samplerate_sent;
4a35548b 222
a267bf45 223 /* Number of channels. */
836fac9c 224 size_t logic_channels;
43bdef26 225 size_t analog_channels;
4a35548b 226
836fac9c 227 /* Column delimiter (actually separator), comment leader, EOL sequence. */
4a35548b 228 GString *delimiter;
4a35548b 229 GString *comment;
41d214f6
BV
230 char *termination;
231
1a920e33
GS
232 /* Format specs for input columns, and processing state. */
233 size_t column_seen_count;
234 const char *column_formats;
e53f32d2
GS
235 size_t column_want_count;
236 struct column_details *column_details;
237
4a35548b 238 /* Line number to start processing. */
6433156c 239 size_t start_line;
4a35548b
MS
240
241 /*
242 * Determines if the first line should be treated as header and used for
ba7dd8bb 243 * channel names in multi column mode.
4a35548b 244 */
de8fe3b5
GS
245 gboolean use_header;
246 gboolean header_seen;
4a35548b 247
cd59e6ec
GS
248 size_t sample_unit_size; /**!< Byte count for a single sample. */
249 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
43bdef26 250 csv_analog_t *analog_sample_buffer; /**!< Buffer for one set of analog values. */
4a35548b 251
cd59e6ec
GS
252 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
253 size_t datafeed_buf_size;
254 size_t datafeed_buf_fill;
43bdef26
GS
255 /* "Striped" layout, M samples for N channels each. */
256 csv_analog_t *analog_datafeed_buffer; /**!< Queue for analog datafeed. */
257 size_t analog_datafeed_buf_size;
258 size_t analog_datafeed_buf_fill;
a267bf45
GS
259 GSList **analog_datafeed_channels;
260 int *analog_datafeed_digits;
4a35548b 261
4a35548b 262 /* Current line number. */
6433156c 263 size_t line_number;
affaf540
GS
264
265 /* List of previously created sigrok channels. */
266 GSList *prev_sr_channels;
4a35548b
MS
267};
268
626c388a
GS
269/*
270 * Primitive operations to handle sample sets:
271 * - Keep a buffer for datafeed submission, capable of holding many
272 * samples (reduces call overhead, improves throughput).
273 * - Have a "current sample set" pointer reference one position in that
274 * large samples buffer.
275 * - Clear the current sample set before text line inspection, then set
276 * the bits which are found active in the current line of text input.
277 * Phrase the API such that call sites can be kept simple. Advance to
278 * the next sample set between lines, flush the larger buffer as needed
279 * (when it is full, or upon EOF).
280 */
281
43bdef26
GS
282static int flush_samplerate(const struct sr_input *in)
283{
284 struct context *inc;
285 struct sr_datafeed_packet packet;
286 struct sr_datafeed_meta meta;
287 struct sr_config *src;
288
289 inc = in->priv;
290 if (inc->samplerate && !inc->samplerate_sent) {
291 packet.type = SR_DF_META;
292 packet.payload = &meta;
293 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(inc->samplerate));
294 meta.config = g_slist_append(NULL, src);
295 sr_session_send(in->sdi, &packet);
296 g_slist_free(meta.config);
297 sr_config_free(src);
298 inc->samplerate_sent = TRUE;
299 }
300
301 return SR_OK;
302}
303
626c388a
GS
304static void clear_logic_samples(struct context *inc)
305{
43bdef26
GS
306 if (!inc->logic_channels)
307 return;
626c388a
GS
308 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
309 memset(inc->sample_buffer, 0, inc->sample_unit_size);
310}
311
312static void set_logic_level(struct context *inc, size_t ch_idx, int on)
313{
314 size_t byte_idx, bit_idx;
315 uint8_t bit_mask;
316
836fac9c 317 if (ch_idx >= inc->logic_channels)
626c388a
GS
318 return;
319 if (!on)
320 return;
321
322 byte_idx = ch_idx / 8;
323 bit_idx = ch_idx % 8;
324 bit_mask = 1 << bit_idx;
325 inc->sample_buffer[byte_idx] |= bit_mask;
326}
327
328static int flush_logic_samples(const struct sr_input *in)
329{
330 struct context *inc;
331 struct sr_datafeed_packet packet;
332 struct sr_datafeed_logic logic;
333 int rc;
334
335 inc = in->priv;
336 if (!inc->datafeed_buf_fill)
337 return SR_OK;
338
43bdef26
GS
339 rc = flush_samplerate(in);
340 if (rc != SR_OK)
341 return rc;
246aca5f 342
626c388a
GS
343 memset(&packet, 0, sizeof(packet));
344 memset(&logic, 0, sizeof(logic));
345 packet.type = SR_DF_LOGIC;
346 packet.payload = &logic;
347 logic.unitsize = inc->sample_unit_size;
348 logic.length = inc->datafeed_buf_fill;
349 logic.data = inc->datafeed_buffer;
350
351 rc = sr_session_send(in->sdi, &packet);
352 if (rc != SR_OK)
353 return rc;
354
355 inc->datafeed_buf_fill = 0;
356 return SR_OK;
357}
358
359static int queue_logic_samples(const struct sr_input *in)
360{
361 struct context *inc;
362 int rc;
363
364 inc = in->priv;
836fac9c
GS
365 if (!inc->logic_channels)
366 return SR_OK;
626c388a
GS
367
368 inc->datafeed_buf_fill += inc->sample_unit_size;
369 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
370 rc = flush_logic_samples(in);
371 if (rc != SR_OK)
372 return rc;
373 }
374 return SR_OK;
375}
376
43bdef26
GS
377static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value);
378
379static void clear_analog_samples(struct context *inc)
380{
381 size_t idx;
382
383 if (!inc->analog_channels)
384 return;
385 inc->analog_sample_buffer = &inc->analog_datafeed_buffer[inc->analog_datafeed_buf_fill];
386 for (idx = 0; idx < inc->analog_channels; idx++)
387 set_analog_value(inc, idx, 0.0);
388}
389
390static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value)
391{
392 if (ch_idx >= inc->analog_channels)
393 return;
394 if (!value)
395 return;
396 inc->analog_sample_buffer[ch_idx * inc->analog_datafeed_buf_size] = value;
397}
398
399static int flush_analog_samples(const struct sr_input *in)
400{
43bdef26
GS
401 struct context *inc;
402 struct sr_datafeed_packet packet;
403 struct sr_datafeed_analog analog;
404 struct sr_analog_encoding encoding;
405 struct sr_analog_meaning meaning;
406 struct sr_analog_spec spec;
407 csv_analog_t *samples;
408 size_t ch_idx;
a267bf45 409 int digits;
43bdef26
GS
410 int rc;
411
412 inc = in->priv;
413 if (!inc->analog_datafeed_buf_fill)
414 return SR_OK;
415
416 rc = flush_samplerate(in);
417 if (rc != SR_OK)
418 return rc;
419
420 samples = inc->analog_datafeed_buffer;
421 for (ch_idx = 0; ch_idx < inc->analog_channels; ch_idx++) {
a267bf45 422 digits = inc->analog_datafeed_digits[ch_idx];
43bdef26
GS
423 sr_analog_init(&analog, &encoding, &meaning, &spec, digits);
424 memset(&packet, 0, sizeof(packet));
425 packet.type = SR_DF_ANALOG;
426 packet.payload = &analog;
427 analog.num_samples = inc->analog_datafeed_buf_fill;
428 analog.data = samples;
429 analog.meaning->channels = inc->analog_datafeed_channels[ch_idx];
430 analog.meaning->mq = 0;
431 analog.meaning->mqflags = 0;
432 analog.meaning->unit = 0;
433 analog.encoding->unitsize = sizeof(samples[0]);
434 analog.encoding->is_signed = TRUE;
435 analog.encoding->is_float = TRUE;
436#ifdef WORDS_BIGENDIAN
437 analog.encoding->is_bigendian = TRUE;
438#else
439 analog.encoding->is_bigendian = FALSE;
440#endif
441 analog.encoding->digits = spec.spec_digits;
442 rc = sr_session_send(in->sdi, &packet);
443 if (rc != SR_OK)
444 return rc;
445 samples += inc->analog_datafeed_buf_size;
446 }
447
448 inc->analog_datafeed_buf_fill = 0;
449 return SR_OK;
450}
451
452static int queue_analog_samples(const struct sr_input *in)
453{
454 struct context *inc;
455 int rc;
456
457 inc = in->priv;
458 if (!inc->analog_channels)
459 return SR_OK;
460
461 inc->analog_datafeed_buf_fill++;
462 if (inc->analog_datafeed_buf_fill == inc->analog_datafeed_buf_size) {
463 rc = flush_analog_samples(in);
464 if (rc != SR_OK)
465 return rc;
466 }
467 return SR_OK;
468}
469
2142a79b
GS
470/* Helpers for "column processing". */
471
472static int split_column_format(const char *spec,
473 size_t *column_count, enum single_col_format *format, size_t *bit_count)
474{
475 size_t count;
476 char *endp, format_char;
477 enum single_col_format format_code;
478
479 if (!spec || !*spec)
480 return SR_ERR_ARG;
481
1a920e33 482 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
2142a79b 483 endp = NULL;
1a920e33 484 if (*spec == '*') {
5ada72fc 485 /* Workaround, strtoul("*") won't always yield expected endp. */
1a920e33
GS
486 count = 0;
487 endp = (char *)&spec[1];
488 } else {
489 count = strtoul(spec, &endp, 10);
490 }
2142a79b
GS
491 if (!endp)
492 return SR_ERR_ARG;
493 if (endp == spec)
494 count = 1;
495 if (column_count)
496 *column_count = count;
497 spec = endp;
498
499 /* Get the (mandatory, single letter) type spec (-/xob/l). */
500 format_char = *spec++;
501 switch (format_char) {
5ada72fc 502 case '-':
2142a79b
GS
503 case '/':
504 format_char = '-';
505 format_code = FORMAT_NONE;
506 break;
507 case 'x':
508 format_code = FORMAT_HEX;
509 break;
510 case 'o':
511 format_code = FORMAT_OCT;
512 break;
513 case 'b':
514 case 'l':
515 format_code = FORMAT_BIN;
516 break;
43bdef26
GS
517 case 'a':
518 format_code = FORMAT_ANALOG;
519 break;
7e4e65bf
GS
520 case 't':
521 format_code = FORMAT_TIME;
522 break;
2142a79b
GS
523 default: /* includes NUL */
524 return SR_ERR_ARG;
525 }
526 if (format)
527 *format = format_code;
528
529 /* Get the (optional, decimal, default 1) bit count. */
530 endp = NULL;
531 count = strtoul(spec, &endp, 10);
532 if (!endp)
533 return SR_ERR_ARG;
534 if (endp == spec)
fc3b42e9
GS
535 count = format_is_analog(format_code) ? 3 : 1;
536 if (format_is_ignore(format_code))
2142a79b
GS
537 count = 0;
538 if (format_char == 'l')
539 count = 1;
540 if (bit_count)
541 *bit_count = count;
542 spec = endp;
543
544 /* Input spec must have been exhausted. */
545 if (*spec)
546 return SR_ERR_ARG;
547
548 return SR_OK;
549}
550
9e7af34e
GS
551static int make_column_details_from_format(const struct sr_input *in,
552 const char *column_format, char **column_texts)
2142a79b 553{
9e7af34e 554 struct context *inc;
2142a79b 555 char **formats, *format;
43bdef26 556 size_t format_count, column_count, logic_count, analog_count;
1a920e33 557 size_t auto_column_count;
43bdef26 558 size_t format_idx, c, b, column_idx, channel_idx, analog_idx;
2142a79b
GS
559 enum single_col_format f;
560 struct column_details *detail;
9e7af34e
GS
561 GString *channel_name;
562 size_t create_idx;
563 char *column;
564 const char *caption;
43bdef26 565 int channel_type, channel_sdi_nr;
2142a79b
GS
566 int ret;
567
9e7af34e
GS
568 inc = in->priv;
569 inc->column_seen_count = g_strv_length(column_texts);
570
2142a79b
GS
571 /* Split the input spec, count involved columns and bits. */
572 formats = g_strsplit(column_format, ",", 0);
573 if (!formats) {
574 sr_err("Cannot parse columns format %s (comma split).", column_format);
575 return SR_ERR_ARG;
576 }
577 format_count = g_strv_length(formats);
578 if (!format_count) {
579 sr_err("Cannot parse columns format %s (field count).", column_format);
580 g_strfreev(formats);
581 return SR_ERR_ARG;
582 }
43bdef26 583 column_count = logic_count = analog_count = 0;
1a920e33 584 auto_column_count = 0;
2142a79b
GS
585 for (format_idx = 0; format_idx < format_count; format_idx++) {
586 format = formats[format_idx];
587 ret = split_column_format(format, &c, &f, &b);
588 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
589 if (ret != SR_OK) {
590 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
591 g_strfreev(formats);
592 return SR_ERR_ARG;
593 }
1a920e33
GS
594 if (f && !c) {
595 /* User requested "auto-count", must be last format. */
596 if (formats[format_idx + 1]) {
597 sr_err("Auto column count must be last format field.");
598 g_strfreev(formats);
599 return SR_ERR_ARG;
600 }
601 auto_column_count = inc->column_seen_count - column_count;
602 c = auto_column_count;
603 }
2142a79b 604 column_count += c;
fc3b42e9 605 if (format_is_analog(f))
43bdef26 606 analog_count += c;
fc3b42e9 607 else if (format_is_logic(f))
43bdef26 608 logic_count += c * b;
2142a79b 609 }
43bdef26
GS
610 sr_dbg("Column format %s -> %zu columns, %zu logic, %zu analog channels.",
611 column_format, column_count, logic_count, analog_count);
2142a79b 612
9e7af34e 613 /* Allocate and fill in "column processing" details. Create channels. */
2142a79b 614 inc->column_want_count = column_count;
9e7af34e
GS
615 if (inc->column_seen_count < inc->column_want_count) {
616 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
617 inc->column_seen_count, inc->column_want_count);
618 g_strfreev(formats);
619 return SR_ERR_ARG;
620 }
2142a79b 621 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
43bdef26 622 column_idx = channel_idx = analog_idx = 0;
9e7af34e 623 channel_name = g_string_sized_new(64);
2142a79b 624 for (format_idx = 0; format_idx < format_count; format_idx++) {
9e7af34e 625 /* Process a format field, which can span multiple columns. */
2142a79b
GS
626 format = formats[format_idx];
627 (void)split_column_format(format, &c, &f, &b);
1a920e33
GS
628 if (f && !c)
629 c = auto_column_count;
2142a79b 630 while (c-- > 0) {
9e7af34e 631 /* Fill in a column's processing details. */
2142a79b
GS
632 detail = &inc->column_details[column_idx++];
633 detail->col_nr = column_idx;
634 detail->text_format = f;
fc3b42e9 635 if (format_is_analog(detail->text_format)) {
43bdef26
GS
636 detail->channel_offset = analog_idx;
637 detail->channel_count = 1;
a267bf45 638 detail->analog_digits = b;
43bdef26 639 analog_idx += detail->channel_count;
fc3b42e9 640 } else if (format_is_logic(detail->text_format)) {
2142a79b
GS
641 detail->channel_offset = channel_idx;
642 detail->channel_count = b;
43bdef26 643 channel_idx += detail->channel_count;
fc3b42e9
GS
644 } else if (format_is_ignore(detail->text_format)) {
645 /* EMPTY */
9e7af34e 646 continue;
fc3b42e9
GS
647 } else {
648 /*
649 * Neither logic nor analog data, nor ignore.
650 * Format was noted. No channel creation involved.
651 */
652 continue;
653 }
9e7af34e 654 /*
08eb955a 655 * Pick most appropriate channel names. Optionally
9e7af34e
GS
656 * use text from a header line (when requested by the
657 * user). In the absence of header text, channels are
658 * assigned rather generic names.
659 *
660 * Manipulation of the column's caption (when a header
661 * line is seen) is acceptable, because this header
662 * line won't get processed another time.
663 */
664 column = column_texts[detail->col_nr - 1];
665 if (inc->use_header && column && *column)
666 caption = sr_scpi_unquote_string(column);
667 else
668 caption = NULL;
669 if (!caption || !*caption)
670 caption = NULL;
3f1f63f0
GS
671 /*
672 * TODO Need we first create _all_ logic channels,
fc3b42e9
GS
673 * before creating analog channels? Just store the
674 * parameters here (index, type, name) and have the
675 * creation sequence done outside of the format
676 * spec parse loop.
3f1f63f0 677 */
9e7af34e
GS
678 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
679 if (caption && detail->channel_count == 1) {
680 g_string_assign(channel_name, caption);
681 } else if (caption) {
682 g_string_printf(channel_name, "%s[%zu]",
683 caption, create_idx);
684 } else {
685 g_string_printf(channel_name, "%zu",
686 detail->channel_offset + create_idx);
687 }
fc3b42e9 688 if (format_is_analog(detail->text_format)) {
43bdef26
GS
689 channel_sdi_nr = logic_count + detail->channel_offset + create_idx;
690 channel_type = SR_CHANNEL_ANALOG;
3f1f63f0 691 detail->channel_index = g_slist_length(in->sdi->channels);
fc3b42e9 692 } else if (format_is_logic(detail->text_format)) {
43bdef26
GS
693 channel_sdi_nr = detail->channel_offset + create_idx;
694 channel_type = SR_CHANNEL_LOGIC;
fc3b42e9
GS
695 } else {
696 continue;
43bdef26
GS
697 }
698 sr_channel_new(in->sdi, channel_sdi_nr,
699 channel_type, TRUE, channel_name->str);
9e7af34e 700 }
2142a79b
GS
701 }
702 }
703 inc->logic_channels = channel_idx;
43bdef26 704 inc->analog_channels = analog_idx;
9e7af34e 705 g_string_free(channel_name, TRUE);
2142a79b
GS
706 g_strfreev(formats);
707
708 return SR_OK;
709}
710
e53f32d2
GS
711static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
712{
713 if (!inc || !inc->column_details)
714 return NULL;
715 if (!nr || nr > inc->column_want_count)
716 return NULL;
717 return &inc->column_details[nr - 1];
718}
719
19267272
GS
720/*
721 * Primitive operations for text input: Strip comments off text lines.
722 * Split text lines into columns. Process input text for individual
723 * columns.
724 */
725
41d214f6 726static void strip_comment(char *buf, const GString *prefix)
4a35548b
MS
727{
728 char *ptr;
729
730 if (!prefix->len)
731 return;
732
b2c4dde2 733 if ((ptr = strstr(buf, prefix->str))) {
41d214f6 734 *ptr = '\0';
b2c4dde2
GS
735 g_strstrip(buf);
736 }
4a35548b
MS
737}
738
19267272 739/**
e53f32d2 740 * @brief Splits a text line into a set of columns.
19267272 741 *
e53f32d2 742 * @param[in] buf The input text line to split.
19267272
GS
743 * @param[in] inc The input module's context.
744 *
e53f32d2 745 * @returns An array of strings, representing the columns' text.
19267272 746 *
e53f32d2 747 * This routine splits a text line on previously determined separators.
19267272 748 */
e53f32d2 749static char **split_line(char *buf, struct context *inc)
4a35548b 750{
e53f32d2 751 return g_strsplit(buf, inc->delimiter->str, 0);
4a35548b
MS
752}
753
19267272 754/**
e53f32d2 755 * @brief Parse a multi-bit field into several logic channels.
19267272 756 *
e53f32d2 757 * @param[in] column The input text, a run of bin/hex/oct digits.
19267272 758 * @param[in] inc The input module's context.
836fac9c 759 * @param[in] details The column processing details.
19267272
GS
760 *
761 * @retval SR_OK Success.
762 * @retval SR_ERR Invalid input data (empty, or format error).
763 *
764 * This routine modifies the logic levels in the current sample set,
e53f32d2 765 * based on the text input and a user provided format spec.
19267272 766 */
836fac9c
GS
767static int parse_logic(const char *column, struct context *inc,
768 const struct column_details *details)
4a35548b 769{
e53f32d2
GS
770 size_t length, ch_rem, ch_idx, ch_inc;
771 const char *rdptr;
4a35548b 772 char c;
e53f32d2
GS
773 gboolean valid;
774 const char *type_text;
775 uint8_t bits;
776
e53f32d2
GS
777 /*
778 * Prepare to read the digits from the text end towards the start.
779 * A digit corresponds to a variable number of channels (depending
780 * on the value's radix). Prepare the mapping of text digits to
781 * (a number of) logic channels.
782 */
783 length = strlen(column);
4a35548b 784 if (!length) {
836fac9c 785 sr_err("Column %zu in line %zu is empty.", details->col_nr,
41d214f6 786 inc->line_number);
4a35548b
MS
787 return SR_ERR;
788 }
e53f32d2 789 rdptr = &column[length];
836fac9c
GS
790 ch_idx = details->channel_offset;
791 ch_rem = details->channel_count;
4a35548b 792
e53f32d2
GS
793 /*
794 * Get another digit and derive up to four logic channels' state from
795 * it. Make sure to not process more bits than the column has channels
796 * associated with it.
797 */
798 while (rdptr > column && ch_rem) {
799 /* Check for valid digits according to the input radix. */
800 c = *(--rdptr);
836fac9c 801 switch (details->text_format) {
e53f32d2
GS
802 case FORMAT_BIN:
803 valid = g_ascii_isxdigit(c) && c < '2';
804 ch_inc = 1;
805 break;
806 case FORMAT_OCT:
807 valid = g_ascii_isxdigit(c) && c < '8';
808 ch_inc = 3;
809 break;
810 case FORMAT_HEX:
811 valid = g_ascii_isxdigit(c);
812 ch_inc = 4;
813 break;
814 default:
815 valid = FALSE;
816 break;
4a35548b 817 }
e53f32d2 818 if (!valid) {
836fac9c 819 type_text = col_format_text[details->text_format];
e53f32d2 820 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
836fac9c 821 column, type_text, details->col_nr, inc->line_number);
4a35548b 822 return SR_ERR;
e53f32d2
GS
823 }
824 /* Use the digit's bits for logic channels' data. */
825 bits = g_ascii_xdigit_value(c);
836fac9c 826 switch (details->text_format) {
e53f32d2
GS
827 case FORMAT_HEX:
828 if (ch_rem >= 4) {
829 ch_rem--;
830 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
831 }
832 /* FALLTHROUGH */
833 case FORMAT_OCT:
834 if (ch_rem >= 3) {
835 ch_rem--;
836 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
837 }
838 if (ch_rem >= 2) {
839 ch_rem--;
840 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
841 }
842 /* FALLTHROUGH */
843 case FORMAT_BIN:
844 ch_rem--;
845 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
846 break;
fc3b42e9 847 default:
836fac9c
GS
848 /* ShouldNotHappen(TM), but silences compiler warning. */
849 return SR_ERR;
4a35548b 850 }
e53f32d2 851 ch_idx += ch_inc;
4a35548b 852 }
e53f32d2
GS
853 /*
854 * TODO Determine whether the availability of extra input data
855 * for unhandled logic channels is worth warning here. In this
856 * implementation users are in control, and can have the more
857 * significant bits ignored (which can be considered a feature
858 * and not really a limitation).
859 */
4a35548b
MS
860
861 return SR_OK;
862}
863
43bdef26
GS
864/**
865 * @brief Parse a floating point text into an analog value.
866 *
867 * @param[in] column The input text, a floating point number.
868 * @param[in] inc The input module's context.
869 * @param[in] details The column processing details.
870 *
871 * @retval SR_OK Success.
872 * @retval SR_ERR Invalid input data (empty, or format error).
873 *
874 * This routine modifies the analog values in the current sample set,
875 * based on the text input and a user provided format spec.
876 */
877static int parse_analog(const char *column, struct context *inc,
878 const struct column_details *details)
879{
880 size_t length;
881 double dvalue; float fvalue;
882 csv_analog_t value;
883 int ret;
884
fc3b42e9 885 if (!format_is_analog(details->text_format))
43bdef26
GS
886 return SR_ERR_BUG;
887
888 length = strlen(column);
889 if (!length) {
890 sr_err("Column %zu in line %zu is empty.", details->col_nr,
891 inc->line_number);
892 return SR_ERR;
893 }
894 if (sizeof(value) == sizeof(double)) {
895 ret = sr_atod_ascii(column, &dvalue);
896 value = dvalue;
897 } else if (sizeof(value) == sizeof(float)) {
898 ret = sr_atof_ascii(column, &fvalue);
899 value = fvalue;
900 } else {
901 ret = SR_ERR_BUG;
902 }
903 if (ret != SR_OK) {
904 sr_err("Cannot parse analog text %s in column %zu in line %zu.",
905 column, details->col_nr, inc->line_number);
906 return SR_ERR_DATA;
907 }
908 set_analog_value(inc, details->channel_offset, value);
909
910 return SR_OK;
911}
912
7e4e65bf
GS
913/**
914 * @brief Parse a timestamp text, auto-determine samplerate.
915 *
916 * @param[in] column The input text, a floating point number.
917 * @param[in] inc The input module's context.
918 * @param[in] details The column processing details.
919 *
920 * @retval SR_OK Success.
921 * @retval SR_ERR Invalid input data (empty, or format error).
922 *
923 * This routine attempts to automatically determine the input data's
924 * samplerate from text rows' timestamp values. Only simple formats are
925 * supported, user provided values always take precedence.
926 */
927static int parse_timestamp(const char *column, struct context *inc,
928 const struct column_details *details)
929{
930 double ts, rate;
931 int ret;
932
933 if (!format_is_timestamp(details->text_format))
934 return SR_ERR_BUG;
935
936 /*
937 * Implementor's notes on timestamp interpretation. Use a simple
938 * approach for improved maintainability which covers most cases
939 * of input data. There is not much gain in adding complexity,
940 * users can easily provide the rate when auto-detection fails.
941 * - Bail out if samplerate is known already.
942 * - Try to interpret the timestamp (simple float conversion).
943 * If conversion fails then clear all previous knowledge and
944 * bail out (non-fatal, perhaps warn). Silently ignore values
945 * of zero since those could be silent fails -- assume that
946 * genuine data contains at least two adjacent rows with useful
947 * timestamps for the feature to work reliably. Annoying users
948 * with "failed to detect" messages is acceptable here, since
949 * users expecting the feature to work should provide useful
950 * data, and there are easy ways to disable the detection or
951 * ignore the column.
952 * - If there is no previous timestamp, keep the current value
953 * for later reference and bail out.
954 * - If a previous timestamp was seen, determine the difference
955 * between them, and derive the samplerate. Update internal
956 * state (the value automatically gets sent to the datafeed),
957 * and clear previous knowledge. Subsequent calls will ignore
958 * following input data (see above, rate is known).
959 *
960 * TODO Potential future improvements:
961 * - Prefer rationals over floats for improved precision and
962 * reduced rounding errors which result in odd rates.
963 * - Support other formats ("2 ms" or similar)?
964 */
965 if (inc->samplerate)
966 return SR_OK;
967 ret = sr_atod_ascii(column, &ts);
968 if (ret != SR_OK)
969 ts = 0.0;
970 if (!ts) {
971 sr_warn("Cannot convert timestamp text %s in line %zu (or zero value).",
972 column, inc->line_number);
973 inc->prev_timestamp = 0.0;
974 return SR_OK;
975 }
976 if (!inc->prev_timestamp) {
977 sr_dbg("First timestamp value %g in line %zu.",
978 ts, inc->line_number);
979 inc->prev_timestamp = ts;
980 return SR_OK;
981 }
982 sr_dbg("Second timestamp value %g in line %zu.", ts, inc->line_number);
983 ts -= inc->prev_timestamp;
984 sr_dbg("Timestamp difference %g in line %zu.",
985 ts, inc->line_number);
986 if (!ts) {
987 sr_warn("Zero timestamp difference in line %zu.",
988 inc->line_number);
989 inc->prev_timestamp = ts;
990 return SR_OK;
991 }
992 rate = 1.0 / ts;
993 rate += 0.5;
994 rate = (uint64_t)rate;
995 sr_dbg("Rate from timestamp %g in line %zu.", rate, inc->line_number);
996 inc->samplerate = rate;
997 inc->prev_timestamp = 0.0;
998
999 return SR_OK;
1000}
1001
836fac9c
GS
1002/**
1003 * @brief Parse routine which ignores the input text.
1004 *
1005 * This routine exists to unify dispatch code paths, mapping input file
1006 * columns' data types to their respective parse routines.
1007 */
1008static int parse_ignore(const char *column, struct context *inc,
1009 const struct column_details *details)
1010{
1011 (void)column;
1012 (void)inc;
1013 (void)details;
1014 return SR_OK;
1015}
1016
1017typedef int (*col_parse_cb)(const char *column, struct context *inc,
1018 const struct column_details *details);
1019
1020static const col_parse_cb col_parse_funcs[] = {
1021 [FORMAT_NONE] = parse_ignore,
1022 [FORMAT_BIN] = parse_logic,
1023 [FORMAT_OCT] = parse_logic,
1024 [FORMAT_HEX] = parse_logic,
43bdef26 1025 [FORMAT_ANALOG] = parse_analog,
7e4e65bf 1026 [FORMAT_TIME] = parse_timestamp,
836fac9c
GS
1027};
1028
41d214f6 1029static int init(struct sr_input *in, GHashTable *options)
4a35548b 1030{
41d214f6 1031 struct context *inc;
1a920e33 1032 size_t single_column, first_column, logic_channels;
41d214f6 1033 const char *s;
836fac9c 1034 enum single_col_format format;
1a920e33 1035 char format_char;
4a35548b 1036
836fac9c
GS
1037 in->sdi = g_malloc0(sizeof(*in->sdi));
1038 in->priv = inc = g_malloc0(sizeof(*inc));
4a35548b 1039
72903e9d 1040 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
72903e9d 1041 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
41d214f6 1042 inc->delimiter = g_string_new(g_variant_get_string(
72903e9d 1043 g_hash_table_lookup(options, "column_separator"), NULL));
836fac9c 1044 if (!inc->delimiter->len) {
72903e9d 1045 sr_err("Column separator cannot be empty.");
41d214f6 1046 return SR_ERR_ARG;
4a35548b 1047 }
72903e9d 1048 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
836fac9c
GS
1049 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
1050 format = FORMAT_BIN;
1051 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
1052 format = FORMAT_HEX;
1053 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
1054 format = FORMAT_OCT;
41d214f6 1055 } else {
72903e9d 1056 sr_err("Invalid single-column format: '%s'", s);
41d214f6 1057 return SR_ERR_ARG;
4a35548b 1058 }
41d214f6 1059 inc->comment = g_string_new(g_variant_get_string(
72903e9d 1060 g_hash_table_lookup(options, "comment_leader"), NULL));
41d214f6 1061 if (g_string_equal(inc->comment, inc->delimiter)) {
e53f32d2
GS
1062 /*
1063 * Using the same sequence as comment leader and column
72903e9d
GS
1064 * separator won't work. The user probably specified ';'
1065 * as the column separator but did not adjust the comment
e53f32d2
GS
1066 * leader. Try DWIM, drop comment strippin support here.
1067 */
72903e9d 1068 sr_warn("Comment leader and column separator conflict, disabling comment support.");
41d214f6 1069 g_string_truncate(inc->comment, 0);
4a35548b 1070 }
6e8d95a5 1071 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
72903e9d 1072 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
de8fe3b5 1073 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
72903e9d 1074 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
41d214f6 1075 if (inc->start_line < 1) {
6433156c 1076 sr_err("Invalid start line %zu.", inc->start_line);
41d214f6 1077 return SR_ERR_ARG;
4a35548b
MS
1078 }
1079
e53f32d2 1080 /*
1a920e33
GS
1081 * Scan flexible, to get prefered format specs which describe
1082 * the input file's data formats. As well as some simple specs
1083 * for backwards compatibility and user convenience.
1084 *
1085 * This logic ends up with a copy of the format string, either
1086 * user provided or internally derived. Actual creation of the
1087 * column processing details gets deferred until the first line
1088 * of input data was seen. To support automatic determination of
1089 * e.g. channel counts from column counts.
e53f32d2 1090 */
72903e9d 1091 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
2142a79b 1092 if (s && *s) {
1a920e33 1093 inc->column_formats = g_strdup(s);
72903e9d 1094 sr_dbg("User specified column_formats: %s.", s);
1a920e33
GS
1095 } else if (single_column && logic_channels) {
1096 format_char = col_format_char[format];
1097 if (single_column == 1) {
1098 inc->column_formats = g_strdup_printf("%c%zu",
1099 format_char, logic_channels);
e53f32d2 1100 } else {
1a920e33
GS
1101 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
1102 single_column - 1,
1103 format_char, logic_channels);
e53f32d2 1104 }
72903e9d 1105 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
1a920e33
GS
1106 single_column, col_format_text[format], logic_channels,
1107 inc->column_formats);
1108 } else if (!single_column) {
1109 if (first_column > 1) {
1110 inc->column_formats = g_strdup_printf("%zu-,%zul",
1111 first_column - 1, logic_channels);
1112 } else {
1113 inc->column_formats = g_strdup_printf("%zul",
1114 logic_channels);
1115 }
1116 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
1117 first_column, logic_channels,
1118 inc->column_formats);
e53f32d2 1119 } else {
72903e9d 1120 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
1a920e33 1121 inc->column_formats = g_strdup("*l");
4a35548b
MS
1122 }
1123
41d214f6
BV
1124 return SR_OK;
1125}
4a35548b 1126
affaf540
GS
1127/*
1128 * Check the channel list for consistency across file re-import. See
1129 * the VCD input module for more details and motivation.
1130 */
1131
1132static void keep_header_for_reread(const struct sr_input *in)
1133{
1134 struct context *inc;
1135
1136 inc = in->priv;
1137 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
1138 inc->prev_sr_channels = in->sdi->channels;
1139 in->sdi->channels = NULL;
1140}
1141
1142static int check_header_in_reread(const struct sr_input *in)
1143{
1144 struct context *inc;
1145
1146 if (!in)
1147 return FALSE;
1148 inc = in->priv;
1149 if (!inc)
1150 return FALSE;
1151 if (!inc->prev_sr_channels)
1152 return TRUE;
1153
1154 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
1155 sr_err("Channel list change not supported for file re-read.");
1156 return FALSE;
1157 }
1158 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
1159 in->sdi->channels = inc->prev_sr_channels;
1160 inc->prev_sr_channels = NULL;
1161
1162 return TRUE;
1163}
1164
492dfa90
GS
1165static const char *delim_set = "\r\n";
1166
329733d9 1167static const char *get_line_termination(GString *buf)
41d214f6 1168{
329733d9 1169 const char *term;
4a35548b 1170
41d214f6
BV
1171 term = NULL;
1172 if (g_strstr_len(buf->str, buf->len, "\r\n"))
1173 term = "\r\n";
1174 else if (memchr(buf->str, '\n', buf->len))
1175 term = "\n";
1176 else if (memchr(buf->str, '\r', buf->len))
1177 term = "\r";
4a35548b 1178
41d214f6
BV
1179 return term;
1180}
4a35548b 1181
41d214f6
BV
1182static int initial_parse(const struct sr_input *in, GString *buf)
1183{
1184 struct context *inc;
9e7af34e 1185 size_t num_columns;
3f1f63f0 1186 size_t line_number, line_idx;
41d214f6 1187 int ret;
9e7af34e 1188 char **lines, *line, **columns;
41d214f6
BV
1189
1190 ret = SR_OK;
1191 inc = in->priv;
1192 columns = NULL;
1193
9e7af34e 1194 /* Search for the first line to process (header or data). */
41d214f6 1195 line_number = 0;
ef0b9935
GS
1196 if (inc->termination)
1197 lines = g_strsplit(buf->str, inc->termination, 0);
1198 else
1199 lines = g_strsplit_set(buf->str, delim_set, 0);
e53f32d2 1200 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
41d214f6
BV
1201 line_number++;
1202 if (inc->start_line > line_number) {
e53f32d2 1203 sr_spew("Line %zu skipped (before start).", line_number);
4a35548b
MS
1204 continue;
1205 }
df0db9fd 1206 if (line[0] == '\0') {
41d214f6
BV
1207 sr_spew("Blank line %zu skipped.", line_number);
1208 continue;
1209 }
df0db9fd
GS
1210 strip_comment(line, inc->comment);
1211 if (line[0] == '\0') {
41d214f6 1212 sr_spew("Comment-only line %zu skipped.", line_number);
4a35548b
MS
1213 continue;
1214 }
1215
41d214f6
BV
1216 /* Reached first proper line. */
1217 break;
1218 }
e53f32d2 1219 if (!line) {
41d214f6 1220 /* Not enough data for a proper line yet. */
60107497 1221 ret = SR_ERR_NA;
41d214f6 1222 goto out;
4a35548b
MS
1223 }
1224
9e7af34e 1225 /* Get the number of columns in the line. */
e53f32d2 1226 columns = split_line(line, inc);
df0db9fd 1227 if (!columns) {
41d214f6
BV
1228 sr_err("Error while parsing line %zu.", line_number);
1229 ret = SR_ERR;
1230 goto out;
4a35548b 1231 }
4a35548b 1232 num_columns = g_strv_length(columns);
4a35548b 1233 if (!num_columns) {
e53f32d2 1234 sr_err("Error while parsing line %zu.", line_number);
41d214f6
BV
1235 ret = SR_ERR;
1236 goto out;
4a35548b 1237 }
e53f32d2
GS
1238 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
1239
1a920e33 1240 /*
9e7af34e
GS
1241 * Interpret the user provided column format specs. This might
1242 * involve inspection of the now received input text, to support
1243 * e.g. automatic detection of channel counts in the absence of
1244 * user provided specs. Optionally a header line is used to get
1245 * channels' names.
1246 *
1247 * Check the then created channels for consistency across .reset
1248 * and .receive sequences (file re-load).
1a920e33 1249 */
9e7af34e 1250 ret = make_column_details_from_format(in, inc->column_formats, columns);
1a920e33
GS
1251 if (ret != SR_OK) {
1252 sr_err("Cannot parse columns format using line %zu.", line_number);
1253 goto out;
4a35548b 1254 }
affaf540
GS
1255 if (!check_header_in_reread(in)) {
1256 ret = SR_ERR_DATA;
1257 goto out;
1258 }
4a35548b
MS
1259
1260 /*
9e7af34e 1261 * Allocate buffer memory for datafeed submission of sample data.
cd59e6ec
GS
1262 * Calculate the minimum buffer size to store the set of samples
1263 * of all channels (unit size). Determine a larger buffer size
1264 * for datafeed submission that is a multiple of the unit size.
626c388a
GS
1265 * Allocate the larger buffer, the "sample buffer" will point
1266 * to a location within that large buffer later.
fc3b42e9
GS
1267 *
1268 * TODO Move channel creation here, and just store required
1269 * parameters in the format parser above? Could simplify the
1270 * arrangement that logic and analog channels get created in
1271 * strict sequence in their respective groups.
4a35548b 1272 */
43bdef26
GS
1273 if (inc->logic_channels) {
1274 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
1275 inc->datafeed_buf_size = CHUNK_SIZE;
1276 inc->datafeed_buf_size *= inc->sample_unit_size;
1277 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
1278 if (!inc->datafeed_buffer) {
1279 sr_err("Cannot allocate datafeed send buffer (logic).");
1280 ret = SR_ERR_MALLOC;
1281 goto out;
1282 }
1283 inc->datafeed_buf_fill = 0;
1284 }
1285
1286 if (inc->analog_channels) {
1287 size_t sample_size, sample_count;
a267bf45 1288 size_t detail_idx;
3f1f63f0 1289 struct column_details *detail;
a267bf45 1290 int *digits_item;
3f1f63f0 1291 void *channel;
43bdef26
GS
1292 sample_size = sizeof(inc->analog_datafeed_buffer[0]);
1293 inc->analog_datafeed_buf_size = CHUNK_SIZE;
1294 inc->analog_datafeed_buf_size /= sample_size;
1295 inc->analog_datafeed_buf_size /= inc->analog_channels;
1296 sample_count = inc->analog_channels * inc->analog_datafeed_buf_size;
1297 inc->analog_datafeed_buffer = g_malloc0(sample_count * sample_size);
1298 if (!inc->analog_datafeed_buffer) {
1299 sr_err("Cannot allocate datafeed send buffer (analog).");
1300 ret = SR_ERR_MALLOC;
1301 goto out;
1302 }
1303 inc->analog_datafeed_buf_fill = 0;
3f1f63f0 1304 inc->analog_datafeed_channels = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_channels[0]));
a267bf45
GS
1305 inc->analog_datafeed_digits = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_digits[0]));
1306 digits_item = inc->analog_datafeed_digits;
1307 for (detail_idx = 0; detail_idx < inc->column_want_count; detail_idx++) {
3f1f63f0 1308 detail = &inc->column_details[detail_idx];
fc3b42e9 1309 if (!format_is_analog(detail->text_format))
a267bf45 1310 continue;
3f1f63f0
GS
1311 channel = g_slist_nth_data(in->sdi->channels, detail->channel_index);
1312 inc->analog_datafeed_channels[detail->channel_offset] = g_slist_append(NULL, channel);
1313 *digits_item++ = detail->analog_digits;
a267bf45 1314 }
43bdef26 1315 }
4a35548b 1316
41d214f6
BV
1317out:
1318 if (columns)
1319 g_strfreev(columns);
1320 g_strfreev(lines);
4a35548b 1321
41d214f6 1322 return ret;
4a35548b
MS
1323}
1324
4439363a
GS
1325/*
1326 * Gets called from initial_receive(), which runs until the end-of-line
1327 * encoding of the input stream could get determined. Assumes that this
1328 * routine receives enough buffered initial input data to either see the
1329 * BOM when there is one, or that no BOM will follow when a text line
1330 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
1331 * from the input buffer if one was seen. Does not care to protect
1332 * against multiple execution or dropping the BOM multiple times --
1333 * there should be at most one in the input stream.
1334 */
1335static void initial_bom_check(const struct sr_input *in)
1336{
1337 static const char *utf8_bom = "\xef\xbb\xbf";
1338
1339 if (in->buf->len < strlen(utf8_bom))
1340 return;
1341 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
1342 return;
1343 g_string_erase(in->buf, 0, strlen(utf8_bom));
1344}
1345
41d214f6 1346static int initial_receive(const struct sr_input *in)
4a35548b 1347{
41d214f6
BV
1348 struct context *inc;
1349 GString *new_buf;
1350 int len, ret;
329733d9
UH
1351 char *p;
1352 const char *termination;
4a35548b 1353
4439363a
GS
1354 initial_bom_check(in);
1355
41d214f6 1356 inc = in->priv;
4a35548b 1357
df0db9fd
GS
1358 termination = get_line_termination(in->buf);
1359 if (!termination)
41d214f6 1360 /* Don't have a full line yet. */
d0181813 1361 return SR_ERR_NA;
4a35548b 1362
df0db9fd
GS
1363 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
1364 if (!p)
41d214f6 1365 /* Don't have a full line yet. */
d0181813 1366 return SR_ERR_NA;
41d214f6
BV
1367 len = p - in->buf->str - 1;
1368 new_buf = g_string_new_len(in->buf->str, len);
1369 g_string_append_c(new_buf, '\0');
4a35548b 1370
41d214f6
BV
1371 inc->termination = g_strdup(termination);
1372
1373 if (in->buf->str[0] != '\0')
1374 ret = initial_parse(in, new_buf);
1375 else
1376 ret = SR_OK;
1377
1378 g_string_free(new_buf, TRUE);
1379
1380 return ret;
1381}
1382
7f4c3a62 1383static int process_buffer(struct sr_input *in, gboolean is_eof)
41d214f6 1384{
41d214f6
BV
1385 struct context *inc;
1386 gsize num_columns;
e53f32d2 1387 size_t line_idx, col_idx, col_nr;
836fac9c
GS
1388 const struct column_details *details;
1389 col_parse_cb parse_func;
ad6a2bee 1390 int ret;
e53f32d2 1391 char *p, **lines, *line, **columns, *column;
41d214f6 1392
41d214f6 1393 inc = in->priv;
d0181813 1394 if (!inc->started) {
bee2b016 1395 std_session_send_df_header(in->sdi);
d0181813 1396 inc->started = TRUE;
4a35548b
MS
1397 }
1398
4555d3bd
GS
1399 /*
1400 * Consider empty input non-fatal. Keep accumulating input until
1401 * at least one full text line has become available. Grab the
1402 * maximum amount of accumulated data that consists of full text
1403 * lines, and process what has been received so far, leaving not
1404 * yet complete lines for the next invocation.
7f4c3a62
GS
1405 *
1406 * Enforce that all previously buffered data gets processed in
1407 * the "EOF" condition. Do not insist in the presence of the
1408 * termination sequence for the last line (may often be missing
1409 * on Windows). A present termination sequence will just result
1410 * in the "execution of an empty line", and does not harm.
4555d3bd
GS
1411 */
1412 if (!in->buf->len)
1413 return SR_OK;
7f4c3a62
GS
1414 if (is_eof) {
1415 p = in->buf->str + in->buf->len;
1416 } else {
1417 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1418 if (!p)
1419 return SR_ERR;
1420 *p = '\0';
1421 p += strlen(inc->termination);
1422 }
41d214f6 1423 g_strstrip(in->buf->str);
4a35548b 1424
18078d05 1425 ret = SR_OK;
ef0b9935 1426 lines = g_strsplit(in->buf->str, inc->termination, 0);
e53f32d2 1427 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
41d214f6 1428 inc->line_number++;
ef0b9935
GS
1429 if (inc->line_number < inc->start_line) {
1430 sr_spew("Line %zu skipped (before start).", inc->line_number);
1431 continue;
1432 }
df0db9fd 1433 if (line[0] == '\0') {
41d214f6 1434 sr_spew("Blank line %zu skipped.", inc->line_number);
4a35548b
MS
1435 continue;
1436 }
1437
1438 /* Remove trailing comment. */
df0db9fd
GS
1439 strip_comment(line, inc->comment);
1440 if (line[0] == '\0') {
41d214f6 1441 sr_spew("Comment-only line %zu skipped.", inc->line_number);
4a35548b
MS
1442 continue;
1443 }
1444
160691b9 1445 /* Skip the header line, its content was used as the channel names. */
de8fe3b5 1446 if (inc->use_header && !inc->header_seen) {
160691b9 1447 sr_spew("Header line %zu skipped.", inc->line_number);
de8fe3b5 1448 inc->header_seen = TRUE;
160691b9
JS
1449 continue;
1450 }
1451
e53f32d2
GS
1452 /* Split the line into columns, check for minimum length. */
1453 columns = split_line(line, inc);
df0db9fd 1454 if (!columns) {
41d214f6 1455 sr_err("Error while parsing line %zu.", inc->line_number);
2355d229 1456 g_strfreev(lines);
4a35548b
MS
1457 return SR_ERR;
1458 }
4a35548b 1459 num_columns = g_strv_length(columns);
e53f32d2
GS
1460 if (num_columns < inc->column_want_count) {
1461 sr_err("Insufficient column count %zu in line %zu.",
1462 num_columns, inc->line_number);
4a35548b 1463 g_strfreev(columns);
2355d229 1464 g_strfreev(lines);
4a35548b
MS
1465 return SR_ERR;
1466 }
1467
836fac9c 1468 /* Have the columns of the current text line processed. */
626c388a 1469 clear_logic_samples(inc);
43bdef26 1470 clear_analog_samples(inc);
e53f32d2
GS
1471 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1472 column = columns[col_idx];
1473 col_nr = col_idx + 1;
836fac9c
GS
1474 details = lookup_column_details(inc, col_nr);
1475 if (!details || !details->text_format)
1476 continue;
1477 parse_func = col_parse_funcs[details->text_format];
1478 if (!parse_func)
1479 continue;
1480 ret = parse_func(column, inc, details);
e53f32d2
GS
1481 if (ret != SR_OK) {
1482 g_strfreev(columns);
1483 g_strfreev(lines);
1484 return SR_ERR;
1485 }
4a35548b
MS
1486 }
1487
626c388a
GS
1488 /* Send sample data to the session bus (buffered). */
1489 ret = queue_logic_samples(in);
43bdef26 1490 ret += queue_analog_samples(in);
41d214f6 1491 if (ret != SR_OK) {
4a35548b 1492 sr_err("Sending samples failed.");
cd59e6ec 1493 g_strfreev(columns);
2355d229 1494 g_strfreev(lines);
4a35548b
MS
1495 return SR_ERR;
1496 }
cd59e6ec 1497
41d214f6
BV
1498 g_strfreev(columns);
1499 }
1500 g_strfreev(lines);
241c386a 1501 g_string_erase(in->buf, 0, p - in->buf->str);
41d214f6 1502
7066fd46 1503 return ret;
41d214f6
BV
1504}
1505
7066fd46 1506static int receive(struct sr_input *in, GString *buf)
41d214f6
BV
1507{
1508 struct context *inc;
7066fd46
BV
1509 int ret;
1510
1511 g_string_append_len(in->buf, buf->str, buf->len);
41d214f6
BV
1512
1513 inc = in->priv;
1a920e33 1514 if (!inc->column_seen_count) {
df0db9fd
GS
1515 ret = initial_receive(in);
1516 if (ret == SR_ERR_NA)
7066fd46
BV
1517 /* Not enough data yet. */
1518 return SR_OK;
1519 else if (ret != SR_OK)
1520 return SR_ERR;
1521
1522 /* sdi is ready, notify frontend. */
1523 in->sdi_ready = TRUE;
41d214f6 1524 return SR_OK;
7066fd46
BV
1525 }
1526
7f4c3a62 1527 ret = process_buffer(in, FALSE);
7066fd46
BV
1528
1529 return ret;
1530}
1531
1532static int end(struct sr_input *in)
1533{
1534 struct context *inc;
7066fd46 1535 int ret;
41d214f6 1536
7066fd46 1537 if (in->sdi_ready)
7f4c3a62 1538 ret = process_buffer(in, TRUE);
7066fd46
BV
1539 else
1540 ret = SR_OK;
cd59e6ec
GS
1541 if (ret != SR_OK)
1542 return ret;
1543
626c388a 1544 ret = flush_logic_samples(in);
43bdef26 1545 ret += flush_analog_samples(in);
cd59e6ec
GS
1546 if (ret != SR_OK)
1547 return ret;
7066fd46
BV
1548
1549 inc = in->priv;
3be42bc2 1550 if (inc->started)
bee2b016 1551 std_session_send_df_end(in->sdi);
4a35548b 1552
7066fd46
BV
1553 return ret;
1554}
1555
d5cc282f 1556static void cleanup(struct sr_input *in)
7066fd46
BV
1557{
1558 struct context *inc;
1559
affaf540
GS
1560 keep_header_for_reread(in);
1561
7066fd46
BV
1562 inc = in->priv;
1563
b1f83103 1564 g_free(inc->termination);
539188e5 1565 inc->termination = NULL;
cd59e6ec 1566 g_free(inc->datafeed_buffer);
539188e5 1567 inc->datafeed_buffer = NULL;
43bdef26
GS
1568 g_free(inc->analog_datafeed_buffer);
1569 inc->analog_datafeed_buffer = NULL;
4a35548b
MS
1570}
1571
ad93bfb0
SA
1572static int reset(struct sr_input *in)
1573{
1574 struct context *inc = in->priv;
1575
1576 cleanup(in);
1577 inc->started = FALSE;
1578 g_string_truncate(in->buf, 0);
1579
1580 return SR_OK;
1581}
1582
c6aa9870 1583enum option_index {
2142a79b 1584 OPT_COL_FMTS,
c6aa9870 1585 OPT_SINGLE_COL,
72903e9d 1586 OPT_FIRST_COL,
c6aa9870 1587 OPT_NUM_LOGIC,
43e1e23a
GS
1588 OPT_SINGLE_FMT,
1589 OPT_START_LINE,
72903e9d 1590 OPT_HEADER,
43e1e23a
GS
1591 OPT_SAMPLERATE,
1592 OPT_COL_SEP,
72903e9d 1593 OPT_COMMENT,
c6aa9870
GS
1594 OPT_MAX,
1595};
1596
41d214f6 1597static struct sr_option options[] = {
72903e9d
GS
1598 [OPT_COL_FMTS] = {
1599 "column_formats", "Column format specs",
43e1e23a 1600 "Text columns data types. A comma separated list of [<cols>]<fmt>[<bits>] items. * for all remaining columns. - ignores columns, x/o/b/l logic data, a (and digits) analog data, t timestamps.",
72903e9d
GS
1601 NULL, NULL,
1602 },
1603 [OPT_SINGLE_COL] = {
1604 "single_column", "Single column",
43e1e23a 1605 "Simple single-column mode, exclusively use text from the specified column (number starting at 1). Obsoleted by 'column_formats=4-,x16'.",
72903e9d
GS
1606 NULL, NULL,
1607 },
1608 [OPT_FIRST_COL] = {
1609 "first_column", "First column",
43e1e23a 1610 "First column with logic data in simple multi-column mode (number starting at 1, default 1). Obsoleted by 'column_formats=4-,*l'.",
72903e9d
GS
1611 NULL, NULL,
1612 },
1613 [OPT_NUM_LOGIC] = {
1614 "logic_channels", "Number of logic channels",
43e1e23a 1615 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats=8l'.",
72903e9d
GS
1616 NULL, NULL,
1617 },
43e1e23a 1618 [OPT_SINGLE_FMT] = {
72903e9d 1619 "single_format", "Data format for simple single-column mode.",
43e1e23a 1620 "The input text number format of simple single-column mode: bin, hex, oct. Obsoleted by 'column_formats=x8'.",
72903e9d
GS
1621 NULL, NULL,
1622 },
43e1e23a 1623 [OPT_START_LINE] = {
72903e9d
GS
1624 "start_line", "Start line",
1625 "The line number at which to start processing input text (default: 1).",
1626 NULL, NULL,
1627 },
1628 [OPT_HEADER] = {
1629 "header", "Get channel names from first line.",
08eb955a 1630 "Use the first processed line's column captions (when available) as channel names. Off by default",
72903e9d
GS
1631 NULL, NULL,
1632 },
43e1e23a 1633 [OPT_SAMPLERATE] = {
72903e9d 1634 "samplerate", "Samplerate (Hz)",
08eb955a 1635 "The input data's sample rate in Hz. No default value.",
72903e9d
GS
1636 NULL, NULL,
1637 },
43e1e23a 1638 [OPT_COL_SEP] = {
72903e9d
GS
1639 "column_separator", "Column separator",
1640 "The sequence which separates text columns. Non-empty text, comma by default.",
1641 NULL, NULL,
1642 },
1643 [OPT_COMMENT] = {
1644 "comment_leader", "Comment leader character",
08eb955a 1645 "The text which starts comments at the end of text lines, semicolon by default.",
72903e9d
GS
1646 NULL, NULL,
1647 },
c6aa9870 1648 [OPT_MAX] = ALL_ZERO,
41d214f6
BV
1649};
1650
2c240774 1651static const struct sr_option *get_options(void)
41d214f6 1652{
31c41782
UH
1653 GSList *l;
1654
41d214f6 1655 if (!options[0].def) {
1a920e33 1656 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
e53f32d2 1657 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
72903e9d 1658 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
e53f32d2 1659 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
43e1e23a 1660 options[OPT_SINGLE_FMT].def = g_variant_ref_sink(g_variant_new_string("bin"));
31c41782
UH
1661 l = NULL;
1662 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1663 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1664 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
43e1e23a
GS
1665 options[OPT_SINGLE_FMT].values = l;
1666 options[OPT_START_LINE].def = g_variant_ref_sink(g_variant_new_uint32(1));
72903e9d 1667 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
43e1e23a
GS
1668 options[OPT_SAMPLERATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1669 options[OPT_COL_SEP].def = g_variant_ref_sink(g_variant_new_string(","));
72903e9d 1670 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
41d214f6
BV
1671 }
1672
1673 return options;
1674}
1675
d4c93774 1676SR_PRIV struct sr_input_module input_csv = {
4a35548b 1677 .id = "csv",
41d214f6
BV
1678 .name = "CSV",
1679 .desc = "Comma-separated values",
c7bc82ff 1680 .exts = (const char*[]){"csv", NULL},
41d214f6 1681 .options = get_options,
4a35548b 1682 .init = init,
41d214f6 1683 .receive = receive,
7066fd46 1684 .end = end,
41d214f6 1685 .cleanup = cleanup,
ad93bfb0 1686 .reset = reset,
4a35548b 1687};