]> sigrok.org Git - libsigrok.git/blame - src/input/csv.c
input/csv: update comments/helptext for analog input data
[libsigrok.git] / src / input / csv.c
CommitLineData
4a35548b
MS
1/*
2 * This file is part of the libsigrok project.
3 *
4 * Copyright (C) 2013 Marc Schink <sigrok-dev@marcschink.de>
e53f32d2 5 * Copyright (C) 2019 Gerhard Sittig <gerhard.sittig@gmx.net>
4a35548b
MS
6 *
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20
e05f1827
GS
21#include "config.h"
22
23#include <glib.h>
4a35548b
MS
24#include <stdlib.h>
25#include <string.h>
e05f1827 26
c1aae900 27#include <libsigrok/libsigrok.h>
4a35548b 28#include "libsigrok-internal.h"
f6dcb320 29#include "scpi.h" /* String un-quote for channel name from header line. */
4a35548b 30
3544f848 31#define LOG_PREFIX "input/csv"
4a35548b 32
9a4fd01a 33#define CHUNK_SIZE (4 * 1024 * 1024)
cd59e6ec 34
4a35548b
MS
35/*
36 * The CSV input module has the following options:
37 *
72903e9d
GS
38 * column_formats: Specifies the data formats and channel counts for the
39 * input file's text columns. Accepts a comma separated list of tuples
40 * with: an optional column repeat count ('*' as a wildcard meaning
41 * "all remaining columns", only applicable to the last field), a format
42 * specifying character ('x' hexadecimal, 'o' octal, 'b' binary, 'l'
43 * single-bit logic), and an optional bit count (translating to: logic
08eb955a
GS
44 * channels communicated in that column). The 'a' format marks analog
45 * data, an optionally following number is the digits count (resolution).
46 * This "column_formats" option is most versatile, other forms of
47 * specifying the column layout only exist for backwards compatibility.
4a35548b 48 *
72903e9d
GS
49 * single_column: Specifies the column number which contains the logic data
50 * for single-column mode. All logic data is taken from several bits
51 * which all are kept within that one column. Only exists for backwards
52 * compatibility, see "column_formats" for more flexibility.
4a35548b 53 *
72903e9d
GS
54 * first_column: Specifies the number of the first column with logic data
55 * in simple multi-column mode. Only exists for backwards compatibility,
56 * see "column_formats" for more flexibility.
4a35548b 57 *
72903e9d
GS
58 * logic_channels: Specifies the number of logic channels. Is required in
59 * simple single-column mode. Is optional in simple multi-column mode
60 * (and defaults to all remaining columns). Only exists for backwards
61 * compatibility, see "column_formats" for more flexibility.
4a35548b 62 *
72903e9d
GS
63 * single_format: Specifies the format of the input text in simple single-
64 * column mode. Available formats are: 'bin' (default), 'hex' and 'oct'.
65 * Simple multi-column mode always uses single-bit data per column.
66 * Only exists for backwards compatibility, see "column_formats" for
67 * more flexibility.
4a35548b 68 *
72903e9d
GS
69 * start_line: Specifies at which line to start processing the input file.
70 * Allows to skip leading lines which neither are header nor data lines.
71 * By default all of the input file gets processed.
4a35548b 72 *
72903e9d
GS
73 * header: Boolean option, controls whether the first processed line is used
74 * to determine channel names. Off by default. Generic channel names are
75 * used in the absence of header line content.
4a35548b 76 *
72903e9d
GS
77 * samplerate: Specifies the samplerate of the input data. Defaults to 0.
78 * User specs take precedence over data which optionally gets derived
79 * from input data.
4a35548b 80 *
72903e9d
GS
81 * column_separator: Specifies the sequence which separates the text file
82 * columns. Cannot be empty. Defaults to comma.
83 *
84 * comment_leader: Specifies the sequence which starts comments that run
85 * up to the end of the current text line. Can be empty to disable
86 * comment support. Defaults to semicolon.
87 *
88 * Typical examples of using these options:
89 * - ... -I csv:column_formats=*l ...
90 * All columns are single-bit logic data. Identical to the previous
91 * multi-column mode (the default when no options were given at all).
92 * - ... -I csv:column_formats=3-,*l ...
93 * Ignore the first three columns, get single-bit logic data from all
94 * remaining lines (multi-column mode with first-column above 1).
95 * - ... -I csv:column_formats=3-,4l,x8 ...
96 * Ignore the first three columns, get single-bit logic data from the
97 * next four columns, then eight-bit data in hex format from the next
98 * column. More columns may follow in the input text but won't get
99 * processed. (Mix of previous multi-column as well as single-column
100 * modes.)
101 * - ... -I csv:column_formats=4x8,b16,5l ...
102 * Get eight-bit data in hex format from the first four columns, then
103 * sixteen-bit data in binary format, then five times single-bit data.
104 * - ... -I csv:single_column=2:single_format=bin:logic_channels=8 ...
105 * Get eight logic bits in binary format from column 2. (Simple
106 * single-column mode, corresponds to the "-,b8" format.)
107 * - ... -I csv:first_column=6:logic_channels=4 ...
108 * Get four single-bit logic channels from columns 6 to 9 respectively.
109 * (Simple multi-column mode, corresponds to the "5-,4b" format.)
110 * - ... -I csv:start_line=20:header=yes:...
111 * Skip the first 19 text lines. Use line 20 to derive channel names.
112 * Data starts at line 21.
08eb955a
GS
113 * - ... -I csv:column_formats=*a6 ...
114 * Each column contains an analog value with six significant digits
115 * after the decimal period.
4a35548b
MS
116 */
117
ccff468b
GS
118/*
119 * TODO
120 *
3f1f63f0 121 * - Extend support for analog input data.
43bdef26
GS
122 * - Determine why analog samples of 'double' data type get scrambled
123 * in sigrok-cli screen output. Is analog.encoding->unitsize not
124 * handled properly? A sigrok-cli or libsigrok (src/output) issue?
3f1f63f0
GS
125 * - Reconsider the channel creation after format processing. Current
126 * logic may "bleed" channel names into the analog group when logic
127 * channels' columns follow analog columns (seen with "-,2a,x8").
128 * Trying to sort it out, a naive change used to map logic channels'
129 * data to incorrect bitmap positions. The whole channel numbering
130 * needs reconsideration. Probably it's easiest to first create _all_
131 * logic channels so that they have adjacent numbers starting at 0
132 * (addressing logic bits), then all analog channels (again adjacent)
133 * to simplify the calculation of their index in the sample set as
134 * well as their sdi channel index from the "analog column index".
5a971176
GS
135 * - Optionally get sample rate from timestamp column. Just best-effort
136 * approach, not necessarily reliable. Users can always specify rates.
137 * - Add a test suite for input modules in general, and CSV in specific?
138 * Becomes more important with the multitude of options and their
139 * interaction. Could cover edge cases (BOM presence, line termination
140 * absence, etc) and auto-stuff as well (channel names, channel counts,
141 * samplerates, etc).
ccff468b
GS
142 */
143
43bdef26
GS
144typedef float csv_analog_t; /* 'double' currently is flawed. */
145
4a35548b 146/* Single column formats. */
ad6a2bee 147enum single_col_format {
e53f32d2
GS
148 FORMAT_NONE, /* Ignore this column. */
149 FORMAT_BIN, /* Bin digits for a set of bits (or just one bit). */
150 FORMAT_HEX, /* Hex digits for a set of bits. */
151 FORMAT_OCT, /* Oct digits for a set of bits. */
43bdef26 152 FORMAT_ANALOG, /* Floating point number for an analog channel. */
e53f32d2
GS
153};
154
155static const char *col_format_text[] = {
156 [FORMAT_NONE] = "unknown",
157 [FORMAT_BIN] = "binary",
158 [FORMAT_HEX] = "hexadecimal",
159 [FORMAT_OCT] = "octal",
43bdef26 160 [FORMAT_ANALOG] = "analog",
e53f32d2
GS
161};
162
1a920e33
GS
163static const char col_format_char[] = {
164 [FORMAT_NONE] = '?',
165 [FORMAT_BIN] = 'b',
166 [FORMAT_HEX] = 'x',
167 [FORMAT_OCT] = 'o',
43bdef26 168 [FORMAT_ANALOG] = 'a',
1a920e33
GS
169};
170
e53f32d2
GS
171struct column_details {
172 size_t col_nr;
173 enum single_col_format text_format;
174 size_t channel_offset;
175 size_t channel_count;
3f1f63f0 176 size_t channel_index;
a267bf45 177 int analog_digits;
4a35548b
MS
178};
179
180struct context {
41d214f6
BV
181 gboolean started;
182
4a35548b
MS
183 /* Current selected samplerate. */
184 uint64_t samplerate;
246aca5f 185 gboolean samplerate_sent;
4a35548b 186
a267bf45 187 /* Number of channels. */
836fac9c 188 size_t logic_channels;
43bdef26 189 size_t analog_channels;
4a35548b 190
836fac9c 191 /* Column delimiter (actually separator), comment leader, EOL sequence. */
4a35548b 192 GString *delimiter;
4a35548b 193 GString *comment;
41d214f6
BV
194 char *termination;
195
1a920e33
GS
196 /* Format specs for input columns, and processing state. */
197 size_t column_seen_count;
198 const char *column_formats;
e53f32d2
GS
199 size_t column_want_count;
200 struct column_details *column_details;
201
4a35548b 202 /* Line number to start processing. */
6433156c 203 size_t start_line;
4a35548b
MS
204
205 /*
206 * Determines if the first line should be treated as header and used for
ba7dd8bb 207 * channel names in multi column mode.
4a35548b 208 */
de8fe3b5
GS
209 gboolean use_header;
210 gboolean header_seen;
4a35548b 211
cd59e6ec
GS
212 size_t sample_unit_size; /**!< Byte count for a single sample. */
213 uint8_t *sample_buffer; /**!< Buffer for a single sample. */
43bdef26 214 csv_analog_t *analog_sample_buffer; /**!< Buffer for one set of analog values. */
4a35548b 215
cd59e6ec
GS
216 uint8_t *datafeed_buffer; /**!< Queue for datafeed submission. */
217 size_t datafeed_buf_size;
218 size_t datafeed_buf_fill;
43bdef26
GS
219 /* "Striped" layout, M samples for N channels each. */
220 csv_analog_t *analog_datafeed_buffer; /**!< Queue for analog datafeed. */
221 size_t analog_datafeed_buf_size;
222 size_t analog_datafeed_buf_fill;
a267bf45
GS
223 GSList **analog_datafeed_channels;
224 int *analog_datafeed_digits;
4a35548b 225
4a35548b 226 /* Current line number. */
6433156c 227 size_t line_number;
affaf540
GS
228
229 /* List of previously created sigrok channels. */
230 GSList *prev_sr_channels;
4a35548b
MS
231};
232
626c388a
GS
233/*
234 * Primitive operations to handle sample sets:
235 * - Keep a buffer for datafeed submission, capable of holding many
236 * samples (reduces call overhead, improves throughput).
237 * - Have a "current sample set" pointer reference one position in that
238 * large samples buffer.
239 * - Clear the current sample set before text line inspection, then set
240 * the bits which are found active in the current line of text input.
241 * Phrase the API such that call sites can be kept simple. Advance to
242 * the next sample set between lines, flush the larger buffer as needed
243 * (when it is full, or upon EOF).
244 */
245
43bdef26
GS
246static int flush_samplerate(const struct sr_input *in)
247{
248 struct context *inc;
249 struct sr_datafeed_packet packet;
250 struct sr_datafeed_meta meta;
251 struct sr_config *src;
252
253 inc = in->priv;
254 if (inc->samplerate && !inc->samplerate_sent) {
255 packet.type = SR_DF_META;
256 packet.payload = &meta;
257 src = sr_config_new(SR_CONF_SAMPLERATE, g_variant_new_uint64(inc->samplerate));
258 meta.config = g_slist_append(NULL, src);
259 sr_session_send(in->sdi, &packet);
260 g_slist_free(meta.config);
261 sr_config_free(src);
262 inc->samplerate_sent = TRUE;
263 }
264
265 return SR_OK;
266}
267
626c388a
GS
268static void clear_logic_samples(struct context *inc)
269{
43bdef26
GS
270 if (!inc->logic_channels)
271 return;
626c388a
GS
272 inc->sample_buffer = &inc->datafeed_buffer[inc->datafeed_buf_fill];
273 memset(inc->sample_buffer, 0, inc->sample_unit_size);
274}
275
276static void set_logic_level(struct context *inc, size_t ch_idx, int on)
277{
278 size_t byte_idx, bit_idx;
279 uint8_t bit_mask;
280
836fac9c 281 if (ch_idx >= inc->logic_channels)
626c388a
GS
282 return;
283 if (!on)
284 return;
285
286 byte_idx = ch_idx / 8;
287 bit_idx = ch_idx % 8;
288 bit_mask = 1 << bit_idx;
289 inc->sample_buffer[byte_idx] |= bit_mask;
290}
291
292static int flush_logic_samples(const struct sr_input *in)
293{
294 struct context *inc;
295 struct sr_datafeed_packet packet;
296 struct sr_datafeed_logic logic;
297 int rc;
298
299 inc = in->priv;
300 if (!inc->datafeed_buf_fill)
301 return SR_OK;
302
43bdef26
GS
303 rc = flush_samplerate(in);
304 if (rc != SR_OK)
305 return rc;
246aca5f 306
626c388a
GS
307 memset(&packet, 0, sizeof(packet));
308 memset(&logic, 0, sizeof(logic));
309 packet.type = SR_DF_LOGIC;
310 packet.payload = &logic;
311 logic.unitsize = inc->sample_unit_size;
312 logic.length = inc->datafeed_buf_fill;
313 logic.data = inc->datafeed_buffer;
314
315 rc = sr_session_send(in->sdi, &packet);
316 if (rc != SR_OK)
317 return rc;
318
319 inc->datafeed_buf_fill = 0;
320 return SR_OK;
321}
322
323static int queue_logic_samples(const struct sr_input *in)
324{
325 struct context *inc;
326 int rc;
327
328 inc = in->priv;
836fac9c
GS
329 if (!inc->logic_channels)
330 return SR_OK;
626c388a
GS
331
332 inc->datafeed_buf_fill += inc->sample_unit_size;
333 if (inc->datafeed_buf_fill == inc->datafeed_buf_size) {
334 rc = flush_logic_samples(in);
335 if (rc != SR_OK)
336 return rc;
337 }
338 return SR_OK;
339}
340
43bdef26
GS
341static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value);
342
343static void clear_analog_samples(struct context *inc)
344{
345 size_t idx;
346
347 if (!inc->analog_channels)
348 return;
349 inc->analog_sample_buffer = &inc->analog_datafeed_buffer[inc->analog_datafeed_buf_fill];
350 for (idx = 0; idx < inc->analog_channels; idx++)
351 set_analog_value(inc, idx, 0.0);
352}
353
354static void set_analog_value(struct context *inc, size_t ch_idx, csv_analog_t value)
355{
356 if (ch_idx >= inc->analog_channels)
357 return;
358 if (!value)
359 return;
360 inc->analog_sample_buffer[ch_idx * inc->analog_datafeed_buf_size] = value;
361}
362
363static int flush_analog_samples(const struct sr_input *in)
364{
43bdef26
GS
365 struct context *inc;
366 struct sr_datafeed_packet packet;
367 struct sr_datafeed_analog analog;
368 struct sr_analog_encoding encoding;
369 struct sr_analog_meaning meaning;
370 struct sr_analog_spec spec;
371 csv_analog_t *samples;
372 size_t ch_idx;
a267bf45 373 int digits;
43bdef26
GS
374 int rc;
375
376 inc = in->priv;
377 if (!inc->analog_datafeed_buf_fill)
378 return SR_OK;
379
380 rc = flush_samplerate(in);
381 if (rc != SR_OK)
382 return rc;
383
384 samples = inc->analog_datafeed_buffer;
385 for (ch_idx = 0; ch_idx < inc->analog_channels; ch_idx++) {
a267bf45 386 digits = inc->analog_datafeed_digits[ch_idx];
43bdef26
GS
387 sr_analog_init(&analog, &encoding, &meaning, &spec, digits);
388 memset(&packet, 0, sizeof(packet));
389 packet.type = SR_DF_ANALOG;
390 packet.payload = &analog;
391 analog.num_samples = inc->analog_datafeed_buf_fill;
392 analog.data = samples;
393 analog.meaning->channels = inc->analog_datafeed_channels[ch_idx];
394 analog.meaning->mq = 0;
395 analog.meaning->mqflags = 0;
396 analog.meaning->unit = 0;
397 analog.encoding->unitsize = sizeof(samples[0]);
398 analog.encoding->is_signed = TRUE;
399 analog.encoding->is_float = TRUE;
400#ifdef WORDS_BIGENDIAN
401 analog.encoding->is_bigendian = TRUE;
402#else
403 analog.encoding->is_bigendian = FALSE;
404#endif
405 analog.encoding->digits = spec.spec_digits;
406 rc = sr_session_send(in->sdi, &packet);
407 if (rc != SR_OK)
408 return rc;
409 samples += inc->analog_datafeed_buf_size;
410 }
411
412 inc->analog_datafeed_buf_fill = 0;
413 return SR_OK;
414}
415
416static int queue_analog_samples(const struct sr_input *in)
417{
418 struct context *inc;
419 int rc;
420
421 inc = in->priv;
422 if (!inc->analog_channels)
423 return SR_OK;
424
425 inc->analog_datafeed_buf_fill++;
426 if (inc->analog_datafeed_buf_fill == inc->analog_datafeed_buf_size) {
427 rc = flush_analog_samples(in);
428 if (rc != SR_OK)
429 return rc;
430 }
431 return SR_OK;
432}
433
2142a79b
GS
434/* Helpers for "column processing". */
435
436static int split_column_format(const char *spec,
437 size_t *column_count, enum single_col_format *format, size_t *bit_count)
438{
439 size_t count;
440 char *endp, format_char;
441 enum single_col_format format_code;
442
443 if (!spec || !*spec)
444 return SR_ERR_ARG;
445
1a920e33 446 /* Get the (optional, decimal, default 1) column count. Accept '*'. */
2142a79b 447 endp = NULL;
1a920e33 448 if (*spec == '*') {
5ada72fc 449 /* Workaround, strtoul("*") won't always yield expected endp. */
1a920e33
GS
450 count = 0;
451 endp = (char *)&spec[1];
452 } else {
453 count = strtoul(spec, &endp, 10);
454 }
2142a79b
GS
455 if (!endp)
456 return SR_ERR_ARG;
457 if (endp == spec)
458 count = 1;
459 if (column_count)
460 *column_count = count;
461 spec = endp;
462
463 /* Get the (mandatory, single letter) type spec (-/xob/l). */
464 format_char = *spec++;
465 switch (format_char) {
5ada72fc 466 case '-':
2142a79b
GS
467 case '/':
468 format_char = '-';
469 format_code = FORMAT_NONE;
470 break;
471 case 'x':
472 format_code = FORMAT_HEX;
473 break;
474 case 'o':
475 format_code = FORMAT_OCT;
476 break;
477 case 'b':
478 case 'l':
479 format_code = FORMAT_BIN;
480 break;
43bdef26
GS
481 case 'a':
482 format_code = FORMAT_ANALOG;
483 break;
2142a79b
GS
484 default: /* includes NUL */
485 return SR_ERR_ARG;
486 }
487 if (format)
488 *format = format_code;
489
490 /* Get the (optional, decimal, default 1) bit count. */
491 endp = NULL;
492 count = strtoul(spec, &endp, 10);
493 if (!endp)
494 return SR_ERR_ARG;
495 if (endp == spec)
a267bf45 496 count = (format_code == FORMAT_ANALOG) ? 3 : 1;
43bdef26 497 if (!format_code)
2142a79b
GS
498 count = 0;
499 if (format_char == 'l')
500 count = 1;
501 if (bit_count)
502 *bit_count = count;
503 spec = endp;
504
505 /* Input spec must have been exhausted. */
506 if (*spec)
507 return SR_ERR_ARG;
508
509 return SR_OK;
510}
511
9e7af34e
GS
512static int make_column_details_from_format(const struct sr_input *in,
513 const char *column_format, char **column_texts)
2142a79b 514{
9e7af34e 515 struct context *inc;
2142a79b 516 char **formats, *format;
43bdef26 517 size_t format_count, column_count, logic_count, analog_count;
1a920e33 518 size_t auto_column_count;
43bdef26 519 size_t format_idx, c, b, column_idx, channel_idx, analog_idx;
2142a79b
GS
520 enum single_col_format f;
521 struct column_details *detail;
9e7af34e
GS
522 GString *channel_name;
523 size_t create_idx;
524 char *column;
525 const char *caption;
43bdef26 526 int channel_type, channel_sdi_nr;
2142a79b
GS
527 int ret;
528
9e7af34e
GS
529 inc = in->priv;
530 inc->column_seen_count = g_strv_length(column_texts);
531
2142a79b
GS
532 /* Split the input spec, count involved columns and bits. */
533 formats = g_strsplit(column_format, ",", 0);
534 if (!formats) {
535 sr_err("Cannot parse columns format %s (comma split).", column_format);
536 return SR_ERR_ARG;
537 }
538 format_count = g_strv_length(formats);
539 if (!format_count) {
540 sr_err("Cannot parse columns format %s (field count).", column_format);
541 g_strfreev(formats);
542 return SR_ERR_ARG;
543 }
43bdef26 544 column_count = logic_count = analog_count = 0;
1a920e33 545 auto_column_count = 0;
2142a79b
GS
546 for (format_idx = 0; format_idx < format_count; format_idx++) {
547 format = formats[format_idx];
548 ret = split_column_format(format, &c, &f, &b);
549 sr_dbg("fmt %s -> %zu cols, %s fmt, %zu bits, rc %d", format, c, col_format_text[f], b, ret);
550 if (ret != SR_OK) {
551 sr_err("Cannot parse columns format %s (field split, %s).", column_format, format);
552 g_strfreev(formats);
553 return SR_ERR_ARG;
554 }
1a920e33
GS
555 if (f && !c) {
556 /* User requested "auto-count", must be last format. */
557 if (formats[format_idx + 1]) {
558 sr_err("Auto column count must be last format field.");
559 g_strfreev(formats);
560 return SR_ERR_ARG;
561 }
562 auto_column_count = inc->column_seen_count - column_count;
563 c = auto_column_count;
564 }
2142a79b 565 column_count += c;
43bdef26
GS
566 if (f == FORMAT_ANALOG)
567 analog_count += c;
568 else if (f)
569 logic_count += c * b;
2142a79b 570 }
43bdef26
GS
571 sr_dbg("Column format %s -> %zu columns, %zu logic, %zu analog channels.",
572 column_format, column_count, logic_count, analog_count);
2142a79b 573
9e7af34e 574 /* Allocate and fill in "column processing" details. Create channels. */
2142a79b 575 inc->column_want_count = column_count;
9e7af34e
GS
576 if (inc->column_seen_count < inc->column_want_count) {
577 sr_err("Insufficient input text width for desired data amount, got %zu but want %zu columns.",
578 inc->column_seen_count, inc->column_want_count);
579 g_strfreev(formats);
580 return SR_ERR_ARG;
581 }
2142a79b 582 inc->column_details = g_malloc0_n(column_count, sizeof(inc->column_details[0]));
43bdef26 583 column_idx = channel_idx = analog_idx = 0;
9e7af34e 584 channel_name = g_string_sized_new(64);
2142a79b 585 for (format_idx = 0; format_idx < format_count; format_idx++) {
9e7af34e 586 /* Process a format field, which can span multiple columns. */
2142a79b
GS
587 format = formats[format_idx];
588 (void)split_column_format(format, &c, &f, &b);
1a920e33
GS
589 if (f && !c)
590 c = auto_column_count;
2142a79b 591 while (c-- > 0) {
9e7af34e 592 /* Fill in a column's processing details. */
2142a79b
GS
593 detail = &inc->column_details[column_idx++];
594 detail->col_nr = column_idx;
595 detail->text_format = f;
43bdef26
GS
596 if (detail->text_format == FORMAT_ANALOG) {
597 detail->channel_offset = analog_idx;
598 detail->channel_count = 1;
a267bf45 599 detail->analog_digits = b;
43bdef26
GS
600 analog_idx += detail->channel_count;
601 } else if (detail->text_format) {
2142a79b
GS
602 detail->channel_offset = channel_idx;
603 detail->channel_count = b;
43bdef26 604 channel_idx += detail->channel_count;
2142a79b
GS
605 }
606 sr_dbg("detail -> col %zu, fmt %s, ch off/cnt %zu/%zu",
607 detail->col_nr, col_format_text[detail->text_format],
608 detail->channel_offset, detail->channel_count);
9e7af34e
GS
609 if (!detail->text_format)
610 continue;
611 /*
08eb955a 612 * Pick most appropriate channel names. Optionally
9e7af34e
GS
613 * use text from a header line (when requested by the
614 * user). In the absence of header text, channels are
615 * assigned rather generic names.
616 *
617 * Manipulation of the column's caption (when a header
618 * line is seen) is acceptable, because this header
619 * line won't get processed another time.
620 */
621 column = column_texts[detail->col_nr - 1];
622 if (inc->use_header && column && *column)
623 caption = sr_scpi_unquote_string(column);
624 else
625 caption = NULL;
626 if (!caption || !*caption)
627 caption = NULL;
3f1f63f0
GS
628 /*
629 * TODO Need we first create _all_ logic channels,
630 * before creating analog channels?
631 */
9e7af34e
GS
632 for (create_idx = 0; create_idx < detail->channel_count; create_idx++) {
633 if (caption && detail->channel_count == 1) {
634 g_string_assign(channel_name, caption);
635 } else if (caption) {
636 g_string_printf(channel_name, "%s[%zu]",
637 caption, create_idx);
638 } else {
639 g_string_printf(channel_name, "%zu",
640 detail->channel_offset + create_idx);
641 }
43bdef26
GS
642 if (detail->text_format == FORMAT_ANALOG) {
643 channel_sdi_nr = logic_count + detail->channel_offset + create_idx;
644 channel_type = SR_CHANNEL_ANALOG;
3f1f63f0 645 detail->channel_index = g_slist_length(in->sdi->channels);
43bdef26
GS
646 } else {
647 channel_sdi_nr = detail->channel_offset + create_idx;
648 channel_type = SR_CHANNEL_LOGIC;
649 }
650 sr_channel_new(in->sdi, channel_sdi_nr,
651 channel_type, TRUE, channel_name->str);
9e7af34e 652 }
2142a79b
GS
653 }
654 }
655 inc->logic_channels = channel_idx;
43bdef26 656 inc->analog_channels = analog_idx;
9e7af34e 657 g_string_free(channel_name, TRUE);
2142a79b
GS
658 g_strfreev(formats);
659
660 return SR_OK;
661}
662
e53f32d2
GS
663static const struct column_details *lookup_column_details(struct context *inc, size_t nr)
664{
665 if (!inc || !inc->column_details)
666 return NULL;
667 if (!nr || nr > inc->column_want_count)
668 return NULL;
669 return &inc->column_details[nr - 1];
670}
671
19267272
GS
672/*
673 * Primitive operations for text input: Strip comments off text lines.
674 * Split text lines into columns. Process input text for individual
675 * columns.
676 */
677
41d214f6 678static void strip_comment(char *buf, const GString *prefix)
4a35548b
MS
679{
680 char *ptr;
681
682 if (!prefix->len)
683 return;
684
b2c4dde2 685 if ((ptr = strstr(buf, prefix->str))) {
41d214f6 686 *ptr = '\0';
b2c4dde2
GS
687 g_strstrip(buf);
688 }
4a35548b
MS
689}
690
19267272 691/**
e53f32d2 692 * @brief Splits a text line into a set of columns.
19267272 693 *
e53f32d2 694 * @param[in] buf The input text line to split.
19267272
GS
695 * @param[in] inc The input module's context.
696 *
e53f32d2 697 * @returns An array of strings, representing the columns' text.
19267272 698 *
e53f32d2 699 * This routine splits a text line on previously determined separators.
19267272 700 */
e53f32d2 701static char **split_line(char *buf, struct context *inc)
4a35548b 702{
e53f32d2 703 return g_strsplit(buf, inc->delimiter->str, 0);
4a35548b
MS
704}
705
19267272 706/**
e53f32d2 707 * @brief Parse a multi-bit field into several logic channels.
19267272 708 *
e53f32d2 709 * @param[in] column The input text, a run of bin/hex/oct digits.
19267272 710 * @param[in] inc The input module's context.
836fac9c 711 * @param[in] details The column processing details.
19267272
GS
712 *
713 * @retval SR_OK Success.
714 * @retval SR_ERR Invalid input data (empty, or format error).
715 *
716 * This routine modifies the logic levels in the current sample set,
e53f32d2 717 * based on the text input and a user provided format spec.
19267272 718 */
836fac9c
GS
719static int parse_logic(const char *column, struct context *inc,
720 const struct column_details *details)
4a35548b 721{
e53f32d2
GS
722 size_t length, ch_rem, ch_idx, ch_inc;
723 const char *rdptr;
4a35548b 724 char c;
e53f32d2
GS
725 gboolean valid;
726 const char *type_text;
727 uint8_t bits;
728
e53f32d2
GS
729 /*
730 * Prepare to read the digits from the text end towards the start.
731 * A digit corresponds to a variable number of channels (depending
732 * on the value's radix). Prepare the mapping of text digits to
733 * (a number of) logic channels.
734 */
735 length = strlen(column);
4a35548b 736 if (!length) {
836fac9c 737 sr_err("Column %zu in line %zu is empty.", details->col_nr,
41d214f6 738 inc->line_number);
4a35548b
MS
739 return SR_ERR;
740 }
e53f32d2 741 rdptr = &column[length];
836fac9c
GS
742 ch_idx = details->channel_offset;
743 ch_rem = details->channel_count;
4a35548b 744
e53f32d2
GS
745 /*
746 * Get another digit and derive up to four logic channels' state from
747 * it. Make sure to not process more bits than the column has channels
748 * associated with it.
749 */
750 while (rdptr > column && ch_rem) {
751 /* Check for valid digits according to the input radix. */
752 c = *(--rdptr);
836fac9c 753 switch (details->text_format) {
e53f32d2
GS
754 case FORMAT_BIN:
755 valid = g_ascii_isxdigit(c) && c < '2';
756 ch_inc = 1;
757 break;
758 case FORMAT_OCT:
759 valid = g_ascii_isxdigit(c) && c < '8';
760 ch_inc = 3;
761 break;
762 case FORMAT_HEX:
763 valid = g_ascii_isxdigit(c);
764 ch_inc = 4;
765 break;
766 default:
767 valid = FALSE;
768 break;
4a35548b 769 }
e53f32d2 770 if (!valid) {
836fac9c 771 type_text = col_format_text[details->text_format];
e53f32d2 772 sr_err("Invalid text '%s' in %s type column %zu in line %zu.",
836fac9c 773 column, type_text, details->col_nr, inc->line_number);
4a35548b 774 return SR_ERR;
e53f32d2
GS
775 }
776 /* Use the digit's bits for logic channels' data. */
777 bits = g_ascii_xdigit_value(c);
836fac9c 778 switch (details->text_format) {
e53f32d2
GS
779 case FORMAT_HEX:
780 if (ch_rem >= 4) {
781 ch_rem--;
782 set_logic_level(inc, ch_idx + 3, bits & (1 << 3));
783 }
784 /* FALLTHROUGH */
785 case FORMAT_OCT:
786 if (ch_rem >= 3) {
787 ch_rem--;
788 set_logic_level(inc, ch_idx + 2, bits & (1 << 2));
789 }
790 if (ch_rem >= 2) {
791 ch_rem--;
792 set_logic_level(inc, ch_idx + 1, bits & (1 << 1));
793 }
794 /* FALLTHROUGH */
795 case FORMAT_BIN:
796 ch_rem--;
797 set_logic_level(inc, ch_idx + 0, bits & (1 << 0));
798 break;
43bdef26 799 case FORMAT_ANALOG:
836fac9c
GS
800 case FORMAT_NONE:
801 /* ShouldNotHappen(TM), but silences compiler warning. */
802 return SR_ERR;
4a35548b 803 }
e53f32d2 804 ch_idx += ch_inc;
4a35548b 805 }
e53f32d2
GS
806 /*
807 * TODO Determine whether the availability of extra input data
808 * for unhandled logic channels is worth warning here. In this
809 * implementation users are in control, and can have the more
810 * significant bits ignored (which can be considered a feature
811 * and not really a limitation).
812 */
4a35548b
MS
813
814 return SR_OK;
815}
816
43bdef26
GS
817/**
818 * @brief Parse a floating point text into an analog value.
819 *
820 * @param[in] column The input text, a floating point number.
821 * @param[in] inc The input module's context.
822 * @param[in] details The column processing details.
823 *
824 * @retval SR_OK Success.
825 * @retval SR_ERR Invalid input data (empty, or format error).
826 *
827 * This routine modifies the analog values in the current sample set,
828 * based on the text input and a user provided format spec.
829 */
830static int parse_analog(const char *column, struct context *inc,
831 const struct column_details *details)
832{
833 size_t length;
834 double dvalue; float fvalue;
835 csv_analog_t value;
836 int ret;
837
838 if (details->text_format != FORMAT_ANALOG)
839 return SR_ERR_BUG;
840
841 length = strlen(column);
842 if (!length) {
843 sr_err("Column %zu in line %zu is empty.", details->col_nr,
844 inc->line_number);
845 return SR_ERR;
846 }
847 if (sizeof(value) == sizeof(double)) {
848 ret = sr_atod_ascii(column, &dvalue);
849 value = dvalue;
850 } else if (sizeof(value) == sizeof(float)) {
851 ret = sr_atof_ascii(column, &fvalue);
852 value = fvalue;
853 } else {
854 ret = SR_ERR_BUG;
855 }
856 if (ret != SR_OK) {
857 sr_err("Cannot parse analog text %s in column %zu in line %zu.",
858 column, details->col_nr, inc->line_number);
859 return SR_ERR_DATA;
860 }
861 set_analog_value(inc, details->channel_offset, value);
862
863 return SR_OK;
864}
865
836fac9c
GS
866/**
867 * @brief Parse routine which ignores the input text.
868 *
869 * This routine exists to unify dispatch code paths, mapping input file
870 * columns' data types to their respective parse routines.
871 */
872static int parse_ignore(const char *column, struct context *inc,
873 const struct column_details *details)
874{
875 (void)column;
876 (void)inc;
877 (void)details;
878 return SR_OK;
879}
880
881typedef int (*col_parse_cb)(const char *column, struct context *inc,
882 const struct column_details *details);
883
884static const col_parse_cb col_parse_funcs[] = {
885 [FORMAT_NONE] = parse_ignore,
886 [FORMAT_BIN] = parse_logic,
887 [FORMAT_OCT] = parse_logic,
888 [FORMAT_HEX] = parse_logic,
43bdef26 889 [FORMAT_ANALOG] = parse_analog,
836fac9c
GS
890};
891
41d214f6 892static int init(struct sr_input *in, GHashTable *options)
4a35548b 893{
41d214f6 894 struct context *inc;
1a920e33 895 size_t single_column, first_column, logic_channels;
41d214f6 896 const char *s;
836fac9c 897 enum single_col_format format;
1a920e33 898 char format_char;
4a35548b 899
836fac9c
GS
900 in->sdi = g_malloc0(sizeof(*in->sdi));
901 in->priv = inc = g_malloc0(sizeof(*inc));
4a35548b 902
72903e9d 903 single_column = g_variant_get_uint32(g_hash_table_lookup(options, "single_column"));
72903e9d 904 logic_channels = g_variant_get_uint32(g_hash_table_lookup(options, "logic_channels"));
41d214f6 905 inc->delimiter = g_string_new(g_variant_get_string(
72903e9d 906 g_hash_table_lookup(options, "column_separator"), NULL));
836fac9c 907 if (!inc->delimiter->len) {
72903e9d 908 sr_err("Column separator cannot be empty.");
41d214f6 909 return SR_ERR_ARG;
4a35548b 910 }
72903e9d 911 s = g_variant_get_string(g_hash_table_lookup(options, "single_format"), NULL);
836fac9c
GS
912 if (g_ascii_strncasecmp(s, "bin", 3) == 0) {
913 format = FORMAT_BIN;
914 } else if (g_ascii_strncasecmp(s, "hex", 3) == 0) {
915 format = FORMAT_HEX;
916 } else if (g_ascii_strncasecmp(s, "oct", 3) == 0) {
917 format = FORMAT_OCT;
41d214f6 918 } else {
72903e9d 919 sr_err("Invalid single-column format: '%s'", s);
41d214f6 920 return SR_ERR_ARG;
4a35548b 921 }
41d214f6 922 inc->comment = g_string_new(g_variant_get_string(
72903e9d 923 g_hash_table_lookup(options, "comment_leader"), NULL));
41d214f6 924 if (g_string_equal(inc->comment, inc->delimiter)) {
e53f32d2
GS
925 /*
926 * Using the same sequence as comment leader and column
72903e9d
GS
927 * separator won't work. The user probably specified ';'
928 * as the column separator but did not adjust the comment
e53f32d2
GS
929 * leader. Try DWIM, drop comment strippin support here.
930 */
72903e9d 931 sr_warn("Comment leader and column separator conflict, disabling comment support.");
41d214f6 932 g_string_truncate(inc->comment, 0);
4a35548b 933 }
6e8d95a5 934 inc->samplerate = g_variant_get_uint64(g_hash_table_lookup(options, "samplerate"));
72903e9d 935 first_column = g_variant_get_uint32(g_hash_table_lookup(options, "first_column"));
de8fe3b5 936 inc->use_header = g_variant_get_boolean(g_hash_table_lookup(options, "header"));
72903e9d 937 inc->start_line = g_variant_get_uint32(g_hash_table_lookup(options, "start_line"));
41d214f6 938 if (inc->start_line < 1) {
6433156c 939 sr_err("Invalid start line %zu.", inc->start_line);
41d214f6 940 return SR_ERR_ARG;
4a35548b
MS
941 }
942
e53f32d2 943 /*
1a920e33
GS
944 * Scan flexible, to get prefered format specs which describe
945 * the input file's data formats. As well as some simple specs
946 * for backwards compatibility and user convenience.
947 *
948 * This logic ends up with a copy of the format string, either
949 * user provided or internally derived. Actual creation of the
950 * column processing details gets deferred until the first line
951 * of input data was seen. To support automatic determination of
952 * e.g. channel counts from column counts.
e53f32d2 953 */
72903e9d 954 s = g_variant_get_string(g_hash_table_lookup(options, "column_formats"), NULL);
2142a79b 955 if (s && *s) {
1a920e33 956 inc->column_formats = g_strdup(s);
72903e9d 957 sr_dbg("User specified column_formats: %s.", s);
1a920e33
GS
958 } else if (single_column && logic_channels) {
959 format_char = col_format_char[format];
960 if (single_column == 1) {
961 inc->column_formats = g_strdup_printf("%c%zu",
962 format_char, logic_channels);
e53f32d2 963 } else {
1a920e33
GS
964 inc->column_formats = g_strdup_printf("%zu-,%c%zu",
965 single_column - 1,
966 format_char, logic_channels);
e53f32d2 967 }
72903e9d 968 sr_dbg("Backwards compat single_column, col %zu, fmt %s, bits %zu -> %s.",
1a920e33
GS
969 single_column, col_format_text[format], logic_channels,
970 inc->column_formats);
971 } else if (!single_column) {
972 if (first_column > 1) {
973 inc->column_formats = g_strdup_printf("%zu-,%zul",
974 first_column - 1, logic_channels);
975 } else {
976 inc->column_formats = g_strdup_printf("%zul",
977 logic_channels);
978 }
979 sr_dbg("Backwards compat multi-column, col %zu, chans %zu -> %s.",
980 first_column, logic_channels,
981 inc->column_formats);
e53f32d2 982 } else {
72903e9d 983 sr_warn("Unknown or unsupported columns layout spec, assuming simple multi-column mode.");
1a920e33 984 inc->column_formats = g_strdup("*l");
4a35548b
MS
985 }
986
41d214f6
BV
987 return SR_OK;
988}
4a35548b 989
affaf540
GS
990/*
991 * Check the channel list for consistency across file re-import. See
992 * the VCD input module for more details and motivation.
993 */
994
995static void keep_header_for_reread(const struct sr_input *in)
996{
997 struct context *inc;
998
999 inc = in->priv;
1000 g_slist_free_full(inc->prev_sr_channels, sr_channel_free_cb);
1001 inc->prev_sr_channels = in->sdi->channels;
1002 in->sdi->channels = NULL;
1003}
1004
1005static int check_header_in_reread(const struct sr_input *in)
1006{
1007 struct context *inc;
1008
1009 if (!in)
1010 return FALSE;
1011 inc = in->priv;
1012 if (!inc)
1013 return FALSE;
1014 if (!inc->prev_sr_channels)
1015 return TRUE;
1016
1017 if (sr_channel_lists_differ(inc->prev_sr_channels, in->sdi->channels)) {
1018 sr_err("Channel list change not supported for file re-read.");
1019 return FALSE;
1020 }
1021 g_slist_free_full(in->sdi->channels, sr_channel_free_cb);
1022 in->sdi->channels = inc->prev_sr_channels;
1023 inc->prev_sr_channels = NULL;
1024
1025 return TRUE;
1026}
1027
492dfa90
GS
1028static const char *delim_set = "\r\n";
1029
329733d9 1030static const char *get_line_termination(GString *buf)
41d214f6 1031{
329733d9 1032 const char *term;
4a35548b 1033
41d214f6
BV
1034 term = NULL;
1035 if (g_strstr_len(buf->str, buf->len, "\r\n"))
1036 term = "\r\n";
1037 else if (memchr(buf->str, '\n', buf->len))
1038 term = "\n";
1039 else if (memchr(buf->str, '\r', buf->len))
1040 term = "\r";
4a35548b 1041
41d214f6
BV
1042 return term;
1043}
4a35548b 1044
41d214f6
BV
1045static int initial_parse(const struct sr_input *in, GString *buf)
1046{
1047 struct context *inc;
9e7af34e 1048 size_t num_columns;
3f1f63f0 1049 size_t line_number, line_idx;
41d214f6 1050 int ret;
9e7af34e 1051 char **lines, *line, **columns;
41d214f6
BV
1052
1053 ret = SR_OK;
1054 inc = in->priv;
1055 columns = NULL;
1056
9e7af34e 1057 /* Search for the first line to process (header or data). */
41d214f6 1058 line_number = 0;
ef0b9935
GS
1059 if (inc->termination)
1060 lines = g_strsplit(buf->str, inc->termination, 0);
1061 else
1062 lines = g_strsplit_set(buf->str, delim_set, 0);
e53f32d2 1063 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
41d214f6
BV
1064 line_number++;
1065 if (inc->start_line > line_number) {
e53f32d2 1066 sr_spew("Line %zu skipped (before start).", line_number);
4a35548b
MS
1067 continue;
1068 }
df0db9fd 1069 if (line[0] == '\0') {
41d214f6
BV
1070 sr_spew("Blank line %zu skipped.", line_number);
1071 continue;
1072 }
df0db9fd
GS
1073 strip_comment(line, inc->comment);
1074 if (line[0] == '\0') {
41d214f6 1075 sr_spew("Comment-only line %zu skipped.", line_number);
4a35548b
MS
1076 continue;
1077 }
1078
41d214f6
BV
1079 /* Reached first proper line. */
1080 break;
1081 }
e53f32d2 1082 if (!line) {
41d214f6 1083 /* Not enough data for a proper line yet. */
60107497 1084 ret = SR_ERR_NA;
41d214f6 1085 goto out;
4a35548b
MS
1086 }
1087
9e7af34e 1088 /* Get the number of columns in the line. */
e53f32d2 1089 columns = split_line(line, inc);
df0db9fd 1090 if (!columns) {
41d214f6
BV
1091 sr_err("Error while parsing line %zu.", line_number);
1092 ret = SR_ERR;
1093 goto out;
4a35548b 1094 }
4a35548b 1095 num_columns = g_strv_length(columns);
4a35548b 1096 if (!num_columns) {
e53f32d2 1097 sr_err("Error while parsing line %zu.", line_number);
41d214f6
BV
1098 ret = SR_ERR;
1099 goto out;
4a35548b 1100 }
e53f32d2
GS
1101 sr_dbg("DIAG Got %zu columns in text line: %s.", num_columns, line);
1102
1a920e33 1103 /*
9e7af34e
GS
1104 * Interpret the user provided column format specs. This might
1105 * involve inspection of the now received input text, to support
1106 * e.g. automatic detection of channel counts in the absence of
1107 * user provided specs. Optionally a header line is used to get
1108 * channels' names.
1109 *
1110 * Check the then created channels for consistency across .reset
1111 * and .receive sequences (file re-load).
1a920e33 1112 */
9e7af34e 1113 ret = make_column_details_from_format(in, inc->column_formats, columns);
1a920e33
GS
1114 if (ret != SR_OK) {
1115 sr_err("Cannot parse columns format using line %zu.", line_number);
1116 goto out;
4a35548b 1117 }
affaf540
GS
1118 if (!check_header_in_reread(in)) {
1119 ret = SR_ERR_DATA;
1120 goto out;
1121 }
4a35548b
MS
1122
1123 /*
9e7af34e 1124 * Allocate buffer memory for datafeed submission of sample data.
cd59e6ec
GS
1125 * Calculate the minimum buffer size to store the set of samples
1126 * of all channels (unit size). Determine a larger buffer size
1127 * for datafeed submission that is a multiple of the unit size.
626c388a
GS
1128 * Allocate the larger buffer, the "sample buffer" will point
1129 * to a location within that large buffer later.
4a35548b 1130 */
43bdef26
GS
1131 if (inc->logic_channels) {
1132 inc->sample_unit_size = (inc->logic_channels + 7) / 8;
1133 inc->datafeed_buf_size = CHUNK_SIZE;
1134 inc->datafeed_buf_size *= inc->sample_unit_size;
1135 inc->datafeed_buffer = g_malloc(inc->datafeed_buf_size);
1136 if (!inc->datafeed_buffer) {
1137 sr_err("Cannot allocate datafeed send buffer (logic).");
1138 ret = SR_ERR_MALLOC;
1139 goto out;
1140 }
1141 inc->datafeed_buf_fill = 0;
1142 }
1143
1144 if (inc->analog_channels) {
1145 size_t sample_size, sample_count;
a267bf45 1146 size_t detail_idx;
3f1f63f0 1147 struct column_details *detail;
a267bf45 1148 int *digits_item;
3f1f63f0 1149 void *channel;
43bdef26
GS
1150 sample_size = sizeof(inc->analog_datafeed_buffer[0]);
1151 inc->analog_datafeed_buf_size = CHUNK_SIZE;
1152 inc->analog_datafeed_buf_size /= sample_size;
1153 inc->analog_datafeed_buf_size /= inc->analog_channels;
1154 sample_count = inc->analog_channels * inc->analog_datafeed_buf_size;
1155 inc->analog_datafeed_buffer = g_malloc0(sample_count * sample_size);
1156 if (!inc->analog_datafeed_buffer) {
1157 sr_err("Cannot allocate datafeed send buffer (analog).");
1158 ret = SR_ERR_MALLOC;
1159 goto out;
1160 }
1161 inc->analog_datafeed_buf_fill = 0;
3f1f63f0 1162 inc->analog_datafeed_channels = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_channels[0]));
a267bf45
GS
1163 inc->analog_datafeed_digits = g_malloc0(inc->analog_channels * sizeof(inc->analog_datafeed_digits[0]));
1164 digits_item = inc->analog_datafeed_digits;
1165 for (detail_idx = 0; detail_idx < inc->column_want_count; detail_idx++) {
3f1f63f0
GS
1166 detail = &inc->column_details[detail_idx];
1167 if (detail->text_format != FORMAT_ANALOG)
a267bf45 1168 continue;
3f1f63f0
GS
1169 channel = g_slist_nth_data(in->sdi->channels, detail->channel_index);
1170 inc->analog_datafeed_channels[detail->channel_offset] = g_slist_append(NULL, channel);
1171 *digits_item++ = detail->analog_digits;
a267bf45 1172 }
43bdef26 1173 }
4a35548b 1174
41d214f6
BV
1175out:
1176 if (columns)
1177 g_strfreev(columns);
1178 g_strfreev(lines);
4a35548b 1179
41d214f6 1180 return ret;
4a35548b
MS
1181}
1182
4439363a
GS
1183/*
1184 * Gets called from initial_receive(), which runs until the end-of-line
1185 * encoding of the input stream could get determined. Assumes that this
1186 * routine receives enough buffered initial input data to either see the
1187 * BOM when there is one, or that no BOM will follow when a text line
1188 * termination sequence was seen. Silently drops the UTF-8 BOM sequence
1189 * from the input buffer if one was seen. Does not care to protect
1190 * against multiple execution or dropping the BOM multiple times --
1191 * there should be at most one in the input stream.
1192 */
1193static void initial_bom_check(const struct sr_input *in)
1194{
1195 static const char *utf8_bom = "\xef\xbb\xbf";
1196
1197 if (in->buf->len < strlen(utf8_bom))
1198 return;
1199 if (strncmp(in->buf->str, utf8_bom, strlen(utf8_bom)) != 0)
1200 return;
1201 g_string_erase(in->buf, 0, strlen(utf8_bom));
1202}
1203
41d214f6 1204static int initial_receive(const struct sr_input *in)
4a35548b 1205{
41d214f6
BV
1206 struct context *inc;
1207 GString *new_buf;
1208 int len, ret;
329733d9
UH
1209 char *p;
1210 const char *termination;
4a35548b 1211
4439363a
GS
1212 initial_bom_check(in);
1213
41d214f6 1214 inc = in->priv;
4a35548b 1215
df0db9fd
GS
1216 termination = get_line_termination(in->buf);
1217 if (!termination)
41d214f6 1218 /* Don't have a full line yet. */
d0181813 1219 return SR_ERR_NA;
4a35548b 1220
df0db9fd
GS
1221 p = g_strrstr_len(in->buf->str, in->buf->len, termination);
1222 if (!p)
41d214f6 1223 /* Don't have a full line yet. */
d0181813 1224 return SR_ERR_NA;
41d214f6
BV
1225 len = p - in->buf->str - 1;
1226 new_buf = g_string_new_len(in->buf->str, len);
1227 g_string_append_c(new_buf, '\0');
4a35548b 1228
41d214f6
BV
1229 inc->termination = g_strdup(termination);
1230
1231 if (in->buf->str[0] != '\0')
1232 ret = initial_parse(in, new_buf);
1233 else
1234 ret = SR_OK;
1235
1236 g_string_free(new_buf, TRUE);
1237
1238 return ret;
1239}
1240
7f4c3a62 1241static int process_buffer(struct sr_input *in, gboolean is_eof)
41d214f6 1242{
41d214f6
BV
1243 struct context *inc;
1244 gsize num_columns;
e53f32d2 1245 size_t line_idx, col_idx, col_nr;
836fac9c
GS
1246 const struct column_details *details;
1247 col_parse_cb parse_func;
ad6a2bee 1248 int ret;
e53f32d2 1249 char *p, **lines, *line, **columns, *column;
41d214f6 1250
41d214f6 1251 inc = in->priv;
d0181813 1252 if (!inc->started) {
bee2b016 1253 std_session_send_df_header(in->sdi);
d0181813 1254 inc->started = TRUE;
4a35548b
MS
1255 }
1256
4555d3bd
GS
1257 /*
1258 * Consider empty input non-fatal. Keep accumulating input until
1259 * at least one full text line has become available. Grab the
1260 * maximum amount of accumulated data that consists of full text
1261 * lines, and process what has been received so far, leaving not
1262 * yet complete lines for the next invocation.
7f4c3a62
GS
1263 *
1264 * Enforce that all previously buffered data gets processed in
1265 * the "EOF" condition. Do not insist in the presence of the
1266 * termination sequence for the last line (may often be missing
1267 * on Windows). A present termination sequence will just result
1268 * in the "execution of an empty line", and does not harm.
4555d3bd
GS
1269 */
1270 if (!in->buf->len)
1271 return SR_OK;
7f4c3a62
GS
1272 if (is_eof) {
1273 p = in->buf->str + in->buf->len;
1274 } else {
1275 p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination);
1276 if (!p)
1277 return SR_ERR;
1278 *p = '\0';
1279 p += strlen(inc->termination);
1280 }
41d214f6 1281 g_strstrip(in->buf->str);
4a35548b 1282
18078d05 1283 ret = SR_OK;
ef0b9935 1284 lines = g_strsplit(in->buf->str, inc->termination, 0);
e53f32d2 1285 for (line_idx = 0; (line = lines[line_idx]); line_idx++) {
41d214f6 1286 inc->line_number++;
ef0b9935
GS
1287 if (inc->line_number < inc->start_line) {
1288 sr_spew("Line %zu skipped (before start).", inc->line_number);
1289 continue;
1290 }
df0db9fd 1291 if (line[0] == '\0') {
41d214f6 1292 sr_spew("Blank line %zu skipped.", inc->line_number);
4a35548b
MS
1293 continue;
1294 }
1295
1296 /* Remove trailing comment. */
df0db9fd
GS
1297 strip_comment(line, inc->comment);
1298 if (line[0] == '\0') {
41d214f6 1299 sr_spew("Comment-only line %zu skipped.", inc->line_number);
4a35548b
MS
1300 continue;
1301 }
1302
160691b9 1303 /* Skip the header line, its content was used as the channel names. */
de8fe3b5 1304 if (inc->use_header && !inc->header_seen) {
160691b9 1305 sr_spew("Header line %zu skipped.", inc->line_number);
de8fe3b5 1306 inc->header_seen = TRUE;
160691b9
JS
1307 continue;
1308 }
1309
e53f32d2
GS
1310 /* Split the line into columns, check for minimum length. */
1311 columns = split_line(line, inc);
df0db9fd 1312 if (!columns) {
41d214f6 1313 sr_err("Error while parsing line %zu.", inc->line_number);
2355d229 1314 g_strfreev(lines);
4a35548b
MS
1315 return SR_ERR;
1316 }
4a35548b 1317 num_columns = g_strv_length(columns);
e53f32d2
GS
1318 if (num_columns < inc->column_want_count) {
1319 sr_err("Insufficient column count %zu in line %zu.",
1320 num_columns, inc->line_number);
4a35548b 1321 g_strfreev(columns);
2355d229 1322 g_strfreev(lines);
4a35548b
MS
1323 return SR_ERR;
1324 }
1325
836fac9c 1326 /* Have the columns of the current text line processed. */
626c388a 1327 clear_logic_samples(inc);
43bdef26 1328 clear_analog_samples(inc);
e53f32d2
GS
1329 for (col_idx = 0; col_idx < inc->column_want_count; col_idx++) {
1330 column = columns[col_idx];
1331 col_nr = col_idx + 1;
836fac9c
GS
1332 details = lookup_column_details(inc, col_nr);
1333 if (!details || !details->text_format)
1334 continue;
1335 parse_func = col_parse_funcs[details->text_format];
1336 if (!parse_func)
1337 continue;
1338 ret = parse_func(column, inc, details);
e53f32d2
GS
1339 if (ret != SR_OK) {
1340 g_strfreev(columns);
1341 g_strfreev(lines);
1342 return SR_ERR;
1343 }
4a35548b
MS
1344 }
1345
626c388a
GS
1346 /* Send sample data to the session bus (buffered). */
1347 ret = queue_logic_samples(in);
43bdef26 1348 ret += queue_analog_samples(in);
41d214f6 1349 if (ret != SR_OK) {
4a35548b 1350 sr_err("Sending samples failed.");
cd59e6ec 1351 g_strfreev(columns);
2355d229 1352 g_strfreev(lines);
4a35548b
MS
1353 return SR_ERR;
1354 }
cd59e6ec 1355
41d214f6
BV
1356 g_strfreev(columns);
1357 }
1358 g_strfreev(lines);
241c386a 1359 g_string_erase(in->buf, 0, p - in->buf->str);
41d214f6 1360
7066fd46 1361 return ret;
41d214f6
BV
1362}
1363
7066fd46 1364static int receive(struct sr_input *in, GString *buf)
41d214f6
BV
1365{
1366 struct context *inc;
7066fd46
BV
1367 int ret;
1368
1369 g_string_append_len(in->buf, buf->str, buf->len);
41d214f6
BV
1370
1371 inc = in->priv;
1a920e33 1372 if (!inc->column_seen_count) {
df0db9fd
GS
1373 ret = initial_receive(in);
1374 if (ret == SR_ERR_NA)
7066fd46
BV
1375 /* Not enough data yet. */
1376 return SR_OK;
1377 else if (ret != SR_OK)
1378 return SR_ERR;
1379
1380 /* sdi is ready, notify frontend. */
1381 in->sdi_ready = TRUE;
41d214f6 1382 return SR_OK;
7066fd46
BV
1383 }
1384
7f4c3a62 1385 ret = process_buffer(in, FALSE);
7066fd46
BV
1386
1387 return ret;
1388}
1389
1390static int end(struct sr_input *in)
1391{
1392 struct context *inc;
7066fd46 1393 int ret;
41d214f6 1394
7066fd46 1395 if (in->sdi_ready)
7f4c3a62 1396 ret = process_buffer(in, TRUE);
7066fd46
BV
1397 else
1398 ret = SR_OK;
cd59e6ec
GS
1399 if (ret != SR_OK)
1400 return ret;
1401
626c388a 1402 ret = flush_logic_samples(in);
43bdef26 1403 ret += flush_analog_samples(in);
cd59e6ec
GS
1404 if (ret != SR_OK)
1405 return ret;
7066fd46
BV
1406
1407 inc = in->priv;
3be42bc2 1408 if (inc->started)
bee2b016 1409 std_session_send_df_end(in->sdi);
4a35548b 1410
7066fd46
BV
1411 return ret;
1412}
1413
d5cc282f 1414static void cleanup(struct sr_input *in)
7066fd46
BV
1415{
1416 struct context *inc;
1417
affaf540
GS
1418 keep_header_for_reread(in);
1419
7066fd46
BV
1420 inc = in->priv;
1421
b1f83103 1422 g_free(inc->termination);
539188e5 1423 inc->termination = NULL;
cd59e6ec 1424 g_free(inc->datafeed_buffer);
539188e5 1425 inc->datafeed_buffer = NULL;
43bdef26
GS
1426 g_free(inc->analog_datafeed_buffer);
1427 inc->analog_datafeed_buffer = NULL;
4a35548b
MS
1428}
1429
ad93bfb0
SA
1430static int reset(struct sr_input *in)
1431{
1432 struct context *inc = in->priv;
1433
1434 cleanup(in);
1435 inc->started = FALSE;
1436 g_string_truncate(in->buf, 0);
1437
1438 return SR_OK;
1439}
1440
c6aa9870 1441enum option_index {
2142a79b 1442 OPT_COL_FMTS,
c6aa9870 1443 OPT_SINGLE_COL,
72903e9d 1444 OPT_FIRST_COL,
c6aa9870 1445 OPT_NUM_LOGIC,
c6aa9870 1446 OPT_FORMAT,
c6aa9870 1447 OPT_START,
72903e9d
GS
1448 OPT_HEADER,
1449 OPT_RATE,
1450 OPT_DELIM,
1451 OPT_COMMENT,
c6aa9870
GS
1452 OPT_MAX,
1453};
1454
41d214f6 1455static struct sr_option options[] = {
72903e9d
GS
1456 [OPT_COL_FMTS] = {
1457 "column_formats", "Column format specs",
08eb955a 1458 "Specifies text columns data types: A comma separated list of [<cols>]<fmt>[<bits>] items, with - to ignore columns, x/o/b/l for logic data, a (and resolution) for analog data.",
72903e9d
GS
1459 NULL, NULL,
1460 },
1461 [OPT_SINGLE_COL] = {
1462 "single_column", "Single column",
08eb955a 1463 "Enable single-column mode, exclusively use text from the specified column (number starting at 1). Obsoleted by 'column_formats'.",
72903e9d
GS
1464 NULL, NULL,
1465 },
1466 [OPT_FIRST_COL] = {
1467 "first_column", "First column",
08eb955a 1468 "Number of the first column with logic data in simple multi-column mode (number starting at 1, default 1). Obsoleted by 'column_formats'.",
72903e9d
GS
1469 NULL, NULL,
1470 },
1471 [OPT_NUM_LOGIC] = {
1472 "logic_channels", "Number of logic channels",
1473 "Logic channel count, required in simple single-column mode, defaults to \"all remaining columns\" in simple multi-column mode. Obsoleted by 'column_formats'.",
1474 NULL, NULL,
1475 },
1476 [OPT_FORMAT] = {
1477 "single_format", "Data format for simple single-column mode.",
08eb955a 1478 "The number format of single-column mode input data: bin, hex, oct. Obsoleted by 'column_formats'.",
72903e9d
GS
1479 NULL, NULL,
1480 },
1481 [OPT_START] = {
1482 "start_line", "Start line",
1483 "The line number at which to start processing input text (default: 1).",
1484 NULL, NULL,
1485 },
1486 [OPT_HEADER] = {
1487 "header", "Get channel names from first line.",
08eb955a 1488 "Use the first processed line's column captions (when available) as channel names. Off by default",
72903e9d
GS
1489 NULL, NULL,
1490 },
1491 [OPT_RATE] = {
1492 "samplerate", "Samplerate (Hz)",
08eb955a 1493 "The input data's sample rate in Hz. No default value.",
72903e9d
GS
1494 NULL, NULL,
1495 },
1496 [OPT_DELIM] = {
1497 "column_separator", "Column separator",
1498 "The sequence which separates text columns. Non-empty text, comma by default.",
1499 NULL, NULL,
1500 },
1501 [OPT_COMMENT] = {
1502 "comment_leader", "Comment leader character",
08eb955a 1503 "The text which starts comments at the end of text lines, semicolon by default.",
72903e9d
GS
1504 NULL, NULL,
1505 },
c6aa9870 1506 [OPT_MAX] = ALL_ZERO,
41d214f6
BV
1507};
1508
2c240774 1509static const struct sr_option *get_options(void)
41d214f6 1510{
31c41782
UH
1511 GSList *l;
1512
41d214f6 1513 if (!options[0].def) {
1a920e33 1514 options[OPT_COL_FMTS].def = g_variant_ref_sink(g_variant_new_string(""));
e53f32d2 1515 options[OPT_SINGLE_COL].def = g_variant_ref_sink(g_variant_new_uint32(0));
72903e9d 1516 options[OPT_FIRST_COL].def = g_variant_ref_sink(g_variant_new_uint32(1));
e53f32d2 1517 options[OPT_NUM_LOGIC].def = g_variant_ref_sink(g_variant_new_uint32(0));
c6aa9870 1518 options[OPT_FORMAT].def = g_variant_ref_sink(g_variant_new_string("bin"));
31c41782
UH
1519 l = NULL;
1520 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("bin")));
1521 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("hex")));
1522 l = g_slist_append(l, g_variant_ref_sink(g_variant_new_string("oct")));
c6aa9870 1523 options[OPT_FORMAT].values = l;
e53f32d2 1524 options[OPT_START].def = g_variant_ref_sink(g_variant_new_uint32(1));
72903e9d
GS
1525 options[OPT_HEADER].def = g_variant_ref_sink(g_variant_new_boolean(FALSE));
1526 options[OPT_RATE].def = g_variant_ref_sink(g_variant_new_uint64(0));
1527 options[OPT_DELIM].def = g_variant_ref_sink(g_variant_new_string(","));
1528 options[OPT_COMMENT].def = g_variant_ref_sink(g_variant_new_string(";"));
41d214f6
BV
1529 }
1530
1531 return options;
1532}
1533
d4c93774 1534SR_PRIV struct sr_input_module input_csv = {
4a35548b 1535 .id = "csv",
41d214f6
BV
1536 .name = "CSV",
1537 .desc = "Comma-separated values",
c7bc82ff 1538 .exts = (const char*[]){"csv", NULL},
41d214f6 1539 .options = get_options,
4a35548b 1540 .init = init,
41d214f6 1541 .receive = receive,
7066fd46 1542 .end = end,
41d214f6 1543 .cleanup = cleanup,
ad93bfb0 1544 .reset = reset,
4a35548b 1545};