From: Gerhard Sittig Date: Sat, 19 Oct 2019 09:47:45 +0000 (+0200) Subject: input/csv: improve reliabilty of text line isolation X-Git-Url: https://sigrok.org/gitweb/?p=libsigrok.git;a=commitdiff_plain;h=fbefa03f58e9fa6be58024e4df913602a4eb8ab5 input/csv: improve reliabilty of text line isolation Slightly unobfuscate the "end of current input chunk" marker in the data processing loop. Make the variable's identifier reflect that it's not a temporary, but instead something worth keeping around until needed again. Unbreak the calculation of line numbers in those situations where input chunks (including previously accumulated unprocessed data) happens to start with a line termination. This covers input files which start with empty lines, as well as environments with mutli-byte line termination sequences (CR/LF) and arbitrary distribution of bytes across chunks. This fixes bug #968. Accept when there is no line termination in the current input chunk. We cannot assume that calling applications always provide file content in large enough chunks to span complete lines. And any arbitrary chunk size which applications happen to use can get exceeded by input files (e.g. for generated files with wide data or long comments). --- diff --git a/src/input/csv.c b/src/input/csv.c index d2d86a32..4efcaed0 100644 --- a/src/input/csv.c +++ b/src/input/csv.c @@ -1502,7 +1502,8 @@ static int process_buffer(struct sr_input *in, gboolean is_eof) const struct column_details *details; col_parse_cb parse_func; int ret; - char *p, **lines, *line, **columns, *column; + char *processed_up_to; + char **lines, *line, **columns, *column; inc = in->priv; if (!inc->started) { @@ -1526,16 +1527,17 @@ static int process_buffer(struct sr_input *in, gboolean is_eof) if (!in->buf->len) return SR_OK; if (is_eof) { - p = in->buf->str + in->buf->len; + processed_up_to = in->buf->str + in->buf->len; } else { - p = g_strrstr_len(in->buf->str, in->buf->len, inc->termination); - if (!p) - return SR_ERR; - *p = '\0'; - p += strlen(inc->termination); + processed_up_to = g_strrstr_len(in->buf->str, in->buf->len, + inc->termination); + if (!processed_up_to) + return SR_OK; + *processed_up_to = '\0'; + processed_up_to += strlen(inc->termination); } - g_strstrip(in->buf->str); + /* Split input text lines and process their columns. */ ret = SR_OK; lines = g_strsplit(in->buf->str, inc->termination, 0); for (line_idx = 0; (line = lines[line_idx]); line_idx++) { @@ -1612,7 +1614,7 @@ static int process_buffer(struct sr_input *in, gboolean is_eof) g_strfreev(columns); } g_strfreev(lines); - g_string_erase(in->buf, 0, p - in->buf->str); + g_string_erase(in->buf, 0, processed_up_to - in->buf->str); return ret; }