Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parsing error #21

Open
hanguanmiao opened this issue Jun 7, 2021 · 1 comment
Open

Parsing error #21

hanguanmiao opened this issue Jun 7, 2021 · 1 comment

Comments

@hanguanmiao
Copy link

problem1

int,string
1,

One less column is parsed, the last column should be null

problem2

int,string,int
1,"",123

There are 3 columns in total, but only 2 columns can be parsed, '",123' is treated as one column

@ljluestc
Copy link

ljluestc commented Feb 9, 2025


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// Maximum length of a CSV line
#define MAX_LINE_LENGTH 1024
#define MAX_COLUMNS 100  // Adjust based on expected column count

// Function to trim leading and trailing whitespace
char *trim_whitespace(char *str) {
    while (*str == ' ' || *str == '\t') str++; // Trim leading whitespace
    char *end = str + strlen(str) - 1;
    while (end > str && (*end == ' ' || *end == '\t' || *end == '\n' || *end == '\r')) end--; // Trim trailing whitespace
    *(end + 1) = '\0';
    return str;
}

// CSV Parsing Function
int parse_csv_line(const char *line, char *columns[], int max_columns) {
    int count = 0;
    const char *ptr = line;
    int in_quotes = 0;
    char buffer[MAX_LINE_LENGTH];
    int buf_pos = 0;

    while (*ptr) {
        if (*ptr == ',' && !in_quotes) {  // Column separator
            buffer[buf_pos] = '\0';
            columns[count++] = strdup(trim_whitespace(buffer)); // Store parsed field
            buf_pos = 0;
            if (count >= max_columns) break;
        } else if (*ptr == '"') {
            if (in_quotes && *(ptr + 1) == '"') {  // Handle escaped quotes
                buffer[buf_pos++] = '"';
                ptr++;  // Skip next quote
            } else {
                in_quotes = !in_quotes;  // Toggle quote state
            }
        } else {
            buffer[buf_pos++] = *ptr;
        }
        ptr++;
    }

    buffer[buf_pos] = '\0';
    columns[count++] = strdup(trim_whitespace(buffer));  // Store last column

    // Handle missing last column (empty column at the end)
    if (*(ptr - 1) == ',') {
        columns[count++] = strdup("");  // Add empty value for trailing comma
    }

    return count;
}

// Test the CSV parser
void test_csv_parser() {
    char *columns[MAX_COLUMNS];
    int col_count;

    const char *test_cases[] = {
        "1,",          // Problem 1: Last column should be NULL/empty
        "1,\"\",123",  // Problem 2: Correctly parse quoted empty strings
    };

    for (int i = 0; i < 2; i++) {
        printf("Parsing: %s\n", test_cases[i]);
        col_count = parse_csv_line(test_cases[i], columns, MAX_COLUMNS);

        for (int j = 0; j < col_count; j++) {
            printf("Column %d: '%s'\n", j + 1, columns[j]);
            free(columns[j]);  // Free allocated memory
        }
        printf("\n");
    }
}

int main() {
    test_csv_parser();
    return 0;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants