开发者

Segmentation fault when returning a struct

开发者 https://www.devze.com 2023-01-09 22:16 出处:网络
I am trying to do a pretty simple thing - it is reading a file and then turning it into a char** splitting it into lines. However when I return a struct containing the char** and size i get Segmentati

I am trying to do a pretty simple thing - it is reading a file and then turning it into a char** splitting it into lines. However when I return a struct containing the char** and size i get Segmentation fault. I read here: C segmentation fault before/during return statement that it's probably "mangled stack". I still however don't know what I did to mangle it. This is my code:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "comp_words.h"
#define BLOCK 4096

struct sized_str {
    char* str;
    long size;
};

struct sized_arr {
    char** content;
    int size;
};

struct sized_str readfile(char* name) {
    FILE *f;
    long filesize;
    char *buf;
    struct sized_str res;
    int r, p = 0;

    f = fopen(name, "r");
    fseek(f, 0, SEEK_END);
    filesize = ftell(f);
    rewind(f);
    buf = calloc(filesize + 1, sizeof(char));
    while ((r = fread(buf + p, sizeof(char), BLOCK, f))) {
        p += r;
    }
    res.str = buf;
    res.size = filesize + 1;

    return res;
}

struct sized_arr read_dict() {
    struct sized_str file_content;
    struct sized_arr result;
    char *buf, *buf_cpy, *buf_cpy_point, *line, **res;
    int i = 0, j, line_count = 0;

    file_content = readfile("/var/tmp/twl06.txt");
    buf = file_content.str;
    buf_cpy = (char*)malloc(file_content.size * sizeof(char));
    strcpy(buf_cpy, buf);
    buf_cpy_point = buf_cpy;

    while (strtok(buf_cpy_point, "\n\r")) {
        line_count++;
        buf_cpy_point = NULL;
    }

    res = (char**)malloc(sizeof(char*) * line_count);

    while ((line = strtok(buf, "\n\r"))) {
        res[i开发者_如何学C] = (char*)malloc(sizeof(char) * strlen(line));

        j = 0;
        while ((res[i][j] = tolower(line[j]))) {
            j++;
        }
        buf = NULL;
    }
    free(buf_cpy);
    result.size = line_count;
    result.content = res;

    return result;
}

// ...

int main (int argc, char** argv) {
    struct sized_str input;
    struct sized_arr dict;

    dict = read_dict();

    // ...
    return 0;

The code segfaults while returning from read_dict function.


At least at first glance, this seems to have a couple of problems. First:

while ((line = strtok(buf, "\n\r"))) {

To use strtok you normally pass the buffer on the first all, then make subsequent calls passing "NULL" for the first parameter until strtok returns a NULL (indicating that it's reached the end of the buffer). [Edit: upon further examination, it's apparent this isn't really a bug -- as pointed out by @Casablanca, he sets buf to NULL in the loop so the second and subsequent iterations actually do pass NULL for the first parameter -- so the current code is a bit hard to understand and (at least arguably) somewhat fragile, but not actually wrong.]

Second, when you allocate your space, it looks like you're not allocating space for the terminating NUL:

res[i] = (char*)malloc(sizeof(char) * strlen(line));

At least at first glance, it looks like this should be:

res[i] = malloc(strlen(line)+1);

[As an aside, sizeof(char)==1 and casting the return from malloc can mask the bug of failing to #include <stdlib.h> to get a proper prototype in scope.]

Some of your other code isn't exactly wrong, but strikes me as less readable than ideal. For example:

j = 0;
while ((res[i][j] = tolower(line[j]))) {
    j++;
}

This appears to be a rather obfuscated way of writing:

for (j=0; line[j] != '\0'; j++)
    res[i][j] = tolower((unsigned char)line[j]);

Also note that when you call tolower, you generally need/want to cast the parameter to unsigned char (passing a negative value gives undefined behavior, and quite a few characters with accents, umlauts, etc., will normally show up as negative in the typical case that char is signed).

You also seem to have a memory leak -- read_dict calls readfile, which allocates a buffer (with calloc -- why not malloc?) and returns a pointer to that memory in a structure. read_dict receives the structure, but unless I've missed something, the struct goes out of scope without your ever freeing the memory it pointed to.

Rather than try to find and fix the problem you've seen, my immediate reaction would be to start over. It seems to me that you've made the problem considerably more complex than it really is. If I were doing it, I'd probably start with a function to allocate space and read a line into the space, something on this order:

// Warning: Untested code.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *readline(FILE *file) { 
    char *buffer = NULL;
    size_t current_size = 1;
    char *temp;
    const int block_size = 256;

    do { 
        if (NULL == (temp = realloc(buffer, current_size+block_size)))
            break;
        buffer = temp;
        buffer[current_size-1] = '\0';
        if (fgets(buffer+current_size-1, block_size, file)==NULL) 
            return strlen(buffer) > 0 ? buffer : NULL;      
        current_size += block_size-1;
    } while (strchr(buffer, '\n') == NULL);

    strtok(buffer, "\n");
    if (NULL != (temp = realloc(buffer, strlen(buffer)+1)))
        buffer =temp;
    return buffer;
}

Once that's working, reading all the lines in the file and converting them to upper-case comes out something like:

// Warning: more untested code.
while (res[i] = readline(file)) {
    size_t j;
    for (j=0; res[i][j]; j++)
        res[i][j] = toupper((unsigned char)res[i][j]);
    ++i;
}


It looks like you forgot to increment i after storing each line into the result array, so you end up storing all lines into res[0]. But you still set result.size = line_count at the end, so all array elements beyond the first are undefined. An i++ at the end of this loop: while ((line = strtok(buf, "\n\r"))) should fix it.

0

精彩评论

暂无评论...
验证码 换一张
取 消

关注公众号