Mergesort - Segmentation fault

2019-07-31 03:48发布

问题:

Code directory structure,

./Computing$ ls -LR
.:
list file.txt  mergeSort.c  program.exe type.h

./list:
arrayImpl.c  linkedListImpl.c  list.h

Compilation procedure:

$./Computing
gcc -Wall -Werror -DARRAY -I. mergeSort.c ./list/*.c -o program

Here is the complete code having files, mergeSort.c, list/*, type.h

With the given representation of List,

typedef struct List{
  void **array;

  /* Housekeeping - Array enhancement/shrink */
  int lastItemPosition;
  int size;
}List;

mergesort is performed below on list->array, where aux maintains the shallow copy of array

void mergeSort(List *list, size_t listSize, isLess less){

  if(list != NULL){

    void **aux = malloc(list->size * sizeof(void*)); //Auxillary shadow copy
    if(aux != NULL){
      printf("Size of list: %d\n", listSize);
      mSort(list->array, aux, 0, listSize-1, less);
    }else{

      fprintf(stderr, "mergeSort() - Malloc failure");
      exit(EXIT_FAILURE);
    }
  }else{

    fprintf(stderr, "mergeSort() - List is NULL");
  }
}

static void mSort(void **array, void **aux, int low, int high, isLess less){

  if(high <= low) return;
  int mid = (low + high)/2;

  mSort(array, aux, low, mid, less);
  mSort(array, aux, mid+1, high, less);
  merge(array, aux, low, mid, high, less);
}

static void merge(void **array, void **aux, int low, int mid, int high, isLess less){

  for(int index = 0; index <= high; index++){
    aux[index] = array[index]; //Shallow copy
  }
  printf("Low-%d, Mid-%d, High-%d\n", low, mid, high);
  int leftIndex = low; int rightIndex = mid+1;
  printf("leftIndex-%d, rightIndex-%d\n", leftIndex, rightIndex);

  for(int index = 0; index <= high; index++){

    if(leftIndex > mid)    /* right array exhausted */  array[index] = aux[rightIndex++];
    else if(rightIndex > high) /*left array exhausted*/ array[index] = aux[leftIndex++];
    else if( less(aux[rightIndex], aux[leftIndex]) )    array[index] = aux[rightIndex++];
    else                                                array[index] = aux[leftIndex++];
  }

}

where the use code is,


bool less(const void *key, const void *item){
  printf("\nIn less function\n");
  printf("left-%d, Right-%d\n\n", ((Record *)key)->age, ((Record *)item)->age);
  if( ((Record *)key)->age < ((Record *)item)->age ){
    printf("Return true\n");
    return true;
  }else{
    printf("Return false\n");
    return false;
  }
}

int main(){
  FILE *pFile = fopen("file.txt", "r");
  checkHandleFailure(pFile, FILE_HANDLE_FAILURE);
  char buf[MAX_RECORD_SIZE];
  DBCache *cache = initCache(pFile);


  readHeader(pFile, cache, buf);

  readData(pFile, cache, buf);

  printRecords(cache);

  printf("Before calling mergesort() \n");
  mergeSort(cache->records, listGetSize(cache->records), less);
}


Actual output is:

$ ./program.exe
Age,LastName,FirstName
------------------------------
50,B,A

30,A,B

20,X,D

10,F,A

90,V,E

60,N,M

Records#: 6
Before calling mergesort()
Size of list: 6
Low-0, Mid-0, High-1
leftIndex-0, rightIndex-1

In less function
left-30, Right-50

Return true
Low-0, Mid-1, High-2
leftIndex-0, rightIndex-2

In less function
left-20, Right-30

Return true
Low-3, Mid-3, High-4
leftIndex-3, rightIndex-4

In less function
left-90, Right-10

Return false
Low-3, Mid-4, High-5
leftIndex-3, rightIndex-5
Segmentation fault (core dumped)
In less function


How to resolve this problem?

Cygwin is not supporting coredump trace format using gdb, trace is provided(above)

回答1:

The two for loops in merge should start at low, not at zero. The second for loop starting at 0 is probably causing the segmentation fault. The first for loop starting at 0 shouldn't cause a segmentation fault, but it consumes extra time.

static void merge(....)

    /* ... */

    for(int index = low; index <= high; index++){   // low not 0
        aux[index] = array[index];
    }

    /* ... */

    for(int index = low; index <= high; index++){   // low not 0
        if(leftIndex > mid) /* ... */
        /* ... */


回答2:

To make a generic merge sort you must not use void **, because you will force the user to use a nested pointer too. So your implementation is not generic here.

I propose you an implementation that use pointer arithmetic, it's not easy to understand. But I think it's better than use int. I change the function less in a compare function because it's more idiomatic in C.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

typedef int msort_cmp(void const *a, void const *b);

static void msort_merge(char *begin, char const *end, char *copy_begin,
                        char *copy_mid, char const *copy_end, size_t size,
                        msort_cmp *cmp) {
  memcpy(copy_begin, begin, (size_t)(end - begin));

  for (char *right = copy_mid; begin < end; begin += size) {
    if (copy_begin >= copy_mid) {
      memcpy(begin, right, size);
      right += size;
    } else if (right >= copy_end) {
      memcpy(begin, copy_begin, size);
      copy_begin += size;
    } else if (cmp(right, copy_begin) > 0) {
      memcpy(begin, right, size);
      right += size;
    } else {
      memcpy(begin, copy_begin, size);
      copy_begin += size;
    }
  }
}

static void msort_partition(char *begin, char const *end, char *copy,
                            size_t size, msort_cmp *cmp) {
  size_t bytes = (size_t)(end - begin);
  if (bytes > size) {
    size_t tmp = bytes / 2;
    size_t offset = tmp - tmp % size;

    char *mid = begin + offset;
    msort_partition(begin, mid, copy, size, cmp);
    msort_partition(mid, end, copy, size, cmp);

    msort_merge(begin, end, copy, copy + offset, copy + bytes, size, cmp);
  }
}

static int msort_aux(char *begin, char const *end, size_t size,
                     msort_cmp *cmp) {
  char *copy = malloc((size_t)(end - begin));
  if (copy == NULL) {
    return 1;
  }

  msort_partition(begin, end, copy, size, cmp);

  return 0;
}

static int msort(void *begin, void const *end, size_t size, msort_cmp *cmp) {
  return msort_aux(begin, end, size, cmp);
}

static int cmp_int_aux(int const *a, int const *b) {
  if (*a < *b) {
    return 1;
  } else if (*a > *b) {
    return -1;
  } else {
    return 0;
  }
}

static int cmp_int(void const *a, void const *b) { return cmp_int_aux(a, b); }

static void print_int(int const *array, size_t size) {
  printf("array: ");
  for (size_t i = 0; i < size; i++) {
    printf(" %d", array[i]);
  }
  printf("\n");
}

static int cmp_pint(void const *a, void const *b) {
  return cmp_int(*(int *const *)a, *(int *const *)b);
}

static void print_pint(int *const *parray, size_t size) {
  printf("parray: ");
  for (size_t i = 0; i < size; i++) {
    printf(" %d", *parray[i]);
  }
  printf("\n");
}

#define SIZE 42

int main(void) {
  int array[SIZE];

  srand((unsigned int)time(NULL));
  for (size_t i = 0; i < SIZE; i++) {
    array[i] = rand() % SIZE - SIZE / 2;
  }

  print_int(array, SIZE);
  msort(array, array + SIZE, sizeof *array, &cmp_int);
  print_int(array, SIZE);

  int *parray[SIZE];
  for (size_t i = 0; i < SIZE; i++) {
    array[i] = rand() % SIZE - SIZE / 2;
    parray[i] = array + i;
  }

  print_pint(parray, SIZE);
  msort(parray, parray + SIZE, sizeof *parray, &cmp_pint);
  print_pint(parray, SIZE);
}