median of median implementation

2019-04-02 17:22发布

问题:

Here is pseudo code for implementation of median by dividing array into 5 groups

select(int A[],int first, int last, int i) {
    n = last - first + 1; /* n is the number elements to select from */
    if (i > n) {return ERROR;} /* there is no ith smallest element */
    if( n < = 100 ) {
        /********************* For Small n *********************/
        Run selection on A[first..last] taking at most n(n-1)/2 < 50n comparisons;
        swap A[first+i-1] with A[first] /* put ith smallest in A[first] */
    }
    else /* n > 100 */ {
        /********** main recursion *************************/
        numGroups = n / 5; /* integer division, round down */
        for group = 0 to numGroups-1 do {
            shift = group*5;
            /* A[first+shift] is the start of the group, A[first+shift+4] is end of group */
            find median of A[first+shift .. first+shift+4] and swap it into A[first + group];
        } /* for group */;
        lastMedian = first+numGroups-1;
        /* now the medians of the numGroups groups are all A[first .. lastMedian] */
        /****** the first recursive call to find median of medians ******/
        select(A, first, lastMedian, numGroups/2);
        /* now median of medians is in slot A[first] */
        /*********** partition array *********************/
        k = partition(A,first, last); /* See partition on page 146 of text */
        /* now k is the index where the median of median winds up, the smaller elements */
        /* will be in A[first..k-1] and larger elements will be in A[k+1..last] */
        /************ where is the ith smallest element? ********/
        if (k == first + i -1) {
            /* ith smallest is the median of medians in A[k] */
            swap A[k] and A[first] and return
        } else if (k > = first + i -1) {
            /* second recursion to find ith smallest among the "small" keys in A[first..k-1] */
            select(A, first, k-1, i);
        } else /* k < first + i -1 */ {
            /* second recursion to find the proper element among the "large" keys */
            numSmaller = k-first+1; /* the number of "smaller" keys not recursed on */
            newi = i - numSmaller;
            /* the ith smallest of A[first..last] is the newi smallest of A[k+1..last] */
            select(A, k+1, last, newi);
            /* ith smallest now in A[k+1], put it in A[first] */
            swap A[k+1] and A[first];
        } /* if k - second else */
    } /* if n - else part */
} /*select */

I have two questions:

  1. first one is related to partition code, here we are given only array and its bounds, no pivot element is indicated, so how this partition code should look? We should choose pivot index and pivot element as:

    int pivotindex=(end-begin)/2
    int pivot values=a[pivotindex];
    

    or it should be random choice?

  2. how to output selected median?

Generally language does not matter, but it would be great if example would be shown in C++.

回答1:

Explanation of the median - of - medians algorithm to find the k-th largest integer out of n can be found here: http://cs.indstate.edu/~spitla/presentation.pdf

Implementation in c++ is below:

#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;

int findMedian(vector<int> vec){
//    Find median of a vector
    int median;
    size_t size = vec.size();
    median = vec[(size/2)];
    return median;
}

int findMedianOfMedians(vector<vector<int> > values){
    vector<int> medians;

    for (int i = 0; i < values.size(); i++) {
        int m = findMedian(values[i]);
        medians.push_back(m);
    }

    return findMedian(medians);
}

void selectionByMedianOfMedians(const vector<int> values, int k){
//    Divide the list into n/5 lists of 5 elements each
    vector<vector<int> > vec2D;

    int count = 0;
    while (count != values.size()) {
        int countRow = 0;
        vector<int> row;

        while ((countRow < 5) && (count < values.size())) {
            row.push_back(values[count]);
            count++;
            countRow++;
        }
        vec2D.push_back(row);
    }

    cout<<endl<<endl<<"Printing 2D vector : "<<endl;
    for (int i = 0; i < vec2D.size(); i++) {
        for (int j = 0; j < vec2D[i].size(); j++) {
            cout<<vec2D[i][j]<<" ";
        }
        cout<<endl;
    }
    cout<<endl;

//    Calculating a new pivot for making splits
    int m = findMedianOfMedians(vec2D);
    cout<<"Median of medians is : "<<m<<endl;

//    Partition the list into unique elements larger than 'm' (call this sublist L1) and
//    those smaller them 'm' (call this sublist L2)
    vector<int> L1, L2;

    for (int i = 0; i < vec2D.size(); i++) {
        for (int j = 0; j < vec2D[i].size(); j++) {
            if (vec2D[i][j] > m) {
                L1.push_back(vec2D[i][j]);
            }else if (vec2D[i][j] < m){
                L2.push_back(vec2D[i][j]);
            }
        }
    }

//    Checking the splits as per the new pivot 'm'
    cout<<endl<<"Printing L1 : "<<endl;
    for (int i = 0; i < L1.size(); i++) {
        cout<<L1[i]<<" ";
    }

    cout<<endl<<endl<<"Printing L2 : "<<endl;
    for (int i = 0; i < L2.size(); i++) {
        cout<<L2[i]<<" ";
    }

//    Recursive calls
    if ((k - 1) == L1.size()) {
        cout<<endl<<endl<<"Answer :"<<m;
    }else if (k <= L1.size()) {
        return selectionByMedianOfMedians(L1, k);
    }else if (k > (L1.size() + 1)){
        return selectionByMedianOfMedians(L2, k-((int)L1.size())-1);
    }

}

int main()
{
    int values[] = {2, 3, 5, 4, 1, 12, 11, 13, 16, 7, 8, 6, 10, 9, 17, 15, 19, 20, 18, 23, 21, 22, 25, 24, 14};

    vector<int> vec(values, values + 25);

    cout<<"The given array is : "<<endl;
    for (int i = 0; i < vec.size(); i++) {
        cout<<vec[i]<<" ";
    }

    selectionByMedianOfMedians(vec, 8);

    return 0;
}


回答2:

The select code puts the median, resp. later the desired i-th smallest element into the first slot of the array, so the pivot for partitioning, the median of medians, is in A[first] (at the very end, that'll be A[0]). So to output, read this location.

The translation of the pseudocode to compilable code is straightforward, since the pseudocode is quite detailed. The only non-obvious parts are the code for the n <= 100 branch, the partitioning, and the finding of the median of 5. For the n <= 100 branch, the simplest would be a quicksort using the same partition function as select. To find the median of five elements, a simple sorting algorithm would be appropriate, bubble sort, insertion sort, champagne sort for example.

Try the translation yourself, if you have specific difficulties with it, we'll be happy to help with those.