Updated with my own version of the program :
I'm trying to do an iterative dynamic programming to generate n choose k
combination.
Say I've 4 vectors of values
v1 : 1 1 1
v2 : 2 2 2
v3 : 3 3 3
v4 : 4 4 4
Now I use addition as my aggregate function and I want to generate 4 choose 2
combinations of vectors as follows :
v1v2 : 3 3 3
v1v3 : 4 4 4
v1v4 : 5 5 5
v2v3 : 5 5 5
v2v4 : 6 6 6
v3v4 : 7 7 7
A naive way to do this would be to go through every pair and find the result. if the N
and k
are very large this is very very inefficient. So an alternative way to this would be recursive/iterative dynamic programming. Recursion for very large N and k would take up a lot of memory so the ideal way to this would be iterative dynamic programming which can be done as follows :
Consider the following table row header is N
and the column header is k
and our objective is to find N choose k
:
We can find N choose k
combination using dynamic program by following way :
The approach is as follows :
- Block[0,1] and Block[0,2] always returns [ ]. {[ ] denotes an empty value as there are no values there}.
- Block[1,1] receives [ ], computes {v1} + [ ] (which is v1 itself) , saves it to Block [1,1].
- Block[1,2] receives [ ], does {v1} + [ ] & {v2} + [ ], saves it to Block [1,2].
- Block[1,3] receives [ ], does {v1} + [ ], {v2} + [ ] + {v3} U [ ], Block [1,3].
- Block[2,4] receives :
- [{v1}] from [1,1] and computes {v1} + {v2},
- [{v1}{v2}] from [1,2] and computes {v1} + {v3} and {v2} + {v3}
- [{v1}, {v2}, {v3}] from [1,3] and computes {v4} + {v1}, {v4} + {v2} and {v4} + {v3}, saves it to Block [2,4].
Now we have all the values we need in Block [2,4]. how can i code this concept efficiently in C++ ?
Any help is greatly appreciated. Thanks.
Here is my idea :
I don't know if this is right. Sorry
=========================================================
//Block [0...k][0...n];
//Block[i][j] contains i-set groups (for eg :if i = 2 it will have v1v2, v1v3, etc..)
//initially, Block[0][i] = [ ] for 0 <= i <= n and all other Block [i][j] = [ $ ]
// "$" is just a symbol to indicate that no computation is done on that block
algorithm(int k, int n) //k-point skyline groups from n number of points.
{
if( Block[k][n] != [ $ ] ) return memory[k][n];
Group = [ ]; //G indicate a collection of vectors
for( int i = k; i <= n; i++ )
{
Group` = algorithm(k-1, i-1);
for( each v` in Group` )
{
Group = Group + (group` + v_i);
}
}
memory[k][n] = Group;
return Group;
}
=========================================================
Here is my program for the above mentioned algorithm :
#include <iostream>
#include <iterator>
#include <set>
#include <vector>
#include <map>
#define DIMENSIONS 5 // No. of elements in the vector. eg. v1: 1 1 1 1 1
using namespace std;
typedef std::set < int > my_set; // To hold vector id's
typedef std::vector < int > my_vector; // To hold values of the vector's
typedef std::vector < std::pair < my_set, my_vector > > my_vector_pair;
typedef std::map < my_set, my_vector > my_map;
typedef std::vector < vector < std::pair < int,my_map > > > my_pair;
typedef my_map::iterator m_it;
my_vector_pair bases; // To hold all the initial <id,vector_values> pair
my_map data, G;
my_pair memory;
void print(my_map& data)
{
for( m_it it(data.begin()) ; it!=data.end(); ++it)
{
cout << "Id : ";
copy(it->first.begin(), it->first.end(), ostream_iterator<int>(cout, " "));
cout << " => value : ";
copy (it->second.begin(),it->second.end(),ostream_iterator<int>(cout," "));
cout << endl;
}
cout << "---------------------------------------------------------------\n";
}
my_map union_(my_map& G, int p)
{
static my_map result;
my_set id;
my_vector scores;
result.clear();
for (m_it it(G.begin()); it != G.end(); ++it)
{
id = it->first;
scores = it->second;
id.insert( bases.at(p-1).first.begin(),bases.at(p-1).first.end() );
for (int j = 0; j < DIMENSIONS; j++)
{
scores.at(j) += bases.at(p - 1).second.at(j);
}
result.insert(make_pair(id, scores));
}
return result;
}
my_map algorithm_(int k, int n) {
unsigned long size = memory.at(n).size();
for (unsigned long i = 0; i < size; i++) {
if (memory.at(n).at(i).first == k) {
return memory.at(n).at(i).second; //if exists in hash table then no need to calculate again
}
}
my_map G_k_1;
if (k != n)
{
G_k_1 = algorithm_(k, n - 1);
if(G_k_1.size() == 0)
{
return G_k_1;
}
}
G_k_1 = algorithm_(k - 1, n - 1);
if(G_k_1.size() == 0)
{
return G_k_1;
}
G_k_1 = union_(G_k_1, n);
if (k != n) {
for (unsigned long i = 0; i < memory.at(n - 1).size(); i++) {
if (memory.at(n - 1).at(i).first == k) {
G_k_1.insert(memory.at(n - 1).at(i).second.begin(), memory.at(n - 1).at(i).second.end());
memory.at(n - 1).at(i).second.clear();
break;
}
}
}
std::pair<int,my_map> temp;
temp.first = k ;
temp.second = G_k_1;
memory.at(n).push_back( temp ); //storing in hash table for further use
return memory.at(n).back().second;
}
int main()
{
my_vector v1,v2,v3,v4,v5;
my_set s1,s2,s3,s4,s5;
for(int i = 1; i<=5; ++i)
{
v1.push_back(1);
v2.push_back(2);
v3.push_back(3);
v4.push_back(4);
v5.push_back(5);
}
s1.insert(1);
s2.insert(2);
s3.insert(3);
s4.insert(4);
s5.insert(5);
bases.insert(bases.end(),make_pair(s1,v1));
bases.insert(bases.end(),make_pair(s2,v2));
bases.insert(bases.end(),make_pair(s3,v3));
bases.insert(bases.end(),make_pair(s4,v4));
bases.insert(bases.end(),make_pair(s5,v5));
my_set empty_set;
my_vector empty_group(DIMENSIONS);
G.insert(make_pair(empty_set,empty_group));
vector<std::pair<int,my_map> > empty_element;
empty_element.push_back(make_pair(0,G));
for (int i = 0; i <= 5; i++) { // 5 is the total number od vectors : v1,v2,v3,v4,v5
memory.push_back(empty_element);
}
data.insert(bases.begin(),bases.end());
cout << endl << "The intial set of vectors are : " << endl;
print ( data );
int k;
cout << "N = 5 " << endl << "Enter the value of k : ";
cin >> k;
cout << "The values for N choose k are : " << endl;
data = algorithm_(k,5);
print ( data );
}
If you run the program you know what I wanted to achieve and in what way. This ALGORITHM (not program) might not be efficient for a smaller number of vectors but it will be when N > 50k and k ~ 10. I'm aware the implementation of the algorithm (my program) is very inefficient. Is there any way to improve it ? I think the same algorithm can be implemented in a much more elegant fashion. Any help is much appreciated. Thanks.