Here's my go.
#include <random>
#include <vector>
#include <cstdio>
/*
Draw a enadom number form the hypergeomtric distribution (sampling
from a bi-valued population without replacement)
rndengine - a random number generator engine
Ngood - number of positives in the population
Nbad - number of negatives in the population
Nsample - number of samples to take (less than or equal to Ngood + Nbad)
Returns: the number of "good" elements drawn, randomly.
*/
template<class rndengine>
int rand_hypergeometric(int Ngood, int Nbad, int Nsample, rndengine &eng)
{
int answer = 0;
std::uniform_real_distribution<double> distr(0, 1);
for (int i =0; i < Nsample; i++)
{
double p = distr(eng);
if (p < ((double)Ngood)/(Ngood + Nbad))
{
answer++;
Ngood--;
}
else
{
Nbad--;
}
}
return answer;
}
/*
Recursive randunique function
out - vector to collect results
Nsamples - number of samples to take
Npopulation - size of population
indexbase - fiddle to have populations not starting from 0, pass 0 at top level.
eng - the random number engine
*/
template<class rndengine>
void randunique_r(std::vector<int> &out, int Nsamples, int Npopulation,
int indexbase, rndengine &eng)
{
if (Nsamples == 0)
return;
std::uniform_int_distribution<int> distr(0, Npopulation -1);
int i = distr(eng);
int Nsampleslow = rand_hypergeometric(i, Npopulation - i -1, Nsamples-1, eng);
randunique_r(out, Nsampleslow, i, indexbase, eng);
out.push_back(i + indexbase);
randunique_r(out, Nsamples - Nsampleslow -1, Npopulation - i -1, indexbase + i, eng);
}
/*
Draw a set of unique random samples from a population
Nsamples - the number of elemnts to choose.
Npopulation - the population size
eng - the random number engine
Returns: vector with indices of chosen members, sorted ascending.
*/
template<class rndengine>
std::vector<int> randunique(int Nsamples, int Npopulation, rndengine &eng)
{
std::vector<int> answer;
randunique_r(answer, Nsamples, Npopulation, 0, eng);
return answer;
}
int main(void)
{
std::mt19937 engine(123);
std::vector<int> result = randunique(10, 20, engine);
for (int i =0; i <result.size(); i++)
printf("%d, ", result[i]);
printf("\n");
return 0;
}
Not very efficient. But the rand_hypergeometric functon is written naively.
It can be rewritten to run a lot faster, though this isn't easy.