Full disclosure -- I have also posted this implementation of a solution to the maximum gap problem to sci.math (no answer received there so far).
Although my algorithm appears to work, I'm concerned that my use of data structures is poor. Any ideas for improvement? Or does anyone think it's fine?
Thank you,
Paul.
/* Given an unsorted array, find the maximum difference between the successive elements in its sorted form.
Try to solve it in linear time/space. Return 0 if the array contains less than 2 elements.
You may assume all elements in the array are non-negative integers and fit in the 32-bit signed integer range.*/
// Algorithm implemented is:
http://cgm.cs.mcgill.ca/~godfried/teaching/dm-reading-assignments/Maximum-Gap-Problem.pdf
#include <utility>
#include <vector>
#include <stdexcept>
#include <unordered_map>
#include <algorithm>
#include <cstdlib>
#include <iostream>
// Step 1. Use a pair to find the min and the max.
std::pair<int, int> findExtrema(const std::vector<int>& vec)
{
if(vec.empty())
throw std::runtime_error("Empty vector");
int min = vec[0];
int max = vec[0];
for(unsigned i = 1; i < vec.size(); ++i)
{
if(vec[i] < min)
min = vec[i];
else if (vec[i] > max)
max = vec[i];
}
return {min, max};
}
// Step 2.
// Divide the min - max interval into n - 1 buckets of equal size.
// The end-points of each bucket are generated and returned in order.
/* std::vector<int> buckets(const std::vector<int>& vec, const std::pair<int, int>& extrema)
{
if(vec.size() < 2)
throw std::runtime_error("Vector with less than two elements.");
int min = extrema.first;
int max = extrema.second;
std::vector<int> result;
for(int i = 0; i < vec.size() - 1; ++i)
result.push_back (min + i * (max - min)/(vec.size() - 1));
}*/
// Step 3. Use an unordered_map (hash table) to obtain the correct bucket index (as in step 2).
// for each element of the vector.
std::unordered_map<int, unsigned> index(const std::vector<int>& vec)
{
if(vec.size() < 2)
throw std::runtime_error("vector has less than two elements");
const auto minMax = findExtrema(vec);
const int min = minMax.first;
const int max = minMax.second;
const double intervalWidth = max - min;
const double bucketSize = intervalWidth/(vec.size()-1);
std::unordered_map<int, unsigned> result;
for(unsigned i = 0; i < vec.size(); ++i)
result[vec[i]] = std::floor((vec[i] - min)/bucketSize);
return result;
}
// Step 4. For each bucket, define a pair of integers.
// One integer corresponds to the max within the bucket. The other corresponds
// to the min within the bucket.
// However some buckets are empty.
std::vector<std::pair<int, int> > extremaInBuckets(const std::vector<int>& vec)
{
std::vector<std::pair<int, int> > result;
auto hash = index(vec);
std::vector<std::vector<int>> vectorOfBuckets(hash.size());
for(unsigned i = 0; i < vec.size(); ++i)
vectorOfBuckets[hash[vec[i]]].push_back(vec[i]);
for(unsigned i = 0; i < hash.size(); ++i)
if(!vectorOfBuckets[i].empty())
result.push_back(findExtrema(vectorOfBuckets[i]));
return result;
}
// Step 5. Use the above function to obtain the max gap
int maxGap(const std::vector<int>& vec)
{
if(vec.size() < 2)
return 0;
auto gaps = extremaInBuckets(vec);
if(gaps.size() < 2)
throw std::runtime_error("Algorithm not performing as expected.");
int max = 0;
for(unsigned i = 0; i < gaps.size() - 1; ++i)
max = std::max(gaps[i+1].first - gaps[i].second, max);
return max;
}
int main()
{
std::vector<int>testVec;
for(int i = 0; i < 20; ++i)
testVec.push_back(rand());
std::cout << "Original vector is\n";
for(int i = 0; i < testVec.size(); ++ i)
std::cout << testVec[i] << "\n";
std::cout << "\n Sorted vector is \n";
std::vector<int> copyVec = testVec;
std::sort(copyVec.begin(), copyVec.end());
for(int i = 0; i < copyVec.size(); ++ i)
std::cout << copyVec[i] << "\n";
std::cout << "\n Max gap is " << maxGap(testVec);
}