I have a set of data which is split into two arrays (let's call them data
and keys
). That is, for any given item with an index i
, I can access the data for that item with data[i]
and the key for that item with keys[i]
. I cannot change this structure (eg, to interleave keys and data into a s开发者_如何学运维ingle array), because I need to pass the data
array to a library function which expects a certain data layout.
How can I sort both arrays (preferably using standard library functions) according to the content of the keys
array?
Create a vector of objects that contain indices to the two arrays. Define operator<
for that object to do the comparison based on keys[index]
. Sort that vector. When you're done, walk through that vector and put your original objects into the order defined by those proxy objects:
// warning: untested code.
struct sort_proxy {
size_t i;
bool operator<(sort_proxy const &other) const {
return keys[i] < keys[other.i];
}
};
// ... key_size = number of keys/data items.
std::vector<sort_proxy> proxies(key_size);
for (i=0; i<keys_size; i++)
proxies[i].i = i;
std::sort(proxies.begin(), proxies.end());
// in-place reordering left as an exercise for the reader. :-)
for (int i=0; i<proxies.size(); i++) {
result_keys[i] = keys[proxies[i].i];
result_data[i] = data[proxies[i].i];
}
Here is a sample implementation which defines a new iterator type to provide a paired view over two sequences. I have tried to make it standards compliant and correct, but since the C++ standard is hideously complex in its details, I am almost certain that I have failed.
I will say that this code appears to work when built with clang++
or g++
.
This code is not recommended for general use, since it is longer and less understandable than the other answers, and possibly invokes the dreaded 'undefined behaviour'.
However, it does have the advantage of constant time and space overhead since it provides a view on existing data rather than actually building a temporary alternate representation or permutation vector. The most obvious (to me) performance problem with this code is that individual elements of the two containers will have to be copied during the swap operation. Despite several attempts, I have not found a way to successfully specialise std::swap
such that std::sort
or std::random_shuffle
will avoid using the default temporary-copy based swap implementation. It is possible that use of the C++0x rvalue reference system (see std::move
and Jon Purdy's answer) could solve this.
#ifndef HDR_PAIRED_ITERATOR
#define HDR_PAIRED_ITERATOR
#include <iterator>
/// pair_view mostly looks like a std::pair,
/// and can decay to a std::pair, but is really a pair of references
template <typename ItA, typename ItB>
struct pair_view {
typedef typename ItA::value_type first_type;
typedef typename ItB::value_type second_type;
typedef std::pair<first_type, second_type> pair_type;
pair_view() {}
pair_view(const ItA &a, const ItB &b):
first(*a), second(*b) {}
pair_view &operator=(const pair_view &x)
{ first = x.first; second = x.second; return *this; }
pair_view &operator=(const pair_type &x)
{ first = x.first; second = x.second; return *this; }
typename ItA::reference first;
typename ItB::reference second;
operator pair_type() const
{ return std::make_pair(first, second); }
friend bool operator==(const pair_view &a, const pair_view &b)
{ return (a.first == b.first) && (a.second == b.second); }
friend bool operator<(const pair_view &a, const pair_view &b)
{ return (a.first < b.first) || ((a.first == b.first) && (a.second < b.second)); }
friend bool operator!=(const pair_view &a, const pair_view &b)
{ return !(a == b); }
friend bool operator>(const pair_view &a, const pair_view &b)
{ return (b < a); }
friend bool operator<=(const pair_view &a, const pair_view &b)
{ return !(b < a); }
friend bool operator>=(const pair_view &a, const pair_view &b)
{ return !(a < b); }
friend bool operator==(const pair_view &a, const pair_type &b)
{ return (a.first == b.first) && (a.second == b.second); }
friend bool operator<(const pair_view &a, const pair_type &b)
{ return (a.first < b.first) || ((a.first == b.first) && (a.second < b.second)); }
friend bool operator!=(const pair_view &a, const pair_type &b)
{ return !(a == b); }
friend bool operator>(const pair_view &a, const pair_type &b)
{ return (b < a); }
friend bool operator<=(const pair_view &a, const pair_type &b)
{ return !(b < a); }
friend bool operator>=(const pair_view &a, const pair_type &b)
{ return !(a < b); }
friend bool operator==(const pair_type &a, const pair_type &b)
{ return (a.first == b.first) && (a.second == b.second); }
friend bool operator<(const pair_type &a, const pair_type &b)
{ return (a.first < b.first) || ((a.first == b.first) && (a.second < b.second)); }
friend bool operator!=(const pair_type &a, const pair_type &b)
{ return !(a == b); }
friend bool operator>(const pair_type &a, const pair_type &b)
{ return (b < a); }
friend bool operator<=(const pair_type &a, const pair_type &b)
{ return !(b < a); }
friend bool operator>=(const pair_type &a, const pair_type &b)
{ return !(a < b); }
};
template <typename ItA, typename ItB>
struct paired_iterator {
// --- standard iterator traits
typedef typename pair_view<ItA, ItB>::pair_type value_type;
typedef pair_view<ItA, ItB> reference;
typedef paired_iterator<ItA,ItB> pointer;
typedef typename std::iterator_traits<ItA>::difference_type difference_type;
typedef std::random_access_iterator_tag iterator_category;
// --- methods not required by the Random Access Iterator concept
paired_iterator(const ItA &a, const ItB &b):
a(a), b(b) {}
// --- iterator requirements
// default construction
paired_iterator() {}
// copy construction and assignment
paired_iterator(const paired_iterator &x):
a(x.a), b(x.b) {}
paired_iterator &operator=(const paired_iterator &x)
{ a = x.a; b = x.b; return *this; }
// pre- and post-increment
paired_iterator &operator++()
{ ++a; ++b; return *this; }
paired_iterator operator++(int)
{ paired_iterator tmp(*this); ++(*this); return tmp; }
// pre- and post-decrement
paired_iterator &operator--()
{ --a; --b; return *this; }
paired_iterator operator--(int)
{ paired_iterator tmp(*this); --(*this); return tmp; }
// arithmetic
paired_iterator &operator+=(const difference_type &n)
{ a += n; b += n; return *this; }
friend paired_iterator operator+(const paired_iterator &x, const difference_type &n)
{ return paired_iterator(x.a+n, x.b+n); }
friend paired_iterator operator+(const difference_type &n, const paired_iterator &x)
{ return paired_iterator(x.a+n, x.b+n); }
paired_iterator &operator-=(const difference_type &n)
{ a -= n; b -= n; return *this; }
friend paired_iterator operator-(const paired_iterator &x, const difference_type &n)
{ return paired_iterator(x.a-n, x.b-n); }
friend difference_type operator-(const paired_iterator &x, const paired_iterator &y)
{ return (x.a - y.a); }
// (in-)equality and ordering
friend bool operator==(const paired_iterator &x, const paired_iterator &y)
{ return (x.a == y.a) && (x.b == y.b); }
friend bool operator<(const paired_iterator &x, const paired_iterator &y)
{ return (x.a < y.a); }
// derived (in-)equality and ordering operators
friend bool operator!=(const paired_iterator &x, const paired_iterator &y)
{ return !(x == y); }
friend bool operator>(const paired_iterator &x, const paired_iterator &y)
{ return (y < x); }
friend bool operator<=(const paired_iterator &x, const paired_iterator &y)
{ return !(y < x); }
friend bool operator>=(const paired_iterator &x, const paired_iterator &y)
{ return !(x < y); }
// dereferencing and random access
reference operator*() const
{ return reference(a,b); }
reference operator[](const difference_type &n) const
{ return reference(a+n, b+n); }
private:
ItA a;
ItB b;
};
template <typename ItA, typename ItB>
paired_iterator<ItA, ItB> make_paired_iterator(const ItA &a, const ItB &b)
{ return paired_iterator<ItA, ItB>(a, b); }
#endif
#include <vector>
#include <algorithm>
#include <iostream>
template <typename ItA, typename ItB>
void print_kvs(const ItA &k0, const ItB &v0, const ItA &kn, const ItB &vn) {
ItA k(k0);
ItB v(v0);
while (k != kn || v != vn) {
if (k != kn && v != vn)
std::cout << "[" << *k << "] = " << *v << "\n";
else if (k != kn)
std::cout << "[" << *k << "]\n";
else if (v != vn)
std::cout << "[?] = " << *v << "\n";
if (k != kn) ++k;
if (v != vn) ++v;
}
std::cout << std::endl;
}
int main() {
std::vector<int> keys;
std::vector<std::string> data;
keys.push_back(0); data.push_back("zero");
keys.push_back(1); data.push_back("one");
keys.push_back(2); data.push_back("two");
keys.push_back(3); data.push_back("three");
keys.push_back(4); data.push_back("four");
keys.push_back(5); data.push_back("five");
keys.push_back(6); data.push_back("six");
keys.push_back(7); data.push_back("seven");
keys.push_back(8); data.push_back("eight");
keys.push_back(9); data.push_back("nine");
print_kvs(keys.begin(), data.begin(), keys.end(), data.end());
std::cout << "Shuffling\n";
std::random_shuffle(
make_paired_iterator(keys.begin(), data.begin()),
make_paired_iterator(keys.end(), data.end())
);
print_kvs(keys.begin(), data.begin(), keys.end(), data.end());
std::cout << "Sorting\n";
std::sort(
make_paired_iterator(keys.begin(), data.begin()),
make_paired_iterator(keys.end(), data.end())
);
print_kvs(keys.begin(), data.begin(), keys.end(), data.end());
std::cout << "Sort descending\n";
std::sort(
make_paired_iterator(keys.begin(), data.begin()),
make_paired_iterator(keys.end(), data.end()),
std::greater< std::pair<int,std::string> >()
);
print_kvs(keys.begin(), data.begin(), keys.end(), data.end());
return 0;
}
You could use a map:
int main() {
vector<int> keys;
vector<string> data;
keys.push_back(5); data.push_back("joe");
keys.push_back(2); data.push_back("yaochun");
keys.push_back(1); data.push_back("holio");
// load the keys and data to the map (they will automatically be inserted in sorted order by key)
map<int, string> sortedVals;
for(int i = 0; i < (int)keys.size(); ++i) {
sortedVals[keys[i]] = data[i];
}
// copy the map values back to vectors
int ndx=0;
for(map<int, string>::iterator it = sortedVals.begin(); it != sortedVals.end(); ++it) {
keys[ndx] = it->first;
data[ndx] = it->second;
++ndx;
}
// verify
for(int i = 0; i < (int)keys.size(); ++i) {
cout<<keys[i]<<" "<<data[i]<<endl;
}
return 0;
}
Here's the output:
---------- Capture Output ----------
> "c:\windows\system32\cmd.exe" /c c:\temp\temp.exe
1 holio
2 yaochun
5 joe
> Terminated with exit code 0.
You can use functors to do the sorting, for example:
template <class T>
struct IndexFunctor {
IndexFunctor(const std::vector<T>& v_) : v(v_) {}
bool operator ()(int a, int b) const {
return v[a] < v[b];
}
const std::vector<T>& v;
};
template <class K, class D>
void SortByKeys(std::vector<K>& keys, std::vector<D>& data) {
// Calculate the valid order inside a permutation array p.
const int n = static_cast<int>(keys.size());
std::vector<int> p(n);
for (int i = 0; i < n; ++i) p[i] = i;
std::sort(p.begin(), p.end(), IndexFunctor(keys));
// Reorder the keys and data in temporary arrays, it cannot be done in place.
std::vector<K> aux_keys(n);
std::vector<D> aux_data(n);
for (int i = 0; i < n; ++i) {
aux_keys[i] = keys[p[i]];
aux_data[i] = data[p[i]];
}
// Assign the ordered vectors by swapping, it should be faster.
keys.swap(aux_keys);
data.swap(aux_data);
}
This problem really got me thinking. I came up with a solution that makes use of some C++0x features to get a very STL-like parallel_sort
algorithm. In order to perform the sort "in-place", I had to write a back_remove_iterator
as the counterpart of back_insert_iterator
to allow the algorithm to read from and write to the same container. You can skip over those parts and go straight to the interesting stuff.
I haven't put it through any hardcore testing, but it seems reasonably efficient in both time and space, principally due to the use of std::move()
to prevent unnecessary copying.
#include <algorithm>
#include <iostream>
#include <string>
#include <vector>
//
// An input iterator that removes elements from the back of a container.
// Provided only because the standard library neglects one.
//
template<class Container>
class back_remove_iterator :
public std::iterator<std::input_iterator_tag, void, void, void, void> {
public:
back_remove_iterator() : container(0) {}
explicit back_remove_iterator(Container& c) : container(&c) {}
back_remove_iterator& operator=
(typename Container::const_reference value) { return *this; }
typename Container::value_type operator*() {
typename Container::value_type value(container->back());
container->pop_back();
return value;
} // operator*()
back_remove_iterator& operator++() { return *this; }
back_remove_iterator operator++(int) { return *this; }
Container* container;
}; // class back_remove_iterator
//
// Equivalence operator for back_remove_iterator. An iterator compares equal
// to the end iterator either if it is default-constructed or if its
// container is empty.
//
template<class Container>
bool operator==(const back_remove_iterator<Container>& a,
const back_remove_iterator<Container>& b) {
return !a.container ? !b.container || b.container->empty() :
!b.container ? !a.container || a.container->empty() :
a.container == b.container;
} // operator==()
//
// Inequivalence operator for back_remove_iterator.
//
template<class Container>
bool operator!=(const back_remove_iterator<Container>& a,
const back_remove_iterator<Container>& b) {
return !(a == b);
} // operator!=()
//
// A handy way to default-construct a back_remove_iterator.
//
template<class Container>
back_remove_iterator<Container> back_remover() {
return back_remove_iterator<Container>();
} // back_remover()
//
// A handy way to construct a back_remove_iterator.
//
template<class Container>
back_remove_iterator<Container> back_remover(Container& c) {
return back_remove_iterator<Container>(c);
} // back_remover()
//
// A comparison functor that sorts std::pairs by their first element.
//
template<class A, class B>
struct sort_pair_by_first {
bool operator()(const std::pair<A, B>& a, const std::pair<A, B>& b) {
return a.first < b.first;
} // operator()()
}; // struct sort_pair_by_first
//
// Performs a parallel sort of the ranges [keys_first, keys_last) and
// [values_first, values_last), preserving the ordering relation between
// values and keys. Sends key and value output to keys_out and values_out.
//
// This works by building a vector of std::pairs, sorting them by the key
// element, then returning the sorted pairs as two separate sequences. Note
// the use of std::move() for a vast performance improvement.
//
template<class A, class B, class I, class J, class K, class L>
void parallel_sort(I keys_first, I keys_last, J values_first, J values_last,
K keys_out, L values_out) {
typedef std::vector< std::pair<A, B> > Pairs;
Pairs sorted;
while (keys_first != keys_last)
sorted.push_back({std::move(*keys_first++), std::move(*values_first++)});
std::sort(sorted.begin(), sorted.end(), sort_pair_by_first<A, B>());
for (auto i = sorted.begin(); i != sorted.end(); ++i)
*keys_out++ = std::move(i->first),
*values_out++ = std::move(i->second);
} // parallel_sort()
int main(int argc, char** argv) {
//
// There is an ordering relation between keys and values,
// but the sets still need to be sorted. Sounds like a job for...
//
std::vector<int> keys{0, 3, 1, 2};
std::vector<std::string> values{"zero", "three", "one", "two"};
//
// parallel_sort! Unfortunately, the key and value types do need to
// be specified explicitly. This could be helped with a utility
// function that accepts back_remove_iterators.
//
parallel_sort<int, std::string>
(back_remover(keys), back_remover<std::vector<int>>(),
back_remover(values), back_remover<std::vector<std::string>>(),
std::back_inserter(keys), std::back_inserter(values));
//
// Just to prove that the mapping is preserved.
//
for (unsigned int i = 0; i < keys.size(); ++i)
std::cout << keys[i] << ": " << values[i] << '\n';
return 0;
} // main()
I hope this proves useful, or at least entertaining.
It turns out that Boost contains an iterator which does pretty much what the paired_iterator
from my other answer does:
Boost.Iterator Zip Iterator
This seems like the best option.
I don't know whether following exploitation of knowing of std::swap
implementation details is UB or not. I think "no".
#include <iostream>
#include <iomanip>
#include <type_traits>
#include <utility>
#include <iterator>
#include <algorithm>
#include <numeric>
#include <deque>
#include <forward_list>
#include <vector>
#include <cstdlib>
#include <cassert>
template< typename pattern_iterator, typename target_iterator >
void
pattern_sort(pattern_iterator pbeg, pattern_iterator pend, target_iterator tbeg, target_iterator tend)
{
//assert(std::distance(pbeg, pend) == std::distance(tbeg, tend));
using pattern_traits = std::iterator_traits< pattern_iterator >;
using target_traits = std::iterator_traits< target_iterator >;
static_assert(std::is_base_of< std::forward_iterator_tag, typename pattern_traits::iterator_category >{});
static_assert(std::is_base_of< std::forward_iterator_tag, typename target_traits::iterator_category >{});
struct iterator_adaptor
{
iterator_adaptor(typename pattern_traits::reference pattern,
typename target_traits::reference target)
: p(&pattern)
, t(&target)
{ ; }
iterator_adaptor(iterator_adaptor &&)
: p(nullptr)
, t(nullptr)
{ ; }
void
operator = (iterator_adaptor && rhs) &
{
if (!!rhs.p) {
assert(!!rhs.t);
std::swap(p, rhs.p);
std::iter_swap(t, rhs.t);
}
}
bool
operator < (iterator_adaptor const & rhs) const
{
return (*p < *rhs.p);
}
private :
typename pattern_traits::pointer p;
typename target_traits::pointer t;
};
std::deque< iterator_adaptor > proxy_; // std::vector can be used instead
//proxy_.reserve(static_cast< std::size_t >(std::distance(pbeg, pend))); // it's (maybe) worth it if proxy_ is std::vector and if walking through whole [tbeg, tend) range is not too expensive operation (in case if target_iterator is worse then RandomAccessIterator)
auto t = tbeg;
auto p = pbeg;
while (p != pend) {
assert(t != tend);
proxy_.emplace_back(*p, *t);
++p;
++t;
}
std::sort(std::begin(proxy_), std::end(proxy_));
}
int
main()
{
std::forward_list< int > keys{5, 4, 3, 2, 1};
std::vector< std::size_t > indices(static_cast< std::size_t >(std::distance(std::cbegin(keys), std::cend(keys))));
std::iota(std::begin(indices), std::end(indices), std::size_t{0}); // indices now: 0, 1, 2, 3, 4
std::copy(std::cbegin(keys), std::cend(keys), std::ostream_iterator< int >(std::cout, " ")); std::cout << std::endl;
std::copy(std::cbegin(indices), std::cend(indices), std::ostream_iterator< std::size_t >(std::cout, " ")); std::cout << std::endl;
pattern_sort(std::cbegin(keys), std::cend(keys), std::begin(indices), std::end(indices)); std::cout << std::endl;
std::copy(std::cbegin(keys), std::cend(keys), std::ostream_iterator< int >(std::cout, " ")); std::cout << std::endl;
std::copy(std::cbegin(indices), std::cend(indices), std::ostream_iterator< std::size_t >(std::cout, " ")); std::cout << std::endl;
// now one can use indices to access keys and data to as ordered containers
return EXIT_SUCCESS;
}
精彩评论