Implementation of Auction Algorthm via Boost Graph Library C++ - c++

As I cited in previous question:
Is it possible to generate multiple custom vertices using the Bundle Properties from Boost Graph Library?
Boost Maximum Weighted Matching in undirected bipartite random graphs hangs in an infinite loop
I'm working on an application benchmark that compare the performance of the boost maximum weighted matching and auction algorithm for the transportation problem on solving the assignment problem for bipartite graphs.
Currently I've implemented a version of the auction algorithm using the bundle proprieties of boost graph library, this implementation is inspired by a vector version from github. I've done this in order to put on the same level both algorithms, to make a fair benchmark. Here it is:
#include "../include/Auction.h"
#include "../include/BipartiteGraph.h"
void auction_algorithm(Graph& graph, const int& n, duration& elapsed) {
const Weight eps = 1;
int unassigned_bidders = n;
GraphProp& gp = graph[boost::graph_bundle];
EdgeFilter any_interconnect = boost::keep_all{};
VertexFilter bidders = [graph](V v) -> bool { return boost::get<Bidder>(&(graph)[v]); };
VertexFilter items = [graph](V v) -> bool { return boost::get<Item>(&(graph)[v]); };
FMap map_bidders = FMap(graph, any_interconnect, bidders);
FMap map_items = FMap(graph, any_interconnect, items);
auto iterator_bidder = boost::make_iterator_range(boost::vertices(map_bidders));
auto iterator_item = boost::make_iterator_range(boost::vertices(map_items));
auto t_start = now();
while (unassigned_bidders > 0) {
for (auto uncasted_bidder : iterator_bidder) {
if (gp.bidder2item[static_cast<int>(uncasted_bidder)] != -1) continue;
Bidder* bidder = boost::get<Bidder>(&graph[uncasted_bidder]);
// 1 Bid
int id_item1 = -1;
Weight val_item1 = -1;
Weight val_item2 = -1;
for (auto uncasted_item : iterator_item) {
Item* item = boost::get<Item>(&graph[static_cast<int>(uncasted_item)]);
Weight val = boost::get(boost::edge_weight_t(), graph, (boost::edge(uncasted_bidder, uncasted_item, graph)).first) - item->cost;
if (val > val_item1) {
val_item2 = val_item1;
val_item1 = val;
id_item1 = item->id;
}
else if (val > val_item2) {
val_item2 = val;
}
}
bidder->best_item = id_item1 + n;
bidder->val_first_best_item = val_item1;
bidder->val_second_best_item = val_item2;
// 2 Compete
Weight bid = bidder->val_first_best_item - bidder->val_second_best_item + eps;
auto best_item = boost::get<Item>(&graph[bidder->best_item]);
if (bid > best_item->high_bid) {
best_item->high_bid = bid;
best_item->high_bidder = bidder->id;
}
}
// 3 Assign
for (auto uncasted_item : iterator_item) {
Item* item = boost::get<Item>(&graph[uncasted_item]);
if (item->high_bid == -1) continue;
item->cost += item->high_bid;
if (gp.item2bidder[item->id] != -1) {
gp.bidder2item[gp.item2bidder[item->id]] = -1;
unassigned_bidders++;
}
gp.item2bidder[item->id] = item->high_bidder;
gp.bidder2item[gp.item2bidder[item->id]] = item->id;
unassigned_bidders--;
}
}
elapsed = now() - t_start;
}
Weight perform_au(Graph& graph, duration& elapsed) {
int n = int(boost::num_vertices(graph) / 2);
Weight total_cost_auction = 0;
auction_algorithm(graph, n, elapsed);
std::cout << "\nThe matching is: ";
for (int bidder = 0; bidder < n; ++bidder) {
std::cout << "(" << bidder << "," << graph[boost::graph_bundle].bidder2item[bidder] << ")";
int item = graph[boost::graph_bundle].bidder2item[bidder];
total_cost_auction += boost::get(boost::edge_weight_t(), graph, (boost::edge(bidder, item + n, graph)).first);
}
std::cout << "\n";
return total_cost_auction;
}
I have compared this to the vector implementation and notice that the latter is much faster than mine (however they return the same amount of total cost). Is it due to the complexity of the boost::get? If so, why is it so heavy?
I'm using the g++ compiler on a Ubuntu machine and to compile the application I run the following line in my console:
g++ -std=c++2a -o ../bin/app BipartiteGraph.cpp MaximumWeightedMatching.cpp Auction.cpp AuctionArray.cpp Main.cpp
I share the link of my github repository so you can have a look at the whole project.
PS: If you have any suggestions for speeding up the algorithm, that would be great!
UPDATE: 09/08/2022
Requirement: Make the auction algorithm generic like the style of the Boost Graph Library. This is the last implementation that I've made.
UPDATE: 10/08/2022
I've made a class that maintain the all stuff like it was before with the Bundle Properties:
UPDATE: 14/08/2022
Actual version
Weight perform_au(const Graph& graph, Duration& elapsed, int& n_iteration_au, bool verbose)
{
int n = int(boost::num_vertices(graph) / 2);
std::vector<int> assignments(n);
Auction<Graph, Weight> auction_problem(n);
auto t_start = now();
auction_problem.auction_algorithm(graph, assignments);
elapsed = now() - t_start;
std::cout << " Finished \nThe matching is: ";
for (int bidder = 0; bidder < n; ++bidder)
std::cout << "(" << bidder << "," << assignments[bidder] << ")";
std::cout << "\n";
if (verbose) auction_problem.printProprieties();
n_iteration_au = auction_problem.getNIterationAu();
return auction_problem.getTotalCost(graph);
}
#ifndef _AA_H
#define _AA_H
#include <vector>
#include <unordered_map>
#include <boost/graph/adjacency_list.hpp>
template<typename T>
using AdjacencyIterator = boost::graph_traits<T>::adjacency_iterator;
template<typename Graph, typename Type>
class Auction
{
private:
struct Bidder {
int best_item = -1;
double val_first_best_item = -1;
double val_second_best_item = -1;
};
struct Item {
double cost = 0;
int high_bidder = -1;
double high_bid = -1;
};
int n_iteration_au = 0;
int vertices = 0;
std::unordered_map<int, Bidder> unassigned_bidder;
std::unordered_map<int, Bidder> assigned_bidder;
std::unordered_map<int, Item> item_map;
bool is_assignment_problem(const Graph& graph);
void auctionRound(const Graph& graph, const double& eps, const auto& vertex_idMap);
public:
void auction_algorithm(const Graph& graph, std::vector<int>& ass);
int getNIterationAu();
Type getTotalCost(const Graph& graph);
void printProprieties();
Type getMaximumEdge(const Graph& graph);
void reset();
Auction(int vertices)
{
this->vertices = vertices;
for (int i : boost::irange(0, vertices))
{
this->unassigned_bidder.insert(std::make_pair(i, Bidder{}));
this->item_map.insert(std::make_pair(i, Item{}));
}
}
};
template<typename Graph, typename Type>
inline int Auction<Graph, Type>::getNIterationAu() { return n_iteration_au; }
template<typename Graph, typename Type>
Type Auction<Graph, Type>::getMaximumEdge(const Graph& graph)
{
Type max = 0;
typedef boost::graph_traits<Graph>::edge_iterator edge_iterator;
std::pair<edge_iterator, edge_iterator> ei = boost::edges(graph);
for (edge_iterator edge_iter = ei.first; edge_iter != ei.second; ++edge_iter)
if (boost::get(boost::edge_weight_t(), graph, *edge_iter) > max)
max = boost::get(boost::edge_weight_t(), graph, *edge_iter);
return max;
}
template<typename Graph, typename Type>
inline Type Auction<Graph, Type>::getTotalCost(const Graph& graph)
{
Type total_cost_auction = 0;
for (int bidder = 0; bidder < vertices; ++bidder)
total_cost_auction += boost::get(boost::edge_weight_t(), graph, (boost::edge(bidder, assigned_bidder[bidder].best_item + vertices, graph)).first);
return total_cost_auction;
}
template<typename Graph, typename Type>
bool Auction<Graph, Type>::is_assignment_problem(const Graph& graph)
{
for (auto v1 : boost::make_iterator_range(boost::vertices(graph)))
{
AdjacencyIterator<Graph> ai, a_end;
boost::tie(ai, a_end) = boost::adjacent_vertices(v1, graph);
if (ai == a_end) return false;
else
for (auto v2 : boost::make_iterator_range(ai, a_end))
if ((v1 < vertices && v2 < vertices) || (v1 > vertices && v2 > vertices))
return false;
}
return true;
}
template<typename Graph, typename Type>
inline void Auction<Graph, Type>::printProprieties()
{
for (auto& bidder : assigned_bidder)
std::cout << "|Bidder:" << bidder.first << "|Best item:" << bidder.second.best_item << "|Value first best item:" << bidder.second.val_first_best_item << "|Value second best item:" << bidder.second.val_second_best_item << "|\n";
for (auto& item : item_map)
std::cout << "|Item:" << item.first << "|Cost:" << item.second.cost << "|Higher bidder:" << item.second.high_bidder << "|Higher bid:" << item.second.high_bid << "|\n";
}
template<typename Graph, typename Type>
void Auction<Graph, Type>::auctionRound(const Graph& graph, const double& eps, const auto& vertex_idMap)
{
for (auto& bidder : unassigned_bidder)
{
int id_item1 = -1;
double val_item1 = -1;
double val_item2 = -1;
AdjacencyIterator<Graph> ai, a_end;
boost::tie(ai, a_end) = boost::adjacent_vertices(vertex_idMap[bidder.first], graph);
for (auto item : boost::make_iterator_range(ai, a_end)) // itero iniziando da quelli che hanno meno vertici?
{
double val = (boost::get(boost::edge_weight_t(), graph, (boost::edge(bidder.first, static_cast<int>(item), graph)).first)) // * (vertices))
- item_map[static_cast<int>(item) - vertices].cost;
if (val > val_item1)
{
val_item2 = val_item1;
val_item1 = val;
id_item1 = static_cast<int>(item) - vertices;
}
else if (val > val_item2) val_item2 = val;
}
bidder.second.best_item = id_item1;
bidder.second.val_second_best_item = val_item2;
bidder.second.val_first_best_item = val_item1;
double bid = bidder.second.val_first_best_item - bidder.second.val_second_best_item + eps;
if (item_map.find(bidder.second.best_item) != item_map.end())
{
if (bid > item_map[bidder.second.best_item].high_bid)
{
item_map[bidder.second.best_item].high_bid = bid;
item_map[bidder.second.best_item].high_bidder = bidder.first;
}
}
}
for (auto& item : item_map)
{
if (item.second.high_bid == -1) continue;
item.second.cost += item.second.high_bid;
int id_to_remove = -1;
for (auto& ass_bidr : assigned_bidder)
{
if (ass_bidr.second.best_item == item.first)
{
id_to_remove = ass_bidr.first;
break;
}
}
if (id_to_remove != -1)
{
unassigned_bidder.insert(std::make_pair(id_to_remove, assigned_bidder[id_to_remove]));
assigned_bidder.erase(id_to_remove);
}
assigned_bidder.insert(std::make_pair(item.second.high_bidder, unassigned_bidder[item.second.high_bidder]));
unassigned_bidder.erase(item.second.high_bidder);
}
}
template<typename Graph, typename Type>
void Auction<Graph, Type>::auction_algorithm(const Graph& graph, std::vector<int>& ass)
{
if (!is_assignment_problem(graph)) throw("Not an assignment problem");
auto vertex_idMap = boost::get(boost::vertex_index, graph);
double eps = static_cast<double>(1.0 / (vertices + 1));
while (unassigned_bidder.size() > 0)
{
auctionRound(graph, eps, vertex_idMap);
n_iteration_au += 1;
}
for (auto& a : assigned_bidder) ass[a.first] = a.second.best_item;
}
#endif

Why would it not be heavy.
Again,
FMap map_bidders = FMap(graph, any_interconnect, bidders);
FMap map_items = FMap(graph, any_interconnect, items);
Just "wishing" things to be a property map doesn't make them so.
Also, your filter predicates:
EdgeFilter any_interconnect = boost::keep_all{};
VertexFilter bidders = [graph](V v) -> bool { return boost::get<Bidder>(&(graph)[v]); };
VertexFilter items = [graph](V v) -> bool { return boost::get<Item>(&(graph)[v]); };
FMap map_bidders = FMap(graph, any_interconnect, bidders);
FMap map_items = FMap(graph, any_interconnect, items);
They...
copy the entire graph(!), twice
uselessly get<> a variant element, just to discard it and return bool
Slightly better:
VertexFilter bidders = [&graph](V v) -> bool {
return graph[v].which() == 0;
};
VertexFilter items = [&graph](V v) -> bool {
return graph[v].which() == 1;
};
FMap map_bidders = FMap(graph, {}, bidders);
FMap map_items = FMap(graph, {}, items);
But it's all kind of useless. I'm not suprised this stuff takes time, because you know your graph is structured (N bidders)(N items), so
auto iterator_bidder = boost::make_iterator_range(vertices(map_bidders));
auto iterator_item = boost::make_iterator_range(vertices(map_items));
CouldShould just be:
auto [b,e] = vertices(graph);
auto iterator_bidder = boost::make_iterator_range(b, b + n);
auto iterator_item = boost::make_iterator_range(b + n, e);
And even those are overkill, since your vertex descriptor is integral anyways:
auto const bidders = boost::irange(0, n);
auto const items = boost::irange(n, 2 * n);
I'll read some more later (family time first), because I'm already noticing more (e.g. why is listS used as the edge container selector?).
Will post here when done.

Related

How can I optimise edge creation and vertex deletion in an Adjacency set representation of a graph?

I have a graph represented with an Adjacency set similar to:
struct Vertex {
int x;
bool operator==(const Vertex& b) {
return x==b.x;
}
};
template<> struct std::hash<Vertex> {
std::size_t operator()(Vertex const& v) const noexcept {
return std::hash<int>()(v.x);
}
};
struct Edge {
std::shared_ptr<Vertex> fr;
std::shared_ptr<Vertex> to;
double weight;
Edge(std::shared_ptr<Vertex> fr_in, std::shared_ptr<Vertex> to_in) : fr(fr_in), to(to_in) {};
};
class Graph{
public:
std::shared_ptr<Vertex> addVertex() {
auto new_vertex = std::make_shared<Vertex>();
mAdjacencySet[new_vertex] = {};
return new_vertex;
}
std::shared_ptr<Edge> addEdge(std::shared_ptr<Vertex> fr, std::shared_ptr<Vertex> to) {
auto edge = std::make_shared<Edge>(fr, to);
mAdjacencySet[fr][to] = edge;
return edge;
}
void deleteVertex(std::shared_ptr<Vertex> v) {
mAdjacencySet.erase(v);
for (auto& [key, val] : mAdjacencySet) {
val.erase(v);
}
};
private:
std::unordered_map<
std::shared_ptr<Vertex>,
std::unordered_map<
std::shared_ptr<Vertex>,
std::shared_ptr<Edge>,
Deref::Hash,
Deref::Compare
>,
Deref::Hash,
Deref::Compare
> mAdjacencySet;
};
After I build my graph, I need to prune as many edges as possible because they are expensive to calculate.
One of the strategies to do so, is to delete any vertices with out degree of zero, EXCEPT for the destination vertex. This is very slow to do, relative to the rest of my program.
I wrote a script to time the relative parts of the complexity of each part:
int main() {
Timer wholeProgram;
wholeProgram.start();
Graph g;
auto v1 = g.addVertex();
auto v2 = g.addVertex();
auto e = g.addEdge(v1, v2);
Timer makingVertices;
makingVertices.start();
size_t n = 1e3;
std::vector<std::shared_ptr<Vertex>> vertices(n);
for (size_t i=0; i<n; ++i) {
vertices[i] = g.addVertex();
vertices[i]->x = i;
}
makingVertices.stop();
Timer makingEdges;
makingEdges.start();
for (auto v1 : vertices) {
for (auto v2: vertices) {
if (v1!=v2) {
g.addEdge(v1, v2);
}
}
}
makingEdges.stop();
Timer deletingVertices;
deletingVertices.start();
for (auto vert : vertices) {
g.deleteVertex(vert);
}
deletingVertices.stop();
wholeProgram.stop();
std::cout << "Making Verts: " << makingVertices.elapsedMilliseconds() << std::endl;
std::cout << "Making edges: " << makingEdges.elapsedMilliseconds() << std::endl;
std::cout << "Deleting verts: " << deletingVertices.elapsedMilliseconds() << std::endl;
std::cout << "Whole program: " << wholeProgram.elapsedMilliseconds() << std::endl;
return 0;
}
And the timings (with '-O3') are:
Making Verts: 0
Making edges: 270
Deleting verts: 188
Whole program: 458
(In my actual code base, the deleting of the vertices is actually around 90% of the time to create the graph).
How can I optimize this code to reduce the time to delete vertices (And also I guess optimize the creation of edges, as this is also slow)?
The full code to run this example is:
#include <functional>
#include <memory>
#include <chrono>
#include <iostream>
class Timer
{
public:
void start()
{
m_StartTime = std::chrono::system_clock::now();
m_bRunning = true;
}
void stop()
{
m_EndTime = std::chrono::system_clock::now();
m_bRunning = false;
}
double elapsedMilliseconds()
{
std::chrono::time_point<std::chrono::system_clock> endTime;
if(m_bRunning)
{
endTime = std::chrono::system_clock::now();
}
else
{
endTime = m_EndTime;
}
return std::chrono::duration_cast<std::chrono::milliseconds>(endTime - m_StartTime).count();
}
double elapsedSeconds()
{
return elapsedMilliseconds() / 1000.0;
}
private:
std::chrono::time_point<std::chrono::system_clock> m_StartTime;
std::chrono::time_point<std::chrono::system_clock> m_EndTime;
bool m_bRunning = false;
};
struct Deref {
/**
* #brief Function to dereference the pointer when hashing elements in a hashmap of shared pointers
*
*/
struct Hash {
template <typename T> std::size_t operator()(std::shared_ptr<T> const& p) const
{
return std::hash<T>()(*p);
}
template <typename T> std::size_t operator()(T const & p) const
{
return std::hash<T>(p);
}
};
/**
* #brief Function to dereference the pointer when comparing elements in a hashmap of shared pointers
*
*/
struct Compare {
template <typename T> bool operator()(std::shared_ptr<T> const& a, std::shared_ptr<T> const& b) const
{
return *a == *b;
}
template <typename T> bool operator()(T const& a, T const& b) const
{
return a == b;
}
};
};
struct Vertex {
int x;
bool operator==(const Vertex& b) {
return x==b.x;
}
};
template<> struct std::hash<Vertex> {
std::size_t operator()(Vertex const& v) const noexcept {
return std::hash<int>()(v.x);
}
};
struct Edge {
std::shared_ptr<Vertex> fr;
std::shared_ptr<Vertex> to;
double weight;
Edge(std::shared_ptr<Vertex> fr_in, std::shared_ptr<Vertex> to_in) : fr(fr_in), to(to_in) {};
};
class Graph{
public:
std::shared_ptr<Vertex> addVertex() {
auto new_vertex = std::make_shared<Vertex>();
mAdjacencyList[new_vertex] = {};
return new_vertex;
}
std::shared_ptr<Edge> addEdge(std::shared_ptr<Vertex> fr, std::shared_ptr<Vertex> to) {
auto edge = std::make_shared<Edge>(fr, to);
mAdjacencyList[fr][to] = edge;
return edge;
}
void deleteVertex(std::shared_ptr<Vertex> v) {
mAdjacencyList.erase(v);
for (auto& [key, val] : mAdjacencyList) {
val.erase(v);
}
};
private:
std::unordered_map<
std::shared_ptr<Vertex>,
std::unordered_map<
std::shared_ptr<Vertex>,
std::shared_ptr<Edge>,
Deref::Hash,
Deref::Compare
>,
Deref::Hash,
Deref::Compare
> mAdjacencyList;
};
int main() {
Timer wholeProgram;
wholeProgram.start();
Graph g;
auto v1 = g.addVertex();
auto v2 = g.addVertex();
auto e = g.addEdge(v1, v2);
Timer makingVertices;
makingVertices.start();
size_t n = 1e3;
std::vector<std::shared_ptr<Vertex>> vertices(n);
for (size_t i=0; i<n; ++i) {
vertices[i] = g.addVertex();
vertices[i]->x = i;
}
makingVertices.stop();
Timer makingEdges;
makingEdges.start();
for (auto v1 : vertices) {
for (auto v2: vertices) {
if (v1!=v2) {
g.addEdge(v1, v2);
}
}
}
makingEdges.stop();
Timer deletingVertices;
deletingVertices.start();
for (auto vert : vertices) {
g.deleteVertex(vert);
}
deletingVertices.stop();
wholeProgram.stop();
std::cout << "Making Verts: " << makingVertices.elapsedMilliseconds() << std::endl;
std::cout << "Making edges: " << makingEdges.elapsedMilliseconds() << std::endl;
std::cout << "Deleting verts: " << deletingVertices.elapsedMilliseconds() << std::endl;
std::cout << "Whole program: " << wholeProgram.elapsedMilliseconds() << std::endl;
return 0;
}
And to run it, you can view it online

How to iterate over boost graph and find neighbor's neighbor also?

The following figure shows bi-directional graph. I have represented following graph using boost-graph.
I have iterated from v1 --> v2 and v1 --> v3 but I am not able to visit from v3 --> v4. How to do that ?
Here is my code:
(vertex = V1) and
(graph = boost graph )
//Finding out edges of vertex
boost::graph_traits<BGType>::out_edge_iterator ei, ei_end;
boost::tie(ei, ei_end) = out_edges( vertex, graph );
for( boost::tie(ei, ei_end) = out_edges(vertex, graph); ei != ei_end; ++ei)
{
auto target = boost::target ( *ei, graph );
graph[target]._isVisible = false;
}
//Finding in edges of vertex
boost::graph_traits<BGType>::in_edge_iterator ein, ein_end;
boost::tie(ein, ein_end) = in_edges( vertex, graph );
for( boost::tie(ein, ein_end) = in_edges(vertex, graph); ein != ein_end; ++ein)
{
auto source = boost::source ( *ein, graph );
graph[source]._isVisible = false;
}
It helps if you get your terminology straight. L1, L2, L3 are edges not vertices.
So, you want to hide all edges and additionally all vertices with degree 0.
You could instead give edges a visibility flag:
struct EdgeProps {
bool _isVisible = true;
};
using BGType = boost::adjacency_list< //
boost::vecS, //
boost::vecS, //
boost::bidirectionalS, //
VertexProps, //
EdgeProps>;
Now I'd make that function:
void hide_connections(Name name, BGType& graph) {
auto it = graph.named_vertices.find(name);
assert(it != graph.named_vertices.end());
using boost::make_iterator_range;
for (auto e : make_iterator_range(out_edges(*it, graph)))
graph[e]._isVisible = false;
for (auto e : make_iterator_range(in_edges(*it, graph)))
graph[e]._isVisible = false;
}
Now, the visibility of vertices is a resultant - a derived property. You could calculate it on the fly:
auto visible = [&graph](V v) {
for (auto e : make_iterator_range(out_edges(v, graph)))
if (graph[e]._isVisible)
return true;
for (auto e : make_iterator_range(in_edges(v, graph)))
if (graph[e]._isVisible)
return true;
return false;
};
Indeed, this satisfies your requirements: Live On Compiler Explorer
#include <boost/graph/adjacency_list.hpp>
#include <boost/property_map/function_property_map.hpp>
#include <iostream>
using Name = std::string;
struct VertexProps {
VertexProps(Name n = "unnamed") : name(std::move(n)) {}
Name name;
};
struct EdgeProps {
bool _isVisible = true;
};
template <> struct boost::graph::internal_vertex_constructor<VertexProps> {
using type = boost::graph::vertex_from_name<VertexProps>;
};
template <> struct boost::graph::internal_vertex_name<VertexProps> {
struct type {
using result_type = Name;
Name const& operator()(VertexProps const& vp) const { return vp.name; }
Name& operator()(VertexProps& vp) const { return vp.name; }
};
};
using BGType = boost::adjacency_list< //
boost::vecS, //
boost::vecS, //
boost::bidirectionalS, //
VertexProps, //
EdgeProps>;
using V = BGType::vertex_descriptor;
using E = BGType::edge_descriptor;
void hide(Name name, BGType& graph) {
auto it = graph.named_vertices.find(name);
assert(it != graph.named_vertices.end());
for (auto e : make_iterator_range(out_edges(*it, graph)))
graph[e]._isVisible = false;
for (auto e : make_iterator_range(in_edges(*it, graph)))
graph[e]._isVisible = false;
}
int main() {
BGType graph;
for (auto [from, to] : {
std::pair{"V1", "V2"},
{"V1", "V3"},
{"V3", "V4"},
})
{
add_edge(from, to, graph);
add_edge(to, from, graph);
}
auto visible = [&graph](V v) { return 0 != degree(v, graph); };
auto names = get(&VertexProps::name, graph);
auto print = [&](std::ostream& os = std::cout) {
for (auto v : boost::make_iterator_range(vertices(graph))) {
if (!visible(v))
continue;
os << names[v] << " -->";
for (auto e : make_iterator_range(out_edges(v, graph))) {
if (graph[e]._isVisible)
os << " " << names[target(e, graph)];
}
os << "\n";
}
};
print();
hide("V1", graph);
print(std::cout << "\nConnections of V1 hidden:\n");
}
Prints
V2 --> V1
V1 --> V2 V3
V3 --> V1 V4
V4 --> V3
Connections of V1 hidden:
V3 --> V4
V4 --> V3
Outside The Box
This is clumsy and inefficient:
auto visible = [&graph](V v) {
for (auto e : make_iterator_range(out_edges(v, graph)))
if (graph[e]._isVisible)
return true;
for (auto e : make_iterator_range(in_edges(v, graph)))
if (graph[e]._isVisible)
return true;
return false;
};
What you really want to be able to say is:
auto visible = [&graph](V v) { return 0 != degree(v, graph); };
However, it won't work because you don't actually delete anything, so BGL will think the edges are still there.
You can fix the model by using a filtered_graph_adaptor where you store the filterables OUTSIDE the graph model.
Filtered Graph
So, this shifts the perspective back to your original: hiding vertices. Let's start out simple:
std::set<V> vhidden;
std::set<E> ehidden;
This is the set that will contain all hidden vertex descriptors.
Now we can setup filter predicates and the adapted graph:
std::function epred = [&](E e) { return not ehidden.contains(e); };
std::function vpred = [&](V v) { return not vhidden.contains(v); };
boost::filtered_graph f(graph, epred, vpred);
Adding some helpers to hide edges/vertices.
auto ehide = [&](E e) {
if (auto u = target(e, graph); 0 == degree(u, f))
vhidden.insert(u);
ehidden.insert(e);
};
Notice that we're being lazy and using degree(v, f) on the filtered graph (!) so that we don't have to manually count the number of edges that were already filtered.
auto vhide = [&](Name const& name) {
auto it = graph.named_vertices.find(name);
assert(it != graph.named_vertices.end());
V v = *it;
vhidden.insert(v);
for (auto e: make_iterator_range(out_edges(v, graph)))
ehide(e);
for (auto e: make_iterator_range(in_edges(v, graph)))
ehide(e);
};
Hiding a vertex traverses exactly one level to neighbours unconditionally. That's good enough as a stopping condition (in response to your comment) because the degree cannot change unless by hiding more edges to the same target node.
Using the filtered view:
std::cout << "Filtered edges(" << ehidden.size() << "), vertices(" << vhidden.size() << ")\n";
print_graph(f, names, std::cout << "Filtered view: \n");
vhide("V1");
std::cout << "Filtered edges(" << ehidden.size() << "), vertices(" << vhidden.size() << ")\n";
print_graph(f, names, std::cout << "Connections of V1 hidden:\n");
See it Live On Compiler Explorer
Filtered edges(0), vertices(0)
Filtered view:
V2 --> V1
V1 --> V2 V3
V3 --> V1 V4
V4 --> V3
Filtered edges(4), vertices(2)
Connections of V1 hidden:
V3 --> V4
V4 --> V3
Full Listing (Filtered Graph)
For posterity: Live On Compiler Explorer
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/filtered_graph.hpp>
#include <boost/graph/graph_utility.hpp>
#include <boost/property_map/function_property_map.hpp>
#include <iostream>
using Name = std::string;
struct VertexProps {
VertexProps(Name n = "unnamed") : name(std::move(n)) {}
Name name;
};
struct EdgeProps {
bool _isVisible = true;
};
template <> struct boost::graph::internal_vertex_constructor<VertexProps> {
using type = boost::graph::vertex_from_name<VertexProps>;
};
template <> struct boost::graph::internal_vertex_name<VertexProps> {
struct type {
using result_type = Name;
Name const& operator()(VertexProps const& vp) const { return vp.name; }
Name& operator()(VertexProps& vp) const { return vp.name; }
};
};
using BGType = boost::adjacency_list< //
boost::vecS, //
boost::vecS, //
boost::bidirectionalS, //
VertexProps, //
EdgeProps>;
using V = BGType::vertex_descriptor;
using E = BGType::edge_descriptor;
int main() {
BGType graph;
for (auto [from, to] : {
std::pair{"V1", "V2"},
{"V1", "V3"},
{"V3", "V4"},
})
{
add_edge(from, to, graph);
add_edge(to, from, graph);
}
auto names = get(&VertexProps::name, graph);
print_graph(graph, names, std::cout << "Unfiltered graph:\n");
std::set<V> vhidden;
std::set<E> ehidden;
std::function epred = [&](E e) { return not ehidden.contains(e); };
std::function vpred = [&](V v) { return not vhidden.contains(v); };
boost::filtered_graph f(graph, epred, vpred);
auto ehide = [&](E e) {
if (auto u = target(e, graph); 0 == degree(u, f))
vhidden.insert(u);
ehidden.insert(e);
};
auto vhide = [&](Name const& name) {
auto it = graph.named_vertices.find(name);
assert(it != graph.named_vertices.end());
V v = *it;
vhidden.insert(v);
for (auto e: make_iterator_range(out_edges(v, graph)))
ehide(e);
for (auto e: make_iterator_range(in_edges(v, graph)))
ehide(e);
};
std::cout << "Filtered edges(" << ehidden.size() << "), vertices(" << vhidden.size() << ")\n";
print_graph(f, names, std::cout << "Filtered view: \n");
vhide("V1");
std::cout << "Filtered edges(" << ehidden.size() << "), vertices(" << vhidden.size() << ")\n";
print_graph(f, names, std::cout << "Connections of V1 hidden:\n");
}

How to find point of intersection in qcustomplot?

I have an app based on qt (qcustomplot) that prints two different graphs. They have one point of intersection. How to find x and y coordinates of this point?
This doesn't have much to do with plotting, since you'd be investigating the underlying data. Let's say that we can interpolate between data points using lines, and the data sets are single-valued (i.e. for any x or key coordinate, there's only one value).
Online demo of the code below
Let's sketch a solution. First, some preliminaries, and we detect whether QCustomPlot was included so that the code can be tested without it - the necessary classes are mocked:
#define _USE_MATH_DEFINES
#include <algorithm>
#include <cassert>
#include <cmath>
#include <iostream>
#include <optional>
#include <type_traits>
#include <vector>
//#include "qcustomplot.h"
constexpr bool debugOutput = false;
#ifndef QCP_PLOTTABLE_GRAPH_H
struct QCPGraphData {
double key, value;
QCPGraphData() = default;
QCPGraphData(double x, double y) : key(x), value(y) {}
};
#endif
auto keyLess(const QCPGraphData &l, const QCPGraphData &r) { return l.key < r.key; }
#ifndef QCP_PLOTTABLE_GRAPH_H
template <typename T> struct QCPDataContainer : public std::vector<T> {
using std::vector<T>::vector;
void sort() { std::sort(this->begin(), this->end(), keyLess); }
};
using QCPGraphDataContainer = QCPDataContainer<QCPGraphData>;
#endif
using Point = QCPGraphData;
using Container = QCPGraphDataContainer;
static_assert(std::is_copy_constructible_v<Point>, "Point must be copy-constructible");
Some helper functions:
std::ostream &operator<<(std::ostream &os, const Point &p) {
return os << "(" << p.key << ", " << p.value << ")";
}
template <class T> bool has_unique_keys(const T &v) {
constexpr auto keyEqual = [](const Point &l, const Point &r) { return l.key == r.key; };
return std::adjacent_find(std::begin(v), std::end(v), keyEqual) == std::end(v);
}
template <class T> bool has_valid_points(const T& v) {
constexpr auto isValid = [](const Point &p) { return std::isfinite(p.key) && std::isfinite(p.value); };
return std::all_of(std::begin(v), std::end(v), isValid);
}
The line segment intersection finder:
// intersection of two line segments
std::optional<Point> intersection(const Point &a1, const Point &a2, const Point &b1, const Point &b2)
{
auto p1 = a1, p2 = a2, p3 = b1, p4 = b2;
assert(p1.key <= p2.key);
assert(p3.key <= p4.key);
if (debugOutput) std::cout << p1 << "-" << p2 << ", " << p3 << "-" << p4;
auto const denom = (p1.key - p2.key)*(p3.value - p4.value)
- (p1.value - p2.value)*(p3.key - p4.key);
if (fabs(denom) > 1e-6*(p2.key - p1.key)) {
// the lines are not parallel
auto const scale = 1.0/denom;
auto const q = p1.key*p2.value - p1.value*p2.key;
auto const r = p3.key*p4.value - p3.value*p4.key;
auto const x = (q*(p3.key-p4.key) - (p1.key-p2.key)*r) * scale;
if (debugOutput) std::cout << " x=" << x << "\n";
if (p1.key <= x && x <= p2.key && p3.key <= x && x <= p4.key) {
auto const y = (q*(p3.value-p4.value) - (p1.value-p2.value)*r) * scale;
return std::optional<Point>(std::in_place, x, y);
}
}
else if (debugOutput) std::cout << "\n";
return std::nullopt;
}
An algorithm that walks down two lists of points sorted in ascending key (x) order, and finds all intersections of line segments spanning consecutive point pairs from these two lists:
std::vector<Point> findIntersections(const Container &a_, const Container &b_)
{
if (a_.size() < 2 || b_.size() < 2) return {};
static constexpr auto check = [](const auto &c){
assert(has_valid_points(c));
assert(std::is_sorted(c.begin(), c.end(), keyLess));
assert(has_unique_keys(c));
};
check(a_);
check(b_);
bool aFirst = a_.front().key <= b_.front().key;
const auto &a = aFirst ? a_ : b_, &b = aFirst ? b_ : a_;
assert(a.front().key <= b.front().key);
if (a.back().key < b.front().key) return {}; // the key spans don't overlap
std::vector<Point> intersections;
auto ia = a.begin(), ib = b.begin();
Point a1 = *ia++, b1 = *ib++;
while (ia->key < b1.key) a1=*ia++; // advance a until the key spans overlap
for (Point a2 = *ia, b2 = *ib;;) {
auto const ipt = intersection(a1, a2, b1, b2);
if (ipt)
intersections.push_back(*ipt);
bool advanceA = a2.key <= b2.key, advanceB = b2.key <= a2.key;
if (advanceA) {
if (++ia == a.end()) break;
a1 = a2, a2 = *ia;
}
if (advanceB) {
if (++ib == b.end()) break;
b1 = b2, b2 = *ib;
}
}
return intersections;
}
And a more generic version that can also sort the points in ascending key order:
auto findIntersections(Container &d1, Container &d2, bool presorted)
{
if (!presorted) {
d1.sort();
d2.sort();
}
return findIntersections(d1, d2);
}
And now some simple demonstration:
template <typename Fun>
Container makeGraph(double start, double step, double end, Fun &&fun) {
Container result;
int i = 0;
for (auto x = start; x <= end; x = ++i * step)
result.emplace_back(x, fun(x));
return result;
}
int main()
{
for (auto step2: {0.1, 0.1151484584}) {
auto sinPlot = makeGraph(-2*M_PI, 0.1, 3*M_PI, sin);
auto cosPlot = makeGraph(0., step2, 2*M_PI, cos);
auto intersections = findIntersections(sinPlot, cosPlot);
std::cout << "Intersections:\n";
for (auto &ip : intersections)
std::cout << " at " << ip << "\n";
}
}
Demo output:
Intersections:
at (0.785613, 0.706509)
at (3.92674, -0.706604)
Intersections:
at (0.785431, 0.706378)
at (3.92693, -0.706732)

C++ vector bounds checking

I have a 6D vector and I need to check neighborhood of each element (2 elements in each direction). Of course when I am on boundary of the vector, check leads in the Segmentation fault. All I can do is having switch with ton of cases. Is there any better way to solve this? I also thought of something like try-catch.
Still too bulky but it works:
#include <iostream>
#include <array>
#include <vector>
typedef std::vector<int> Vector1D;
typedef std::vector<Vector1D> Vector2D;
typedef std::vector<Vector2D> Vector3D;
typedef std::vector<Vector3D> Vector4D;
typedef std::vector<Vector4D> Vector5D;
typedef std::vector<Vector5D> Vector6D;
typedef std::array<size_t, 6> Path;
bool GetVectorPathElement(Vector6D const &vector6D, Path const &path, int &val)
{
size_t i = 0, k = path[i];
if (vector6D.size() > k)
{
Vector5D const &vector5D = vector6D[k];
k = path[++i];
if (vector5D.size() > k)
{
Vector4D const &vector4D = vector5D[k];
k = path[++i];
if (vector4D.size() > k)
{
Vector3D const &vector3D = vector4D[k];
k = path[++i];
if (vector3D.size() > k)
{
Vector2D const &vector2D = vector3D[k];
k = path[++i];
if (vector2D.size() > k)
{
Vector1D const &vector1D = vector2D[k];
k = path[++i];
if (vector1D.size() > k)
{
val = vector1D[k];
return true;
}
}
}
}
}
}
std::cout << "Invalid path " << k << " at index " << i << std::endl;
return false;
}
int main()
{
Vector1D vector1D = { 1,2,3,4,5,6 };
Vector2D vector2D = { vector1D, vector1D, vector1D, vector1D, vector1D };
Vector3D vector3D = { vector2D, vector2D, vector2D, vector2D };
Vector4D vector4D = { vector3D, vector3D, vector3D };
Vector5D vector5D = { vector4D, vector4D };
Vector6D vector6D = { vector5D };
Path path = { 0,0,2,1,4,5 };
int element;
if (GetVectorPathElement(vector6D, path, element))
{
std::cout << "Path: ";
for (auto i : path)
std::cout << i << " ";
std::cout << "\nElement value at destination: " << element << std::endl;
}
return 0;
}
https://ideone.com/nL1zo2

How to optimize an indirect radix sort? (a.k.a. how to optimize unpredictable memory access patterns)

I've written an indirect radix sort algorithm in C++ (by indirect, I mean it returns the indices of the items):
#include <algorithm>
#include <iterator>
#include <vector>
template<class It1, class It2>
void radix_ipass(
It1 begin, It1 const end,
It2 const a, size_t const i,
std::vector<std::vector<size_t> > &buckets)
{
size_t ncleared = 0;
for (It1 j = begin; j != end; ++j)
{
size_t const k = a[*j][i];
while (k >= ncleared && ncleared < buckets.size())
{ buckets[ncleared++].clear(); }
if (k >= buckets.size())
{
buckets.resize(k + 1);
ncleared = buckets.size();
}
buckets[k].push_back(size_t());
using std::swap; swap(buckets[k].back(), *j);
}
for (std::vector<std::vector<size_t> >::iterator
j = buckets.begin(); j != buckets.begin() + ncleared; j->clear(), ++j)
{
begin = std::swap_ranges(j->begin(), j->end(), begin);
}
}
template<class It, class It2>
void radix_isort(It const begin, It const end, It2 const items)
{
for (ptrdiff_t i = 0; i != end - begin; ++i) { items[i] = i; }
size_t smax = 0;
for (It i = begin; i != end; ++i)
{
size_t const n = i->size();
smax = n > smax ? n : smax;
}
std::vector<std::vector<size_t> > buckets;
for (size_t i = 0; i != smax; ++i)
{
radix_ipass(
items, items + (end - begin),
begin, smax - i - 1, buckets);
}
}
It seems to perform around 40% faster than std::sort when I test it with the following code (3920 ms compared to 6530 ms):
#include <functional>
template<class Key>
struct key_comp : public Key
{
explicit key_comp(Key const &key = Key()) : Key(key) { }
template<class T>
bool operator()(T const &a, T const &b) const
{ return this->Key::operator()(a) < this->Key::operator()(b); }
};
template<class Key>
key_comp<Key> make_key_comp(Key const &key) { return key_comp<Key>(key); }
template<class T1, class T2>
struct add : public std::binary_function<T1, T2, T1>
{ T1 operator()(T1 a, T2 const &b) const { return a += b; } };
template<class F>
struct deref : public F
{
deref(F const &f) : F(f) { }
typename std::iterator_traits<
typename F::result_type
>::value_type const
&operator()(typename F::argument_type const &a) const
{ return *this->F::operator()(a); }
};
template<class T> deref<T> make_deref(T const &t) { return deref<T>(t); }
size_t xorshf96(void) // random number generator
{
static size_t x = 123456789, y = 362436069, z = 521288629;
x ^= x << 16;
x ^= x >> 5;
x ^= x << 1;
size_t t = x;
x = y;
y = z;
z = t ^ x ^ y;
return z;
}
#include <stdio.h>
#include <time.h>
#include <array>
int main(void)
{
typedef std::vector<std::array<size_t, 3> > Items;
Items items(1 << 24);
std::vector<size_t> ranks(items.size() * 2);
for (size_t i = 0; i != items.size(); i++)
{
ranks[i] = i;
for (size_t j = 0; j != items[i].size(); j++)
{ items[i][j] = xorshf96() & 0xFFF; }
}
clock_t const start = clock();
if (1) { radix_isort(items.begin(), items.end(), ranks.begin()); }
else // STL sorting
{
std::sort(
ranks.begin(),
ranks.begin() + items.size(),
make_key_comp(make_deref(std::bind1st(
add<Items::const_iterator, ptrdiff_t>(),
items.begin()))));
}
printf("%u ms\n",
(unsigned)((clock() - start) * 1000 / CLOCKS_PER_SEC),
std::min(ranks.begin(), ranks.end()));
return 0;
}
Hmm, I guess that's the best I can do, I thought.
But after lots of banging my head against the wall, I realized that prefetching in the beginning of radix_ipass can help cut down the result to 1440 ms (!):
#include <xmmintrin.h>
...
for (It1 j = begin; j != end; ++j)
{
#if defined(_MM_TRANSPOSE4_PS) // should be defined if xmmintrin.h is included
enum { N = 8 };
if (end - j > N)
{ _mm_prefetch((char const *)(&a[j[N]][i]), _MM_HINT_T0); }
#endif
...
}
Clearly, the bottleneck is the memory bandwidth---the access pattern is unpredictable.
So now my question is: what else can I do to make it even faster on similar amounts of data?
Or is there not much room left for improvement?
(I'm hoping to avoid compromising the readability of the code if possible, so if the readability is harmed, the improvement should be significant.)
Using a more compact data structure that combines ranks and values can boost the performance of std::sort by a factor 2-3. Essentially, the sort now runs on a vector<pair<Value,Rank>>. The Value data type, std::array<integer_type, 3> has been replaced for this by a more compact pair<uint32_t, uint8_t> data structure. Only half a byte of it is unused, and the < comparison can by done in two steps, first using a presumably efficient comparison of uint32_ts (it's not clear if the loop used by std::array<..>::operator< can be optimized to a similarly fast code, but the replacement of std::array<integer_type,3> by this data structure yielded another performance boost).
Still, it doesn't get as efficient as the radix sort. (Maybe you could tweak a custom QuickSort with prefetches?)
Besides that additional sorting method, I've replaced the xorshf96 by a mt19937, because I know how to provide a seed for the latter ;)
The seed and the number of values can be changed via two command-line arguments: first the seed, then the count.
Compiled with g++ 4.9.0 20131022, using -std=c++11 -march=native -O3, for a 64-bit linux
Sample runs; important note running on a Core2Duo processor U9400 (old & slow!)
item count: 16000000
using std::sort
duration: 12260 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort
duration: 12230 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort
duration: 12230 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort with a packed data structure
duration: 4290 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort with a packed data structure
duration: 4270 ms
result sorted: true
seed: 5648
item count: 16000000
using std::sort with a packed data structure
duration: 4280 ms
result sorted: true
item count: 16000000
using radix sort
duration: 3790 ms
result sorted: true
seed: 5648
item count: 16000000
using radix sort
duration: 3820 ms
result sorted: true
seed: 5648
item count: 16000000
using radix sort
duration: 3780 ms
result sorted: true
New or changed code:
template<class It>
struct fun_obj
{
It beg;
bool operator()(ptrdiff_t lhs, ptrdiff_t rhs)
{
return beg[lhs] < beg[rhs];
}
};
template<class It>
fun_obj<It> make_fun_obj(It beg)
{
return fun_obj<It>{beg};
}
struct uint32p8_t
{
uint32_t m32;
uint8_t m8;
uint32p8_t(std::array<uint16_t, 3> const& a)
: m32( a[0]<<(32-3*4) | a[1]<<(32-2*3*4) | (a[2]&0xF00)>>8)
, m8( a[2]&0xFF )
{
}
operator std::array<size_t, 3>() const
{
return {{m32&0xFFF00000 >> (32-3*4), m32&0x000FFF0 >> (32-2*3*4),
(m32&0xF)<<8 | m8}};
}
friend bool operator<(uint32p8_t const& lhs, uint32p8_t const& rhs)
{
if(lhs.m32 < rhs.m32) return true;
if(lhs.m32 > rhs.m32) return false;
return lhs.m8 < rhs.m8;
}
};
#include <stdio.h>
#include <time.h>
#include <array>
#include <iostream>
#include <iomanip>
#include <utility>
#include <algorithm>
#include <cstdlib>
#include <iomanip>
#include <random>
int main(int argc, char* argv[])
{
std::cout.sync_with_stdio(false);
constexpr auto items_count_default = 2<<22;
constexpr auto seed_default = 42;
uint32_t const seed = argc > 1 ? std::atoll(argv[1]) : seed_default;
std::cout << "seed: " << seed << "\n";
size_t const items_count = argc > 2 ? std::atoll(argv[2])
: items_count_default;
std::cout << "item count: " << items_count << "\n";
using Items_array_value_t =
#ifdef RADIX_SORT
size_t
#elif defined(STDSORT)
uint16_t
#elif defined(STDSORT_PACKED)
uint16_t
#endif
;
typedef std::vector<std::array<Items_array_value_t, 3> > Items;
Items items(items_count);
auto const ranks_count =
#ifdef RADIX_SORT
items.size() * 2
#elif defined(STDSORT)
items.size()
#elif defined(STDSORT_PACKED)
items.size()
#endif
;
//auto prng = xorshf96;
std::mt19937 gen(seed);
std::uniform_int_distribution<> dist;
auto prng = [&dist, &gen]{return dist(gen);};
std::vector<size_t> ranks(ranks_count);
for (size_t i = 0; i != items.size(); i++)
{
ranks[i] = i;
for (size_t j = 0; j != items[i].size(); j++)
{ items[i][j] = prng() & 0xFFF; }
}
std::cout << "using ";
clock_t const start = clock();
#ifdef RADIX_SORT
std::cout << "radix sort\n";
radix_isort(items.begin(), items.end(), ranks.begin());
#elif defined(STDSORT)
std::cout << "std::sort\n";
std::sort(ranks.begin(), ranks.begin() + items.size(),
make_fun_obj(items.cbegin())
//make_key_comp(make_deref(std::bind1st(
// add<Items::const_iterator, ptrdiff_t>(),
// items.begin())))
);
#elif defined(STDSORT_PACKED)
std::cout << "std::sort with a packed data structure\n";
using Items_ranks = std::vector< std::pair<uint32p8_t,
decltype(ranks)::value_type> >;
Items_ranks items_ranks;
size_t i = 0;
for(auto iI = items.cbegin(); iI != items.cend(); ++iI, ++i)
{
items_ranks.emplace_back(*iI, i);
}
std::sort(begin(items_ranks), end(items_ranks),
[](Items_ranks::value_type const& lhs,
Items_ranks::value_type const& rhs)
{ return lhs.first < rhs.first; }
);
std::transform(items_ranks.cbegin(), items_ranks.cend(), begin(ranks),
[](Items_ranks::value_type const& e) { return e.second; }
);
#endif
auto const duration = (clock() - start) / (CLOCKS_PER_SEC / 1000);
bool const sorted = std::is_sorted(ranks.begin(), ranks.begin() + items.size(),
make_fun_obj(items.cbegin()));
std::cout << "duration: " << duration << " ms\n"
<< "result sorted: " << std::boolalpha << sorted << "\n";
return 0;
}
Full code:
#include <algorithm>
#include <iterator>
#include <vector>
#include <cstddef>
using std::size_t;
using std::ptrdiff_t;
#include <xmmintrin.h>
template<class It1, class It2>
void radix_ipass(
It1 begin, It1 const end,
It2 const a, size_t const i,
std::vector<std::vector<size_t> > &buckets)
{
size_t ncleared = 0;
for (It1 j = begin; j != end; ++j)
{
#if defined(_MM_TRANSPOSE4_PS)
constexpr auto N = 8;
if(end - j > N)
{ _mm_prefetch((char const *)(&a[j[N]][i]), _MM_HINT_T0); }
#else
#error SS intrinsic not found
#endif
size_t const k = a[*j][i];
while (k >= ncleared && ncleared < buckets.size())
{ buckets[ncleared++].clear(); }
if (k >= buckets.size())
{
buckets.resize(k + 1);
ncleared = buckets.size();
}
buckets[k].push_back(size_t());
using std::swap; swap(buckets[k].back(), *j);
}
for (std::vector<std::vector<size_t> >::iterator
j = buckets.begin(); j != buckets.begin() + ncleared; j->clear(), ++j)
{
begin = std::swap_ranges(j->begin(), j->end(), begin);
}
}
template<class It, class It2>
void radix_isort(It const begin, It const end, It2 const items)
{
for (ptrdiff_t i = 0; i != end - begin; ++i) { items[i] = i; }
size_t smax = 0;
for (It i = begin; i != end; ++i)
{
size_t const n = i->size();
smax = n > smax ? n : smax;
}
std::vector<std::vector<size_t> > buckets;
for (size_t i = 0; i != smax; ++i)
{
radix_ipass(
items, items + (end - begin),
begin, smax - i - 1, buckets);
}
}
#include <functional>
template<class Key>
struct key_comp : public Key
{
explicit key_comp(Key const &key = Key()) : Key(key) { }
template<class T>
bool operator()(T const &a, T const &b) const
{ return this->Key::operator()(a) < this->Key::operator()(b); }
};
template<class Key>
key_comp<Key> make_key_comp(Key const &key) { return key_comp<Key>(key); }
template<class T1, class T2>
struct add : public std::binary_function<T1, T2, T1>
{ T1 operator()(T1 a, T2 const &b) const { return a += b; } };
template<class F>
struct deref : public F
{
deref(F const &f) : F(f) { }
typename std::iterator_traits<
typename F::result_type
>::value_type const
&operator()(typename F::argument_type const &a) const
{ return *this->F::operator()(a); }
};
template<class T> deref<T> make_deref(T const &t) { return deref<T>(t); }
size_t xorshf96(void) // random number generator
{
static size_t x = 123456789, y = 362436069, z = 521288629;
x ^= x << 16;
x ^= x >> 5;
x ^= x << 1;
size_t t = x;
x = y;
y = z;
z = t ^ x ^ y;
return z;
}
template<class It>
struct fun_obj
{
It beg;
bool operator()(ptrdiff_t lhs, ptrdiff_t rhs)
{
return beg[lhs] < beg[rhs];
}
};
template<class It>
fun_obj<It> make_fun_obj(It beg)
{
return fun_obj<It>{beg};
}
struct uint32p8_t
{
uint32_t m32;
uint8_t m8;
uint32p8_t(std::array<uint16_t, 3> const& a)
: m32( a[0]<<(32-3*4) | a[1]<<(32-2*3*4) | (a[2]&0xF00)>>8)
, m8( a[2]&0xFF )
{
}
operator std::array<size_t, 3>() const
{
return {{m32&0xFFF00000 >> (32-3*4), m32&0x000FFF0 >> (32-2*3*4),
(m32&0xF)<<8 | m8}};
}
friend bool operator<(uint32p8_t const& lhs, uint32p8_t const& rhs)
{
if(lhs.m32 < rhs.m32) return true;
if(lhs.m32 > rhs.m32) return false;
return lhs.m8 < rhs.m8;
}
};
#include <stdio.h>
#include <time.h>
#include <array>
#include <iostream>
#include <iomanip>
#include <utility>
#include <algorithm>
#include <cstdlib>
#include <iomanip>
#include <random>
int main(int argc, char* argv[])
{
std::cout.sync_with_stdio(false);
constexpr auto items_count_default = 2<<22;
constexpr auto seed_default = 42;
uint32_t const seed = argc > 1 ? std::atoll(argv[1]) : seed_default;
std::cout << "seed: " << seed << "\n";
size_t const items_count = argc > 2 ? std::atoll(argv[2]) : items_count_default;
std::cout << "item count: " << items_count << "\n";
using Items_array_value_t =
#ifdef RADIX_SORT
size_t
#elif defined(STDSORT)
uint16_t
#elif defined(STDSORT_PACKED)
uint16_t
#endif
;
typedef std::vector<std::array<Items_array_value_t, 3> > Items;
Items items(items_count);
auto const ranks_count =
#ifdef RADIX_SORT
items.size() * 2
#elif defined(STDSORT)
items.size()
#elif defined(STDSORT_PACKED)
items.size()
#endif
;
//auto prng = xorshf96;
std::mt19937 gen(seed);
std::uniform_int_distribution<> dist;
auto prng = [&dist, &gen]{return dist(gen);};
std::vector<size_t> ranks(ranks_count);
for (size_t i = 0; i != items.size(); i++)
{
ranks[i] = i;
for (size_t j = 0; j != items[i].size(); j++)
{ items[i][j] = prng() & 0xFFF; }
}
std::cout << "using ";
clock_t const start = clock();
#ifdef RADIX_SORT
std::cout << "radix sort\n";
radix_isort(items.begin(), items.end(), ranks.begin());
#elif defined(STDSORT)
std::cout << "std::sort\n";
std::sort(ranks.begin(), ranks.begin() + items.size(),
make_fun_obj(items.cbegin())
//make_key_comp(make_deref(std::bind1st(
// add<Items::const_iterator, ptrdiff_t>(),
// items.begin())))
);
#elif defined(STDSORT_PACKED)
std::cout << "std::sort with a packed data structure\n";
using Items_ranks = std::vector< std::pair<uint32p8_t,
decltype(ranks)::value_type> >;
Items_ranks items_ranks;
size_t i = 0;
for(auto iI = items.cbegin(); iI != items.cend(); ++iI, ++i)
{
items_ranks.emplace_back(*iI, i);
}
std::sort(begin(items_ranks), end(items_ranks),
[](Items_ranks::value_type const& lhs,
Items_ranks::value_type const& rhs)
{ return lhs.first < rhs.first; }
);
std::transform(items_ranks.cbegin(), items_ranks.cend(), begin(ranks),
[](Items_ranks::value_type const& e) { return e.second; }
);
#endif
auto const duration = (clock() - start) / (CLOCKS_PER_SEC / 1000);
bool const sorted = std::is_sorted(ranks.begin(), ranks.begin() + items.size(),
make_fun_obj(items.cbegin()));
std::cout << "duration: " << duration << " ms\n"
<< "result sorted: " << std::boolalpha << sorted << "\n";
return 0;
}