I have used std::vector for making my algorithm. I would like to replace the vectors by linked lists.
In order to do so, I was thinking of using the std::list, but I have no idea how to do this, for example I have tried following example for finding a value within a vector/list:
void find_values_in_vector(const std::vector<int>& input_vector, int value, int &rv1, int &rv2)
{
if (input_vector[0] >= value) { // too small
rv1 = 0; rv2 = 0; return;
}
int index = (int)input_vector.size() - 1;
if (input_vector[index] <= value) { // too big
rv1 = index; rv2 = index; return;
}
// somewhere inside
index = 0;
while (input_vector[index] <= value) {
index++;
}
rv1 = index - 1; rv2 = index; return;
}
void find_values_in_list(const std::list<int>& input_list, int value, int &rv1, int &rv2)
{
if (*input_list.begin() >= value) { // too small
rv1 = 0; rv2 = 0; return;
}
if (*input_list.end() <= value) { // too big
rv1 = (int)input_list.size() - 1; rv2 = (int)input_list.size() - 1; return;
}
// somewhere inside
int index = 0; int temp = *input_list.begin();
while (temp <= value) {
temp = *input_list.next(); index++;
}
rv1 = index - 1; rv2 = index; return;
}
This seems not to work, as the member function next() is not existing. However I remember that browsing through a linked list is done by going to the beginning, and moving further to the next element until the a certain point is reached. I have seen that there is a way to get this done by using an interator in a for-loop, but I wonder what's wrong with my approach? I was under the impression that a std::list was a standard implementation of a double-directional linked list, or am I wrong and in that case, what std class is the implementation of a linked list (it does not need to be a double-directional linked list)?
The standard way to iterate through containers is like this:
for(std::list<int>::iterator it = input_list.begin();
it != input_list.end();
it++)
{
....
}
This also works for vectors,maps,deque,etc. The Iterator concept is consistently implemented throughout the STL so it's best to get used to this concepts.
There are also iterator operations like std::distance and std::advance etc. for the different types of iterators (I suggest you read up on them and their advantages/limitations)
If you have C++ 11 available you can also use this syntax (may not be useful for your problem though.)
for(const auto& value : input_list)
{
...
}
This also works throughout the STL container.
This should work for vector, list, deque, and set (assuming the contents are sorted).
template <class T>
void find_values_in_container(const T& container, int value, int &rv1, int &rv2)
{
rv1 = rv2 = 0; // Initialize
if (container.empty() || container.front() >= value)
{
return;
}
for (const auto& v : container)
{
rv2++;
if (v > value)
{
break;
}
rv1++;
}
return;
}
Related
This is a2.hpp, and is the program that can be edited, as far as I know the code is correct, just too slow. I am honestly lost here, I know my for loops are probably whats slowing me down so much, maybe use an iterator?
// <algorithm>, <list>, <vector>
// YOU CAN CHANGE/EDIT ANY CODE IN THIS FILE AS LONG AS SEMANTICS IS UNCHANGED
#include <algorithm>
#include <list>
#include <vector>
class key_value_sequences {
private:
std::list<std::vector<int>> seq;
std::vector<std::vector<int>> keyref;
public:
// YOU SHOULD USE C++ CONTAINERS TO AVOID RAW POINTERS
// IF YOU DECIDE TO USE POINTERS, MAKE SURE THAT YOU MANAGE MEMORY PROPERLY
// IMPLEMENT ME: SHOULD RETURN SIZE OF A SEQUENCE FOR GIVEN KEY
// IF NO SEQUENCE EXISTS FOR A GIVEN KEY RETURN 0
int size(int key) const;
// IMPLEMENT ME: SHOULD RETURN POINTER TO A SEQUENCE FOR GIVEN KEY
// IF NO SEQUENCE EXISTS FOR A GIVEN KEY RETURN nullptr
const int* data(int key) const;
// IMPLEMENT ME: INSERT VALUE INTO A SEQUENCE IDENTIFIED BY GIVEN KEY
void insert(int key, int value);
}; // class key_value_sequences
int key_value_sequences::size(int key) const {
//checks if the key is invalid or the count vector is empty.
if(key<0 || keyref[key].empty()) return 0;
// sub tract 1 because the first element is the key to access the count
return keyref[key].size() -1;
}
const int* key_value_sequences::data(int key) const {
//checks if key index or ref vector is invalid
if(key<0 || keyref.size() < static_cast<unsigned int>(key+1)) {
return nullptr;
}
// ->at(1) accesses the count (skipping the key) with a pointer
return &keyref[key].at(1);
}
void key_value_sequences::insert(int key, int value) {
//checks if key is valid and if the count vector needs to be resized
if(key>=0 && keyref.size() < static_cast<unsigned int>(key+1)) {
keyref.resize(key+1);
std::vector<int> val;
seq.push_back(val);
seq.back().push_back(key);
seq.back().push_back(value);
keyref[key] = seq.back();
}
//the index is already valid
else if(key >=0) keyref[key].push_back(value);
}
#endif // A2_HPP
This is a2.cpp, this just tests the functionality of a2.hpp, this code cannot be changed
// DO NOT EDIT THIS FILE !!!
// YOUR CODE MUST BE CONTAINED IN a2.hpp ONLY
#include <iostream>
#include "a2.hpp"
int main(int argc, char* argv[]) {
key_value_sequences A;
{
key_value_sequences T;
// k will be our key
for (int k = 0; k < 10; ++k) { //the actual tests will have way more than 10 sequences.
// v is our value
// here we are creating 10 sequences:
// key = 0, sequence = (0)
// key = 1, sequence = (0 1)
// key = 2, sequence = (0 1 2)
// ...
// key = 9, sequence = (0 1 2 3 4 5 6 7 8 9)
for (int v = 0; v < k + 1; ++v) T.insert(k, v);
}
T = T;
key_value_sequences V = T;
A = V;
}
std::vector<int> ref;
if (A.size(-1) != 0) {
std::cout << "fail" << std::endl;
return -1;
}
for (int k = 0; k < 10; ++k) {
if (A.size(k) != k + 1) {
std::cout << "fail";
return -1;
} else {
ref.clear();
for (int v = 0; v < k + 1; ++v) ref.push_back(v);
if (!std::equal(ref.begin(), ref.end(), A.data(k))) {
std::cout << "fail 3 " << A.data(k) << " " << ref[k];
return -1;
}
}
}
std::cout << "pass" << std::endl;
return 0;
} // main
If anyone could help me improve my codes efficiency I would really appreciate it, thanks.
First, I'm not convinced your code is correct. In insert, if they key is valid you create a new vector and insert it into sequence. Sounds wrong, as that should only happen if you have a new key, but if your tests pass it might be fine.
Performance wise:
Avoid std::list. Linked lists have terrible performance on today's hardware because they break pipelineing, caching and pre-fetching. Always use std::vector instead. If the payload is really big and you are worried about copies use std::vector<std::unique_ptr<T>>
Try to avoid copying vectors. In your code you have keyref[key] = seq.back() which copies the vector, but should be fine since it's only one element.
Otherwise there's no obvious performance problems. Try to benchmark and profile your program and see where the slow parts are. Usually there's one or two places that you need to optimize and get great performance. If it's still too slow, ask another question where you post your results so that we can better understand the problem.
I will join Sorin in saying don't use std::list if avoidable.
So you use key as direct index, where does it say it is none-negative? where does it say its less than 100000000?
void key_value_sequences::insert(int key, int value) {
//checks if key is valid and if the count vector needs to be resized
if(key>=0 && keyref.size() < static_cast<unsigned int>(key+1)) {
keyref.resize(key+1); // could be large
std::vector<int> val; // don't need this temporary.
seq.push_back(val); // seq is useless?
seq.back().push_back(key);
seq.back().push_back(value);
keyref[key] = seq.back(); // we now have 100000000-1 empty indexes
}
//the index is already valid
else if(key >=0) keyref[key].push_back(value);
}
Can it be done faster? depending on your key range yes it can. You will need to implement a flat_map or hash_map.
C++11 concept code for a flat_map version.
// effectively a binary search
auto key_value_sequences::find_it(int key) { // type should be iterator
return std::lower_bound(keyref.begin(), keyref.end(), [key](const auto& check){
return check[0] < key; // key is 0-element
});
}
void key_value_sequences::insert(int key, int value) {
auto found = find_it(key);
// at the end or not found
if (found == keyref.end() || found->front() != key) {
found = keyref.emplace(found, key); // add entry
}
found->emplace_back(value); // update entry, whether new or old.
}
const int* key_value_sequences::data(int key) const {
//checks if key index or ref vector is invalid
auto found = find_it(key);
if (found == keyref.end())
return nullptr;
// ->at(1) accesses the count (skipping the key) with a pointer
return found->at(1);
}
(hope I got that right ...)
I need to loop through a vector of linked list and check if a node with the string(s) exists. Obviously this only checks the beginning to see if it exists. I want to know how to go through the entire linked list.
vector <Dlist *> table; //Dlist is the linked list
int DL_Hash::Present(string &s) {
int index = djb_hash(s) % table.size();
for(int i = 0; i < table[index]->Size(); i++) {
if(table[index]->Begin()->s == s) {
return 1;
}
}
return 0;
}
You are not using variable i at all, and you indeed cant do something like table[index][i], as you cant index linked list. You only can get first element in list, and immediate next.
First you get pointer to the beginning (iterator would be more fitting). Then check every item for your condition, until you are at the end of list.
int DL_Hash::Present(string &s) {
int index = djb_hash(s) % table.size();
auto tmp = table[index]->Begin();
while (tmp != nullptr) { // check if you are at end of linked list
if (tmp->s == s)
return 1;
tmp = tmp.Next(); // asuming you have function to get next element in linked list.
}
return 0;
}
If Dlist would have iterators:
int DL_Hash::Present(string &s) {
int index = djb_hash(s) % table.size();
auto tmp = table[index]->begin();
while (tmp != table[index].end()) { // check if you are at end of linked list
if (tmp->s == s)
return 1;
tmp++;
}
return 0;
}
Or even better, using range for
int DL_Hash::Present(string &s) {
int index = djb_hash(s) % table.size();
for (auto &tmp : table[index]) {
if (tmp->s == s)
return 1;
}
return 0;
}
I have a vector of pointers, pointing to approx 10MB of packets. In that, from first 2MB, I wanna delete all those that matches my predicate. The problem here is remove_if iterates through the whole vector, even though its not required in my use case. Is there any other efficient way?
fn_del_first_2MB
{
uint32 deletedSoFar = 0;
uint32 deleteLimit = 2000000;
auto it = std::remove_if (cache_vector.begin(), cache_vector.end(),[deleteLimit,&deletedSoFar](const rc_vector& item){
if(item.ptr_rc->ref_count <= 0) {
if (deletedSoFar < deleteLimit) {
deletedSoFar += item.ptr_rc->u16packet_size;
delete(item.ptr_rc->packet);
delete(item.ptr_rc);
return true;
}
else
return false;
}
else
return false;
});
cache_vector.erase(it, cache_vector.end());
}
In the above code, once the deletedSoFar is greater than deleteLimit, any iteration more than that is unwanted.
Instead of cache_vector.end() put your own iterator marker myIter. With the remove_if option you should follow the erase-remove idiom. Here is an example that affects only the first 4 elements:
#include <iostream>
#include <vector>
#include <algorithm>
int main()
{
std::vector<int> vec = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
size_t index = 4; // index is something you need to calculate
auto myIter = vec.begin() + index; // Your iterator instead of vec.end()
vec.erase(std::remove_if(vec.begin(), myIter, [](int x){return x < 3; }), myIter);
// modified vector:
for (const auto& a : vec)
{
std::cout << a << std::endl;
}
return 0;
}
You may use your own loop:
void fn_del_first_2MB()
{
const uint32 deleteLimit = 2000000;
uint32 deletedSoFar = 0;
auto dest = cache_vector.begin();
auto it = dest
for (; it != cache_vector.end(); ++it) {
const auto& item = *it;
if (item.ptr_rc->ref_count <= 0) {
deletedSoFar += item.ptr_rc->u16packet_size;
delete(item.ptr_rc->packet);
delete(item.ptr_rc);
if (deletedSoFar >= deleteLimit) {
++it;
break;
}
} else if (dest != it) {
*dest = std::move(*it);
++dest;
}
}
cache_vector.erase(dest, it);
}
There is no need for std::remove_if() to pass the .end() iterator as the second argument: as long as the first argument can reach the second argument by incrementing, any iterators can be passed.
There is somewhat of a complication as your condition depends on the accumulated size of the elements encountered so far. As it turns out, it looks as if std::remove_if() won't be used. Something like this should work (although I'm not sure if this use of std::find_if() is actually legal as it keeps changing the predicate):
std::size_t accumulated_size(0u);
auto send(std::find_if(cache_vector.begin(), cache_vector.end(),
[&](rc_vector const& item) {
bool rc(accumulated_size < delete_limit);
accumulated_size += item.ptr_rc->u16packet_size;
return rc;
});
std::for_each(cache_vector.begin(), send, [](rc_vector& item) {
delete(item.ptr_rc->packet);
delete(item.ptr_rc);
});
cache_vector.erase(cache_vector.begin(), send);
The std::for_each() could be folded into the use of std::find_if() as well but I prefer to keep things logically separate. For a sufficiently large sequence there could be a performance difference when the memory needs to be transferred to the cache twice. For the tiny numbers quoted I doubt that the difference can be measured.
I need to implement the following datastructure for my project. I have a relation of
const MyClass*
to
uint64_t
For every pointer I want to save a counter connected to it, which can be changed over time (in fact only incremented). This would be no problem, I could simply store it in a std::map. The problem is that I need fast access to the pointers which have the highest values.
That is why I came to the conclusion to use a boost::bimap. It is defined is follows for my project:
typedef boost::bimaps::bimap<
boost::bimaps::unordered_set_of< const MyClass* >,
boost::bimaps::multiset_of< uint64_t, std::greater<uint64_t> >
> MyBimap;
MyBimap bimap;
This would work fine, but am I right that I can not modify the uint64_t on pair which were inserted once? The documentation says that multiset_of is constant and therefore I cannot change a value of pair in the bimap.
What can I do? What would be the correct way to change the value of one key in this bimap? Or is there a simpler data structure possible for this problem?
Here's a simple hand-made solution.
Internally it keeps a map to store the counts indexed by object pointer, and a further multi-set of iterators, ordered by descending count of their pointees.
Whenever you modify a count, you must re-index. I have done this piecemeal, but you could do it as a batch update, depending on requirements.
Note that in c++17 there is a proposed splice operation for sets and maps, which would make the re-indexing extremely fast.
#include <map>
#include <set>
#include <vector>
struct MyClass { };
struct store
{
std::uint64_t add_value(MyClass* p, std::uint64_t count = 0)
{
add_index(_map.emplace(p, count).first);
return count;
}
std::uint64_t increment(MyClass* p)
{
auto it = _map.find(p);
if (it == std::end(_map)) {
// in this case, we'll create one - we could throw instead
return add_value(p, 1);
}
else {
remove_index(it);
++it->second;
add_index(it);
return it->second;
}
}
std::uint64_t query(MyClass* p) const {
auto it = _map.find(p);
if (it == std::end(_map)) {
// in this case, we'll create one - we could throw instead
return 0;
}
else {
return it->second;
}
}
std::vector<std::pair<MyClass*, std::uint64_t>> top_n(std::size_t n)
{
std::vector<std::pair<MyClass*, std::uint64_t>> result;
result.reserve(n);
for (auto idx = _value_index.begin(), idx_end = _value_index.end() ;
n && idx != idx_end ;
++idx, --n) {
result.emplace_back((*idx)->first, (*idx)->second);
}
return result;
}
private:
using map_type = std::map<MyClass*, std::uint64_t>;
struct by_count
{
bool operator()(map_type::const_iterator l, map_type::const_iterator r) const {
// note: greater than orders by descending count
return l->second > r->second;
}
};
using value_index_type = std::multiset<map_type::iterator, by_count>;
void add_index(map_type::iterator iter)
{
_value_index.emplace(iter->second, iter);
}
void remove_index(map_type::iterator iter)
{
for(auto range = _value_index.equal_range(iter);
range.first != range.second;
++range.first)
{
if (*range.first == iter) {
_value_index.erase(range.first);
return;
}
}
}
map_type _map;
value_index_type _value_index;
};
When I am doing practice on leetcode, I met a problem like this:
I used a stl::list container as cache for LRU algorithm. But the sequence of erasing an item and inserting an item made the result different.
I know that it is actually a double list as stl::list. And the sequence of inserting and erasing should not matter when I use iterator.
The code is here
class LRUCache{
public:
map<int, list<pair<int,int>>::iterator> mKey;
list<pair<int,int>> lCache;
int cap;
LRUCache(int capacity) {
cap = capacity;
}
int get(int key) {
auto iter = mKey.find(key);
if(iter != mKey.end()) {
int value = (iter->second)->second;
//**the sequence of next two lines can not be changed!***
lCache.erase(iter->second);
mKey[key] = lCache.insert(lCache.begin(), make_pair(key,value));
return value;
}
return -1;
}
void set(int key, int value) {
auto iter = mKey.find(key);
if(iter == mKey.end()) {
if(lCache.size() < cap) {
mKey[key] = lCache.insert(lCache.begin(), make_pair(key,value));
}
else{
mKey[key] = lCache.insert(lCache.begin(), make_pair(key,value));
mKey.erase(lCache.back().first);
lCache.pop_back();
}
}
else {
lCache.erase(iter->second);
mKey[key] = lCache.insert(lCache.begin(), make_pair(key,value));
}
}
};
It's not quite clear what you are asking. If your question is why these two lines can't be reordered:
//**the sequence of next two lines can not be changed!***
lCache.erase(iter->second);
mKey[key] = lCache.insert(lCache.begin(), make_pair(key,value));
then that's simple. iter points to the same node as mKey[key], so the assignment actually changes the value of iter->second. If the assignment would happen first, then iter->second would point to the freshly inserted list node, not the previously existing one.