Search a sequence in a string. DNA - c++

I need to do a program that separate from 3 to the size of a string and compare to the others sequences of 3 in the same string given. I'm going to explain it.
User introduce this DNA string = "ACTGCGACGGTACGCTTCGACGTAG" For example.
We start with n = 3, this is, we take the first three caracters for comparing in the DNA.
The first characters are: "ACT", and we need to compare it with the other sequences of three, like, [CTG,TGC,GCA... until the end].
If we find another sequence equal to "ACT", we save the position.
Here is another example:
DNA: "ACTGCGACGGTACGCTTCGACGTAG" and we find this sequences in his positions:
ACG: 7 - 12 - 20
CGA: 5 - 18
GAC: 6 - 19
GTA: 10 - 22
CGAC: 5 - 18
GACG: 6 - 19
CGACG: 5 - 18
The number is the position of the start of the sequence:
ACTGCGACGGTACGCTTCGACGTAG
You can see that the n = 3, increment in 1 when the we end to find by n = 3, the variable pass to n=4, until n = DNA.size().
My problem is that i have one function for divide the string in a little sequences of the DNA, and I do a push_back() for saving in the vector, and then I can see if there is more sequences or not, but i don't know how can i get the position.
I can use the library algorithm, and for sure, in this library there is a function that do this but i don't know so much this library.
Here is my code:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;
const string DNA = "ACTGCGACGGTACGCTTCGACGTAG";
const size_t taille = DNA.size();
size_t m = 3;
vector<string> v;
/*
struct DNA{
const string dna; // chaine saisie pour l'utilisateur
size_t taille; // Taille de la chaine
string chaine; // Chaine à chercher
};
*/
// what kind of structs can i create? for me it's stupid to make any struct in this program.
bool checkDNA(string &s);
string takeStrings(const string &s,size_t i, size_t m);
void FindSequenceDNA(vector<string>&s,string sq);
size_t incrementValue(size_t &m);
int main(){
string DNAuser;
cout << "Introduce the DNA: ";
cin >> DNAuser;
bool request;
cout << boolalpha;
request = DNAuser.find_first_not_of("AGCT");
cout << request << endl;
vector<string> vectorSq;
size_t auxiliar = 0;
string r;
size_t ocurrencies = DNA.size()-2;
cout << "DNA: " << DNA << endl;
while(auxiliar<ocurrencies){ // This gonna be works with the ocurriences, from 1 to end.
r = takeStrings(DNA,auxiliar,auxiliar+m);
auxiliar++;
if(r.size()==m){
vectorSq.push_back(r);
}
}
// string res = takeStrings(DNA,0,3);
// cout << "res: " << res << endl;
// cout << "Printing vector: " << endl;
// I just need to find the other, the practice is almost done.
for(size_t i = 0; i< vectorSq.size(); i++){
cout << vectorSq[i] << endl;
}
return 0;
}
string takeStrings(const string &s,size_t i, size_t m){
string result;
size_t aux=i;
if(s.size()==0){
cout << "String is empty." << endl;
}
else{
for(;i<s.size()&&i!=m;i++){
result+=s[i];
aux++;
}
}
return result;
}
void FindSequenceDNA(vector<string>&s,string sq){
if(s.size()==0){
cout << "DNA invalid." << endl;
}
else{
for(size_t i=0;i<s.size();i++){
if(sq==s[i]){
cout << "function: " << endl;
cout << s[i] << endl; // I need to calculate the real position in the string, not in the vector
}
}
}
}
bool checkDNA(string &s){
bool res;
if(s.size()==0 || s.size()<3){
cout << "DNA invalid" << endl;
}
else{
for(size_t i=0;i<s.size();i++){
if(s[i]=='A' || s[i]=='C' || s[i]=='G' || s[i]=='T')
{
res = true;
}
else{
res= false;
}
}
}
return res;
}
size_t incrementValue(size_t &m){
if(m<DNA.size()){
m++;
}
return m;
}

Based on Mohit's answer but re-uses pointers to possibly, get better performance (vs string.substr)
#include <iostream>
#include <cstring>
#include <vector>
#include <string>
using namespace std;
static const char* DNAdata = "ACTGCGACGGTACGCTTCGACGTAG";
static const size_t len = strlen(DNAdata);
vector< vector< string > > uniqueKeys(len);
vector< vector< vector<size_t> > > locations(len);
void saveInfo(const char* str, size_t n, size_t loc) {
vector<string>& keys = uniqueKeys[n-1];
vector<vector<size_t> >& locs = locations[n-1];
bool found = false;
for (size_t i=0; i<keys.size(); ++i) {
if (keys[i] == str) {
locs[i].push_back(loc);
found = true;
break;
}
}
if (!found) {
vector<size_t> newcont;
newcont.push_back(loc);
keys.push_back(str);
locs.push_back(newcont);
}
}
void printInfo(const char* str) {
cout << str << endl;
size_t len = strlen(str);
vector<string>& keys = uniqueKeys[len-1];
vector<vector<size_t> >& locs = locations[len-1];
for (size_t i=0; i<keys.size(); ++i) {
if (keys[i] == str) {
vector<size_t>& l = locs[i];
vector<size_t>::iterator iter = l.begin();
for (; iter != l.end(); ++iter) {
cout << *iter << endl;
}
break;
}
}
}
int main() {
char* DNA = new char[len+1];
strcpy(DNA, DNAdata);
char* end = DNA+len;
char* start = DNA;
for (size_t n =3; n<=len; ++n) {
size_t loc = 0;
char* p = start;
char* e = p+n;
while (e <= end) {
char save = *e;
*e = 0;
saveInfo(p++, n, loc++);
*e = save;
++e;
}
}
delete[] DNA;
printInfo("GTA");
printInfo("ACTGCGACGGTACGCTTCGACGTA");
return 0;
}
To print all:
void printAll() {
for (size_t n=3; n<=len; ++n) {
cout << "--> " << n << " <--" << endl;
vector<string>& keys = uniqueKeys[n-1];
vector<vector<size_t> >& locs = locations[n-1];
for (size_t i=0; i<keys.size(); ++i) {
cout << keys[i] << endl;
vector<size_t>& l = locs[i];
vector<size_t>::iterator iter = l.begin();
for (; iter != l.end(); ++iter) {
cout << *iter << endl;
}
}
}
}

How about:
std::map< std::string, std::vectpr<int> > msvi;
std::size_t len = dna.size();
for(size_t from = 0; from < len; ++from) {
for(size_t sz = 3; sz < len; ++sz) {
msvi[ dna.substr(from, sz ].push_back(from);
}
}
This creates all strings of size 3 and saves there position in a map.
Live demo link
Print only the items with 2 or more instances
As you don't want to use std::map, you can construct a trie as shown on this page written in C. Change your tree node to:
struct tree_node {
vector<int> starts;
struct tree_node *children[26]; /* A to Z */
};

Related

Why is it that my code is only showing the last element in the array even though It should be showing the element with the most amount of characters

#include <iostream>
#include <vector>
#include <ctime>
using namespace std;
vector<string> createvector() {
vector<string> words;
string names;
cout << "Please enter 5 different words: " << endl;
for (int i = 0; i < 5; i++) {
cin >> names;
words.push_back(names);
}
return (words);
}
void mostchar(vector<string> words) {
string w1 = words[0];
string largestword;
for (int i = 1; i < 5; i++) {
if (words[i] > w1) {
largestword = words[i];
}
}
cout << "The largest word is: " << largestword;
}
int main()
{
vector<string> words;
string names;
words = createvector();
mostchar(words);
}
I do not understand why it's picking the last element or the second to last element every time. Right I've tried to change for(int i = 1; i < 5; i++) but it makes no difference to what I do.
For starters you are comparing strings in the lexicographical order.
if (words[i] > w1) {
Secondly you always comparing with the word in the first element of the array
if (words[i] > w1) {
and the variable w1 is not being changed within the loop. So any last element in the vector that is greater than w1 will be assigned to the variable largestword.
Using the for loop the function can look the following way
void mostchar( const std::vector<std::string> &words )
{
size_t largestword = 0;
for ( size_t i = 1; i < words.size(); i++ )
{
if ( words[largestword].size() < words[i].size() )
{
largestword = i;
}
}
if ( largestword != words.size() )
{
std::cout << "The largest word is: " << words[largestword] << '\n';
}
}
Pay attention to that in general case the user can pass to the function an empty vector. You must check such a possibility within the function.
Bear in mind that there is standard algorithm std::max_element that can be used instead of manually written for loop.
For example
#include <iostream>
#include <vector>
#include <iterator>
#include <algorithm>
void mostchar( const std::vector<std::string> &words )
{
auto largestword = std::max_element( std::begin( words ), std::end( words ),
[]( const auto &a, const auto &b )
{
return a.size() < b.size();
} );
if ( largestword != std::end( words ) )
{
std::cout << "The largest word is: " << *largestword << '\n';
}
}
There are a couple issues here:
1: You should use something like .length() to compare "length"
2: You are comparing the next word in the array to words[0] every time.
EDIT: To further explain this, there is an assignment of string w1 = words[0];. w1 is then used in the if in the for loop here:
string w1 = words[0];
string largestword;
for (int i = 1; i < 5; i++) {
if (words[i] > w1) {
largestword = words[i];
}
}
resulting in the value of words[0] being the value repeatedly compared in the loop.
Adjust the comparison line to if (words[i].length() > largestword.length()) and that solves both problems. You can elminate w1 entirely this way as well.
#include <iostream>
#include <vector>
#include <ctime>
using namespace std;
vector<string> createvector() {
vector<string> words;
string names;
cout << "Please enter 5 different words: " << endl;
for (int i = 0; i < 5; i++) {
cin >> names;
words.push_back(names);
}
return (words);
}
void mostchar(vector<string> words) {
string largestword;
for (int i = 0; i < 5; i++) {
if (words[i].length() > largestword.length()) {
largestword = words[i];
}
}
cout << "The largest word is: " << largestword;
}
int main()
{
vector<string> words;
string names;
words = createvector();
mostchar(words);
}

Parallel vectors in C++

I need some help with the use of parallel vectors. What I want to do is have 2 vectors, 1 containing the alphabet, and the other containing the alphabet the other way around. When someone types in a word, it prints out the word using the inverted alphabet.
This is what I've done up until now and I'm not too sure if I'm on the right track or not:
#include <iostream>
#include <ctype.h>
using namespace std;
void search(char alfab[], char cripto[], int code){
cout << "Introduce your message: " << endl;
cin >> code;
for(int i = 0; i < code; i++)
{
if(code == 0){
cout << "Your code is:" << cripto[i] << endl;
}
}
}
int main(){
char alfab[26] = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'};
char cripto[26] = {'z','y','x','w','v','u','t','s','r','q','p','o','n','m','l','k','j','i','h','g','f','e','d','c','b','a'};
char code;
}
Think about how you would do this by hand. Then try to translate those steps to code.
Get user input
for each letter:
decide which letter of your reversed alphabet it is
write that new letter down in the same position as the original
output new string
Try something more like this instead:
#include <iostream>
#include <string>
static const char alfab[26] = {'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'};
static const char cripto[26] = {'z','y','x','w','v','u','t','s','r','q','p','o','n','m','l','k','j','i','h','g','f','e','d','c','b','a'};
std::string invert(const std::string &word){
std::string inverted = word;
for(std::string::size_type i = 0; i < inverted.size(); ++i)
{
char ch = inverted[i];
for(int j = 0; j < 26; ++j)
{
if (alfab[j] == ch)
{
inverted[i] = cripto[j];
break;
}
}
}
return inverted;
}
int main(){
std::string word;
std::cout << "Enter a word: " << std::endl;
std::cin >> word;
std::cout << "Your code is: " << invert(word) << std::endl;
}
You could try using one array:
std::string invert(const std::string& original)
{
static const char cripto[26] =
{
'z','y','x','w',
'v','u','t','s','r',
'q','p','o','n','m',
'l','k','j','i','h',
'g','f','e','d','c',
'b','a'
};
const size_t length = original.length();
std::string inverted_text;
for (unsigned int i = 0; i < length)
{
char c = original[i];
inverted_text += cripto[c - 'a'];
}
return inverted_text;
}
Edit 1: Using some math
You could simplify the encryption (inversion) by using some math.
std::string invert(const std::string& original)
{
const size_t length = original.length();
std::string inverted_text;
for (unsigned int i = 0; i < length)
{
char c = original[i];
inverted_text += (25 - (c - 'a')) + 'a';
}
return inverted_text;
}
Using transform
You could use std::transform:
char invert_char(char c)
{
return (25 - (c - 'a')) + 'a':
}
//...
std::transform(original_word.begin(), original_word.end(),
original_word.begin(), invert_char);

which method is more suitable for huffman encoding i want to read chars with their frequency

two loops reading chars from string
void ReadCharWithFreq(string str){
int n = str.size();
int count = 0;
// loops to read all char from string and frequency
for(int i = 0;i<n;i++){
for(int x =0;x<n;x++ ){
if(str[i]==str[x]){
count++;
}
}
//enqueue char with frequency
enqueue(str[i],count);
count=0;
}
} //end of function
same function with different method
using heap array freq[] and memeset
and i dont understand function of memeset(array,int,int)
void ReadCharWithFreq(string str){
int n = str.size();
int SIZE = 40;
int spf=0;
memset(freq, 0, sizeof(freq));
for (int i = 0; i < n; i++){
freq[str[i] - 'a']++;
}
for (int i = 0; i < n; i++) {
if (freq[str[i] - 'a'] != 0) {
cout << str[i] <<" "<< freq[str[i] - 'a'] << " - >";
enqueue(str[i], freq[str[i] - 'a']);
freq[str[i] - 'a'] = 0;
}
}
} //end of function
which one of the above algorithms is more accurate and efficient
i want to read all chars from a string and count their occurrence/frequency
I would use a std::array with space enough to hold the count of all the characters you may encounter:
#include <array>
#include <limits>
constexpr size_t ArrSize = std::numeric_limits<unsigned char>::max()+1;
std::array<unsigned char, ArrSize> ReadCharWithFreq(const std::string& str){
std::array<unsigned char, ArrSize> freq{};
for(unsigned char ch : str)
freq[ch]++;
return freq;
}
Example usage:
#include <iostream>
#include <iomanip>
#include <vector>
int main(int argc, char* argv[]) {
std::vector<std::string> args(argv+1, argv+argc);
for(const auto& str : args) {
auto result = ReadCharWithFreq(str);
for(size_t i=0; i<ArrSize; ++i) {
if(result[i]) {
std::cout << std::setw(3) << i << " " << static_cast<char>(i) << " " << static_cast<int>(result[i]) << "\n";
// enqueue here?
}
}
}
}

how to use char ** properly?

I have two examples, which of these two is better and why? In both cases, I got the same result. I have chosen container simply to hold strings.
Example 1:
char *c_ptr[] = {};
int num;
if (fill_array(c_ptr, &num) != 0) {
cout << "Error" << endl;
}
for (int i = 0; i < num; i++) {
cout << "Str[" << i << "] = " << c_ptr[i] << endl;
}
// free pointer..
// Function implementation
int fill_array(char *c_ptr[], int *count) {
vector<string> v = {"haha", "hehe", "omg", "happy, learning!"};
*count = v.size();
int i = 0;
for (vector<string>::iterator it = v.begin(); it != v.end(); it++, i++) {
c_ptr[i] = (char*)malloc((*it).size() + 1);
strncpy(c_ptr[i], (*it).c_str(),(*it).size() + 1);
}
return 0;
}
Example 2:
char **c_ptr = NULL;
int num;
if (fill_array(&c_ptr, &num) != 0) {
cout << "Error" << endl;
}
for (int i = 0; i < num; i++) {
cout << "Str[" << i << "] = " << c_ptr[i] << endl;
}
// free double pointer..
// Function implementation
int fill_array(char ***c_ptr, int *num) {
vector<string> v = {"haha", "hehe", "omg", "happy, learning!"};
*num = v.size();
int i = 0;
*c_ptr = (char **)malloc(*num * sizeof(char *));
for (vector<string>::iterator it = v.begin(); it != v.end(); it++, i++) {
c_ptr[i] = (char*)malloc((*it).size() + 1);
strncpy(*c_ptr[i], (*it).c_str(),(*it).size() + 1);
}
return 0;
}
Result:
Str[0] = haha
Str[1] = hehe
Str[2] = omg
Str[3] = happy, learning!
Also What is the use of empty bracket in array? Is it good programming habit vs dynamic allocation?
** is pointer to a pointer or simply we can say it a double pointer.
Double pointers are better to use when we are passing a pointer variable from main() or simply a function to another function.
By looking at your code, I would like to suggest you one thing and that is, avoid using global variables.

C++ heap sort of vector<string,int>

I can not figure out where I'm having my problem with my heap sort.
The program takes a filename from the command line, imports the words into a vector then that vector is turned into a vector pair of vector<string,int> where string is the word and int is the count of how many instances of that word are in the file.
The vector<PAIR> is then sorted by either the string (value or v) or by int (key or k). My sorting by Key works fine however sort by value is off. I suspect I'm missing an if statement in max_heapify when sorting by value. Here's my code:
main.cpp
#include <fstream>
#include <iostream>
#include <stdlib.h>
#include <vector>
#include <string>
#include <string.h>
#include <stdio.h>
#include <map>
#include <time.h>
#include "readwords.h"
using namespace std;
readwords wordsinfile;
vector<string> allwords;
bool times;
char *filename;
timespec timestart,timeend;
vector< pair<string,int> > allwords_vp;
timespec diffclock(timespec start, timespec end);
int main ( int argc, char *argv[] ) {
filename = argv[1];
//Lets open the file
ifstream ourfile2(filename);
//Lets get all the words using our requirements
allwords = wordsinfile.getwords(ourfile2);
//Convert all the words from file and count how many times they
//appear. We will store them in a vector<string,int> string
//being the word and int being how many time the word appeared
allwords_vp = wordsinfile.count_vector(allwords);
cout << "HeapSort by Values" << endl;
if (times) {
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &timestart);
wordsinfile.heapsort(const_cast<char *>("v"));
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &timeend);
cout << "HeapSort by Values ran in "
<< diffclock(timestart,timeend).tv_nsec << " nanosecond or "
<< diffclock(timestart,timeend).tv_nsec/1000 << " millisecond"
<< endl;
} else {
wordsinfile.heapsort(const_cast<char *>("v"));
}
cout << "HeapSort by Keys" << endl;
if (times) {
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &timestart);
wordsinfile.heapsort(const_cast<char *>("k"));
clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &timeend);
cout << "HeapSort by Keys ran in "
<< diffclock(timestart,timeend).tv_nsec << " nanosecond or "
<< diffclock(timestart,timeend).tv_nsec/1000 << " millisecond"
<< endl;
} else {
wordsinfile.heapsort(const_cast<char *>("k"));
}
}
timespec diffclock(timespec start, timespec end) {
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}
readwords.h
#ifndef READWORDS_H
#define READWORDS_H
#include <vector>
#include <map>
#include <utility>
#include <time.h>
typedef std::pair<std::string, int> PAIR;
bool isasciifile(std::istream& file);
class readwords {
private:
std::vector<PAIR> vp;
public:
std::vector<std::string> getwords(std::istream& file);
std::vector<PAIR> count_vector(std::vector<std::string> sv);
void print_vectorpair(std::vector<PAIR> vp);
void print_vector(std::vector<std::string> sv);
void heapsort(char how[]);
void buildmaxheap(std::vector<PAIR> &vp, int heapsize, char how[]);
void max_heapify(std::vector<PAIR> &vp, int i, int heapsize, char how[]);
void swap_pair(PAIR &p1, PAIR &p2);
};
readwords.cpp
#include <fstream>
#include <iostream>
#include <map>
#include "readwords.h"
#include <vector>
#include <string>
#include <utility>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
//using std::vector;
using namespace std;
typedef pair<string, int> PAIR;
// Do we have a ASCII file?
// Lets test the second 10 chars to make sure
// This method is flawed if the file is less than 10 chars
bool isasciifile(std::istream& file) {
int c = 0;
bool foundbin = false;
for(c=0; c < 10;c++) {
if(!isprint(file.get())){
// Looks like we found a non ASCII file, or its empty.
foundbin = true;
}
}
return foundbin;
}
// This is our workhorse as it splits up the words based on our criteria and
// passes them back as a vector of strings.
vector<string> readwords::getwords(std::istream& file) {
char c;
string aword;
vector<string> sv;
//Let go through the file till the end
while(file.good()) {
c = file.get();
if (isalnum(c)) {
//convert any uppercase to lowercase
if(isupper(c)) {
c = (tolower(c));
}
//if its a space lets go onto the next char
if(isspace(c)) { continue; }
//everything looks good lets add the char to our word
aword.insert(aword.end(),c);
} else {
//its not a alphnum or a space so lets skip it
if(!isspace(c)) { continue; }
//reset our string and increment
if (aword != "") {sv.push_back(aword);}
aword = "";
continue;
}
}
return sv;
}
vector<PAIR> readwords::count_vector(vector<string> sv) {
unsigned int i = 0;
int j = 0;
int match = 0;
// cout << "Working with these string: " << endl;
// print_vector(sv);
for (i=0; i < sv.size(); i++) {
// cout << "count of i: " << i << " word is: " << sv.at(i) << endl;
match = 0;
if(readwords::vp.size() == 0) {
readwords::vp.push_back(make_pair(sv.at(i),1)); continue;
}
for (j=readwords::vp.size() - 1; j >= 0; --j) {
if (sv.at(i) == readwords::vp.at(j).first) {
// cout << "Match found with: " << sv.at(i) << endl;;
readwords::vp.at(j).second = readwords::vp.at(j).second + 1;
match = 1;
}
// cout << "Value of j and match: " << j << match << endl;
if ( j == 0 && match == 0) {
// cout << "Match found at end with: " << sv.at(i) << endl;;
readwords::vp.push_back(make_pair(sv.at(i),1));
}
}
}
//Prob need to sort by first data type then second here, prior to sort functions.
//Might not be the best place as the sort functions would alter it, if not here
//then each sort requires to do secondary search
return readwords::vp;
}
void readwords::print_vectorpair(vector<PAIR> vp) {
unsigned int i = 0;
for (i=0; i < vp.size(); ++i) {
cout << vp.at(i).first << " " << vp.at(i).second << endl;
}
}
void readwords::print_vector(vector<string> sv) {
unsigned int i = 0;
for (i=0; i < sv.size(); ++i) {
cout << sv.at(i) << endl;
}
}
void readwords::heapsort(char how[]) {
int heapsize = (readwords::vp.size() - 1);
buildmaxheap(readwords::vp, heapsize, how);
for(int i=(readwords::vp.size() - 1); i >= 0; i--) {
swap(readwords::vp[0],readwords::vp[i]);
heapsize--;
max_heapify(readwords::vp, 0, heapsize, how);
}
print_vectorpair(readwords::vp);
}
void readwords::buildmaxheap(vector<PAIR> &vp, int heapsize, char how[]) {
for(int i=(heapsize/2); i >= 0 ; i--) {
max_heapify(vp, i, heapsize, how);
}
}
void readwords::max_heapify(vector<PAIR> &vp, int i, int heapsize, char how[]) {
int left = ( 2 * i ) + 1;
int right = left + 1;
int largest;
if(!strcmp(how,"v")) {
if(left <= heapsize && vp.at(left).second >= vp.at(i).second ) {
if( vp.at(left).first >= vp.at(i).first ) {
largest = left;
} else {
largest = i;
}
} else {
largest = i;
}
if(right <= heapsize && vp.at(right).second >= vp.at(largest).second) {
if( vp.at(right).first >= vp.at(largest).first) {
largest = right;
}
}
}
if(!strcmp(how,"k")) {
if(left <= heapsize && vp.at(left).first > vp.at(i).first) {
largest = left;
} else {
largest = i;
}
if(right <= heapsize && vp.at(right).first > vp.at(largest).first) {
largest = right;
}
}
if(largest != i) {
swap(vp[i], vp[largest]);
max_heapify(vp, largest, heapsize, how);
}
}
The vector is then sorted by either the string (value or v) or by int (key or k).
That description doesn't match the code, sorting with a how parameter of "k" sorts by the first component only, which is the string, and sorting with "v" as how parameter takes both components into account.
I think it's a rather bad idea to pass a char[] to determine the sorting criterion, it should be a comparator function, so you need only one implementation in max_heapify.
My sorting by Key works fine however sort by value is off. I suspect I'm missing an if statement in max_heapify when sorting by value.
The problem is that a heap sort needs a total ordering or it won't sort properly.
Your conditions
if(left <= heapsize && vp.at(left).second >= vp.at(i).second ) {
if( vp.at(left).first >= vp.at(i).first ) {
largest = left;
} else {
largest = i;
}
} else {
largest = i;
}
check whether both components of vp.at(left) (resp. right) are at least as large as the corresponding component of vp.at(i), resulting in the product partial ordering, two general pairs are not comparable, and in that case, your max_heapify doesn't do anything.
Example, for <"a",3>, <"b",2> and <"c",1> in the positions i, left, right, in whichever order, your max_heapify sets largest to i.
If your sorting by "v" is meant to sort based on the int component first, and in case of a tie, take the string component into account, you'd need to distinguish the cases vp.at(left).second > vp.at(i).second and equality (for right too, of course). For example
if(left <= heapsize && vp.at(left).second >= vp.at(i).second ) {
if(vp.at(left).second > vp.at(i).second || vp.at(left).first >= vp.at(i).first ) {
largest = left;
} else {
largest = i;
}
} else {
largest = i;
}
To sort a vector<pair<string, int> > by values, consider adding vector<pair<int, string> >
vector<pair<int, string> > v(orignal.size());
for (int i = 0; i < v.size(); ++i) v[i] = make_pair(original[i].second, original[i].first);
sort(v.begin(), v.end());