Returning data from if statement - c++

I'm handling data in an if statement, and would like to keep that data as an not-locale variable, so I can call on it outside of the statement. Here's the actual code. Beware; it's pretty much just the outcast of an idea, but I'll welcome any and all ideas or better solutions (provided the suggestions aren't too advanced for my current level)...
The code creates and multidimensional char array (2 dimensions), and fills it. When trying to call on it afterwards, the programs crashes as a result of reaching a unassigned array value.
do
{
destRow = ( rand() % ROWS ) + 1; // assigning random placement for destroyer ship
destCol = ( rand() % COLLUMS ) + 1;
destDir = ( rand() % 2 ) + 1;
if ( destDir == 1 ) { //Destroyer: 1 = right
if ( destCol + 1 > COLLUMS ) {
continue;
} else {
char enemyDestroyer[DESTROYER_SIZE][2] = { { board[destCol][0] , board[0][destRow] }, { board[destCol + 1][0] , board[0][destRow] } };
}
} else if ( destDir == 2 ) { //Destroyer: 2 = down
if ( destRow + 1 > ROWS ) {
continue;
} else {
char enemyDestroyer[DESTROYER_SIZE][2] = { { board[destCol][0] , board[0][destRow] }, { board[destCol][0] , board[0][destRow + 1] } };
}
}
destSucces = true;
} while (!destSucces);
Here it checks the player input (gathered at some other point) with the data, set above, which hasn't actually been set to anything because the assignment happend in an if-statement
if ( ( input[0] == enemyDestroyer[0][0] && input[1] == enemyDestroyer[0][1] ) || (input[0] == enemyDestroyer[1][0] && input[1] == enemyDestroyer[1][1] ) ) {
cout << "Hit!" << endl;
board[posY][posX] = '!';
getchar();
//setting display damage
if ( input[1] == enemyDestroyer[0][1] ) {
enmDesDisp[0] = '!';
} else if ( input[1] == enemyDestroyer[1][1] ) {
enmDesDisp[1] = '!';
}
} else {
cout << endl << "Splash!" << endl;
}

It's not clear which array you are refering to. The only array you're interacting with in the code is enemyDestroyer, which is immediately released from memory after the assignment.
Perhaps you could expand you example so we can get a better understanding of what it is you're trying to do?
Update
To prevent enemyDestroyer from being instantly released youd have to move it to the appropriate scope. That would mean declaring it in the same scope it's going to be used.
I suggest reading up on scopes and perhaps switching to an object oriented design.

Related

bad_alloc when attempting to print string that was assigned to member of $$ struct

During our compiler's intermediate code generation phase, and more specifically while testing the arithmetic expressions and assignment rules, I noticed that although the respective quads are constructed successfully, when printing them out sometimes we'll get a bad_alloc exception. After tracing it, it looks like it's cause by the printQuads() method and specifically the following string access of key:
if(q.result != nullptr && q.result->sym != nullptr) {
cout << "quad " << opcodeStrings[q.op] << " inside if key check for" << opcodeStrings[q.op] << endl;
resultKey = q.result->sym->key;
}
I'll try to include the code that's relevant instead of dumping 500 lines of code here.
So, below you can see our assignmentexpr and basic arithmetic expression rules and actions:
expr: assignexpr
| expr PLUS expr
{
bool isExpr1Arithm = check_arith($1);
bool isExpr2Arithm = check_arith($3);
if(!isExpr1Arithm || !isExpr2Arithm)
{
//string msg = !isExpr1Arithm ? "First operand isn\'t a number in addition!" : "Second operand isn\'t a number in addition!";
yyerror(token_node, "Both addition operands must be numbers!");
} else
{
double result = $1->numConst + $3->numConst;
$$ = newexpr(arithmetic_e);
$$->sym = newtemp(scope);
$$->numConst = result;
emit(add, $1, $3, $$, nextquadlabel(), yylineno);
}
}
| expr MIN expr
{
bool isExpr1Arithm = check_arith($1);
bool isExpr2Arithm = check_arith($3);
if(!isExpr1Arithm || !isExpr2Arithm)
{
//string msg = !isExpr1Arithm ? "First operand isn\'t a number in subtraction!" : "Second operand isn\'t a number in subtracion!";
yyerror(token_node, "Both suctraction operands must be numbers!");
} else
{
double result = $1->numConst - $3->numConst;
$$ = newexpr(arithmetic_e);
$$->sym = newtemp(scope);
$$->numConst = result;
emit(sub, $1, $3, $$, nextquadlabel(), yylineno);
}
}
| expr MUL expr
{
bool isExpr1Arithm = check_arith($1);
bool isExpr2Arithm = check_arith($3);
if(!isExpr1Arithm || !isExpr2Arithm)
{
//string msg = !isExpr1Arithm ? "First operand isn\'t a number in subtraction!" : "Second operand isn\'t a number in subtracion!";
yyerror(token_node, "Both multiplication operands must be numbers!");
} else
{
double result = $1->numConst * $3->numConst;
$$ = newexpr(arithmetic_e);
$$->sym = newtemp(scope);
$$->numConst = result;
emit(mul, $1, $3, $$, nextquadlabel(), yylineno);
}
}
| expr DIV expr
{
bool isExpr1Arithm = check_arith($1);
bool isExpr2Arithm = check_arith($3);
if(!isExpr1Arithm || !isExpr2Arithm)
{
//string msg = !isExpr1Arithm ? "First operand isn\'t a number in subtraction!" : "Second operand isn\'t a number in subtracion!";
yyerror(token_node, "Both division operands must be numbers!");
} else
{
if($3->numConst == 0) {
yyerror(token_node, "division by 0!");
} else {
double result = $1->numConst / $3->numConst;
$$ = newexpr(arithmetic_e);
$$->sym = newtemp(scope);
$$->numConst = result;
emit(div_op, $1, $3, $$, nextquadlabel(), yylineno);
}
}
}
| expr MOD expr
{
bool isExpr1Arithm = check_arith($1);
bool isExpr2Arithm = check_arith($3);
if(!isExpr1Arithm || !isExpr2Arithm)
{
//string msg = !isExpr1Arithm ? "First operand isn\'t a number in subtraction!" : "Second operand isn\'t a number in subtracion!";
yyerror(token_node, "Both modulus operands must be numbers!");
} else
{
if($3->numConst == 0) {
yyerror(token_node, "division by 0!");
} else {
double result = fmod($1->numConst,$3->numConst);
$$ = newexpr(arithmetic_e);
$$->sym = newtemp(scope);
$$->numConst = result;
emit(mod_op, $1, $3, $$, nextquadlabel(), yylineno);
}
}
}
...
assignexpr: lvalue ASSIGN expr { if ( isMemberOfFunc )
{
isMemberOfFunc=false;
}
else{ if ( islocalid==true ){
islocalid = false;
}else{
if ( isLibFunc($1->sym->key) ) yyerror(token_node,"Library function \"" + $1->sym->key + "\" is not lvalue!");
if (SymTable_lookup(symtab,$1->sym->key,scope,false) && isFunc($1->sym->key,scope)) yyerror(token_node,"User function \"" + $1->sym->key + "\" is not lvalue!");
}
}
if($1->type == tableitem_e)
{
// lvalue[index] = expr
emit(tablesetelem,$1->index,$3,$1,nextquadlabel(),yylineno);
$$ = emit_iftableitem($1,nextquadlabel(),yylineno, scope);
$$->type = assignment;
} else
{
emit(assign,$3,NULL,$1,nextquadlabel(),yylineno); //lval = expr;
$$ = newexpr(assignment);
$$->sym = newtemp(scope);
emit(assign, $1,NULL,$$,nextquadlabel(),yylineno);
}
}
;
The printQuads method is the following:
void printQuads() {
unsigned int index = 1;
cout << "quad#\t\topcode\t\tresult\t\targ1\t\targ2\t\tlabel" <<endl;
cout << "-------------------------------------------------------------------------------------------------" << endl;
for(quad q : quads) {
string arg1_type = "";
string arg2_type = "";
cout << "quad before arg1 type check" << endl;
if(q.arg1 != nullptr) {
switch (q.arg1->type) {
case const_bool:
arg1_type = "\'" + BoolToString(q.arg1->boolConst) + "\'";
break;
case const_string:
arg1_type = "\"" + q.arg1->strConst + "\"";
break;
case const_num:
arg1_type = to_string(q.arg1->numConst);
break;
case var:
arg1_type = q.arg1->sym->key;
break;
case nil_e:
arg1_type = "nil";
break;
default:
arg1_type = q.arg1->sym->key;
break;
}
}
cout << "quad before arg2 type check" << endl;
if(q.arg2 != nullptr) {
switch (q.arg2->type) {
case const_bool:
arg2_type = "\'" + BoolToString(q.arg2->boolConst) + "\'";
break;
case const_string:
arg2_type = "\"" + q.arg2->strConst + "\"";
break;
case const_num:
arg2_type = to_string(q.arg2->numConst);
break;
case nil_e:
arg2_type = "nil";
break;
default:
arg2_type = q.arg2->sym->key;
break;
}
}
string label = "";
if(q.op == if_eq || q.op == if_noteq || q.op == if_lesseq || q.op == if_greatereq
|| q.op == if_less || q.op == if_greater || q.op == jump) label = q.label;
string resultKey = "";
cout << "quad before key check" << endl;
if(q.result != nullptr && q.result->sym != nullptr) {
cout << "quad " << opcodeStrings[q.op] << " inside if key check for" << opcodeStrings[q.op] << endl;
resultKey = q.result->sym->key;
}
cout << "quad after key check" << endl;
cout << index << ":\t\t" << opcodeStrings[q.op] << "\t\t" << resultKey << "\t\t" << arg1_type << "\t\t" << arg2_type << "\t\t" << label << "\t\t" << endl;
index++;
}
}
The quads variable is just a vector of quads. Here is the quad struct:
enum expr_t {
var,
tableitem_e,
user_func,
lib_func,
arithmetic_e,
assignment,
newtable_e,
const_num,
const_bool,
const_string,
nil_e,
bool_e
};
struct expr {
expr_t type;
binding* sym;
expr* index;
double numConst;
string strConst;
bool boolConst;
expr* next;
};
struct quad {
iopcode op;
expr* result;
expr* arg1;
expr* arg2;
unsigned int label;
unsigned int line;
};
The binding* is defined as follows and is a symbol table binding:
enum SymbolType{GLOBAL_, LOCAL_, FORMAL_, USERFUNC_, LIBFUNC_, TEMP};
struct binding{
std::string key;
bool isactive = true;
SymbolType sym;
//vector<binding *> formals;
scope_space space;
unsigned int offset;
unsigned int scope;
int line;
};
Here are the emit(), newtemp & newexpr() methods:
void emit(
iopcode op,
expr* arg1,
expr* arg2,
expr* result,
unsigned int label,
unsigned int line
){
quad p;
p.op = op;
p.arg1 = arg1;
p.arg2 = arg2;
p.result = result;
p.label = label;
p.line = line;
currQuad++;
quads.push_back(p);
}
binding *newtemp(unsigned int scope){
string name = newTempName();
binding* sym = SymTable_get(symtab,name,scope);
if (sym== nullptr){
SymTable_put(symtab,name,scope,TEMP,-1);
binding* sym = SymTable_get(symtab,name,scope);
return sym;
}else return sym;
}
string newTempName(){
string temp = "_t" + to_string(countertemp) + " ";
countertemp++;
return temp;
}
expr* newexpr(expr_t exprt){
expr* current = new expr;
current->sym = NULL;
current->index = NULL;
current->numConst = 0;
current->strConst = "";
current->boolConst = false;
current->next = NULL;
current->type = exprt;
return current;
}
unsigned int countertemp = 0;
unsigned int currQuad = 0;
Symbol table cpp file:
#include <algorithm>
bool isHidingBindings = false;
/* Return a hash code for pcKey.*/
static unsigned int SymTable_hash(string pcKey){
size_t ui;
unsigned int uiHash = 0U;
for (ui = 0U; pcKey[ui] != '\0'; ui++)
uiHash = uiHash * HASH_MULTIPLIER + pcKey[ui];
return (uiHash % DEFAULT_SIZE);
}
/*If b contains a binding with key pcKey, returns 1.Otherwise 0.
It is a checked runtime error for oSymTable and pcKey to be NULL.*/
int Bucket_contains(scope_bucket b, string pcKey){
vector<binding> current = b.entries[SymTable_hash(pcKey)]; /*find the entry binding based on the argument pcKey*/
for (int i=0; i<current.size(); i++){
binding cur = current.at(i);
if (cur.key==pcKey) return 1;
}
return 0;
}
/*epistrefei to index gia to bucket pou antistixei sto scope 'scope'.Se periptwsh pou den uparxei
akoma bucket gia to en logw scope, ean to create einai true dhmiourgei to antistoixo bucket sto
oSymTable kai epistrefei to index tou.Diaforetika epistrefei thn timh -1.*/
int indexofscope(SymTable_T &oSymTable, unsigned int scope, bool create){
int index=-1;
for(int i=0; i<oSymTable.buckets.size(); i++) if (oSymTable.buckets[i].scope == scope) index=i;
if ( index==-1 && create ){
scope_bucket newbucket;
newbucket.scope = scope;
oSymTable.buckets.push_back(newbucket);
index = oSymTable.buckets.size()-1;
}
return index;
}
/*If there is no binding with key : pcKey in oSymTable, puts a new binding with
this key and value : pvvValue returning 1.Otherise, it just returns 0.
It is a checked runtime error for oSymTable and pcKey to be NULL.*/
int SymTable_put(SymTable_T &oSymTable, string pcKey,unsigned int scope, SymbolType st, unsigned int line){
int index = indexofscope(oSymTable,scope, true);
if(index==-1) cerr<<"ERROR"<<endl;
scope_bucket *current = &oSymTable.buckets.at(index);
if ( Bucket_contains(*current, pcKey) && st != FORMAL_ && st != LOCAL_) return 0; /*If the binding exists in oSymTable return 0.*/
binding newnode;
newnode.key = pcKey;
newnode.isactive = true;
newnode.line = line;
newnode.sym = st;
newnode.scope = scope;
current->entries[SymTable_hash(pcKey)].push_back(newnode);
return 1;
}
/*Pairnei ws orisma to oSymTable kai to scope pou theloume na apenergopoihsoume.
An to sugkekrimeno scope den uparxei sto oSymTable epistrefei -1.Diaforetika 0*/
void SymTable_hide(SymTable_T &oSymTable, unsigned int scope){
isHidingBindings = true;
for(int i=scope; i >= 0; i--) {
if(i == 0) return;
int index = indexofscope(oSymTable,i,false);
if(index == -1) continue;
scope_bucket *current = &oSymTable.buckets.at(index);
for (int i=0; i<DEFAULT_SIZE; i++) {
for (int j=0; j<current->entries[i].size(); j++) {
if(current->entries[i].at(j).sym == LOCAL_ || current->entries[i].at(j).sym == FORMAL_)
current->entries[i].at(j).isactive = false;
}
}
}
}
void SymTable_show(SymTable_T &oSymTable, unsigned int scope){
isHidingBindings = false;
for(int i=scope; i >= 0; i--) {
if(i == 0) return;
int index = indexofscope(oSymTable,i,false);
if(index == -1) continue;
scope_bucket *current = &oSymTable.buckets.at(index);
for (int i=0; i<DEFAULT_SIZE; i++) {
for (int j=0; j<current->entries[i].size(); j++) {
if(current->entries[i].at(j).sym == LOCAL_ || current->entries[i].at(j).sym == FORMAL_)
current->entries[i].at(j).isactive = true;
}
}
}
}
bool SymTable_lookup(SymTable_T oSymTable, string pcKey, unsigned int scope, bool searchInScopeOnly){
for(int i=scope; i >= 0; i--) {
if(searchInScopeOnly && i != scope) break;
int index = indexofscope(oSymTable,i,false);
if(index == -1) continue;
scope_bucket current = oSymTable.buckets[index];
for(vector<binding> entry : current.entries) {
for(binding b : entry) {
if(b.key == pcKey && b.isactive) return true;
else if(b.key == pcKey && !b.isactive) return false;
}
}
}
return false;
}
binding* SymTable_lookupAndGet(SymTable_T &oSymTable, string pcKey, unsigned int scope) noexcept{
for ( int i=scope; i >= 0; --i ){
int index = indexofscope(oSymTable,i,false );
if (index==-1) continue;
scope_bucket &current = oSymTable.buckets[index];
for (auto &entry : current.entries) {
for (auto &b : entry ){
if ( b.key == pcKey ) return &b;
}
}
}
return nullptr;
}
/*Lamvanei ws orisma to oSymTable, kleidh tou tou desmou pou psaxnoume kai to scope tou desmou.
H sunarthsh telika epistrefei to value tou tou desmou.Diaforetika epistrefei 0*/
binding* SymTable_get(SymTable_T &oSymTable, const string pcKey, unsigned int scope){
for ( int i=scope; i >= 0; --i )
{
const int index = indexofscope( oSymTable, i, false );
if ( index == -1 )
{
continue;
}
scope_bucket& current = oSymTable.buckets[index];
for ( auto& entry : current.entries)
{
for ( auto& b : entry )
{
if ( b.key == pcKey )
{
return &b;
}
}
}
}
return nullptr;
}
When run with the following test file, the issue occurs at the z5 = 4 / 2; expression's assign quad:
// simple arithmetic operations
z1 = 1 + 2;
z10 = 1 + 1;
z2 = 1 - 3;
z3 = 4 * 4;
z4 = 5 / 2;
What's confusing is that if I print out the sym->key after each emit() in the arithmetic-related actions, I can see the keys just fine. But once I try to access them inside the printQuads it will fail (for the div operation at least so far). This has me thinking that maybe we are shallow copying the binding* sym thus losing the key? But how come the rest of them are printed normally?
I'm thinking that the issue (which has occured again in the past at various stages) could be caused by us using a ton of copy-by-value instead of by-reference but I can't exactly confirm this because most of the time it works (I'm guessing that means that this is undefined behavior?).
I'm sure this is very difficult to help debug but maybe someone will eyeball something that I can't see after this many hours.
Debugging by eyeballing your code is probably a useful skill, but it's far from the most productive form of debugging. These days, it's much less necessary, since there are lots of good tools which you can use to detect problems. (Here, I do mean "you", specifically. I can't use any of those tools because I don't have your complete project in front of me. And nor do I particularly want it; this is not a request for you to paste hundreds of lines of code).
You're almost certainly right that your problem is related to some kind of undefined behaviour. If you're correct about the bad_alloc exception being thrown by what is effectively a copy of a std::string, then it's most likely the result of the thing being copied from not being a valid std::string. Perhaps it's an actual std::string object whose internal members have been corrupted; perhaps the pointer is not actually pointing to an active std::string (which I think is the real problem, see below). Or perhaps it's something else.
Either way, the error occurred long before the bug manifests itself, so you're only going to stumble upon where it happened by blind luck. On the other hand, there are a variety of memory error detection tools available which may be able to pinpoint the precise moment in which you violated the contract by reading or writing to memory which didn't belong to you. These include Valgrind and AddressSanitizer (also known as ASan); one or both of these is certainly available for the platform on which you are developing your project. (I say that confidently even without knowing what that platform is, but you'll have to do a little research to find the one which works best for your particular environment. Both of those names can be looked up on Wikipedia.) These tools are very easy to use, and extraordinarily useful; they can save you hours or days of debugging and a lot of frustration. As an extra added bonus, they can detect bugs you don't even know you have, saving you the embarrassment of shipping a program which will blow up in the hands of the customer or the person who is marking your assignment. So I strongly recommend learning how to use them.
I probably should leave it at that, because it's better motivation to learn to use the tools. Still, I can't resist making a guess about where the problem lies. But honestly, you will learn a lot more by ignoring what I'm about to say and trying to figure out the problem yourself.
Anyway, you don't include much in the way of information about your SymTable_T class, and the inconsistent naming convention makes me wonder if you even wrote its code; perhaps it was part of the skeleton code you were given for this assignment. From what I can see in SymTable_put and SymTable_get, the SymTable_T includes something like a hash table, but doesn't use the C++ standard library associative containers. (That's a mistake from the beginning, IMHO. This assignment is about learning how to generate code, not how to write a good hash table. The C++ standard library associative containers are certainly adequate for your purposes, whether or not they are the absolute ideal for your use case, and they have the enormous advantages of already being thoroughly documented and debugged.)
It's possible that SymTable_T was not originally written in C++ at all. The use of free-standing functions like SymTable_put and SymTable_get rather than class methods is difficult to explain unless the functions were originally written in C, which doesn't allow object methods. On the other hand, they appear to use C++ standard library collections, as evidenced by the call to push_back in SymTable_put:
current->entries[SymTable_hash(pcKey)].push_back(newnode);
That suggests that entries is a std::vector (although there are other possibilities), and if it is, it should raise a red flag when you combine it with this, from SymTable_get (whitespace-edited to save screen space here):
for ( auto& entry : current.entries) {
for ( auto& b : entry ) {
if ( b.key == pcKey )
return &b;
}
}
To be honest, I don't understand that double loop. To start with, you seem to be ignoring the fact that there is a hash table somewhere in that data structure, but beyond that, it seems to me that entry should be a binding (that's what SymTable_put pushes onto the entries container), and I don't see where a binding is an iterable object. Perhaps I'm not reading that correctly.)
Regardless, evidently SymTable_get is returning a reference to something which is stored in a container, probably a std::vector, and that container is modified from time to time by having new elements pushed onto it. And pushing a new element onto the end of a std::vector invalidates all existing references to every element of the vector. (See https://en.cppreference.com/w/cpp/container/vector/push_back)
Thus, newtemp, which returns a binding* acquired from SymTable_get, is returning a pointer which may be invalidated in the future by some call to SymTable_put (though not by every call to that function; only the ones where the stars unline unhappily). That pointer is then stored into a data object which will (much later) be given to printQuads, which will attempt to use the pointer to make a copy of a string which it will attempt to print. And, as I mentioned towards the beginning of this treatise, trying to use an object which is pointed to by a dangling pointer is Undefined Behaviour.
As a minor note, making a copy of a string in order to print it out is completely unnecessary. A reference would work just fine, and save a bunch of unnecessary memory allocations. But that won't fix the problem (if my guess turns out to be correct) because printing through a dangling pointer is just as Undefined Behaviour as making a copy through a dangling pointer, and will likely manifest in some other mysterious way.

Sort file names func [duplicate]

I'm sorting strings that are comprised of text and numbers.
I want the sort to sort the number parts as numbers, not alphanumeric.
For example I want: abc1def, ..., abc9def, abc10def
instead of: abc10def, abc1def, ..., abc9def
Does anyone know an algorithm for this (in particular in c++)
Thanks
I asked this exact question (although in Java) and got pointed to http://www.davekoelle.com/alphanum.html which has an algorithm and implementations of it in many languages.
Update 14 years later: Dave Koelle’s blog has gone off line and I can’t find his actual algorithm, but here’s an implementation.
https://github.com/cblanc/koelle-sort
Several natural sort implementations for C++ are available. A brief review:
natural_sort<> - based on Boost.Regex.
In my tests, it's roughly 20 times slower than other options.
Dirk Jagdmann's alnum.hpp, based on Dave Koelle's alphanum algorithm
Potential integer overlow issues for values over MAXINT
Martin Pool's natsort - written in C, but trivially usable from C++.
The only C/C++ implementation I've seen to offer a case insensitive version, which would seem to be a high priority for a "natural" sort.
Like the other implementations, it doesn't actually parse decimal points, but it does special case leading zeroes (anything with a leading 0 is assumed to be a fraction), which is a little weird but potentially useful.
PHP uses this algorithm.
This is known as natural sorting. There's an algorithm here that looks promising.
Be careful of problems with non-ASCII characters (see Jeff's blog entry on the subject).
Partially reposting my another answer:
bool compareNat(const std::string& a, const std::string& b){
if (a.empty())
return true;
if (b.empty())
return false;
if (std::isdigit(a[0]) && !std::isdigit(b[0]))
return true;
if (!std::isdigit(a[0]) && std::isdigit(b[0]))
return false;
if (!std::isdigit(a[0]) && !std::isdigit(b[0]))
{
if (a[0] == b[0])
return compareNat(a.substr(1), b.substr(1));
return (toUpper(a) < toUpper(b));
//toUpper() is a function to convert a std::string to uppercase.
}
// Both strings begin with digit --> parse both numbers
std::istringstream issa(a);
std::istringstream issb(b);
int ia, ib;
issa >> ia;
issb >> ib;
if (ia != ib)
return ia < ib;
// Numbers are the same --> remove numbers and recurse
std::string anew, bnew;
std::getline(issa, anew);
std::getline(issb, bnew);
return (compareNat(anew, bnew));
}
toUpper() function:
std::string toUpper(std::string s){
for(int i=0;i<(int)s.length();i++){s[i]=toupper(s[i]);}
return s;
}
Usage:
std::vector<std::string> str;
str.push_back("abc1def");
str.push_back("abc10def");
...
std::sort(str.begin(), str.end(), compareNat);
To solve what is essentially a parsing problem a state machine (aka finite state automaton) is the way to go. Dissatisfied with the above solutions i wrote a simple one-pass early bail-out algorithm that beats C/C++ variants suggested above in terms of performance, does not suffer from numerical datatype overflow errors, and is easy to modify to add case insensitivity if required.
sources can be found here
For those that arrive here and are already using Qt in their project, you can use the QCollator class. See this question for details.
Avalanchesort is a recursive variation of naturall sort, whiche merge runs, while exploring the stack of sorting-datas. The algorithim will sort stable, even if you add datas to your sorting-heap, while the algorithm is running/sorting.
The search-principle is simple. Only merge runs with the same rank.
After finding the first two naturell runs (rank 0), avalanchesort merge them to a run with rank 1. Then it call avalanchesort, to generate a second run with rank 1 and merge the two runs to a run with rank 2. Then it call the avalancheSort to generate a run with rank 2 on the unsorted datas....
My Implementation porthd/avalanchesort divide the sorting from the handling of the data using interface injection. You can use the algorithmn for datastructures like array, associative arrays or lists.
/**
* #param DataListAvalancheSortInterface $dataList
* #param DataRangeInterface $beginRange
* #param int $avalancheIndex
* #return bool
*/
public function startAvalancheSort(DataListAvalancheSortInterface $dataList)
{
$avalancheIndex = 0;
$rangeResult = $this->avalancheSort($dataList, $dataList->getFirstIdent(), $avalancheIndex);
if (!$dataList->isLastIdent($rangeResult->getStop())) {
do {
$avalancheIndex++;
$lastIdent = $rangeResult->getStop();
if ($dataList->isLastIdent($lastIdent)) {
$rangeResult = new $this->rangeClass();
$rangeResult->setStart($dataList->getFirstIdent());
$rangeResult->setStop($dataList->getLastIdent());
break;
}
$nextIdent = $dataList->getNextIdent($lastIdent);
$rangeFollow = $this->avalancheSort($dataList, $nextIdent, $avalancheIndex);
$rangeResult = $this->mergeAvalanche($dataList, $rangeResult, $rangeFollow);
} while (true);
}
return $rangeResult;
}
/**
* #param DataListAvalancheSortInterface $dataList
* #param DataRangeInterface $range
* #return DataRangeInterface
*/
protected function findRun(DataListAvalancheSortInterface $dataList,
$startIdent)
{
$result = new $this->rangeClass();
$result->setStart($startIdent);
$result->setStop($startIdent);
do {
if ($dataList->isLastIdent($result->getStop())) {
break;
}
$nextIdent = $dataList->getNextIdent($result->getStop());
if ($dataList->oddLowerEqualThanEven(
$dataList->getDataItem($result->getStop()),
$dataList->getDataItem($nextIdent)
)) {
$result->setStop($nextIdent);
} else {
break;
}
} while (true);
return $result;
}
/**
* #param DataListAvalancheSortInterface $dataList
* #param $beginIdent
* #param int $avalancheIndex
* #return DataRangeInterface|mixed
*/
protected function avalancheSort(DataListAvalancheSortInterface $dataList,
$beginIdent,
int $avalancheIndex = 0)
{
if ($avalancheIndex === 0) {
$rangeFirst = $this->findRun($dataList, $beginIdent);
if ($dataList->isLastIdent($rangeFirst->getStop())) {
// it is the last run
$rangeResult = $rangeFirst;
} else {
$nextIdent = $dataList->getNextIdent($rangeFirst->getStop());
$rangeSecond = $this->findRun($dataList, $nextIdent);
$rangeResult = $this->mergeAvalanche($dataList, $rangeFirst, $rangeSecond);
}
} else {
$rangeFirst = $this->avalancheSort($dataList,
$beginIdent,
($avalancheIndex - 1)
);
if ($dataList->isLastIdent($rangeFirst->getStop())) {
$rangeResult = $rangeFirst;
} else {
$nextIdent = $dataList->getNextIdent($rangeFirst->getStop());
$rangeSecond = $this->avalancheSort($dataList,
$nextIdent,
($avalancheIndex - 1)
);
$rangeResult = $this->mergeAvalanche($dataList, $rangeFirst, $rangeSecond);
}
}
return $rangeResult;
}
protected function mergeAvalanche(DataListAvalancheSortInterface $dataList, $oddListRange, $evenListRange)
{
$resultRange = new $this->rangeClass();
$oddNextIdent = $oddListRange->getStart();
$oddStopIdent = $oddListRange->getStop();
$evenNextIdent = $evenListRange->getStart();
$evenStopIdent = $evenListRange->getStop();
$dataList->initNewListPart($oddListRange, $evenListRange);
do {
if ($dataList->oddLowerEqualThanEven(
$dataList->getDataItem($oddNextIdent),
$dataList->getDataItem($evenNextIdent)
)) {
$dataList->addListPart($oddNextIdent);
if ($oddNextIdent === $oddStopIdent) {
$restTail = $evenNextIdent;
$stopTail = $evenStopIdent;
break;
}
$oddNextIdent = $dataList->getNextIdent($oddNextIdent);
} else {
$dataList->addListPart($evenNextIdent);
if ($evenNextIdent === $evenStopIdent) {
$restTail = $oddNextIdent;
$stopTail = $oddStopIdent;
break;
}
$evenNextIdent = $dataList->getNextIdent($evenNextIdent);
}
} while (true);
while ($stopTail !== $restTail) {
$dataList->addListPart($restTail);
$restTail = $dataList->getNextIdent($restTail);
}
$dataList->addListPart($restTail);
$dataList->cascadeDataListChange($resultRange);
return $resultRange;
}
}
My algorithm with test code of java version. If you want to use it in your project you can define a comparator yourself.
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.Consumer;
public class FileNameSortTest {
private static List<String> names = Arrays.asList(
"A__01__02",
"A__2__02",
"A__1__23",
"A__11__23",
"A__3++++",
"B__1__02",
"B__22_13",
"1_22_2222",
"12_222_222",
"2222222222",
"1.sadasdsadsa",
"11.asdasdasdasdasd",
"2.sadsadasdsad",
"22.sadasdasdsadsa",
"3.asdasdsadsadsa",
"adsadsadsasd1",
"adsadsadsasd10",
"adsadsadsasd3",
"adsadsadsasd02"
);
public static void main(String...args) {
List<File> files = new ArrayList<>();
names.forEach(s -> {
File f = new File(s);
try {
if (!f.exists()) {
f.createNewFile();
}
files.add(f);
} catch (IOException e) {
e.printStackTrace();
}
});
files.sort(Comparator.comparing(File::getName));
files.forEach(f -> System.out.print(f.getName() + " "));
System.out.println();
files.sort(new Comparator<File>() {
boolean caseSensitive = false;
int SPAN_OF_CASES = 'a' - 'A';
#Override
public int compare(File left, File right) {
char[] csLeft = left.getName().toCharArray(), csRight = right.getName().toCharArray();
boolean isNumberRegion = false;
int diff=0, i=0, j=0, lenLeft=csLeft.length, lenRight=csRight.length;
char cLeft = 0, cRight = 0;
for (; i<lenLeft && j<lenRight; i++, j++) {
cLeft = getCharByCaseSensitive(csLeft[i]);
cRight = getCharByCaseSensitive(csRight[j]);
boolean isNumericLeft = isNumeric(cLeft), isNumericRight = isNumeric(cRight);
if (isNumericLeft && isNumericRight) {
// Number start!
if (!isNumberRegion) {
isNumberRegion = true;
// Remove prefix '0'
while (i < lenLeft && cLeft == '0') i++;
while (j < lenRight && cRight == '0') j++;
if (i == lenLeft || j == lenRight) break;
}
// Diff start: calculate the diff value.
if (cLeft != cRight && diff == 0)
diff = cLeft - cRight;
} else {
if (isNumericLeft != isNumericRight) {
// One numeric and one char.
if (isNumberRegion)
return isNumericLeft ? 1 : -1;
return cLeft - cRight;
} else {
// Two chars: if (number) diff don't equal 0 return it.
if (diff != 0)
return diff;
// Calculate chars diff.
diff = cLeft - cRight;
if (diff != 0)
return diff;
// Reset!
isNumberRegion = false;
diff = 0;
}
}
}
// The longer one will be put backwards.
return (i == lenLeft && j == lenRight) ? cLeft - cRight : (i == lenLeft ? -1 : 1) ;
}
private boolean isNumeric(char c) {
return c >= '0' && c <= '9';
}
private char getCharByCaseSensitive(char c) {
return caseSensitive ? c : (c >= 'A' && c <= 'Z' ? (char) (c + SPAN_OF_CASES) : c);
}
});
files.forEach(f -> System.out.print(f.getName() + " "));
}
}
The output is,
1.sadasdsadsa 11.asdasdasdasdasd 12_222_222 1_22_2222 2.sadsadasdsad 22.sadasdasdsadsa 2222222222 3.asdasdsadsadsa A__01__02 A__11__23 A__1__23 A__2__02 A__3++++ B__1__02 B__22_13 adsadsadsasd02 adsadsadsasd1 adsadsadsasd10 adsadsadsasd3
1.sadasdsadsa 1_22_2222 2.sadsadasdsad 3.asdasdsadsadsa 11.asdasdasdasdasd 12_222_222 22.sadasdasdsadsa 2222222222 A__01__02 A__1__23 A__2__02 A__3++++ A__11__23 adsadsadsasd02 adsadsadsasd1 adsadsadsasd3 adsadsadsasd10 B__1__02 B__22_13
Process finished with exit code 0
// -1: s0 < s1; 0: s0 == s1; 1: s0 > s1
static int numericCompare(const string &s0, const string &s1) {
size_t i = 0, j = 0;
for (; i < s0.size() && j < s1.size();) {
string t0(1, s0[i++]);
while (i < s0.size() && !(isdigit(t0[0]) ^ isdigit(s0[i]))) {
t0.push_back(s0[i++]);
}
string t1(1, s1[j++]);
while (j < s1.size() && !(isdigit(t1[0]) ^ isdigit(s1[j]))) {
t1.push_back(s1[j++]);
}
if (isdigit(t0[0]) && isdigit(t1[0])) {
size_t p0 = t0.find_first_not_of('0');
size_t p1 = t1.find_first_not_of('0');
t0 = p0 == string::npos ? "" : t0.substr(p0);
t1 = p1 == string::npos ? "" : t1.substr(p1);
if (t0.size() != t1.size()) {
return t0.size() < t1.size() ? -1 : 1;
}
}
if (t0 != t1) {
return t0 < t1 ? -1 : 1;
}
}
return i == s0.size() && j == s1.size() ? 0 : i != s0.size() ? 1 : -1;
}
I am not very sure if it is you want, anyway, you can have a try:-)

c++ process does not stop

There is something wrong in the code but I do not understand why.
I think one of the reason could be the threads that I used in the code.
The code looks like this.
in main.cpp
vector<thread> t;
vector<future<myClass>> futures;
vector<myClass> chV;
for(int i = 0; i < NumberOfThreads; i++) // NumberOfThreads are 2 here
{
promise<myClass> promises;
futures.push_back(promises.get_future());
t.push_back(thread(MyFunction ,i, PointsNumberInThreads , pointList, std::move(promises)));
}
for_each(t.begin(), t.end(), std::mem_fn(&std::thread::join));
for(int i = 0; i < futures.size(); i++ )
{
// futures.at(i).get().fOut(i); // <-- if I comment out then it gives error. but why?
chV.push_back(futures.at(i).get());
}
myClass c1 = chV.at(0);
myClass c2 = chV.at(1);
cout << "merge start" << endl; // <-- it prints out
c1.Merge(c2);
cout << "merge end" << endl; // <-- does not print out this message. so I have to kill the process
the Merge function in the myClass.cpp I have these sentences at the bottom of the function.
int aSt,aMid,aEnd;
int bSt,bMid,bEnd;
aSt = 0, aEnd = upperV.size(), aMid = (aEnd + aSt)/2;
bSt = 0, bEnd = b.upperV.size(), bMid = (bEnd + bSt)/2;
Point aPoint, bPoint, aPrev, aNext, bPrev, bNext;
aPoint = upperV.at(aMid);
aPrev = upperV.at(aMid-1);
aNext = upperV.at(aMid+1);
bPoint = b.upperV.at(bMid);
bPrev = b.upperV.at(bMid-1);
bNext = b.upperV.at(bMid+1);
bool done = true;
while(done)
{
done = false;
if(orientation(aPoint,bPoint,bPrev) > 0)
{
bEnd = bMid;
bMid = (bEnd + bSt)/2;
bPoint = b.upperV.at(bMid);
done = true;
}
if(orientation(aPoint,bPoint,bNext) > 0)
{
bSt = bMid;
bMid = (bEnd + bSt)/2;
bPoint = b.upperV.at(bMid);
done = true;
}
if(orientation(bPoint,aPoint,aPrev) < 0)
{
aEnd = aMid;
aMid = (aEnd + aSt)/2;
aPoint = upperV.at(aMid);
done = true;
}
if(orientation(bPoint,aPoint,aNext) < 0)
{
aSt = aMid;
aMid = (aEnd + aSt)/2;
aPoint = upperV.at(aMid);
done = true;
}
}
cout << "aPoint = (" << aPoint.x << " , " << aPoint.y << ")" << endl;
cout << "bPoint = (" << bPoint.x << " , " << bPoint.y << ")" << endl;
and they are printed but as I mentioned the process seems to keep running in somewhere.
This is the section of code that I think you should be concerned with. This may
not be why your code is continuing execution as you have stated in your problem, but from the code that you did provide to us; I still think this is an issue that should be addressed.
bool done = true;
while(done)
{
done = false;
if(orientation(aPoint,bPoint,bPrev) > 0)
{
bEnd = bMid;
bMid = (bEnd + bSt)/2;
bPoint = b.upperV.at(bMid);
done = true;
}
if(orientation(aPoint,bPoint,bNext) > 0)
{
bSt = bMid;
bMid = (bEnd + bSt)/2;
bPoint = b.upperV.at(bMid);
done = true;
}
if(orientation(bPoint,aPoint,aPrev) < 0)
{
aEnd = aMid;
aMid = (aEnd + aSt)/2;
aPoint = upperV.at(aMid);
done = true;
}
if(orientation(bPoint,aPoint,aNext) < 0)
{
aSt = aMid;
aMid = (aEnd + aSt)/2;
aPoint = upperV.at(aMid);
done = true;
}
}
You are initially setting this to be true. You are then going into your while loop and are setting it to false. All of your if statements set it back to true. So if any of these conditions are met depending on what orientation() returns then you are not going to break out of this while loop. It may seem to be illogical at first, but I learned over time that a good way to do while loops is in this structure.
bool done = false;
while ( !done ) {
// terminating case - I usually almost always check for this first!
if ( done ) {
break;
}
// Do work here and one or more cases could trip done to be equal to true
// within a if... else if ... else statement or within a series of if satements
// or none of them can, but if they don't then it must be set at the end of the while loop.
// Any one of these or all of them could set done = true and if they don't
// then you should set done = true at the end of the while loop since
// if none of these conditions are met, it will execute the assignment
// before it begins to loop.
if ( some condidition ) {
// Do this
} else if ( some other condidition ) {
// Do this instead
} else {
// Then do this
}
// Or this: Any of these could trip done = true or none of them
// and once again if none of them do, it should be the last thing done
// before the ending brace to your while loop.
if ( this ) {
// Do Work;
}
if ( this ) {
// Do Work;
}
// etc.
// Set this last if none of the if statements trip done to be true;
// otherwise if one or more does trip done, then you can omit this line.
// Note: If none of the if statements trip this flag and you trip it here
// in this section, this loop will occur only one time.
done = true;
}

Delete duplicates from an array of objects

I have to delete the duplicates from an array of objects, but so far my algorithm gives me mistakes:
request for member 'author' in '((Album*)this)->Album::songs',
which is of pointer type 'Song*' (maybe you meant to use '->' ?)|
So this is the code:
void Album::deleteDuplicates()
{
Song* current , *end = songs + top - 1;
for ( current = songs + 1; songs < end; songs++, current = songs + 1 )
{
while ( current <= end )
{
if ( current->author == songs.author && current->title == *songs.title
&& current->year == *songs.year && current->length == *songs.length)
{
*current = *end--;
}
else
{
current++;
}
}
}
}
Any ideas how to fix it?
With the following code the compiler error should go away.
if ( current->author == songs->author && current->title == songs->title
^^^ ^^^
&& current->year == songs->year && current->length == songs->length)
^^^ ^^^

How to implement a natural sort algorithm in c++?

I'm sorting strings that are comprised of text and numbers.
I want the sort to sort the number parts as numbers, not alphanumeric.
For example I want: abc1def, ..., abc9def, abc10def
instead of: abc10def, abc1def, ..., abc9def
Does anyone know an algorithm for this (in particular in c++)
Thanks
I asked this exact question (although in Java) and got pointed to http://www.davekoelle.com/alphanum.html which has an algorithm and implementations of it in many languages.
Update 14 years later: Dave Koelle’s blog has gone off line and I can’t find his actual algorithm, but here’s an implementation.
https://github.com/cblanc/koelle-sort
Several natural sort implementations for C++ are available. A brief review:
natural_sort<> - based on Boost.Regex.
In my tests, it's roughly 20 times slower than other options.
Dirk Jagdmann's alnum.hpp, based on Dave Koelle's alphanum algorithm
Potential integer overlow issues for values over MAXINT
Martin Pool's natsort - written in C, but trivially usable from C++.
The only C/C++ implementation I've seen to offer a case insensitive version, which would seem to be a high priority for a "natural" sort.
Like the other implementations, it doesn't actually parse decimal points, but it does special case leading zeroes (anything with a leading 0 is assumed to be a fraction), which is a little weird but potentially useful.
PHP uses this algorithm.
This is known as natural sorting. There's an algorithm here that looks promising.
Be careful of problems with non-ASCII characters (see Jeff's blog entry on the subject).
Partially reposting my another answer:
bool compareNat(const std::string& a, const std::string& b){
if (a.empty())
return true;
if (b.empty())
return false;
if (std::isdigit(a[0]) && !std::isdigit(b[0]))
return true;
if (!std::isdigit(a[0]) && std::isdigit(b[0]))
return false;
if (!std::isdigit(a[0]) && !std::isdigit(b[0]))
{
if (a[0] == b[0])
return compareNat(a.substr(1), b.substr(1));
return (toUpper(a) < toUpper(b));
//toUpper() is a function to convert a std::string to uppercase.
}
// Both strings begin with digit --> parse both numbers
std::istringstream issa(a);
std::istringstream issb(b);
int ia, ib;
issa >> ia;
issb >> ib;
if (ia != ib)
return ia < ib;
// Numbers are the same --> remove numbers and recurse
std::string anew, bnew;
std::getline(issa, anew);
std::getline(issb, bnew);
return (compareNat(anew, bnew));
}
toUpper() function:
std::string toUpper(std::string s){
for(int i=0;i<(int)s.length();i++){s[i]=toupper(s[i]);}
return s;
}
Usage:
std::vector<std::string> str;
str.push_back("abc1def");
str.push_back("abc10def");
...
std::sort(str.begin(), str.end(), compareNat);
To solve what is essentially a parsing problem a state machine (aka finite state automaton) is the way to go. Dissatisfied with the above solutions i wrote a simple one-pass early bail-out algorithm that beats C/C++ variants suggested above in terms of performance, does not suffer from numerical datatype overflow errors, and is easy to modify to add case insensitivity if required.
sources can be found here
For those that arrive here and are already using Qt in their project, you can use the QCollator class. See this question for details.
Avalanchesort is a recursive variation of naturall sort, whiche merge runs, while exploring the stack of sorting-datas. The algorithim will sort stable, even if you add datas to your sorting-heap, while the algorithm is running/sorting.
The search-principle is simple. Only merge runs with the same rank.
After finding the first two naturell runs (rank 0), avalanchesort merge them to a run with rank 1. Then it call avalanchesort, to generate a second run with rank 1 and merge the two runs to a run with rank 2. Then it call the avalancheSort to generate a run with rank 2 on the unsorted datas....
My Implementation porthd/avalanchesort divide the sorting from the handling of the data using interface injection. You can use the algorithmn for datastructures like array, associative arrays or lists.
/**
* #param DataListAvalancheSortInterface $dataList
* #param DataRangeInterface $beginRange
* #param int $avalancheIndex
* #return bool
*/
public function startAvalancheSort(DataListAvalancheSortInterface $dataList)
{
$avalancheIndex = 0;
$rangeResult = $this->avalancheSort($dataList, $dataList->getFirstIdent(), $avalancheIndex);
if (!$dataList->isLastIdent($rangeResult->getStop())) {
do {
$avalancheIndex++;
$lastIdent = $rangeResult->getStop();
if ($dataList->isLastIdent($lastIdent)) {
$rangeResult = new $this->rangeClass();
$rangeResult->setStart($dataList->getFirstIdent());
$rangeResult->setStop($dataList->getLastIdent());
break;
}
$nextIdent = $dataList->getNextIdent($lastIdent);
$rangeFollow = $this->avalancheSort($dataList, $nextIdent, $avalancheIndex);
$rangeResult = $this->mergeAvalanche($dataList, $rangeResult, $rangeFollow);
} while (true);
}
return $rangeResult;
}
/**
* #param DataListAvalancheSortInterface $dataList
* #param DataRangeInterface $range
* #return DataRangeInterface
*/
protected function findRun(DataListAvalancheSortInterface $dataList,
$startIdent)
{
$result = new $this->rangeClass();
$result->setStart($startIdent);
$result->setStop($startIdent);
do {
if ($dataList->isLastIdent($result->getStop())) {
break;
}
$nextIdent = $dataList->getNextIdent($result->getStop());
if ($dataList->oddLowerEqualThanEven(
$dataList->getDataItem($result->getStop()),
$dataList->getDataItem($nextIdent)
)) {
$result->setStop($nextIdent);
} else {
break;
}
} while (true);
return $result;
}
/**
* #param DataListAvalancheSortInterface $dataList
* #param $beginIdent
* #param int $avalancheIndex
* #return DataRangeInterface|mixed
*/
protected function avalancheSort(DataListAvalancheSortInterface $dataList,
$beginIdent,
int $avalancheIndex = 0)
{
if ($avalancheIndex === 0) {
$rangeFirst = $this->findRun($dataList, $beginIdent);
if ($dataList->isLastIdent($rangeFirst->getStop())) {
// it is the last run
$rangeResult = $rangeFirst;
} else {
$nextIdent = $dataList->getNextIdent($rangeFirst->getStop());
$rangeSecond = $this->findRun($dataList, $nextIdent);
$rangeResult = $this->mergeAvalanche($dataList, $rangeFirst, $rangeSecond);
}
} else {
$rangeFirst = $this->avalancheSort($dataList,
$beginIdent,
($avalancheIndex - 1)
);
if ($dataList->isLastIdent($rangeFirst->getStop())) {
$rangeResult = $rangeFirst;
} else {
$nextIdent = $dataList->getNextIdent($rangeFirst->getStop());
$rangeSecond = $this->avalancheSort($dataList,
$nextIdent,
($avalancheIndex - 1)
);
$rangeResult = $this->mergeAvalanche($dataList, $rangeFirst, $rangeSecond);
}
}
return $rangeResult;
}
protected function mergeAvalanche(DataListAvalancheSortInterface $dataList, $oddListRange, $evenListRange)
{
$resultRange = new $this->rangeClass();
$oddNextIdent = $oddListRange->getStart();
$oddStopIdent = $oddListRange->getStop();
$evenNextIdent = $evenListRange->getStart();
$evenStopIdent = $evenListRange->getStop();
$dataList->initNewListPart($oddListRange, $evenListRange);
do {
if ($dataList->oddLowerEqualThanEven(
$dataList->getDataItem($oddNextIdent),
$dataList->getDataItem($evenNextIdent)
)) {
$dataList->addListPart($oddNextIdent);
if ($oddNextIdent === $oddStopIdent) {
$restTail = $evenNextIdent;
$stopTail = $evenStopIdent;
break;
}
$oddNextIdent = $dataList->getNextIdent($oddNextIdent);
} else {
$dataList->addListPart($evenNextIdent);
if ($evenNextIdent === $evenStopIdent) {
$restTail = $oddNextIdent;
$stopTail = $oddStopIdent;
break;
}
$evenNextIdent = $dataList->getNextIdent($evenNextIdent);
}
} while (true);
while ($stopTail !== $restTail) {
$dataList->addListPart($restTail);
$restTail = $dataList->getNextIdent($restTail);
}
$dataList->addListPart($restTail);
$dataList->cascadeDataListChange($resultRange);
return $resultRange;
}
}
My algorithm with test code of java version. If you want to use it in your project you can define a comparator yourself.
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.function.Consumer;
public class FileNameSortTest {
private static List<String> names = Arrays.asList(
"A__01__02",
"A__2__02",
"A__1__23",
"A__11__23",
"A__3++++",
"B__1__02",
"B__22_13",
"1_22_2222",
"12_222_222",
"2222222222",
"1.sadasdsadsa",
"11.asdasdasdasdasd",
"2.sadsadasdsad",
"22.sadasdasdsadsa",
"3.asdasdsadsadsa",
"adsadsadsasd1",
"adsadsadsasd10",
"adsadsadsasd3",
"adsadsadsasd02"
);
public static void main(String...args) {
List<File> files = new ArrayList<>();
names.forEach(s -> {
File f = new File(s);
try {
if (!f.exists()) {
f.createNewFile();
}
files.add(f);
} catch (IOException e) {
e.printStackTrace();
}
});
files.sort(Comparator.comparing(File::getName));
files.forEach(f -> System.out.print(f.getName() + " "));
System.out.println();
files.sort(new Comparator<File>() {
boolean caseSensitive = false;
int SPAN_OF_CASES = 'a' - 'A';
#Override
public int compare(File left, File right) {
char[] csLeft = left.getName().toCharArray(), csRight = right.getName().toCharArray();
boolean isNumberRegion = false;
int diff=0, i=0, j=0, lenLeft=csLeft.length, lenRight=csRight.length;
char cLeft = 0, cRight = 0;
for (; i<lenLeft && j<lenRight; i++, j++) {
cLeft = getCharByCaseSensitive(csLeft[i]);
cRight = getCharByCaseSensitive(csRight[j]);
boolean isNumericLeft = isNumeric(cLeft), isNumericRight = isNumeric(cRight);
if (isNumericLeft && isNumericRight) {
// Number start!
if (!isNumberRegion) {
isNumberRegion = true;
// Remove prefix '0'
while (i < lenLeft && cLeft == '0') i++;
while (j < lenRight && cRight == '0') j++;
if (i == lenLeft || j == lenRight) break;
}
// Diff start: calculate the diff value.
if (cLeft != cRight && diff == 0)
diff = cLeft - cRight;
} else {
if (isNumericLeft != isNumericRight) {
// One numeric and one char.
if (isNumberRegion)
return isNumericLeft ? 1 : -1;
return cLeft - cRight;
} else {
// Two chars: if (number) diff don't equal 0 return it.
if (diff != 0)
return diff;
// Calculate chars diff.
diff = cLeft - cRight;
if (diff != 0)
return diff;
// Reset!
isNumberRegion = false;
diff = 0;
}
}
}
// The longer one will be put backwards.
return (i == lenLeft && j == lenRight) ? cLeft - cRight : (i == lenLeft ? -1 : 1) ;
}
private boolean isNumeric(char c) {
return c >= '0' && c <= '9';
}
private char getCharByCaseSensitive(char c) {
return caseSensitive ? c : (c >= 'A' && c <= 'Z' ? (char) (c + SPAN_OF_CASES) : c);
}
});
files.forEach(f -> System.out.print(f.getName() + " "));
}
}
The output is,
1.sadasdsadsa 11.asdasdasdasdasd 12_222_222 1_22_2222 2.sadsadasdsad 22.sadasdasdsadsa 2222222222 3.asdasdsadsadsa A__01__02 A__11__23 A__1__23 A__2__02 A__3++++ B__1__02 B__22_13 adsadsadsasd02 adsadsadsasd1 adsadsadsasd10 adsadsadsasd3
1.sadasdsadsa 1_22_2222 2.sadsadasdsad 3.asdasdsadsadsa 11.asdasdasdasdasd 12_222_222 22.sadasdasdsadsa 2222222222 A__01__02 A__1__23 A__2__02 A__3++++ A__11__23 adsadsadsasd02 adsadsadsasd1 adsadsadsasd3 adsadsadsasd10 B__1__02 B__22_13
Process finished with exit code 0
// -1: s0 < s1; 0: s0 == s1; 1: s0 > s1
static int numericCompare(const string &s0, const string &s1) {
size_t i = 0, j = 0;
for (; i < s0.size() && j < s1.size();) {
string t0(1, s0[i++]);
while (i < s0.size() && !(isdigit(t0[0]) ^ isdigit(s0[i]))) {
t0.push_back(s0[i++]);
}
string t1(1, s1[j++]);
while (j < s1.size() && !(isdigit(t1[0]) ^ isdigit(s1[j]))) {
t1.push_back(s1[j++]);
}
if (isdigit(t0[0]) && isdigit(t1[0])) {
size_t p0 = t0.find_first_not_of('0');
size_t p1 = t1.find_first_not_of('0');
t0 = p0 == string::npos ? "" : t0.substr(p0);
t1 = p1 == string::npos ? "" : t1.substr(p1);
if (t0.size() != t1.size()) {
return t0.size() < t1.size() ? -1 : 1;
}
}
if (t0 != t1) {
return t0 < t1 ? -1 : 1;
}
}
return i == s0.size() && j == s1.size() ? 0 : i != s0.size() ? 1 : -1;
}
I am not very sure if it is you want, anyway, you can have a try:-)