I'm trying to convert some strings, I'd like to be able to remove diacritics from strinf. (Exemple : éùèà would become euea)
i have try this :
static str AALRemoveDiacritics( System.String input )
{
int i;
System.Text.NormalizationForm FormD;
str normalizedString = input.Normalize(FormD);
System.Text.StringBuilder stringBuilder = new System.Text.StringBuilder();
for (i = 0; i < strLen(normalizedString); i++)
{
System.Char c = normalizedString[i];
if (CharUnicodeInfo.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark)
{
stringBuilder.Append(c);
}
}
return stringBuilder.ToString().Normalize(NormalizationForm.FormC);
}
It looks like you tried making this post work in X++ and were very close.
Here's a working job I just wrote you can use:
static void AlexRemoveDiacritics(Args _args)
{
str strInput = 'ÁÂÃÄÅÇÈÉàáâãäåèéêëìíîïòóôõ£ALEX';
System.String input = strInput;
str retVal;
int i;
System.Char c;
System.Text.NormalizationForm FormD = System.Text.NormalizationForm::FormD;
str normalizedString = input.Normalize(FormD);
System.Text.StringBuilder stringBuilder = new System.Text.StringBuilder();
for (i = 0; i <= strLen(normalizedString); i++)
{
c = System.Char::Parse(subStr(normalizedString, i, 1));
if (System.Globalization.CharUnicodeInfo::GetUnicodeCategory(c) != System.Globalization.UnicodeCategory::NonSpacingMark)
{
stringBuilder.Append(c);
}
}
input = stringBuilder.ToString();
input = input.Normalize();
retVal = input;
info(strFmt("Before: '%1'", strInput));
info(strFmt("After: '%1'", retVal));
}
Related
I have a str like this;
"B S <b.s#msoft.com>; J T <j.t#msoft.com>; A M <a.m#msoft.com>"
and i want to return to this format;
"b.s#msoft.com, j.t#msoft.com, a.m#msoft.com"
how i can do this?
str mail, a, mnew;
int b, c;
List strlist = new List(Types::String);
ListIterator iterator;
mail = "B S <b.s#msoft.com>; J T <j.t#msoft.com>; A M <a.m#msoft.com>";
strlist = strSplit(mail,';');
iterator = new ListIterator(strlist);
while (iterator.more())
{
if (strcontains(iterator.value(), "<"))
{
b = strFind(iterator.value(), "<", 1, strLen(iterator.value()));
c = strFind(iterator.value(), ">", 1, strLen(iterator.value()));
info(strFmt("%1",subStr(strRem(iterator.value(),'>'),b+1,c)));
}
else
{
info(strFmt("%1",strLRTrim(iterator.value())));
}
iterator.next();
}
I do this with string runtime funtions. How can do it with regexp?
Please check below one of the possible examples with regular expressions:
TextBuffer textBuffer = new TextBuffer();
int pos, len;
str res;
;
textBuffer.setText("Brandon Smith <brandon.smith#msoft.com>; Jake Tyler <jake.tyler#msoft.com>; Amelia Miler <amelia.miler#msoft.com>");
textBuffer.regularExpressions(true);
while (textBuffer.find(#'\<[a-z0-9.#]+\>', pos))
{
pos = textBuffer.matchPos();
len = textBuffer.matchLen();
res = (res == '') ? textBuffer.subStr(pos, len) : res + ', ' + textBuffer.subStr(pos, len);
pos++;
}
textBuffer.setText(res);
textBuffer.removeChar('<>');
info(textBuffer.getText());
I create a program who I need to compare two Chinese characters.
I use this function to compare :
void fengshuitradition::comparerAuto()
{
Stockage obj_stockage;
Lunar lunar;
LunarObj* obj = lunar.solar2lunar(ui->SBSelection_4->value(), ui->SBSelection_3->value(), ui->SBSelection_2->value());
string day = obj->ganzhiDay;
for(int i = 0; i<64; i++)
{
string jourComparer = obj_stockage.appelStockage(i,1);
string jourComparer2 = obj_stockage.appelStockage(i,2);
if (day.compare(0,6,jourComparer,0,6) == 0 && day.compare(7,6,jourComparer2,0,6) == 0)
{
ui->label_0->setText(obj_stockage.appelStockage(i,0));
ui->label_1->setText(obj_stockage.appelStockage(i,1));
ui->label_2->setText(obj_stockage.appelStockage(i,2));
ui->label_3->setText(obj_stockage.appelStockage(i,3));
ui->label_4->setText(obj_stockage.appelStockage(i,4));
ui->label_5->setText(obj_stockage.appelStockage(i,5));
ui->label_6->setText(obj_stockage.appelStockage(i,6));
return;
}
}
}
My values are stock like this :
const char* stockage[64][7] = {
{"1",
"\u7532 B+",
u8"\u5b50 E+ hiver",
"24 F",
"8",
"癸E-",
". ."},
// ainsi de suite
And I use a library :
static std::string Gan[] = {"\u7532","\u4e59","\u4e19","\u4e01","\u620a","\u5df1","\u5e9a","\u8f9b","\u58ec","\u7678"};
static std::string Zhi[] = {"\u5b50","\u4e11","\u5bc5","\u536f","\u8fb0","\u5df3","\u5348","\u672a","\u7533","\u9149","\u620c","\u4ea5"};
Thanks for your help.
I need to parse a file which contains the financial FIX protocol. A sample is below:
1128=99=24535=X49=CME75=2017040934=82452=2017040920070508394791460=201704092007050800000005799=10000000268=2279=0269=B48=900655=ESM783=23271=1473460731=100000005796=17263279=0269=C48=900655=ESM783=24271=2861528731=100000005796=1726310=219
My application will load many files each with many millions of rows of historical data so performance needs to be considered.
I have reviewed similar questions online around FIX parsing, as well as explored the QuickFix library (specifically using FIX::Message(string) to crack the message) but i aim to have a throughput better than what i was able to achieve using quickfix.
I wrote up a mock for the most common of the message types (Market Data Incremental Refresh) to see the kinds of speed i was achieving, and am most unimpressed with the result of ~60,000 messages / second including the file parsing of a 3m line file.
This is my first c++ application so i'm expecting there to be many flaws in my approach and any advice on how to improve its performance would be greatly appreciated.
Currently the flow is file->string->MDIncrementalRefresh. An MDIncrementalRefresh has two optional repeating groups which i'm using a vector to store as they are of unknown size from message to message.
I'm guessing the fact that i'm reconstructing MDIncrementalRefresh upon every update is causing unnecessary overhead compared to if i were to re-use the object by updating the contents of the previous MDIncrementalRefresh?
Thanks in Advance
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
using namespace std;
std::vector<std::string> string_split(std::string s, const char delimiter)
{
size_t start=0;
size_t end=s.find_first_of(delimiter);
std::vector<std::string> output;
while (end <= std::string::npos)
{
output.emplace_back(s.substr(start, end-start));
if (end == std::string::npos)
break;
start=end+1;
end = s.find_first_of(delimiter, start);
}
return output;
}
const char FIX_FIELD_DELIMITER = '\x01';
const char FIX_KEY_DELIMITER = '=';
const int STR_TO_CHAR = 0;
const int KEY = 0;
const int VALUE = 1;
const string Field_TransactTime = "60";
const string Field_MatchEventIndicator = "5799";
const string Field_NoMDEntries = "268";
const string Field_MDUpdateAction = "279";
const string Field_MDEntryType = "269";
const string Field_SecurityID = "48";
const string Field_RptSeq = "83";
const string Field_MDEntryPx = "270";
const string Field_MDEntrySize = "271";
const string Field_NumberOfOrders = "346";
const string Field_MDPriceLevel = "1023";
const string Field_OpenCloseSettlFlag = "286";
const string Field_AggressorSide = "5797";
const string Field_TradingReferenceDate = "5796";
const string Field_HighLimitPrice = "1149";
const string Field_LowLimitPrice = "1148";
const string Field_MaxPriceVariation = "1143";
const string Field_ApplID = "1180";
const string Field_NoOrderIDEntries = "37705";
const string Field_OrderID = "37";
const string Field_LastQty = "32";
const string Field_SettlPriceType= "731";
class OrderIdEntry {
public:
string OrderID;
int LastQty;
};
struct MDEntry {
public:
// necessary for defaults?
char MDUpdateAction;
char MDEntryType;
int SecurityID;
int RptSeq;
double MDEntryPx;
int MDEntrySize;
int NumberOfOrders = 0;
int MDPriceLevel = 0;
int OpenCloseSettlFlag = 0;
string SettlPriceType = "";
int AggressorSide = 0;
string TradingReferenceDate = "";
double HighLimitPrice = 0.0;
double LowLimitPrice = 0.0;
double MaxPriceVariation = 0.0;
int ApplID = 0;
};
class MDIncrementalRefresh {
public:
string TransactTime;
string MatchEventIndicator;
int NoMDEntries;
int NoOrderIDEntries = 0;
vector<MDEntry> MDEntries;
vector<OrderIdEntry> OrderIdEntries;
MDIncrementalRefresh(const string& message)
{
MDEntry* currentMDEntry = nullptr;
OrderIdEntry* currentOrderIDEntry = nullptr;
for (auto fields : string_split(message, FIX_FIELD_DELIMITER))
{
vector<string> kv = string_split(fields, FIX_KEY_DELIMITER);
// Header :: MDIncrementalRefresh
if (kv[KEY] == Field_TransactTime) this->TransactTime = kv[VALUE];
else if (kv[KEY] == Field_MatchEventIndicator) this->MatchEventIndicator = kv[VALUE];
else if (kv[KEY] == Field_NoMDEntries) this->NoMDEntries = stoi(kv[VALUE]);
else if (kv[KEY] == Field_NoOrderIDEntries) this->NoOrderIDEntries = stoi(kv[VALUE]);
// Repeating Group :: MDEntry
else if (kv[KEY] == Field_MDUpdateAction)
{
MDEntries.push_back(MDEntry());
currentMDEntry = &MDEntries.back(); // use pointer for fast lookup on subsequent repeating group fields
currentMDEntry->MDUpdateAction = kv[VALUE][STR_TO_CHAR];
}
else if (kv[KEY] == Field_MDEntryType) currentMDEntry->MDEntryType = kv[VALUE][STR_TO_CHAR];
else if (kv[KEY] == Field_SecurityID) currentMDEntry->SecurityID = stoi(kv[VALUE]);
else if (kv[KEY] == Field_RptSeq) currentMDEntry->RptSeq = stoi(kv[VALUE]);
else if (kv[KEY] == Field_MDEntryPx) currentMDEntry->MDEntryPx = stod(kv[VALUE]);
else if (kv[KEY] == Field_MDEntrySize) currentMDEntry->MDEntrySize = stoi(kv[VALUE]);
else if (kv[KEY] == Field_NumberOfOrders) currentMDEntry->NumberOfOrders = stoi(kv[VALUE]);
else if (kv[KEY] == Field_MDPriceLevel) currentMDEntry->MDPriceLevel = stoi(kv[VALUE]);
else if (kv[KEY] == Field_OpenCloseSettlFlag) currentMDEntry->OpenCloseSettlFlag = stoi(kv[VALUE]);
else if (kv[KEY] == Field_SettlPriceType) currentMDEntry->SettlPriceType= kv[VALUE];
else if (kv[KEY] == Field_AggressorSide) currentMDEntry->AggressorSide = stoi(kv[VALUE]);
else if (kv[KEY] == Field_TradingReferenceDate) currentMDEntry->TradingReferenceDate = kv[VALUE];
else if (kv[KEY] == Field_HighLimitPrice) currentMDEntry->HighLimitPrice = stod(kv[VALUE]);
else if (kv[KEY] == Field_LowLimitPrice) currentMDEntry->LowLimitPrice = stod(kv[VALUE]);
else if (kv[KEY] == Field_MaxPriceVariation) currentMDEntry->MaxPriceVariation = stod(kv[VALUE]);
else if (kv[KEY] == Field_ApplID) currentMDEntry->ApplID = stoi(kv[VALUE]);
// Repeating Group :: OrderIDEntry
else if (kv[KEY] == Field_OrderID) {
OrderIdEntries.push_back(OrderIdEntry());
currentOrderIDEntry = &OrderIdEntries.back();
currentOrderIDEntry->OrderID = kv[VALUE];
}
else if (kv[KEY] == Field_LastQty) currentOrderIDEntry->LastQty = stol(kv[VALUE]);
}
}
};
int main() {
//std::string filename = "test/sample";
std::string line;
std::ifstream file (filename);
int count = 0;
if (file.is_open())
{
while ( std::getline( file, line ) )
{
MDIncrementalRefresh md(line);
if (md.TransactTime != "") {
count++;
}
}
file.close();
}
cout << count << endl;
return 0;
}
For those who are interested, majority of the time being spent processing the code above was in the split_string function. The large number of calls to split_string resulted in many (expensive) allocations being done on the heap.
An alternative implementation split_string_optim re-uses a pre-allocated vector. This prevents unnecessary heap allocation/expansion upon every split_string function call. The below sample running 1.5m iterations suggests a 3.4x speed improvement. By utilising vector.clear() which itself does not free allocated memory back to the heap, it ensures subsequent split_string calls to split_string_optim where the resulting vector size <= previous have no additional allocations.
#include <string>
#include <vector>
void string_split_optim(std::vector<std::string>& output, const std::string &s, const char delimiter)
{
output.clear();
size_t start = 0;
size_t end = s.find_first_of(delimiter);
while (end <= std::string::npos)
{
output.emplace_back(s.substr(start, end - start));
if (end == std::string::npos)
break;
start = end + 1;
end = s.find_first_of(delimiter, start);
}
}
int main()
{
const int NUM_RUNS = 1500000;
const std::string s = "1128=9\u00019=174\u000135=X\u000149=CME\u000175=20170403\u000134=1061\u000152=20170402211926965794928\u000160=20170402211926965423233\u00015799=10000100\u0001268=1\u0001279=1\u0001269=1\u000148=9006\u000155=ESM7\u000183=118\u0001270=236025.0\u0001271=95\u0001346=6\u00011023=9\u000110=088\u0001";
std::vector<std::string> vec;
// standard
clock_t tStart = clock();
for (int i = 0; i < NUM_RUNS; ++i)
{
vec = string_split(s, '=');
}
printf("Time taken: %.2fs\n", (double) (clock() - tStart) / CLOCKS_PER_SEC);
// reused vector
tStart = clock();
for (int i = 0; i < NUM_RUNS; ++i)
{
string_split_optim(vec, s, '=');
vec.clear();
}
printf("Time taken: %.2fs\n", (double) (clock() - tStart) / CLOCKS_PER_SEC);
}
The result on my macbook was a 3.4x improvement.
Time taken: 6.60s
Time taken: 1.94s
Additionally, the MDIncrementalRefresh object was being repetitively constructed (on the stack, but it's vector members were also being expanded on the heap). In line with the above findings on split_string, i decided to re-use the temporary object and simply clear its previous state, resulting in another significant performance increase.
In my game I keep track of unlocked levels with a vector std::vector<bool> lvlUnlocked_;.
The simple function to save the progress is this:
void save() {
std::stringstream ss;
std::string stringToSave = "";
std::ofstream ofile("./progress.txt");
if (ofile.good()) {
ofile.clear();
for (std::size_t i = 0; i < levelUnlocked_.size(); ++i) {
ss << "lvl" << i << "=" << (lvlUnlocked_.at(i) ? "1" : "0") << std::endl;
}
stringToSave = ss.str();
ofile << stringToSave;
ofile.close();
}
}
This works and is nice since I can just use a loop to dump the info.
Now to the part where I am stuck, the lower part of my load function (see comment in code below):
void load() {
std::ifstream ifile("./progress.txt");
if (ifile.good()) {
int begin;
int end;
std::string line;
std::string stringKey = "";
std::string stringValue = "";
unsigned int result;
while (std::getline(ifile, line)) {
stringKey = "";
stringValue = "";
for (unsigned int i = 0; i < line.length(); i++) {
if (line.at(i) == '=') {
begin = i + 1;
end = line.length();
break;
}
}
for (int i = 0; i < begin - 1; i++) {
stringKey += line.at(i);
}
for (int i = begin; i < end; i++) {
stringValue += line.at(i);
}
result = static_cast<unsigned int>(std::stoi(stringValue));
// usually I now compare the value and act accordingly, like so:
if (std::strcmp(stringKey.c_str(), "lvl0") == 0) {
lvlUnlocked_.at(0) = true;
} else if (std::strcmp(stringKey.c_str(), "lvl1") == 0) {
lvlUnlocked_.at(1) = true;
} else if (std::strcmp(stringKey.c_str(), "lvl2") == 0) {
lvlUnlocked_.at(2) = true;
}
// etc....
}
}
}
This works fine, but...
the problem is that I have 100+ levels and I want it to be dynamic based on the size of my lvlUnlocked_ vector instead of having to type it all like in the code above.
Is there a way to somehow make use of a loop like in my save function to check all levels?
If you parse your key to extract a suitable integer value, you can just index into the bit-vector with that:
while (std::getline(ifile, line)) {
const size_t eq = line.find('=');
if (eq == std::string::npos)
// no equals sign
continue;
auto stringKey = line.substr(0, eq);
auto stringValue = line.substr(eq+1);
if (stringKey.substr(0,3) != "lvl")
// doesn't begin with lvl
continue;
// strip off "lvl"
stringKey = stringKey.substr(3);
size_t end;
std::vector<bool>::size_type index = std::stoi(stringKey, &end);
if (end == 0 || end != stringKey.length())
// not a valid level number
continue;
if (index >= lvlUnlocked_.size())
// out of range
continue;
// Set it :-)
lvlUnlocked_[index] = stringValue=="1";
}
(I've also updated your parsing for "key=value" strings to more idiomatic C++.)
I'm interested in unescaping text for example: \ maps to \ in C. Does anyone know of a good library?
As reference the Wikipedia List of XML and HTML Character Entity References.
For another open source reference in C to decoding these HTML entities you can check out the command line utility uni2ascii/ascii2uni. The relevant files are enttbl.{c,h} for entity lookup and putu8.c which down converts from UTF32 to UTF8.
uni2ascii
I wrote my own unescape code; very simplified, but does the job: pn_util.c
Function Description: Convert special HTML entities back to characters.
Need to do some modifications to fit your requirement.
char* HtmlSpecialChars_Decode(char* encodedHtmlSpecialEntities)
{
int encodedLen = 0;
int escapeArrayLen = 0;
static char decodedHtmlSpecialChars[TITLE_SIZE];
char innerHtmlSpecialEntities[MAX_CONFIG_ITEM_SIZE];
/* This mapping table can be extended if necessary. */
static const struct {
const char* encodedEntity;
const char decodedChar;
} entityToChars[] = {
{"<", '<'},
{">", '>'},
{"&", '&'},
{""", '"'},
{"'", '\''},
};
if(strchr(encodedHtmlSpecialEntities, '&') == NULL)
return encodedHtmlSpecialEntities;
memset(decodedHtmlSpecialChars, '\0', TITLE_SIZE);
memset(innerHtmlSpecialEntities, '\0', MAX_CONFIG_ITEM_SIZE);
escapeArrayLen = sizeof(entityToChars) / sizeof(entityToChars[0]);
strcpy(innerHtmlSpecialEntities, encodedHtmlSpecialEntities);
encodedLen = strlen(innerHtmlSpecialEntities);
for(int i = 0; i < encodedLen; i++)
{
if(innerHtmlSpecialEntities[i] == '&')
{
/* Potential encode char. */
char * tempEntities = innerHtmlSpecialEntities + i;
for(int j = 0; j < escapeArrayLen; j++)
{
if(strncmp(tempEntities, entityToChars[j].encodedEntity, strlen(entityToChars[j].encodedEntity)) == 0)
{
int index = 0;
strncat(decodedHtmlSpecialChars, innerHtmlSpecialEntities, i);
index = strlen(decodedHtmlSpecialChars);
decodedHtmlSpecialChars[index] = entityToChars[j].decodedChar;
if(strlen(tempEntities) > strlen(entityToChars[j].encodedEntity))
{
/* Not to the end, continue */
char temp[MAX_CONFIG_ITEM_SIZE] = {'\0'};
strcpy(temp, tempEntities + strlen(entityToChars[j].encodedEntity));
memset(innerHtmlSpecialEntities, '\0', MAX_CONFIG_ITEM_SIZE);
strcpy(innerHtmlSpecialEntities, temp);
encodedLen = strlen(innerHtmlSpecialEntities);
i = -1;
}
else
encodedLen = 0;
break;
}
}
}
}
if(encodedLen != 0)
strcat(decodedHtmlSpecialChars, innerHtmlSpecialEntities);
return decodedHtmlSpecialChars;
}
QString UNESC(const QString &txt) {
QStringList bld;
static QChar AMP = '&', SCL = ';';
static QMap<QString, QString> dec = {
{"<", "<"}, {">", ">"}
, {"&", "&"}, {""", R"(")"}, {"'", "'"} };
if(!txt.contains(AMP)) { return txt; }
int bgn = 0, pos = 0;
while((pos = txt.indexOf(AMP, pos)) != -1) {
int end = txt.indexOf(SCL, pos)+1;
QString val = dec[txt.mid(pos, end - pos)];
bld << txt.mid(bgn, pos - bgn);
if(val.isEmpty()) {
end = txt.indexOf(AMP, pos+1);
bld << txt.mid(pos, end - pos);
} else {
bld << val;
}// else // if(val.isEmpty())
bgn = end; pos = end;
}// while((pos = txt.indexOf(AMP, pos)) != -1)
return bld.join(QString());
}// UNESC