Why do I get a strcpy runtime error in my code? - c++

I've been trying to make my code work on Windows (moved from the Mac) and for some reason I get a runtime error related to my strcpy call.
Please help!!
Cust.h
/*
* Cust.h
* Project 3
*
* Created by Anthony Glyadchenko on 11/17/09.
* Copyright 2009 __MyCompanyName__. All rights reserved.
*
*/
#include <iostream>
#include <string>
using namespace std;
#ifndef CUST_H
#define CUST_H
class Cust{
public:
char * getAcctNum();
void setAcctNum(char num[]);
double getCurrBalance();
void setCurrBalance(double balance);
void addToCurrBalance(double amount);
void subFromCurrBalance(double amount);
void setAcctFN(char firstName[]);
void setAcctLN(char lastName[]);
char * getAcctFN();
char * getAcctLN();
void setPIN(int pin);
int getPIN();
private:
char acctNum[255];
char acctFN[255];
char acctLN[255];
double currBalance;
int pin;
char fileName[255];
};
#endif
Cust.cpp
/*
* Cust.cpp
* Project 3
*
* Created by Anthony Glyadchenko on 11/17/09.
* Copyright 2009 __MyCompanyName__. All rights reserved.
*
*/
#include <fstream>
#include <string>
#include <sstream>
#include "Cust.h"
using namespace std;
char * Cust::getAcctNum(){
return acctNum;
}
void Cust::setAcctNum(char num[]){
strcpy(acctNum,num);
}
double Cust::getCurrBalance(){
return currBalance;
}
void Cust::setCurrBalance(double balance){
currBalance = balance;
}
void Cust::addToCurrBalance(double amount){
currBalance += amount;
}
void Cust::subFromCurrBalance(double amount){
currBalance -= amount;
}
void Cust::setAcctFN(char firstName[]){
strcpy(acctFN,firstName);
}
void Cust::setAcctLN(char lastName[]){
strcpy(acctLN,lastName);
}
char * Cust::getAcctFN(){
return acctFN;
}
char * Cust::getAcctLN(){
return acctLN;
}
void Cust::setPIN(int pin){
Cust::pin = pin;
}
int Cust::getPIN(){
return pin;
}
main.cpp
#include <iostream>
#include <string>
#include <fstream>
#include "Cust.h"
using namespace std;
int findNumLines(char file[]){
ifstream tempInput(file);
char ch;
int lineCount = 0;
while (!tempInput.eof()){
tempInput.get(ch);
if (ch == '\n') lineCount++;
}
tempInput.close();
return lineCount;
}
int main (int argc, char * const argv[]) {
Cust customers[500];
char tmpString[70] = " ";
char pch[255];
string tmpAcctFN = " ";
string tmpAcctLN = " ";
ifstream input("P3_custData.txt");
for (int idx = 0; idx < 130; idx++){
input.getline(tmpString, 70, '\n');
strcpy(pch,strtok(tmpString," "));
customers[idx].setAcctNum(pch);
cout << pch << endl;
strcpy(pch, strtok(NULL," "));;
customers[idx].setAcctFN(pch);
cout << pch << endl;
strcpy(pch, strtok(NULL," "));;
customers[idx].setAcctLN(pch);
cout << pch << endl;
strcpy(pch, strtok(NULL," "));;
customers[idx].setCurrBalance(atol(pch));
cout << pch << endl;
strcpy(pch, strtok(NULL," "));;
customers[idx].setPIN(atoi(pch));
cout << pch << endl;
}
input.close();
return 0;
}
P3_custData.txt
10000 Alicia Jones 1005.00 1234
10010 Mary Gonzalez 2040.55 8472
10020 Bill Henry 5340.20 7840
10030 Alex Brown 10010.50 8202
10040 Becca Kingman 983.00 9201
10050 Oliver Stone 12001.74 2382
10060 Robert Reich 3010.30 8137
10070 Judith Johnson 540.98 8203
10080 Jeremy Brice 672.10 8472
10090 Andrew Aziz 4041.50 2456
10100 Alicia Jones 10010.00 8264
10110 Mary Gonzalez 2050.51 6252
10120 Bill Henry 5340.20 3658
10130 Audrey Samuels 536.78 7462
10140 Marion Sams 9788.19 3266
10150 Richard Rubens 3265.90 6237
10160 Russell Townsend 123.00 5324
10170 Carolyn Tanner 4210.60 3256
10180 Corey Brill 77.40 4356
10190 Randall North 44.50 6346
10200 James Jackson 10020.00 2457
10210 Martin Gallagher 2041.50 2345
10220 William Walker 7340.20 2345
10230 Ellen Jacobson 433.99 1234
10240 Angela Bryer 15010.10 4321
10250 Steven Bond 960.00 9876
10260 Sally Stevens 23.10 2834
10270 Alan Fuller 7858.00 7294
10280 Peter Prentice 697.00 7618
10290 Paula Smith 1020.00 7349
10300 Alice Johnson 10030.00 7364
10310 Gail Green 3040.55 6717
10320 Gene Harold 8340.20 5162
10330 Lois Lane 100.00 7234
10340 Debby Dewhurst 8765.34 1382
10350 Louise Talent 350.00 8193
10360 Louis Bragg 10091.22 6738
10370 Alexander Gibson 540.70 7392
10380 Gertrude Ring 9030.00 7390
10390 John Johnson 3299.99 6329
10400 Alice Johannsen 2009.80 8273
10410 Marty Gordon 2040.55 6712
10420 William Hurst 540.20 1273
10430 Barry True 278.50 3247
10440 Maxwell Smart 800.66 2119
10450 Owen Burton 5261.00 3749
10460 Diane Walters 6004.44 3794
10470 Georgina Trump 7083.00 9283
10480 Erica Applegate 12007.00 3649
10490 Walter Wonkers 15789.40 1639
10500 Alicia Rogers 1009.00 6392
10510 Emmanuel Evans 220.50 2803
10520 Robert Bachman 760.25 9999
10530 Richard Rogers 2345.10 8888
10540 Roberta Maxwell 6666.66 6238
10550 Gregory Ichan 521.30 1111
10560 Lars Jensen 497.80 7239
10570 Roberta Peters 20004.10 3333
10580 Ali Masterson 3980.00 8304
10590 Laurence Leonard 6732.12 3684
10600 Tracy Jones 500.00 6382
10610 Michael Gonzalez 2040.57 3649
10620 Alexander Henry 5368.10 7389
10630 Leo Palmer 21900.00 6283
10640 Esther Richman 300.00 3684
10650 Harold Pinter 6783.10 3648
10660 Eva Burton 5355.55 7639
10670 William Shakespeare 0.00 6384
10680 Russell Carlson 4455.77 1384
10690 Janice Klein 3965.15 2738
10700 Henry Adams 4050.00 2374
10710 George Gonzalez 2040.55 2739
10720 Jose Enrique 5340.20 2376
10730 Jane Eakins 657.90 8209
10740 Justin Prince 8000.12 7394
10750 Ed True 5978.00 4798
10760 Emily Prentice 34.00 8220
10770 Olivia Callahan 231.21 5374
10780 Peter Cabot 5478.20 8293
10790 Andrew Austin 1110.10 3792
10800 Oliver Owens 100.00 8201
10810 Monty Wood 200.55 3748
10820 Terrance Thomas 340.20 6239
10830 Barry Brown 105.00 6387
10840 Harrison Huston 299.78 6384
10850 Robin Young 8655.30 9734
10860 Ishmael Green 10101.10 9246
10870 Fiona Fein 257.20 2836
10880 Florence Gregson 5699.60 6374
10890 Wilma Flinstone 78.00 5478
10900 Nancy Drew 2001.00 2536
10910 Captain Kirk 2444.44 7364
10920 Allie McGraw 540.20 6483
10930 Frederick Campbell 1050.00 6492
10940 Paula Prescott 5134.44 7483
10950 Ursula Unger 789.00 6482
10960 Betty Banker 4500.34 3567
10970 Elizabeth Young 1022.00 6489
10980 Maria Manners 510.00 5463
10990 Tracy Austin 674.10 6834
11000 Alex Andrews 300.00 1245
11010 Mike Matire 4040.55 7234
11020 Oscar Grouch 5340.20 9326
11030 Jennifer Young 823.33 6593
11040 Walter True 444.00 3485
11050 Hudson Haliburton 953.10 8465
11060 Ursula Angel 321.00 6583
11070 Zackery Brown 7666.60 9123
11080 Carole King 10000.00 6382
11090 Tracy Burton 955.00 6654
11100 Arthur Jones 100.00 7893
11110 Andrew Jackson 4040.55 9173
11120 Samuel Barber 50.20 2874
11130 George Gregrory 643.00 7392
11140 Quentin Larson 21.00 9277
11150 Dorothy Pace 777.23 4270
11160 Frieda Flowers 9000.99 6483
11170 Howard Alexander 78.00 2743
11180 Henry Aldritch 55.00 2084
11190 Beatrice Snow 99.99 2987
11200 Kelly Klark 200.00 3874
11210 Mary Gonzalez 440.51 2480
11220 Elly Hand 555.20 2479
11230 Gregory George 431.44 4756
11240 Nancy Alexander 6220.90 9274
11250 Sargent Pepper 16870.50 7777
11260 Linda Gale 20000.70 2974
11270 Charles Reilly 544.45 5973
11280 Chuck Mangers 10.00 5555
11290 Wilson Beckett 6010.10 6666

It would help if you would post the error message, but since you're saying you're going from Mac to Windows, it would point to a line ending issue. Convert your custData.txt file to have Windows end-of-lines (CR + LF) and retry it that way.

It could be that your line
input.getline(tmpString, 70, '\n');
is just looking for the \n when it should be looking for a \r\n or something similar because of the Windows line ending being different the Mac line ending.

The most likely reason is that one of the five strtok() sequence calls is returning NULL because there aren't enough fields on the line.

I don't get a runtime error, but I get a compile time error (on Linux) because you didn't
#include <cstring>
#include <cstdlib>
in main.cpp
or
#include <cstring>
in Cust.cpp
After adding those it compiled and ran fine for me...
What precisely is the error you get?

You didn't specify in which line of the output file the crash happens. Could it be that the last line is missing the carriage return (\n) at the end?

Related

C++ : Unable to print the text formatted as expected

I'm trying to print this LIST but it's driving me insane.
So this is the output that I wanted to get :
Veranstaltungen:
1: Freitag 08:00 - 09:30 SU Informatik III Kevin Kaufmann H3
2: Freitag 10:00 - 11:30 Ueb Informatik III Kevin Kaufmann D 114
3: Freitag 12:15 - 13:45 Ueb Informatik III Kevin Kaufmann D 114
4: Montag 10:00 - 11:30 SU Mathe III Nathan Neuling B 301
5: Mittwoch 10:00 - 11:30 Ueb Mathe III Nathan Neuling D 209
6: Donnerstag 16:00 - 17:30 Englisch Sabine Sauber D 419
7: Dienstag 16:00 - 17:30 SU Digitale Systeme Willi Witzig H5
8: Dienstag 17:45 - 19:15 Ueb Digitale Systeme Willi Witzig D 114
This is the print function in my code :
I tried first of all this simple alternative here:
void CEvents::print()
{
cout << "Veranstaltungen:" << endl ;
for (int i = 0 ; i < counter;i++)
{
CWeekday Day = Events[i]->WeekDay;
cout<<i+1<<": "<<Events[i]->getDay(Day) << " ";
Events[i]->Block->print();
cout<<" "<<Events[i]->Name<<" "<<Events[i]->Teacher->Name<<" "<<Events[i]->Room->Name<< endl;
}
}
I got from this code this output :
Veranstaltungen:
1: Freitag 08:00 - 09:30 SU Informatik III Kevin Kaufmann H3
2: Freitag 10:00 - 11:30 Ueb Informatik III Kevin Kaufmann D 114
3: Freitag 12:15 - 13:45 Ueb Informatik III Kevin Kaufmann D 114
4: Montag 10:00 - 11:30 SU Mathe III Nathan Neuling B 301
5: Mittwoch 10:00 - 11:30 Ueb Mathe III Nathan Neuling D 209
6: Donnerstag 16:00 - 17:30 Englisch Sabine Sauber D 419
7: Dienstag 16:00 - 17:30 SU Digitale Systeme Willi Witzig H5
8: Dienstag 17:45 - 19:15 Ueb Digitale Systeme Willi Witzig D 114
Of course this one won't work correctly because they all don't have the same lengths but atleast the first part here is correct .
Then I tried to switch to printf() instead of cout , got a little closer to the result but still not the same :
extern "C" void CEvents::print()
{
cout << "Veranstaltungen:" << endl ;
string a= " ";
for (int i = 0 ; i < counter;i++)
{
CWeekday Day = Events[i]->WeekDay;
printf("%d: %10s %5s",i+1,Events[i]->getDay(Day).c_str(),a.c_str());
Events[i]->Block->print();
printf("%30s %20s %10s\n",Events[i]->Name.c_str(),Events[i]->Teacher->Name.c_str(),Events[i]->Room->Name.c_str());
}
}
For this code I got this output :
1: Freitag 08:00 - 09:30 SU Informatik III Kevin Kaufmann H3
2: Freitag 10:00 - 11:30 Ueb Informatik III Kevin Kaufmann D 114
3: Freitag 12:15 - 13:45 Ueb Informatik III Kevin Kaufmann D 114
4: Montag 10:00 - 11:30 SU Mathe III Nathan Neuling B 301
5: Mittwoch 10:00 - 11:30 Ueb Mathe III Nathan Neuling D 209
6: Donnerstag 16:00 - 17:30 Englisch Sabine Sauber D 419
7: Dienstag 16:00 - 17:30 SU Digitale Systeme Willi Witzig H5
8: Dienstag 17:45 - 19:15 Ueb Digitale Systeme Willi Witzig D 114
Now second part is correct(the time) and the other parts are not.
How do I make this format correctly ?
Using std::left and std::setw should get you the formatting you want.
I've had to improvise a little as you didn't provide a full example but you should be able to adapt this code into yours:
#include <vector>
#include <string>
#include <iostream>
#include <array>
#include <iomanip>
int main()
{
std::vector<std::array<std::string, 5>> data =
{
{ "Freitag", "08:00 - 09:30", "SU Informatik III", "Kevin Kaufmann", "H3" },
{ "Freitag", "10:00 - 11:30", "Ueb Informatik III", "Kevin Kaufmann", "D 114" },
{ "Freitag", "12:15 - 13:45", "Ueb Informatik III", "Kevin Kaufmann", "D 114" },
{ "Montag", "10:00 - 11:30", "SU Mathe III", "Nathan Neuling", "B 301" },
{ "Mittwoch", "10:00 - 11:30", "Ueb Mathe III", "Nathan Neuling", "D 209" },
{ "Donnerstag", "16:00 - 17:30", "Englisch", "Sabine Sauber", "D 419" },
{ "Dienstag", "16:00 - 17:30", "SU Digitale Systeme", "Willi Witzig", "H5" },
{ "Dienstag", "17:45 - 19:15", "Ueb Digitale Systeme", "Willi Witzig", "D 114" },
};
std::cout << "Veranstaltungen:\n";
for (size_t i = 0; i < data.size(); i++)
{
std::cout << (i+1) << ": " <<
std::left <<
std::setw(12) << data[i][0] <<
std::setw(16) << data[i][1] <<
std::setw(22) << data[i][2] <<
std::setw(16) << data[i][3] <<
data[i][4] << "\n";
}
}
https://godbolt.org/z/EqafWs
To fix your problems you need to left justify the text for some fields:
printf("%-30s %-20s %-10s\n",Events[i]->Name.c_str(),Events[i]->Teacher->Name.c_str(),Events[i]->Room->Name.c_str();
Note the minus in the number before s. Additionally I think you will have to make some of the fields wider than this constants
As a side note here is fmt example (note fmt is part of C++20 and currently is not supported by any compiler, so it has to be used as external library):
#include <iostream>
#include <string>
#include <vector>
#include <fmt/core.h>
int main()
{
std::vector<std::array<std::string, 5>> data =
{
{ "Freitag", "08:00 - 09:30", "SU Informatik III", "Kevin Kaufmann", "H3" },
{ "Freitag", "10:00 - 11:30", "Ueb Informatik III", "Kevin Kaufmann", "D 114" },
{ "Freitag", "12:15 - 13:45", "Ueb Informatik III", "Kevin Kaufmann", "D 114" },
{ "Montag", "10:00 - 11:30", "SU Mathe III", "Nathan Neuling", "B 301" },
{ "Mittwoch", "10:00 - 11:30", "Ueb Mathe III", "Nathan Neuling", "D 209" },
{ "Donnerstag", "16:00 - 17:30", "Englisch", "Sabine Sauber", "D 419" },
{ "Dienstag", "16:00 - 17:30", "SU Digitale Systeme", "Willi Witzig", "H5" },
{ "Dienstag", "17:45 - 19:15", "Ueb Digitale Systeme", "Willi Witzig", "D 114" },
};
std::cout << "Veranstaltungen:\n";
size_t i = 0;
for (const auto&x : data)
{
fmt::print("{:2}: {:12} {:16} {:22} {:16} {}\n",
++i, x[0], x[1], x[2], x[3], x[4]);
}
}
Live demo

All unique combinations of given length from list of values in Libre Office

I have several, let's say six, different values. Can be numbers from 1 to 6.
I want to quickly list all unique combinations of four, so 1-2-3-4, 1-2-3-5 ... 3-4-5-6, all of them, but without any numbers showing more than once.
I'd like to do it in Libre Office Calc or Libre Office Base, but thus far I haven't had much luck searching for a way to do it. I'd be really grateful for any ideas.
there you go:
1234 1235 1236 1243 1245 1246 1253 1254 1256 1263 1264 1265 1324 1325 1326 1342 1345 1346 1352 1354 1356 1362 1364 1365 1423 1425 1426 1432 1435 1436 1452 1453 1456 1462 1463 1465 1523 1524 1526 1532 1534 1536 1542 1543 1546 1562 1563 1564 1623 1624 1625 1632 1634 1635 1642 1643 1645 1652 1653 1654 2134 2135 2136 2143 2145 2146 2153 2154 2156 2163 2164 2165 2314 2315 2316 2341 2345 2346 2351 2354 2356 2361 2364 2365 2413 2415 2416 2431 2435 2436 2451 2453 2456 2461 2463 2465 2513 2514 2516 2531 2534 2536 2541 2543 2546 2561 2563 2564 2613 2614 2615 2631 2634 2635 2641 2643 2645 2651 2653 2654 3124 3125 3126 3142 3145 3146 3152 3154 3156 3162 3164 3165 3214 3215 3216 3241 3245 3246 3251 3254 3256 3261 3264 3265 3412 3415 3416 3421 3425 3426 3451 3452 3456 3461 3462 3465 3512 3514 3516 3521 3524 3526 3541 3542 3546 3561 3562 3564 3612 3614 3615 3621 3624 3625 3641 3642 3645 3651 3652 3654 4123 4125 4126 4132 4135 4136 4152 4153 4156 4162 4163 4165 4213 4215 4216 4231 4235 4236 4251 4253 4256 4261 4263 4265 4312 4315 4316 4321 4325 4326 4351 4352 4356 4361 4362 4365 4512 4513 4516 4521 4523 4526 4531 4532 4536 4561 4562 4563 4612 4613 4615 4621 4623 4625 4631 4632 4635 4651 4652 4653 5123 5124 5126 5132 5134 5136 5142 5143 5146 5162 5163 5164 5213 5214 5216 5231 5234 5236 5241 5243 5246 5261 5263 5264 5312 5314 5316 5321 5324 5326 5341 5342 5346 5361 5362 5364 5412 5413 5416 5421 5423 5426 5431 5432 5436 5461 5462 5463 5612 5613 5614 5621 5623 5624 5631 5632 5634 5641 5642 5643 6123 6124 6125 6132 6134 6135 6142 6143 6145 6152 6153 6154 6213 6214 6215 6231 6234 6235 6241 6243 6245 6251 6253 6254 6312 6314 6315 6321 6324 6325 6341 6342 6345 6351 6352 6354 6412 6413 6415 6421 6423 6425 6431 6432 6435 6451 6452 6453 6512 6513 6514 6521 6523 6524 6531 6532 6534 6541 6542 6543
PS: i don't think that there is a way to generate them in libre office, since i'm not aware of programming languages in that program, however you can compute them online or with a your script
If you need the script, save this code in a .html file and open it in a browser
<html>
<body>
<script>
function finish(arr, n){
for(let el in arr)
if(el != n)
return true;
return false;
}
function updateIndexes(arr, n){
for( i = 0; i < arr.length ; i++ ){
if(arr[i] < n-1){
arr[i]++;
return true;
}
arr[i] = 0;
}
return false
}
let from = [1,2,3,4,5,6].map((el)=>el.toString());
let length = 4;
let separator = '-'
let indexes = Array(length).fill().map(el=>el=0);
let results = [];
do{
results.push(indexes.map(index => from[index]).join(separator));
} while (updateIndexes(indexes, from.length));
body = document.getElementsByTagName('body')[0];
results.filter((el)=>{
for(i = 0; i < el.length ; i++)
for(j = i+1 ; j < el.length ; j++)
if(el.charAt(i) == el.charAt(j) && el.charAt(i) != separator)
return false;
return true;
}).forEach(el => body.innerHTML+= el.toString()+'<br>');
</script>
</body>
</html>
what you can customize is:
let from = [1,2,3,4,5,6]; to what numbers/letters you want
let length = 4; to the length of the string you want
let separator = '-' to the separator you want (the separator here intended is the one between each sequence generated, so in this case will be 1-2-3-4 for example)
Python has a library called itertools that does this.
import itertools
l = itertools.permutations(range(1,7), 4) # between 1 and 6 of length 4
for t in list(l):
print("{}, ".format("-".join(str(i) for i in t)), end='')
Result:
1-2-3-4, 1-2-3-5, 1-2-3-6, 1-2-4-3, 1-2-4-5, 1-2-4-6, 1-2-5-3, 1-2-5-4, 1-2-5-6, 1-2-6-3, 1-2-6-4, 1-2-6-5, 1-3-2-4, 1-3-2-5, 1-3-2-6, 1-3-4-2, 1-3-4-5, 1-3-4-6, 1-3-5-2, 1-3-5-4, 1-3-5-6, 1-3-6-2, 1-3-6-4, 1-3-6-5, 1-4-2-3, 1-4-2-5, 1-4-2-6, 1-4-3-2, 1-4-3-5, 1-4-3-6, 1-4-5-2, 1-4-5-3, 1-4-5-6, 1-4-6-2, 1-4-6-3, 1-4-6-5, 1-5-2-3, 1-5-2-4, 1-5-2-6, 1-5-3-2, 1-5-3-4, 1-5-3-6, 1-5-4-2, 1-5-4-3, 1-5-4-6, 1-5-6-2, 1-5-6-3, 1-5-6-4, 1-6-2-3, 1-6-2-4, 1-6-2-5, 1-6-3-2, 1-6-3-4, 1-6-3-5, 1-6-4-2, 1-6-4-3, 1-6-4-5, 1-6-5-2, 1-6-5-3, 1-6-5-4, 2-1-3-4, 2-1-3-5, 2-1-3-6, 2-1-4-3, 2-1-4-5, 2-1-4-6, 2-1-5-3, 2-1-5-4, 2-1-5-6, 2-1-6-3, 2-1-6-4, 2-1-6-5, 2-3-1-4, 2-3-1-5, 2-3-1-6, 2-3-4-1, 2-3-4-5, 2-3-4-6, 2-3-5-1, 2-3-5-4, 2-3-5-6, 2-3-6-1, 2-3-6-4, 2-3-6-5, 2-4-1-3, 2-4-1-5, 2-4-1-6, 2-4-3-1, 2-4-3-5, 2-4-3-6, 2-4-5-1, 2-4-5-3, 2-4-5-6, 2-4-6-1, 2-4-6-3, 2-4-6-5, 2-5-1-3, 2-5-1-4, 2-5-1-6, 2-5-3-1, 2-5-3-4, 2-5-3-6, 2-5-4-1, 2-5-4-3, 2-5-4-6, 2-5-6-1, 2-5-6-3, 2-5-6-4, 2-6-1-3, 2-6-1-4, 2-6-1-5, 2-6-3-1, 2-6-3-4, 2-6-3-5, 2-6-4-1, 2-6-4-3, 2-6-4-5, 2-6-5-1, 2-6-5-3, 2-6-5-4, 3-1-2-4, 3-1-2-5, 3-1-2-6, 3-1-4-2, 3-1-4-5, 3-1-4-6, 3-1-5-2, 3-1-5-4, 3-1-5-6, 3-1-6-2, 3-1-6-4, 3-1-6-5, 3-2-1-4, 3-2-1-5, 3-2-1-6, 3-2-4-1, 3-2-4-5, 3-2-4-6, 3-2-5-1, 3-2-5-4, 3-2-5-6, 3-2-6-1, 3-2-6-4, 3-2-6-5, 3-4-1-2, 3-4-1-5, 3-4-1-6, 3-4-2-1, 3-4-2-5, 3-4-2-6, 3-4-5-1, 3-4-5-2, 3-4-5-6, 3-4-6-1, 3-4-6-2, 3-4-6-5, 3-5-1-2, 3-5-1-4, 3-5-1-6, 3-5-2-1, 3-5-2-4, 3-5-2-6, 3-5-4-1, 3-5-4-2, 3-5-4-6, 3-5-6-1, 3-5-6-2, 3-5-6-4, 3-6-1-2, 3-6-1-4, 3-6-1-5, 3-6-2-1, 3-6-2-4, 3-6-2-5, 3-6-4-1, 3-6-4-2, 3-6-4-5, 3-6-5-1, 3-6-5-2, 3-6-5-4, 4-1-2-3, 4-1-2-5, 4-1-2-6, 4-1-3-2, 4-1-3-5, 4-1-3-6, 4-1-5-2, 4-1-5-3, 4-1-5-6, 4-1-6-2, 4-1-6-3, 4-1-6-5, 4-2-1-3, 4-2-1-5, 4-2-1-6, 4-2-3-1, 4-2-3-5, 4-2-3-6, 4-2-5-1, 4-2-5-3, 4-2-5-6, 4-2-6-1, 4-2-6-3, 4-2-6-5, 4-3-1-2, 4-3-1-5, 4-3-1-6, 4-3-2-1, 4-3-2-5, 4-3-2-6, 4-3-5-1, 4-3-5-2, 4-3-5-6, 4-3-6-1, 4-3-6-2, 4-3-6-5, 4-5-1-2, 4-5-1-3, 4-5-1-6, 4-5-2-1, 4-5-2-3, 4-5-2-6, 4-5-3-1, 4-5-3-2, 4-5-3-6, 4-5-6-1, 4-5-6-2, 4-5-6-3, 4-6-1-2, 4-6-1-3, 4-6-1-5, 4-6-2-1, 4-6-2-3, 4-6-2-5, 4-6-3-1, 4-6-3-2, 4-6-3-5, 4-6-5-1, 4-6-5-2, 4-6-5-3, 5-1-2-3, 5-1-2-4, 5-1-2-6, 5-1-3-2, 5-1-3-4, 5-1-3-6, 5-1-4-2, 5-1-4-3, 5-1-4-6, 5-1-6-2, 5-1-6-3, 5-1-6-4, 5-2-1-3, 5-2-1-4, 5-2-1-6, 5-2-3-1, 5-2-3-4, 5-2-3-6, 5-2-4-1, 5-2-4-3, 5-2-4-6, 5-2-6-1, 5-2-6-3, 5-2-6-4, 5-3-1-2, 5-3-1-4, 5-3-1-6, 5-3-2-1, 5-3-2-4, 5-3-2-6, 5-3-4-1, 5-3-4-2, 5-3-4-6, 5-3-6-1, 5-3-6-2, 5-3-6-4, 5-4-1-2, 5-4-1-3, 5-4-1-6, 5-4-2-1, 5-4-2-3, 5-4-2-6, 5-4-3-1, 5-4-3-2, 5-4-3-6, 5-4-6-1, 5-4-6-2, 5-4-6-3, 5-6-1-2, 5-6-1-3, 5-6-1-4, 5-6-2-1, 5-6-2-3, 5-6-2-4, 5-6-3-1, 5-6-3-2, 5-6-3-4, 5-6-4-1, 5-6-4-2, 5-6-4-3, 6-1-2-3, 6-1-2-4, 6-1-2-5, 6-1-3-2, 6-1-3-4, 6-1-3-5, 6-1-4-2, 6-1-4-3, 6-1-4-5, 6-1-5-2, 6-1-5-3, 6-1-5-4, 6-2-1-3, 6-2-1-4, 6-2-1-5, 6-2-3-1, 6-2-3-4, 6-2-3-5, 6-2-4-1, 6-2-4-3, 6-2-4-5, 6-2-5-1, 6-2-5-3, 6-2-5-4, 6-3-1-2, 6-3-1-4, 6-3-1-5, 6-3-2-1, 6-3-2-4, 6-3-2-5, 6-3-4-1, 6-3-4-2, 6-3-4-5, 6-3-5-1, 6-3-5-2, 6-3-5-4, 6-4-1-2, 6-4-1-3, 6-4-1-5, 6-4-2-1, 6-4-2-3, 6-4-2-5, 6-4-3-1, 6-4-3-2, 6-4-3-5, 6-4-5-1, 6-4-5-2, 6-4-5-3, 6-5-1-2, 6-5-1-3, 6-5-1-4, 6-5-2-1, 6-5-2-3, 6-5-2-4, 6-5-3-1, 6-5-3-2, 6-5-3-4, 6-5-4-1, 6-5-4-2, 6-5-4-3,
LibreOffice allows Python scripting, so the code can be added to Calc or Base by including it in a Python-UNO macro.

python: Splitting Main address into primary and secondary addresses

I need help to create a python function to make Main street address (usually house number and street name) in Address field. Additional address information (Suite, Unit, Space, PO Box, other additional details) saved to Address2
Here are few examples of Address format which need to split.
780 Main Street, P.O. Box 4109 -> 780 Main Street / PO Box 4109
438 University Ave. P.O. Box 5 -> 438 University Ave. / PO Box 5
HIGHWAY 10 BOX 39 -> HIGHWAY 10 / PO Box 39
98 LATHROP ROAD - BOX 147 -> 98 LATHROP ROAD / PO Box 147
396 S MAIN/P.O. BOX 820 -> 396 S MAIN / PO Box 820
HWY 18 AND HWY 128 (BOX 1305) -> HWY 18 AND HWY 128 / PO Box 1305
808 Innisfil Beach Rd Box 2 -> 808 Innisfil Beach Rd / PO Box 2
100 St 101 Ave, P.o. Box 1620 -> 100 St 101 Ave / P.O. Box 1620
201 Del Rio (p.O. Box 309 -> 201 Del Rio / PO Box 309
BOX 487 2054 HWY 1 EAST -> 2054 HWY 1 EAST / PO Box 487
P O BOX 2820 41340 BIG BEAR BL -> 41340 BIG BEAR BL / PO Box 2820
2813 HWY 15 - P O BOX 1083 -> 2813 HWY 15 / PO Box 1083
P.o. Box 838 2540 Hwy 43 West -> 2540 Hwy 43 West / POBox 838
I have tried below code. But It can remove important information from address and leave PO Box data in address (not to move all PO Box data into address2).
input_array = [
'780 Main Street, P.O. Box 410',
'438 University Ave. P.O. Box 5 ',
'HIGHWAY 10 BOX 39',
'98 LATHROP ROAD - BOX 147',
'396 S MAIN/P.O. BOX 820 ',
'HWY 18 AND HWY 128 (BOX 1305)',
'808 Innisfil Beach Rd Box 2',
'100 St 101 Ave, P.o. Box 1620',
'201 Del Rio (p.O. Box 309 ',
'BOX 487 2054 HWY 1 EAST ',
'P O BOX 2820 41340 BIG BEAR BL',
'2813 HWY 15 - P O BOX 1083 ',
'P.o. Box 838 2540 Hwy 43 West'
]
import re
for inputs in input_array:
inputs = (inputs).lower()
for a in (inputs.split(' ')):
if 'box' in a:
box_index = (inputs.split(' ').index(a))
box_num = ((inputs.split(' ')[(inputs.split(' ').index(a)) + 1]))
if (((inputs.split(' ')[(inputs.split(' ').index(a)) + 1])).isdigit()):
if 'p' in ((inputs.split(' ')[(inputs.split(' ').index(a)) - 1])) or 'o' in ((inputs.split(' ')[(inputs.split(' ').index(a)) - 1])):
inputs = inputs.replace(((inputs.split(' ')[(inputs.split(' ').index(a)) - 1])), '')
else:
inputs = inputs.replace(((inputs.split(' ')[(inputs.split(' ').index(a)) + 1])), '')
inputs = inputs.replace(a, '')
inputs = inputs.replace('-', '')
inputs = inputs.replace('/', '')
inputs = inputs.replace(',', '')
print ('address => ',inputs,' address2 => ', 'PO Box ', box_num)
break
Need Improvement in above function to make it more compatible with desired result.
Interesting enough question. Here's regex which works for all of your examples, but I can't say for sure if it will work all the way for your project.
Read more regex documentation and play with regular expressions here.
Here's code:
import re
streets = [
'780 Main Street, P.O. Box 410',
'438 University Ave. P.O. Box 5 ',
'HIGHWAY 10 BOX 39',
'98 LATHROP ROAD - BOX 147',
'396 S MAIN/P.O. BOX 820 ',
'HWY 18 AND HWY 128 (BOX 1305)',
'808 Innisfil Beach Rd Box 2',
'100 St 101 Ave, P.o. Box 1620',
'201 Del Rio (p.O. Box 309 ',
'BOX 487 2054 HWY 1 EAST ',
'P O BOX 2820 41340 BIG BEAR BL',
'2813 HWY 15 - P O BOX 1083 ',
'P.o. Box 838 2540 Hwy 43 West'
]
regex = r'([^a-z0-9]*(p[\s.]?o)?[\s.]*?box (\d+)[^a-z0-9]*)'
for street in streets:
match = re.search(regex, street, flags=re.IGNORECASE)
po_box_chunk = match.group(0)
po_box_number = match.group(3)
cleaned_address = street.strip(po_box_chunk)
result = '{} / PO Box {}'.format(cleaned_address, po_box_number)
print(result)

How to delete words from a dataframe column that are present in dictionary in Pandas

An extension to :
Removing list of words from a string
I have following dataframe and I want to delete frequently occuring words from df.name column:
df :
name
Bill Hayden
Rock Clinton
Bill Gates
Vishal James
James Cameroon
Micky James
Michael Clark
Tony Waugh
Tom Clark
Tom Bill
Avinash Clinton
Shreyas Clinton
Ramesh Clinton
Adam Clark
I'm creating a new dataframe with words and their frequency with following code :
df = pd.DataFrame(data.name.str.split(expand=True).stack().value_counts())
df.reset_index(level=0, inplace=True)
df.columns = ['word', 'freq']
df = df[df['freq'] >= 3]
which will result in
df2 :
word freq
Clinton 4
Bill 3
James 3
Clark 3
Then I'm converting it into a dictionary with following code snippet :
d = dict(zip(df['word'], df['freq']))
Now if I've to remove words from df.name that are in d(which is dictionary, with word : freq), I'm using following code snippet :
def check_thresh_word(merc,d):
m = merc.split(' ')
for i in range(len(m)):
if m[i] in d.keys():
return False
else:
return True
def rm_freq_occurences(merc,d):
if check_thresh_word(merc,d) == False:
nwords = merc.split(' ')
rwords = [word for word in nwords if word not in d.keys()]
m = ' '.join(rwords)
else:
m=merc
return m
df['new_name'] = df['name'].apply(lambda x: rm_freq_occurences(x,d))
But in actual my dataframe(df) contains nearly 240k rows and i've to use threshold(thresh=3 in above sample) greater than 100.
So above code takes lots of time to run because of complex search.
Is there any effiecient way to make it faster??
Following is a desired output :
name
Hayden
Rock
Gates
Vishal
Cameroon
Micky
Michael
Tony Waugh
Tom
Tommy
Avinash
Shreyas
Ramesh
Adam
Thanks in advance!!!!!!!
Use replace by regex created by joined all values of column word, last strip traling whitespaces:
data.name = data.name.replace('|'.join(df['word']), '', regex=True).str.strip()
Another solution is add \s* for select zero or more whitespaces:
pat = '|'.join(['\s*{}\s*'.format(x) for x in df['word']])
print (pat)
\s*Clinton\s*|\s*James\s*|\s*Bill\s*|\s*Clark\s*
data.name = data.name.replace(pat, '', regex=True)
print (data)
name
0 Hayden
1 Rock
2 Gates
3 Vishal
4 Cameroon
5 Micky
6 Michael
7 Tony Waugh
8 Tom
9 Tom
10 Avinash
11 Shreyas
12 Ramesh
13 Adam

SAS - plot actual and ARIMA model

How to plot in SAS, the estimated ARIMA model with the actual data on the same graph? The plot I've got, using the code below, does not show the actual and the model clearly. The model estimated is MA(15).
data project;
input dj 1-6 aus 7-14;
datalines;
3651 1962.2
3645 1977.1
3626 1968.4
3634 1952.0
3620.5 1962.5
3607 1967.8
3589 1939.5
3590 1931.4
3622 1941.5
3634 1938.3
3616 1912.9
3634 1903.6
3631 1902.6
3613 1925.5
3576 1924.1
3537 1925.2
3547 1919.3
3540 1928.6
3543 1946.5
3568 1943.0
3566 1942.3
3566 1951.4
3555 1964.4
3581 1972.7
3578 1977.0
3587 1998.5
3599 2018.8
3584 2022.5
3585 2026.2
3593 2039.8
3593 2028.0
3603 2038.6
3622 2062.0
3630 2074.1
3642 2085.5
3635 2075.5
3645 2051.7
3636 2060.4
3649 2061.4
3674 2046.9
3672 2055.7
3665 2068.3
3688 2076.3
3681 2112.2
3693 2132.4
3698 2125.3
3662 2108.4
3625 2101.6
3643 2079.9
3648 2054.2
3640 2050.8
3664 2042.9
3662 2052.4
3684 2074.0
3678 2082.9
3711 2083.8
3704 2104.3
3685 2108.0
3694 2083.2
3670 2049.3
3674 2009.6
3688 2032.4
3686 2042.0
3684 2043.1
3678 2010.3
3684 2009.4
3697 2005.4
3702 2047.3
3704 2047.4
3710 2053.7
3719 2073.9
3734 2096.0
3730 2095.7
3741 2084.9
3764 2094.5
3743 2086.6
3717 2069.9
3726 2074.8
3752 2080.2
3755 2076.0
3745 2067.0
3762 2053.2
3758 2068.8
3776 2089.2
3794 2126.9
3776 2154.5
3757 2173.6
3784 2174.3
3799 2193.4
3804 2200.3
3821 2186.0
3866 2198.6
3850 2206.7
3849 2195.6
3842 2177.5
3867 2206.4
3870 2238.2
3870 2232.1
3884 2248.2
3892 2266.2
3914 2250.3
3913 2224.5
3895 2221.9
3926 2250.7
3945 2259.9
3978 2310.8
3964 2310.1
3976 2312.1
3968 2340.6
3871 2332.8
3906 2281.1
3906 2305.4
3932 2270.9
3895 2234.3
3895 2241.4
3904 2238.6
3928 2234.0
3937 2249.0
3923 2240.9
3888 2223.2
3900 2178.5
3912 2202.5
3892 2218.9
3840 2197.0
3839 2148.8
3832 2180.1
3809 2181.7
3832 2154.0
3824 2151.4
3832 2116.8
3856 2144.7
3852 2171.7
3853 2146.8
3831 2155.1
3863 2153.1
3863 2179.3
3850 2172.5
3848 2173.5
3865 2164.4
3896 2163.5
3865 2140.5
3863 2140.8
3869 2180.9
3821 2169.8
3775 2151.6
3762 2108.9
3699 2100.8
3627 2092.4
3636 2053.1
3675 2050.0
3680 2084.1
3693 2087.4
3674 2082.0
3689 2076.0
3682 2095.1
3662 2114.7
3663 2095.0
3662 2080.6
3620 2095.9
3620 2061.4
3599 2046.6
3653 2029.6
3649 2042.5
3700 2069.4
3684 2059.7
3668 2069.1
3682 2066.1
3701 2047.9
3714 2044.2
3698 2018.4
3696 1988.1
3670 2004.3
3629 2009.3
3656 2008.2
3629 2034.6
3653 2041.4
3660 2070.0
3672 2110.9
3721 2096.0
3733 2107.8
3759 2093.7
3766 2103.9
3742 2121.0
3745 2132.4
3755 2105.9
3754 2096.9
3757 2102.2
3757.5 2091.8
3758 2081.8
3761 2097.2
3759 2077.0
3772 2078.6
3768 2072.5
3756 2070.2
3749 2079.7
3753 2076.7
3773 2069.4
3815 2076.6
3790 2074.4
3811 2056.0
3777 2051.2
3742 2024.4
3708 1993.6
3725 2010.9
3699 2022.5
3637 2017.9
3686 1957.4
3670 1974.4
3667 1975.1
3625 1989.1
3647 1965.8
3649.5 1987.1
3652 2003.4
3674 1991.2
3688 1962.2
3709 1964.9
3703 1961.2
3703 1972.9
3704 1978.6
3739 2007.7
3754 2058.0
3755 2072.3
3748 2077.4
3727 2078.6
3732 2049.2
3735 2052.5
3742 2048.3
3736 2041.3
3720 2041.7
3731 2042.1
3764 2061.5
3798 2082.1
3796 2086.9
3793 2072.3
3766 2083.5
3747 2091.9
3754 2081.1
3756 2086.8
3767 2076.5
3751 2062.8
3769 2052.0
3760 2055.9
3785 2040.0
3776 2059.5
3755 2066.8
3755 2061.3
3751 2063.6
3776 2051.6
3847 2061.1
3830 2077.8
3881 2077.2
3899 2111.8
3917 2116.5
3913 2122.1
3901 2105.5
3886 2107
3892.5 2095.5
3899 2103.6
3886 2104.4
3908 2089.1
3875 2070.6
3860 2032.9
3880 2043.6
3895 2050.5
3954 2050.8
3933 2059
3937 2049.1
3869 2045.1
3852 2026.6
3837 2028.2
3832 2027.7
3849 2030
3863 2013.8
3878 2014.2
3855 2030.6
3843 2028.7
3847 2030.9
3801 1998
3787 1979.8
3776 1976.3
3797 1967.5
3821 1988
3877 2003.6
3875 2002.6
3890 1998.9
3910 2006
3924 2014.2
3918 2003.4
3936 2013.4
3911 2016.3
3891 2034.6
3855 2034.2
;
run;
proc print data= project;
run;
proc arima data=project;
identify var=aus
run;
proc arima data = project plots(only)=(forecast(FORECAST));
identify var=aus(1) nlag=20;
estimate q=(1,15);
forecast lead = 90 out= results;
run;
I'd suggest using the new SAS GTL (graphics template language) to do it. Plenty of examples can be found in the SAS documentation:
http://support.sas.com/documentation/cdl/en/grstatgraph/65377/HTML/default/viewer.htm#p07ssfftzsass9n1x8lb94xnref5.htm
Note that GTL is only available in newer versions of SAS (9.1 onwards I believe?).