As I am still quite new to web scraping I am currently practicing some basics such as this one. I have scraped the categories from 'th' tag and the players from the 'tr' tag and appended it to a couple empty lists. The categories come out fine from get_text(), but when I try printing the players it has a number rank before the first letter of the name, and the player's team abbreviation letters after the last name.
3 things I am trying to do:
1)output only the first and last name of each player by doing some slicing from the list but I cannot figure out any easier way to do it. There is probably a quicker way inside the tags where I can call the class or using soup.findAll again in the html, or something else I am unware of, but I currently do not know how or what I am missing.
2)take the number ranks before the name and append it to an empty list.
3)take the 3 last abbreviated letters and append it to an empty list
Any suggestions would be much appreciated!
from bs4 import BeautifulSoup as bs4
import requests
import pandas as pd
from time import sleep
players = []
categories = []
url ='https://www.espn.com/nba/stats/player/_/table/offensive/sort/avgPoints/dir/desc'
source = requests.get(url)
soup = bs4(source.text, 'lxml')
for i in soup.findAll('th'):
c = i.get_text()
categories.append(c)
for i in soup.findAll('tr'):
player = i.get_text()
players.append(player)
players = players[1:51]
print(categories)
print(players)
Apis are always the best way to go in my opinion.
However, this can also be done with pandas .read_html() (it uses beautifulsoup under the hood to parse the table).
import pandas as pd
url = 'https://www.espn.com/nba/stats/player/_/table/offensive/sort/avgPoints/dir/desc'
dfs = pd.read_html(url)
dfs[0][['Name','Team']] = dfs[0]['Name'].str.extract('^(.*?)([A-Z]+)$', expand=True)
df = dfs[0].join(dfs[1])
Output:
print (df[['RK','Name','Team','POS']])
RK Name Team POS
0 1 James Harden HOU SG
1 2 Stephen Curry GS PG
2 3 Bradley Beal WSH SG
3 4 Trae Young ATL PG
4 5 Kevin Durant BKN SF
5 6 CJ McCollum POR SG
6 7 Kyrie Irving BKN PG
7 8 Jaylen Brown BOS SG
8 9 Giannis Antetokounmpo MIL PF
9 10 Jayson Tatum BOS PF
10 11 Damian Lillard POR PG
11 12 Luka Doncic DAL PG
12 13 Collin Sexton CLE PG
13 14 Paul George LAC SG
14 15 Brandon Ingram NO SF
15 16 Nikola Jokic DEN C
16 17 LeBron James LAL SF
17 18 Zach LaVine CHI SG
18 19 Christian Wood HOU PF
19 20 Kawhi Leonard LAC SF
20 21 Joel Embiid PHI C
21 22 Jerami Grant DET PF
22 23 Anthony Davis LAL PF
23 24 Jamal Murray DEN PG
24 25 Julius Randle NY PF
25 26 Malcolm Brogdon IND PG
26 27 Fred VanVleet TOR SG
27 28 Nikola Vucevic ORL C
28 28 Donovan Mitchell UTAH SG
29 30 Terry Rozier CHA PG
30 31 Devin Booker PHX SG
31 32 Khris Middleton MIL SF
32 33 Terrence Ross ORL SG
33 33 Victor Oladipo IND SG
34 35 Russell Westbrook WSH PG
35 36 Domantas Sabonis IND PF
36 36 De'Aaron Fox SAC PG
37 38 Zion Williamson NO SF
38 39 Tobias Harris PHI SF
39 40 Bam Adebayo MIA C
40 41 DeMar DeRozan SA SG
41 41 D'Angelo Russell MIN SG
42 43 Gordon Hayward CHA SF
43 44 Kyle Lowry TOR PG
44 44 Shai Gilgeous-Alexander OKC SG
45 46 Mike Conley UTAH PG
46 47 Malik Beasley MIN SG
47 48 RJ Barrett NY SG
48 49 Thomas Bryant WSH C
49 50 Pascal Siakam TOR PF
Is this what you want?
import requests
from bs4 import BeautifulSoup
from tabulate import tabulate
url = "https://www.espn.com/nba/stats/player/_/table/offensive/sort/avgPoints/dir/desc"
soup = BeautifulSoup(requests.get(url).text, "html.parser")
table_data = [
[r // 2, i.find("a").getText(), i.find("span").getText()] for r, i in
enumerate(soup.find_all("td", class_="Table__TD"), start=1)
if i.find("a") and i.find("span")
]
print(tabulate(table_data, headers=["Rank", "Name", "Team"], tablefmt="pretty"))
Output:
| Rank | Name | Team |
+------+-------------------------+------+
| 1 | James Harden | HOU |
| 2 | Stephen Curry | GS |
| 3 | Bradley Beal | WSH |
| 4 | Trae Young | ATL |
| 5 | Kevin Durant | BKN |
| 6 | CJ McCollum | POR |
| 7 | Kyrie Irving | BKN |
| 8 | Jaylen Brown | BOS |
| 9 | Giannis Antetokounmpo | MIL |
| 10 | Jayson Tatum | BOS |
| 11 | Damian Lillard | POR |
| 12 | Luka Doncic | DAL |
| 13 | Collin Sexton | CLE |
| 14 | Paul George | LAC |
| 15 | Brandon Ingram | NO |
| 16 | Nikola Jokic | DEN |
| 17 | LeBron James | LAL |
| 18 | Zach LaVine | CHI |
| 19 | Christian Wood | HOU |
| 20 | Kawhi Leonard | LAC |
| 21 | Joel Embiid | PHI |
| 22 | Jerami Grant | DET |
| 23 | Anthony Davis | LAL |
| 24 | Jamal Murray | DEN |
| 25 | Julius Randle | NY |
| 26 | Malcolm Brogdon | IND |
| 27 | Fred VanVleet | TOR |
| 28 | Nikola Vucevic | ORL |
| 29 | Donovan Mitchell | UTAH |
| 30 | Terry Rozier | CHA |
| 31 | Devin Booker | PHX |
| 32 | Khris Middleton | MIL |
| 33 | Terrence Ross | ORL |
| 34 | Victor Oladipo | IND |
| 35 | Russell Westbrook | WSH |
| 36 | Domantas Sabonis | IND |
| 37 | De'Aaron Fox | SAC |
| 38 | Zion Williamson | NO |
| 39 | Tobias Harris | PHI |
| 40 | Bam Adebayo | MIA |
| 41 | DeMar DeRozan | SA |
| 42 | D'Angelo Russell | MIN |
| 43 | Gordon Hayward | CHA |
| 44 | Kyle Lowry | TOR |
| 45 | Shai Gilgeous-Alexander | OKC |
| 46 | Mike Conley | UTAH |
| 47 | Malik Beasley | MIN |
| 48 | RJ Barrett | NY |
| 49 | Thomas Bryant | WSH |
| 50 | Pascal Siakam | TOR |
+------+-------------------------+------+
Always ask you - Is there an easier way?
Yes it is and you should go it :)
If you wanna scrape, first take a look if you really have to scrape content from the website or if there is an api that provide the information well structured.
Example requesting api
import requests
import pandas as pd
url = "https://site.web.api.espn.com/apis/common/v3/sports/basketball/nba/statistics/byathlete?region=us&lang=en&contentorigin=espn&isqualified=true&page=1&limit=50&sort=offensive.avgPoints%3Adesc&season=2021&seasontype=2"
headers = {"user-agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
response.raise_for_status()
ranking=[]
for i,player in enumerate(response.json()['athletes'], start=1):
rank = i
name = player['athlete']['displayName']
team = player['athlete']['teamShortName']
category = player['athlete']['position']['abbreviation']
ranking.append({'rank':rank, 'name':name, 'team':team, 'category':category})
df = pd.DataFrame(ranking)
df
Output data frame
rank name team category
1 James Harden HOU SG
2 Stephen Curry GS PG
3 Bradley Beal WSH SG
4 Trae Young ATL PG
5 Kevin Durant BKN SF
6 CJ McCollum POR SG
7 Kyrie Irving BKN PG
8 Jaylen Brown BOS SG
9 Giannis Antetokounmpo MIL PF
10 Jayson Tatum BOS PF
But to answer your question
You can also do it with BeautifulSoup, but it is much more error-prone in my opinion:
from bs4 import BeautifulSoup
import requests
import pandas as pd
data = []
url ='https://www.espn.com/nba/stats/player/_/table/offensive/sort/avgPoints/dir/desc'
source = requests.get(url)
soup = BeautifulSoup(source.text, 'lxml')
for i in soup.select('tr')[1:]:
if i.select_one('td'):
rank = i.select_one('td').get_text()
if i.select_one('div > a'):
player = i.select_one('div > a').get_text()
if i.select_one('div > span'):
team =i.select_one('div > span').get_text()
data.append({'rank':rank, 'player':player, 'team':team})
pd.DataFrame(data)
If you do not wanna use css selectors, you can also do
for i in soup.find_all('tr')[1:]:
if i.find('td'):
rank = i.find('td').get_text()
if i.find('a'):
player = i.find('a').get_text()
if i.find('span'):
team =i.find('span').get_text()
I have a comma-separated string and I need to match all the commas in this string except for the commas inside the double-quotes. I'm using regex for this.
,,,,"8000000,B767-200","B767-200","Boeing 767-200","ACFT",,,,,,,,,,,,,,,,,,,,,,,,,,
I tried the following regex patterns but none of them are working in PL/SQL but working in online regex testers.
,(?=(?:[^\"]*\"[^\"]*\")*(?![^\"]*\"))
(?!\B"[^"]*),(?![^"]*"\B)
I'm using REGEXP_INSTR function inside a procedure in PL/SQL to identify the index of the commas. Can someone suggest me a working regex pattern in PL/SQL for this purpose or help me to write one.
Thank you.
Oracle does not support look-ahead and non-capturing groups so you will need to match the quotes.
Assuming you can either have a non-quoted string or a quoted string (which could contain escaped quotes) then you can the regular expression:
([^",]*|"(\\"|[^"])*"),
Which you could use like this:
WITH matches ( id, csv, start_pos, comma_pos, idx, num_matches ) AS (
SELECT id,
csv,
1,
REGEXP_INSTR( csv, '([^",]*|"(\\"|[^"])*"),', 1, 1, 1, NULL ) - 1,
1,
REGEXP_COUNT( csv, '([^",]*|"(\\"|[^"])*"),' )
FROM test_data
UNION ALL
SELECT id,
csv,
REGEXP_INSTR( csv, '([^",]*|"(\\"|[^"])*"),', 1, idx + 1, 0, NULL ),
REGEXP_INSTR( csv, '([^",]*|"(\\"|[^"])*"),', 1, idx + 1, 1, NULL ) - 1,
idx + 1,
num_matches
FROM matches
WHERE idx < num_matches
)
SELECT id,
idx,
start_pos,
comma_pos,
SUBSTR( csv, start_pos, comma_pos - start_pos ) AS value
FROM matches
so for your test data:
CREATE TABLE test_data ( id, csv ) AS
SELECT 1, ',,,,"8000000,B767-200","B767-200","Boeing 767-200","ACFT",,,,,,,,,,,,,,,,,,,,,,,,,,' FROM DUAL
which outputs:
ID | IDX | START_POS | COMMA_POS | VALUE
-: | --: | --------: | --------: | :-----------------
1 | 1 | 1 | 1 | null
1 | 2 | 2 | 2 | null
1 | 3 | 3 | 3 | null
1 | 4 | 4 | 4 | null
1 | 5 | 5 | 23 | "8000000,B767-200"
1 | 6 | 24 | 34 | "B767-200"
1 | 7 | 35 | 51 | "Boeing 767-200"
1 | 8 | 52 | 58 | "ACFT"
1 | 9 | 59 | 59 | null
1 | 10 | 60 | 60 | null
1 | 11 | 61 | 61 | null
1 | 12 | 62 | 62 | null
1 | 13 | 63 | 63 | null
1 | 14 | 64 | 64 | null
1 | 15 | 65 | 65 | null
1 | 16 | 66 | 66 | null
1 | 17 | 67 | 67 | null
1 | 18 | 68 | 68 | null
1 | 19 | 69 | 69 | null
1 | 20 | 70 | 70 | null
1 | 21 | 71 | 71 | null
1 | 22 | 72 | 72 | null
1 | 23 | 73 | 73 | null
1 | 24 | 74 | 74 | null
1 | 25 | 75 | 75 | null
1 | 26 | 76 | 76 | null
1 | 27 | 77 | 77 | null
1 | 28 | 78 | 78 | null
1 | 29 | 79 | 79 | null
1 | 30 | 80 | 80 | null
1 | 31 | 81 | 81 | null
1 | 32 | 82 | 82 | null
1 | 33 | 83 | 83 | null
db<>fiddle here
(Note: you wanted to match the commas and this regular expression does exactly what you ask; it does not match any final value in the comma-delimited list as there is no terminating comma. If you wanted to do that then use the regular expression ([^",]*|"(\\"|[^"])*")(,|$) db<>fiddle.)
If you want it in a procedure then:
CREATE PROCEDURE extract_csv_value(
i_csv IN VARCHAR2,
i_index IN INTEGER,
o_value OUT VARCHAR2
)
IS
BEGIN
o_value := REGEXP_SUBSTR( i_csv, '([^",]*|"(\\"|[^"])*")(,|$)', 1, i_index, NULL, 1 );
IF SUBSTR( o_value, 1, 1 ) = '"' THEN
o_value := REPLACE( SUBSTR( o_value, 2, LENGTH( o_value ) - 2 ), '\"', '"' );
END IF;
END;
/
then:
DECLARE
csv VARCHAR2(4000) := ',,,,"8000000,B767-200","B767-200","Boeing 767-200","ACFT",,,,,,,,,,,,,,,,,,,,,,,,,,';
value VARCHAR2(100);
BEGIN
FOR i IN 1 .. 10 LOOP
extract_csv_value( csv, i, value );
DBMS_OUTPUT.PUT_LINE( LPAD( i, 2, ' ' ) || ' ' || value );
END LOOP;
END;
/
outputs:
1
2
3
4
5 8000000,B767-200
6 B767-200
7 Boeing 767-200
8 ACFT
9
10
db<>fiddle here
I tried to solve it without using a REGEX so check the following PROCEDURE if works as expected.
CREATE OR REPLACE PROCEDURE p_extract(p_string IN VARCHAR) AS
TYPE table_result IS TABLE OF VARCHAR2(255) INDEX BY PLS_INTEGER;
t_retval table_result;
opening BOOLEAN := FALSE;
cnt INTEGER := 1;
I INTEGER := 1;
j INTEGER := 1;
BEGIN
WHILE cnt <= LENGTH(p_string) AND cnt <> 0 LOOP
IF substr(p_string, cnt, 1) = '"'THEN
opening := NOT opening;
END IF;
IF opening THEN
I := instr(p_string, '"', cnt + 1, 1);
t_retval(t_retval.COUNT + 1) := substr(p_string, cnt, I - cnt + 1);
END IF;
cnt := instr(p_string, '"', cnt + 1, 1);
END LOOP;
FOR K IN t_retval.FIRST..t_retval.LAST LOOP
dbms_output.put_line(t_retval(K));
END LOOP;
END;
Test it.
BEGIN
p_extract(',,,,"8000000,B767-200","B767-200","Boeing 767-200","ACFT",,,,,,,,,,,,,,,,,,,,,,,,,,');
END;
--OUTPUT
/*
"8000000,B767-200"
"B767-200"
"Boeing 767-200"
"ACFT"
*/
However this won't work if you miss the last or first "
How can I generate all possible bit combinations in an array of bits of length n. If I start with all zeros in my array then there are n possibilities to place the first bit and for these n possibilities there are n-1 possibilities to place the second bit.. unit all n bits are set to one. But so far I didn't manage to program it out.
Also many people pointed out that I can do this by counting from 0 to (2^n)-1 and printing the number in binary. This would be an easy way to solve the problem, however in this case I just let the machine counting instead of telling it where to place ones. I do this for learning, so I would like to know how to program out the ones-placing approach.
How would you count manually on paper? You would check the last digit. If it is 0, you set it to 1. If it is already 1, you set it back to 0 and continue with the next digit. So it's a recursive process.
The following program generates all possible combinations by mutating a sequence:
#include <iostream>
template <typename Iter>
bool next(Iter begin, Iter end)
{
if (begin == end) // changed all digits
{ // so we are back to zero
return false; // that was the last number
}
--end;
if ((*end & 1) == 0) // even number is treated as zero
{
++*end; // increase to one
return true; // still more numbers to come
}
else // odd number is treated as one
{
--*end; // decrease to zero
return next(begin, end); // RECURSE!
}
}
int main()
{
char test[] = "0000";
do
{
std::cout << test << std::endl;
} while (next(test + 0, test + 4));
}
The program works with any sequence of any type. If you need all possible combinations at the same time, just put them into a collection instead of printing them out. Of course you need a different element type, because you cannot put C arrays into a vector. Let's use a vector of strings:
#include <string>
#include <vector>
int main()
{
std::vector<std::string> combinations;
std::string test = "0000";
do
{
combinations.push_back(test);
} while (next(test.begin(), test.end()));
// now the vector contains all pssible combinations
}
If you don't like recursion, here is an equivalent iterative solution:
template <typename Iter>
bool next(Iter begin, Iter end)
{
while (begin != end) // we're not done yet
{
--end;
if ((*end & 1) == 0) // even number is treated as zero
{
++*end; // increase to one
return true; // still more numbers to come
}
else // odd number is treated as one
{
--*end; // decrease to zero and loop
}
}
return false; // that was the last number
}
Such problems are trivially solved functionally. To find the solutions of length n, you first find the solutions of length n-1 and then append '0' and '1' to those solutions, doubling the solution space.
Here is a simple recursive Haskell program:
comb 0 = [[]]
comb n =
let rest = comb (n-1)
in map ('0':) rest
++ map ('1':) rest
And here is a test run:
> comb 3
["000","001","010","011","100","101","110","111"]
A "truly" recursive approach in C++:
#include <iostream>
#include <string>
void print_digits(int n, std::string const& prefix = "") {
if (!n) {
std::cout << prefix << std::endl;
return;
}
print_digits(n-1, prefix + '0');
print_digits(n-1, prefix + '1');
}
int main(int, char**) {
print_digits(4);
}
Optimal solution is here:
http://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
This is my answer. The advantage is that all the combinations are saved in a two dimension array, but the disadvantage is that you can only use it for a sting long up to 17 digits!!
#include <iostream>
using namespace std;
int main()
{
long long n,i1=0,i2=0, i=1, j, k=2, z=1;
cin >> n;
while (i<n){
k = 2*k;
i++;
}
bool a[k][n], t = false;
j = n-1;
i1=0;
i2 = 0;
z = 1;
while (j>=0){
if(j!=n-1){
z=z*2;
}
i2++;
t = false;
i = 0;
while (i<k){
i1 = 0;
while (i1<z){
if(t==false){
a[i][j]=false;
}
else {
a[i][j]= true;
}
i1++;
i++;
}
if(t==false){
t = true;
}else {
t = false;
}
}
j--;
}
i = 0;
j = 0;
while (i<k){
j = 0;
while (j<n){
cout << a[i][j];
j++;
}
cout << endl;
i++;
}
return 0;
}
FredOverflow is right in general.
However, for 1s & 0s you'd better just increment an integer from 0:
int need_digits = 10
unsigned int i=0
while (! i>>need_digits){
# convert to binary form: shift & check, make an array, or cast to string, anything.
}
... i guess you won't need more than 32 bits or you'd have to chain multiple integers.. and stick to the previous answer :)
regex is your best friend for these. here's a reasonably fast method not specific to any language (code here is awk, but concept is very portable):
using regex to generate every bit-string comboall the way to 2^28 in less than 2.946 secs
if you only need every 2^16 combo, it's less than 0.5 secs
they're all pre-sorted in big-endian sequential order, in a single string : so just cut out what you need from there.
as you can see from gawk profiler - it only requires 1 loop per bit-level.
strip away the timer() and printf() parts and it all boils down to just :
~
1 ____=_=(_<_)(___^=_)
1 __="."
1 gsub(__,"\\&&",____)
15 while (___++<+BITS_NEEDED) {
15 gsub(__,____,_)
15 __=__"."
}
_
using mawk2
#bits : 2 | 0.00260 secs | 4 segs | 8 bitstr-len | 00011011
#bits : 3 | 0.00504 secs | 6 segs | 18 bitstr-len | 000000011100110111
#bits : 4 | 0.00749 secs | 10 segs | 39 bitstr-len | 001001110001111001100001101111
#bits : 5 | 0.00979 secs | 17 segs | 84 bitstr-len | 101110011100001100000011011111
#bits : 6 | 0.01197 secs | 30 segs | 183 bitstr-len | 001100100001101100000110111111
#bits : 7 | 0.01445 secs | 56 segs | 391 bitstr-len | 011000001101011000001110111111
#bits : 8 | 0.01672 secs | 105 segs | 841 bitstr-len | 100001110111110000011101111111
#bits : 9 | 0.01896 secs | 199 segs | 1793 bitstr-len | 001110111111000000111011111111
#bits : 10 | 0.02119 secs | 379 segs | 3788 bitstr-len | 110001111110000001110111111111
#bits : 11 | 0.02348 secs | 724 segs | 7967 bitstr-len | 110011111100000011101111111111
#bits : 12 | 0.02578 secs | 1390 segs | 16684 bitstr-len | 100111111000000111011111111111
#bits : 13 | 0.02896 secs | 2678 segs | 34809 bitstr-len | 001111110000001110111111111111
#bits : 14 | 0.03210 secs | 5171 segs | 72393 bitstr-len | 011111100000011101111111111111
#bits : 15 | 0.03505 secs | 10009 segs | 150142 bitstr-len | 111111000000111011111111111111
#bits : 16 | 0.03781 secs | 19414 segs | 310629 bitstr-len | 111110000001110111111111111111
#bits : 17 | 0.04070 secs | 37723 segs | 641289 bitstr-len | 111100000011101111111111111111
#bits : 18 | 0.04417 secs | 73414 segs | 1321444 bitstr-len | 111000000111011111111111111111
#bits : 19 | 0.04904 secs | 143073 segs | 2718379 bitstr-len | 110000001111011111111111111111
#bits : 20 | 0.05737 secs | 279184 segs | 5583670 bitstr-len | 100100001111011111111111111111
#bits : 21 | 0.07305 secs | 545413 segs | 11453681 bitstr-len | 001000011110111111111111111111
#bits : 22 | 0.09946 secs | 1066640 segs | 23466075 bitstr-len | 010000111101111111111111111111
#bits : 23 | 0.15100 secs | 2087981 segs | 48023565 bitstr-len | 110000111101111111111111111111
#bits : 24 | 0.25276 secs | 4090896 segs | 98181495 bitstr-len | 100001111011111111111111111111
#bits : 25 | 0.45808 secs | 8021635 segs | 200540878 bitstr-len | 100001111011111111111111111111
#bits : 26 | 0.79723 secs | 15741040 segs | 409267048 bitstr-len | 100001111011111111111111111111
#bits : 27 | 1.49781 secs | 30910510 segs | 834583780 bitstr-len | 000011110111111111111111111111
#bits : 28 | 2.91132 secs | 60737902 segs | 1700661245 bitstr-len | 000011110111111111111111111111
( LC_ALL=C mawk2 -v BITS_NEEDED='28' ; ) 2.54s user 0.35s system 98% cpu 2.946 total
CODE, and output using gawk
#bits : 2 | 0.00272 secs | 4 segs | 8 bitstr-len | 00011011
#bits : 3 | 0.00531 secs | 8 segs | 24 bitstr-len | 000001010011100101110111
#bits : 4 | 0.00906 secs | 16 segs | 64 bitstr-len | 001001101010111100110111101111
#bits : 5 | 0.01170 secs | 32 segs | 160 bitstr-len | 110101101111100111011111011111
#bits : 6 | 0.01425 secs | 64 segs | 384 bitstr-len | 111011111100111101111110111111
#bits : 7 | 0.01687 secs | 128 segs | 896 bitstr-len | 111111100111110111111101111111
#bits : 8 | 0.01943 secs | 256 segs | 2048 bitstr-len | 111100111111011111111011111111
#bits : 9 | 0.02203 secs | 512 segs | 4608 bitstr-len | 100111111101111111110111111111
#bits : 10 | 0.02476 secs | 1024 segs | 10240 bitstr-len | 111111110111111111101111111111
#bits : 11 | 0.02799 secs | 2048 segs | 22528 bitstr-len | 111111011111111111011111111111
#bits : 12 | 0.03193 secs | 4096 segs | 49152 bitstr-len | 111101111111111110111111111111
#bits : 13 | 0.03501 secs | 8192 segs | 106496 bitstr-len | 110111111111111101111111111111
#bits : 14 | 0.03909 secs | 16384 segs | 229376 bitstr-len | 011111111111111011111111111111
#bits : 15 | 0.04370 secs | 32768 segs | 491520 bitstr-len | 111111111111110111111111111111
#bits : 16 | 0.04993 secs | 65536 segs | 1048576 bitstr-len | 111111111111101111111111111111
#bits : 17 | 0.05996 secs | 131072 segs | 2228224 bitstr-len | 111111111111011111111111111111
#bits : 18 | 0.07843 secs | 262144 segs | 4718592 bitstr-len | 111111111110111111111111111111
#bits : 19 | 0.11286 secs | 524288 segs | 9961472 bitstr-len | 111111111101111111111111111111
#bits : 20 | 0.17921 secs | 1048576 segs | 20971520 bitstr-len | 111111111011111111111111111111
#bits : 21 | 0.31018 secs | 2097152 segs | 44040192 bitstr-len | 111111110111111111111111111111
# gawk profile, created Thu Jun 2 05:36:49 2022
# BEGIN rule(s)
BEGIN {
1 ____ = _ = "01"
1 __ = "."
1 srand()
1 ______ = timer()
1 gsub(__, "\\&&", ____)
1 ++___
20 while (___++ < +BITS_NEEDED) {
20 gsub(__, ____, _)
20 print sprintf(" #bits : %2.f | %9.5f secs | %10.f segs | %-13.f bitstr-len | %.30s%.0s", ___, (timer() - ______) / (1E6 - 1E-6), (_____ = length(_)) / ___, _____, substr(_, ++_____ - 30), __ = __ ".")
}
}
# Functions, listed alphabetically
21 function timer(_, __)
{
21 return (substr("", (__ = "gdate +'%s%6N'") | getline _, close(__)) _)
}
( LC_ALL=C gawk -p- -v BITS_NEEDED='21' -b -e ; ) 0.26s user 0.04s system 93% cpu 0.321 total