Originally published October 7, 2021 @ 7:17 am

Books have been written on the subject of awk and sed. Here’s a small sample of commands I put together over the years that are useful for everyday system administration tasks. Most of these tasks involve reformatting strings and files, which is why I decided to bundle tr, cut, and paste into this mess.

Replace newline with comma

sed ':a;N;$!ba;s/\n/ /g'

Remove commas inside double-quotes

awk -F'"' -v OFS='' '{ for (i=2; i<=NF; i+=2) gsub(",", "", $i) } 1'

Remove leading spaces and tabs

sed 's/^[ \t]*//'

Remove single spaces only (leave multiple spaces)

sed 's/\(.\) //g'

Reduce multiple spaces to one

sed 's/ \+/ /g'

Reduce multiple spaces to one for a line containing a string

iostat | sed -n '/^sd/s/ \+/ /gp'

Remove entire words containing non-alphabetic characters

awk '{ofs=""; for (i=1; i<=NF; i++) if ($i ~ /^[[:alpha:]]+$/) {printf "%s%s", ofs, $i; ofs=OFS} print "" }'

Remove non-alphanumeric characters from words

sed 's/[^[:alnum:].-\ ]//g'

Remove non-printable characters from files

tr -cd '-6' < infile > outfile

Move the first line to the end of the list

sed '1,1{H;1h;d;};$G'

Remove only the first line matching pattern

sed '0,/pattern/{/pattern/d;}'
awk '!/pattern/ || f++'

Remove all but the first line matching pattern

sed '2,${/pattern/d;}'
awk '/pattern/&&f++ {next} 1'

Remove empty lines

sed '/^\s*$/d'

Show allocated disk space:

df -klP -t xfs -t ext2 -t ext3 -t ext4 -t reiserfs | grep -oE ' [0-9]{1,}( +[0-9]{1,})+' | awk '{sum_used += $1} END {printf "%.0f GB\n", sum_used/1024/1024}'

Show used disk space

df -klP -t xfs -t ext2 -t ext3 -t ext4 -t reiserfs | grep -oE ' [0-9]{1,}( +[0-9]{1,})+' | awk '{sum_used += $2} END {printf "%.0f GB\n", sum_used/1024/1024}'

Summarizing line data with awk

Sample data in the temp file:

ID1,223
ID2,124
ID3,125
ID2,400
ID1,345
ID4,876
ID2,243
ID4,287
ID1,376
ID3,765

Add up the values in the second column

awk -F"," '{s+=$2}END{print s}' temp

Add up the values in the second column only for ID2

awk -F, '$1=="ID2"{s+=$2;}END{print s}' temp
v="ID2"; awk -F, -v v="${v}" '$1==v{s+=$2;}END{print s}' temp

List unique values in the first column

awk -F, '{a[$1];}END{for (i in a)print i;}' temp

Add up values in the second column for each ID

awk -F, '{a[$1]+=$2;}END{for(i in a)print i", "a[i];}' temp

Add up values in the second column for each ID and print total

awk -F, '{a[$1]+=$2;x+=$2}END{for(i in a)print i", "a[i];print "Total,"x}' temp

Print the maximum second-column value for each group

awk -F, '{if (a[$1] < $2)a[$1]=$2;}END{for(i in a){print i,a[i];}}' OFS=, temp

Print the number of occurrences for each ID

awk -F, '{a[$1]++;}END{for (i in a)print i, a[i];}' temp

Print the first entry for each ID

awk -F, '!a[$1]++' temp

Concatenate values for each ID

awk -F, '{if(a[$1])a[$1]=a[$1]":"$2; else a[$1]=$2;}END{for (i in a)print i, a[i];}' OFS=, temp

Extract URLs:

sed -n 's/.*href="\([^"]*\).*//p'

Preserve symlinks when using sed -i

cd /etc/httpd/conf.d && sed -i --follow-symlinks 's/192.168.1/192.168.2/g' *.conf

Append each string with a consecutive number

awk -vRS=string '{$0=n$0;ORS=RT}++n'

Flush awk buffers when piping from STDIN for continuous output

| awk '{print $1; fflush();}'

# or for older POSIX-compliant versions of awk
| awk '{print $1; system("");}'

Print fields set in a Shell variable

fields="1 3 4"
command | awk -v fields="${fields}" 'BEGIN{ n = split(fields,f) }
{ for (i=1; i<=n; ++i) printf "%s%s", $f[i], (i<n?OFS:ORS) }'

Show the primary IP of a local machine

ifconfig | sed -rn 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*//p'

Verify that the local machine’s IP matches DNS

if [ "$(ifconfig | sed -rn 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*//p')" == "$(dig +short $(host -TtA $(hostname -s) | grep "has address" | awk '{print $1}'))" ]; then echo 0 ; else echo 1 ; fi

Show primary NIC

route | grep -m1 ^default | awk '{print $NF}'

Show prefix (netmask in CIDR notation)

ip addr show "$(route | grep -m1 ^default | awk '{print $NF}')" | grep -w inet | grep -v 127.0.0.1 | awk '{ print $2}' | cut -d "/" -f 2

Show broadcast address

ip addr show "$(route | grep -m1 ^default | awk '{print $NF}')" | grep -w inet |grep -v 127.0.0.1|awk '{ print $4}'

Show local machine’s network in CIDR notation

eval $(ipcalc -np $(ifconfig $(route | grep -m1 ^default | awk '{print $NF}') | sed -n "s/inet addr:\([^ ]*\).*Mask:\([^ ]*\).*/ /p")) ; echo $NETWORK/$PREFIX

Calculate sum from stdout and do math

cat $file | awk '{ SUM += $1} END { print ( SUM/1024 )"MB" }'

Calculate allocated and used local filesystem storage

df -klP -t ext2 -t ext3 -t ext4 -t reiserfs -t xfs | grep -oE ' [0-9]{1,}( +[0-9]{1,})+' | awk '{sum_alloc +=$1; sum_used += $2} END {printf "%.2f / %.2f (GB)\n", sum_alloc/1024/1024, sum_used/1024/1024}'

Find gaps in numerical sequences

awk '$1!=p+1{print p+1"-"$1-1}{p=$1}'

# Only print numbers missing from the sequence
awk 'NR-1{if($1!=(p+1))print p+1}{p=$1}'

Grepping with awk

echo "514/tcp   open  shell" | awk '{match($1,"^[0-9]+/[a-z]+") && match($2,"open")}END{print $1,$2,$3}'

Grepping with awk on a specific column

ls -l | awk '$3 == root'
ls -l | awk '$NF ~ "gz"'
ls -l | awk '$5 > 84674560'

Grepping with sed and also printing the headers (first line)

sed '1p;/pattern/!d'

Replace every other occurrence of a pattern

awk '{for(i=1; i<=NF; i++) if($i=="orig_pattern") if(++count%2==1) $i="new_pattern"}1'

Extract lines between unique tags using sed. Sample input file

cat /tmp/testfile.txt

# Header 1
Line 11
Line 12
# Header 2
Line 21
Line 22
Line 23
# Header 3
Line 31
Line 32
Line 33

sed -n '/# Header 2/{:a;n;/# Header 3/b;p;ba}' /tmp/testfile.txt

Line 22
Line 23

Extract lines contained within the second set of <header></header>tags using sed.

Sample input file:

cat /tmp/testfile2.txt

<header>
Line 11
Line 12
</header>
<header>
Line 21
Line 22
Line 23
</header>
<header>
Line 31
Line 32
Line 33
</header>
sed -n '\|<header>|{:n;\|</header>|!{N;bn};y|\n| |;p}' /tmp/testfile2.txt | sed -n '2{p;q}'

<header> Line 21 Line 22 Line 23 </header>

Delete lines between two tags not including the tags

sed "//,/<\/tag close>/{//!d}"

Delete lines between two tags, including the tags

sed "//,/<\/tag close>/d"

Delete all lines after a tag, not including the tag

sed '/<\tag close>/,$d'

Delete lines 12 through 23

sed "12,23d"

Delete text in a line between two markers {}

sed -e 's/\({\).*\(}\)//'

Delete text in a line between two markers {} including the markers

sed -e 's/://' -e 's/\({\).*\(}\)//'

Delete lines containing a regex match

sed "/${regex}/d"

Remove SSH banner from script output

Sample output from which you want to strip out the banner text:

cat /tmp/testfile2.txt

hostname1
This is the SSH banner that you would like to remove
And this is the second line of the banner
and the third line
2.6.32-696.18.7.el6.x86_64
----------------------------------------------------
hostname2
This is the SSH banner that you would like to remove
And this is the second line of the banner
and the third line
2.6.32-696.18.7.el6.x86_64
----------------------------------------------------

awk '/^This.*SSH.*/{p=1;sub(/[\t]*.*/,"")}!p;/^and.*line$/{p=0}'

Remove dupes, spaces, extra semicolons from BASh PATH

PATH=$(xargs -d: -n1 <<<${PATH} | sed 's/ //g' | sort -u | xargs | sed 's/\b*//g;s/ /:/g')

Remove duplicate words in a line

awk '{ while(++i<=NF) printf (!a[$i]++) ? $i FS : ""; i=split("",a); print "" }'

Remove duplicate lines in a file without sorting

awk '!a[$0]++'

Remove duplicate lines in a file without sorting, except blank lines

awk '/^ *$/ { delete x; }; !x[$0]++'

Show duplicate entries in a column

awk -F, 'a[$5]++{print $5}'

Remove duplicate lines

 awk '!x[$0]++'

Remove duplicate lines, except those matching a string

 awk '/^STRING$/ { delete x; }; !x[$0]++'

Print number of characters for each line in a file

awk '{ print length($0)"\t"$0; }' file.txt

Sort lines by the number of characters

awk '{ print length, $0 }' | sort -n -s | cut -d" " -f2-

Insert a Unicode character into a specific column position in a file

sed -r -e 's/^.{15}/&\xe2\x86\x92\x0/' file.txt

Replace multiple newlines with a single newline

sed '/^$/N;/^\n$/D' file.txt

Preserve the original search string and add to it

(Example: replace every [0-9]. with [0-9]..)

ls | sed -e 's/\([0-9]\.\)/\./g'

(Example 2: enclose every four-digit number followed by a dot in parentheses, i.e., 2014. becomes (2014).)

| sed -e 's/\([0-9]\{4\}\)\./\(\)\./g')"

Merge every two adjacent lines (sed wins)

awk 'NR%2{printf $0" ";next;}1'
# or
sed 'N;s/\n/ /'

Get hard drive model and size

for i in $(fdisk -l 2>/dev/null | egrep -o "/dev/sd[a-z]" | sort -u) ; do hdparm -I ${i} 2>/dev/null; done | egrep "Model|size.*1000" | awk -F: '{print $NF}' | awk 'NR%2{printf $0" ";next;}1'

Identify server’s primary IP address

/sbin/ifconfig | sed -rn 's/127.0.0.1//;s/.*inet (addr:)?(([0-9]*\.){3}[0-9]*).*//p'

Print all lines but last

sed $d

Print all fields but first

awk '{$1=""; print $0}'

Print all fields but last

awk '{$NF=""; print $0}'

Print all fields but last and preserve field delimiters

awk -F'/' -v OFS='/' '{$NF=""; print $0}'

Print all fields but the first two

awk '{$1=$2=""; print $0}'

Print fields from 9th to last

awk '{ s = ""; for (i = 9; i <= NF; i++) s = s $i " "; print s }'

Print last three characters of a string

sed 's/.*\(...\)//'
# or
grep -o '...$'
grep -o '.\{3\}$'
python -c "print raw_input()[-3:]"
grep -oP '.{0,3}$'
tail -c 3 # Works if each character is one byte (non-Unicode)
printf '%s\n' "${STRING:(-3)}"
awk '{ print substr( $0, length($0) - 2, length($0) ) }'

Comment-out a line in a file containing a regex match:

sed -re '/REGEX/ s/^#*/#/' -i /tmp/file

Uncomment a file containing a regex match

sed -re '/REGEX/ s/^#*//' -i /tmp/file

Convert upper- to lower-case with tr and sed

tr '[:upper:]' '[:lower:]'
# or
sed -e 's/\(.*\)/\L/'

Convert to “Title Case”

sed 's/.*/\L&/; s/[a-z]*/\u&/g'

Capitalize the first letter of the line

sed 's/[[:alpha:]]/\u&/'

Insert “E” into the string at position #3

sed -r -e 's/^.{3}/&E/'

Insert a line before the first match

sed -i "1,/^${match_me}/ {/^${match_me}/i\
      ${insert_me}
      }" /tmp/file
# or in one line
sed "0,/^${match_me}/s/^${match_me}/${insert_me}\n&/"

Print text between the first occurrence of tag “foo” and the last occurrence of tag “bar.”

sed -n '/foo/{:a;N;/^\n/s/^\n//;/bar/{p;s/.*//;};ba};'

Prepend a shell variable to a string using awk

| awk -v var="${shell_var}" '{print var$0}'

Roundup number to the nearest multiple of 10

awk '{print sprintf("%.0f",$0/10)*10}'

Combine all columns from two CSV files

The data01.csv sample:

2018-04-01,23,85
2018-04-02,22,34
2018-04-03,33,87
2018-04-04,87,10
2018-04-05,27,72

The data02.csv sample:

2018-04-01,sda,12,sdb,23
2018-04-02,sda,15,sdb,43
2018-04-03,sda,83,sdb,54
2018-04-04,sda,22,sdb,63
2018-04-05,sda,46,sdb,34
paste -d',' data01.csv data02.csv

2018-04-01,23,85,2018-04-01,sda,12,sdb,23
2018-04-02,22,34,2018-04-02,sda,15,sdb,43
2018-04-03,33,87,2018-04-03,sda,83,sdb,54
2018-04-04,87,10,2018-04-04,sda,22,sdb,63
2018-04-05,27,72,2018-04-05,sda,46,sdb,34

Combine all columns from data01.csv and all but the first column from data02.csv

awk -F',' -v OFS=',' '{$1=""; print $0}' data02.csv  | paste -d',' data01.csv - | sed 's/,,/,/'

2018-04-01,23,85,sda,12,sdb,23
2018-04-02,22,34,sda,15,sdb,43
2018-04-03,33,87,sda,83,sdb,54
2018-04-04,87,10,sda,22,sdb,63
2018-04-05,27,72,sda,46,sdb,34