-
Notifications
You must be signed in to change notification settings - Fork 3
/
clnduplines.sh
executable file
·53 lines (45 loc) · 1.25 KB
/
clnduplines.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/ksh
#(needs ksh, not sh, for arithmetic)
#
# clnduplines - clean duplicate lines in a file
# original file is saved in file.was
# (same as cntduplines but file replaced by cleaned file)
#
if [ -z "$1" ]; then
echo
echo "syntax: clnduplines [filename]"
echo
exit
fi
file=$1
echo " --- clnduplines: Searching for duplicate lines in $file ---"
count=0
cat /dev/null > $file.clean
while read line
do
found=`grep "$line" $file.clean`
if [ -n "$found" ]; then
echo ". \c"
(( count = count + 1 )) ## ksh arithmetic
else
echo "$line" >> $file.clean
fi
done < $file
echo ""
echo " $count dup lines found"
#---------- sh version, too slow --------
#echo " ... counting lines ..."
## old=`wc -l $file | awk '{ print $1 }'`
# old=`cat $file | wc -l`
## new=`wc -l $file.clean | awk '{ print $1 }'`
# new=`cat $file.clean | wc -l`
## VERY slow: echo " ---> `expr $old - $new` dups found"
## that's why used ksh's arithmetic: (( ))
echo ""
/bin/mv $file $file.was
/bin/mv $file.clean $file
echo " --- $file is now clean, original in $file.was --- delete it ?"
/bin/rm -i $file.was
### based on:
# Unix Tip #487- May 2, 1998 http://www.ugu.com/sui/ugu/show?tip.today
### see also: uniq file : eliminate adjacent lines which are identical