#!/bin/bash # Word frequencies # $Id: unix9-wordfreq.sh 191 2006-03-29 11:07:00Z cactus $ # # (C) 2005 Dr. ERDI Gergo # # See http://cactus.rulez.org/elte/2005-1-unix/#9 for a description of what it does # # Licensed under the GNU General Public License, version 2 function help () { self=`basename $0` cat << EOF Usage: $self FILE1 [FILE2...] Creates statistics about the words occuring in the files. Options: -help Display this help message (C) 2005 Dr. ERDI Gergo Version: \$Id: unix9-wordfreq.sh 191 2006-03-29 11:07:00Z cactus $ EOF exit 0 } function error () { echo ERROR: $@! >&2 exit 1 } # Ez mashogy mukodik mint a tobbi: itt csak vegignezzuk az opciokat, # hogy van-e koztuk -help function options () { [ -z "$1" ] && return case "$1" in -help) help ;; *) [ -f "$1" -a -r "$1" ] || error "$1: Unable to open file" shift options "$@" ;; esac } function awk_count () { AWKPROG=' BEGIN { FS="[^a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]" ekezet_lower["Á"] = "á"; ekezet_lower["É"] = "é"; ekezet_lower["Í"] = "í"; ekezet_lower["Ó"] = "ó"; ekezet_lower["Ö"] = "ö"; ekezet_lower["Õ"] = "õ"; ekezet_lower["Ú"] = "ú"; ekezet_lower["Ü"] = "ü"; ekezet_lower["Û"] = "û"; } function iso88592_tolower (s) { ret="" for (j = 1; j <= length(s); j++) ret = ret iso88592_tolower_c(substr(s, j, 1)); return ret; } function iso88592_tolower_c (c) { if (match (c, "[a-zA-Z]")) return tolower (c); else if (c in ekezet_lower) return ekezet_lower[c]; return c; } /[a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]/ { for (i = 1; i != NF + 1; i++) { if (match ($i, "^[a-zA-ZáÁéÉíÍóÓöÖõÕúÚüÜûÛ]+$")) { words[iso88592_tolower($i)]++; } } } END { for (i in words) printf "%s %s\n", i, words[i]; } ' echo "awk '$AWKPROG' '$1' 2>/dev/null|sort" } # Ez elemenkenti feldolgozas, oriasi Fothi power :) function awk_join () { AWKPROG=' function read1 () { if (!eof1) { eof1 = (getline <= 0); if (eof1) return; key1 = $1; val1 = $2; width = NF - 1; for (i = 3; i <= NF; ++i) val1 = sprintf ("%s %s", val1, $i); } } function read2 () { if (!eof2) { eof2 = ((getline < f2) <= 0); if (eof2) return; key2 = $1; val2 = $2; } } function join () { printf "%s %s %s\n", key1, val1, val2; } function fill_from_1 () { printf "%s %s 0\n", key1, val1, "0"; } function fill_from_2 () { printf "%s ", key2; for (i = 0; i < width; ++i) printf "0 "; printf "%s\n", val2; } BEGIN { FS=" "; read1(); read2(); while (!eof1 || !eof2) { if (key1 == key2) { join(); read1(); read2(); } else if (eof2 || (!eof1 && (key1 < key2))) { fill_from_1(); read1(); } else { fill_from_2(); read2(); } } exit } ' echo "awk -v f2=<($1) '$AWKPROG' 2>/dev/null" } [ $# -lt 1 ] && error "Missing arguments" options "$@" fullpipe="$(awk_count $1)" shift for i in "$@" do counter="`awk_count $i`" awkline="`awk_join "$counter"`" fullpipe="$fullpipe | $awkline" done export LANG= export LC_ALL= export LC_CTYPE= eval "$fullpipe"