enver

#! /bin/bash 
#
#  enver 3.5.0
#
#  Liefert die Uebersetzung eines englische Verbs aus einer Woerterbuchdatei von "dict.cc".
#
#  Fuer eine Bedienungsanleitung "source enver --help" aufrufen!
#
#  Stand: 2020-03-21
#  Autor: Bernd Storck
#
# Veroeffentlicht via "https://www.facebook.com/BStLinux/"
#


# TECHNICAL COMMENT FOR PROGRAMMERS
#
# enver (for "english verbs") filters a flat file database, which is a list of english verbs and
# their german meaning.
#
# Every data record is a line. Records are consisting of exactly two fields, which are separated 
# by a tab character. First field contains the englisch part, second field the german equivalent.
#
# The fields have a substructure, containing for example tags like "[idiom]".


# Static Values:
script_name="enver"
script_version="3.5.0"
Tab="	"


# Default Values:
linecounter="none"


# Current Dynamic Values:
vocabulary="$(dirname "$(which enver)")/dictverbs.txt"
# vocabulary="$(dirname "$(which enver)")/cmcgnoosos-916510172-e6o859.txt"
quantity_of_records=$(wc -l < "$vocabulary")

for i in tput stty
do
	if [ "$(which ${i})" ]; then
		linecounter="$i"
		break 
	fi
done


function get_terminal_height() {
	case "$linecounter" in
		tput)
			terminal_height=$(tput lines)
			;;
		stty)
			terminal_height=$(stty size | cut -d" " -f1)
			;;
		*)
			terminal_height=$(( quantity_of_records + 1 )) 	# Suppresses every call of a pager, 
			# cause the value is greater than the amount of data records.
			;;
	esac
}


function display_page_by_page() {

	local inptfile
	local cmd

	# tail -f "$inptfile" > /dev/pts/4

	cmd="cat"  # default program to display the results.

	# Current Values:
	inptfile="$1"
	quantity_of_lines="$(wc -l < "$1")" 
	get_terminal_height   # sets the value of the script global var terminal_height.

	if [[ "$quantity_of_lines" -gt $(( terminal_height - 1 )) ]]; then
		# Defines the pager program to use: 
		for i in less most more pg
		do
			if [ "$(which ${i})" ]; then
				case "$i" in
				  less)
					cmd="less -SX"
					;;
				  *)
					cmd="${i}"
					;;
				esac
				break 
			fi
		done
	fi

	eval "${cmd} $inptfile"  # Calls pager or cat for displaying the file with the resulting data records.
}


function define_numerus() {

		if [ "$1" -gt 1 ]; then
			object_name="$3"
		else
			object_name="$2"
		fi
}


function default_output() {
	local searched="$1"
	grep -E "^[[:blank:]]*(to )\<$searched\>$Tab" "$vocabulary" | tr "\t" "#" | column -s"#" -t
	return "$(grep -c -E "^[[:blank:]]*(to )\<$searched\>$Tab" "$vocabulary")"
}

function print_all_verbs() {
	local searched="$1"

	get_terminal_height

	grep -E "^[[:blank:]]*(to )\<$searched\>.*$Tab" "$vocabulary" | tr "\t" "#" | column -s"#" -t > /tmp/enver.tmp
	if test -s /tmp/enver.tmp; then
		display_page_by_page "/tmp/enver.tmp"    # sets also quantity_of_lines
	else  
		# 2020-03-21: Sucht nach einigen Ausdruecken mit Hilfsverben, 
		# via mkbase.sh wurde dafuer die Datenbasis erweitert.
		#
		# Diese Datensaetze beginnen in der Datendatei von "dict.cc" mit "[you]".
		grep -E "^[[:blank:]]*(|\[you\] )\<$searched\>.*$Tab" "$vocabulary" | tr "\t" "#" | column -s"#" -t > /tmp/enver.tmp
		if test -s /tmp/enver.tmp; then
			display_page_by_page "/tmp/enver.tmp"    # sets also quantity_of_lines
		else
			quantity_of_lines=0
			echo "Kein Datensatz, keine Erklärung zu \"$searched\" gefunden." > /dev/stderr
		fi
	fi
	rm -f /tmp/enver.tmp
	return $quantity_of_lines

#	grep -E "^[[:blank:]]*(to )\<$searched\>.*$Tab" "$vocabulary" | tr "\t" "#" | column -s"#" -t > /tmp/enver.tmp
#	if test -s /tmp/enver.tmp; then
#		display_page_by_page "/tmp/enver.tmp"    # sets also quantity_of_lines
#	else
#		quantity_of_lines=0
#		echo "Kein Datensatz, keine Erklärung zu \"$searched\" gefunden." > /dev/stderr
#	fi
#	rm -f /tmp/enver.tmp
#	return $quantity_of_lines
}

function print_verbs_full() {
	# Finds the searched string if it is in the english field even if it's only
	# a part of a word. 
	#
	# In opposite to the function "print_full", this function looks only for lines,
	# which are beginning with "to " followed by a word containing the search string.
	local searched="$1"
        grep -iE "^to +([[:alpha:]]-?)*$searched" "$vocabulary"| grep -F --color=always "$searched"
	return $?
}

function print_full() {
	# Finds the searched string if it is in the english field even if it's only
	# a part of a word. But it ignores the occurence of the string in the annotations
	# or tags at the end of the english field. Those annotations are written between
	# squared brackets "[anntotation or tag]".

	local searched="$1"
	grep -iE "^[^[$Tab]*$searched" "$vocabulary" | grep -F --color=always "$searched"
	return $?
}

function print_all() {
	# Fields are separated by tabs.  First field is English, second German. 
	#
	# Finds all records/lines with the searched string in the first field, even
	# if it is a part of a word or part of an annotation beween squared brackets.  
	#
	# The second search string contains three tab characters between the pair of 
	# quotation marks!
	local searched="$1"
	grep --color=always -iE "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary"
	return "$?"
}


function approximate_search() {
	# agrep has limitations for the lenght of complex search patterns. Therefore is the 2. search level
	# a simplification of the search which is performed by level 1.
	#
	# Search level 3 accepts that the searched string is not a word, but part of a word.
	#
	# Search level 4 finally tries to handle agreps problem with long search patterns by shortening the
	# searched word. This method was very successful in many test with some long words. Furthermore 
	# level 4 accepts via agrep a greater difference between the searched string and the found string
	# than the levels 1 to 3 would accept.

	local searched="$1"
	local search_level
	local match_found=1   # flag for "no match".
	local len
	local max

	echo -e "\nVersuch einer fehlertoleranten Suche startet." > /dev/stderr
	if [ $(which agrep) ]; then

		for search_level in 1 2 3 4;
		do

			case "$search_level" in
				1)
					agrep -1 "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary" | \
					agrep -1 -w "$searched" > /tmp/enver1.tmp
					match_found="$?"
					;;
				2)
					agrep -1 "$searched[^$Tab]*$Tab" "$vocabulary"          | \
					agrep -1 -w "$searched" > /tmp/enver1.tmp
					match_found="$?"
					;;
				3)
					agrep -1 "$searched[^$Tab]*$Tab" "$vocabulary" > /tmp/enver1.tmp
					match_found="$?"
					;;
				4)
					len=${#searched}
					max=$(bc <<< "scale=0; $len / 2")
					# echo "\"$searched\" ist $len Zeichen lang."
					# echo "max = $max"
					for i in $(seq 1 $max);
					do
						len=$(( len - 1 ))
						searched="${searched:0:$len}"
						agrep -2 "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary" > /tmp/enver1.tmp
						if [ "$?" -eq 0 ]; then
							# sublevel=".$i"
							match_found=0
							break
						fi
					done
					if [ $match_found -eq 1 ]; then 
						echo "Auch die fehlertolerante Suche hat kein ähnliches Wort gefunden."> /dev/stderr
						exit 1
					fi
					;;
			esac

			if [ $match_found -eq 0 ]; then
				cat /tmp/enver1.tmp | tr "\t" "#" | column -s"#" -t > /tmp/enver.tmp
				if [ "$?" -eq 0 ]; then
						rm /tmp/enver1.tmp
				else
						mv /tmp/enver1.tmp /tmp/enver.tmp
				fi
				# echo "approximate search level ${search_level}${sublevel}" > /dev/pts/0
				echo 
				display_page_by_page "/tmp/enver.tmp"
				exit 0
			fi
		done

	else
		echo "FEHLER: Das Programm \"agrep\" wurde nicht gefunden."
		echo -e "\nFür eine ungefähre, fehlertolerante Suche muss agrep vorhanden sein."
		echo "agrep bekommen Sie, wenn Sie das Paket \"glimpse\" installieren."
	fi
}


function deep_search_cascade() {
	# This extended search tries to find data records by a cascade of regular expressions, 
	# the cascade will be stopped if one regular expression has found any matching data record.

	local searched="$1"
	echo "Weitergehende Suche auch nach Wortteilen startet." > /dev/stderr
	# sleep .75
	print_verbs_full "$searched"
	if [ "$?" -ne 0 ]; then
		print_full "$searched"
		if [ "$?" -ne 0 ]; then
			echo "Weitergehende Suche ohne Ergebnis." > /dev/stderr
			echo
			# sleep .5
			echo "Suche nach jedem Vorkommen des Such-Strings startet." > /dev/stderr
			print_all "$searched"
			if [ "$?" -ne 0 ]; then
				echo "Die Zeichenkette \"$searched\" kommt im englischen Teil der Datenbasis nicht vor." > /dev/stderr
				approximate_search "$searched"
			fi
		fi
	fi
}

function phrases() {
	# As phrases are identified lines which are beginning with an upper case letter or
	# containing "[Redewendung]" or containing "[idiom]".
	#
	# The searched string has to be in the first field, which is the english part.
	local searched="$1"
	grep -e "^[[:blank:]]*[A-Z]" -e "\[Redewendung\]" -e "\[idiom\]" "$vocabulary" | \
	sed "s/\[Redewendung\]//" | grep -iE "^[[:blank:]]*[^	]*\<$searched\>.*	" | \
	grep --color=always -wi "$searched" | tr "\t" "#" | column -s"#" -t
}


function get_amount_of_approximate_matches() {

		local searched="$1"
		local quantity

		if [ $(which agrep) ]; then
			quantity=$(agrep -c1 "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary")
			if [ "$quantity" -gt 0 ]; then

				define_numerus $quantity "Datensatz" "Datensätze"
				echo -e "\n\"$script_name -c $searched\" liefert $quantity $object_name. (Suche nach ungefähr übereinstimmenden Wörtern)" > /dev/stderr

			fi
		fi
}


function write_amount_of_phrases() {

	local searched="$1"
	local quantity_of_phrases
	quantity_of_phrases=$(grep -e "^[[:blank:]]*[A-Z]" -e "\[Redewendung\]" -e "\[idiom\]" "$vocabulary" | \
	grep -c -iE "^[[:blank:]]*[^	]*\<$searched\>.*	")

	if [ "$quantity_of_phrases" -gt 0 ]; then

		define_numerus $quantity_of_phrases "Redewendung" "Redewendungen"
		echo "\"$script_name -P $searched\" findet $quantity_of_phrases $object_name." > /dev/stderr

	fi
}

function write_total_amount() {
	# Counts how many datasets the command "enver -V" finds.

	local searched="$1"
	local quantity
	quantity=$(grep -c -E "^[[:blank:]]*(to )\<$searched\>.*	" "$vocabulary")

	if [ "$quantity" -gt 0 ]; then

		define_numerus $quantity "Datensatz" "Datensätze"
		echo -e  "\n\"$script_name -V $searched\" liefert $quantity $object_name." > /dev/stderr

	fi
}

function count_cascade() {
	# Counts how many datasets the function "deep_search_cascade" for extended search finds.
	#
	# Extended search tries to find datasets by a cascade of three regular expressions, the
	# cascade will be stopped if one regular expression has found matching datasets.

	local searched="$1"
	local amount
	amount=$(grep -ciE "^to +([[:alpha:]]-?)*$searched" "$vocabulary") # Counts print_verbs_full().
	if [ "$amount" -eq 0 ]; then
        	amount=$(grep -ciE "^[^[$Tab]*$searched" "$vocabulary") # Counts print_full().
		if [ "$amount" -eq 0 ]; then  # Count print_all():
			amount=$(grep --color=always -ciE "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary")
		fi
	fi
	if [ "$amount" -gt 1 ]; then 
		echo -e "\nDie vertiefte Suche mit \"$script_name -e $searched\" findet $amount Datensätze."
	elif [ "$amount" -eq 1 ]; then
		echo -e "\nDie vertiefte Suche mit \"$script_name -e $searched\" findet einen Datensatz."
	else
		echo "Die Wörterbuchdatei enthält keinen Datensatz mit der Zeichenkette \"$searched\" in Englisch."
	fi
	return "$amount"
}


display_help_screen() {
 	echo "
$script_name $script_version
    \"$script_name\" zeigt Übersetzungen englischer Verben ins Deutsche an.

AUFRUFFORMAT:
    $script_name [-v|-e|-V|-c|-D|-P|-p|-f|-a|-h] VOKABEL

BEISPIELE:
    $script_name -v translate
    $script_name translate

    (Beide Aufrufe bewirken das selbe. \"translate\" ist hier das Wort, nach dem gefragt wird.)

AUFRUFPARAMETER:
    -v   Gibt schnell eine kurze Auskunft über ein englisches Verb.
    -e   Kann für eine erweiterte Suche verwendet werden, nachdem \"$script_name -v\" erfolglos war.
    -V   Sucht gründlich nach englischen Verben und listet alle dazu verfügbaren Datensätze auf.
    -c   Sucht nach englischen Wörtern, die dem Suchwort ähneln.
    -D   Sucht von einem deutschen Wort ausgehend nach englischen Verben.
    -P   Findet Redewendungen (engl. \"phrases\") und Sätze mit der Zeichenkette.
    -p   Kombiniert \"-v\" (kurze Auskunft) mit \"-P\" (Liste von Redewendungen).
    -f   Zeigt Datensätze mit der gesuchten Zeichenkette im englischen Vokabeltext.
    -a   Zeigt alle Datensätze mit der gesuchten Zeichenkette im englischen Datenfeld.
    -h   Zeigt diese Hilfeseite an.

ALTERNATIVE SPRECHENDE AUFRUFPARAMETER:
    -v    --verb
    -e    --weiter, --erweitere, --extend, --deep-search
    -V    --verbs
    -c    --circa, --etwa, --ungefähr, --proximate
    -D    --deutsch, --de-en
    -P    --phrases
    -p    --phrases-too
    -f    --full
    -a    --all, --alle, --alles
    -h    --help
"
}


case "$1" in

  -v|--verb)
	# "Short list, simple info about a verb:
	default_output "$2"
	if [ $? -ne 0 ]; then
		write_total_amount "$2"
		write_amount_of_phrases "$2"
	else  # if "enver -v" does not succeed, automatically execute the basic function of "enver -V":
		print_all_verbs "$2" # print_all_verbs returns the number of found datasets. 0 means no success.
		if [ "$?" -eq 0 ]; then
			count_cascade "$2"
		fi
	fi
	;;

  -V|--verbs)
	# Long list about the searched verb:
	print_all_verbs "$2"
	if [ "$?" -eq 0 ]; then
		# sleep 1
		echo -e "\nWeitergehende Suche auch nach Wortteilen startet." > /dev/stderr
		# sleep .75
		print_verbs_full "$2"
		if [ "$?" -ne 0 ]; then
			print_full "$2"
			if [ "$?" -ne 0 ]; then
				echo "Weitergehende Suche ohne Ergebnis." > /dev/stderr
				# sleep .5
				echo -e "\nSuche nach jedem Vorkommen des Such-Strings startet." > /dev/stderr
				print_all "$2"
				if [ "$?" -ne 0 ]; then
					echo "Die gesuchte Zeichenkette kommt im englischen Teil der Datenbasis nicht vor." > /dev/stderr
					get_amount_of_approximate_matches "$2"
				fi
			fi
		fi
	fi
	;;

  -[eE]|--weiter|--erweitere|--erweitert|--extend|--deep|--deep-search|--deep_search)
		# Start der Suchkaskade, die stufenweise bis drei verschiedene reguläre Ausdrücke benutzt,
		# um irgendwelche passenden Datensätze zur gesuchten Zeichenkette zu finden.
	deep_search_cascade "$2"
	;;

  -[cC]|--circa|--etwa|--ungefähr|--proximate)
	approximate_search "$2"
	;;

  -[gG]|-[dD]|--de-en|--german|--deutsch)
	# Find a verb to a german word:
	grep -E "^[[:blank:]]*(to ).*$Tab.*\<$2\>" "$vocabulary" | tr "\t" "#" | column -s"#" -t
	;;

  -p|--phrases-too|--phrases_too|-Pv|-vP)
	# Find also phrases, combines "-v" with "-P":
	default_output "$2"
	echo
	phrases "$2"
	write_total_amount "$2"
	;;

  -P|--phrases)
	# Find and write only phrases:
	phrases "$2"
	;;

  -pvf|--verbs-full)
	# Only for testing purposes, call not mentioned in help documentation.
	# Finds a string even if it is a part of a word, but only in words directly following "^to ".
	print_verbs_full "$2"
	;;

  -f|--full)
	# Finds a string even if it is a part of a word.  
	print_full "$2"
	;;

  -a|--all|--alle|--alles)
	# Finds all records/lines with the searched string in English.
	print_all "$2"
	;;

  -h|--help)
	display_help_screen
	;;

  *)
	# Short list, simple info about a verb, same action as with "-v":
	default_output "$1"
	if [ $? -ne 0 ]; then
		write_total_amount "$1"
		write_amount_of_phrases "$1"
	else  # if "enver -v" does not succeed, automatically execute the basic function of "enver -V":
		print_all_verbs "$1" # print_all_verbs returns the number of found datasets. 0 means no success.
		if [ "$?" -eq 0 ]; then  
			count_cascade "$1"
		fi
	fi
	;;

esac

exit 0