-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenver
executable file
·527 lines (441 loc) · 15.3 KB
/
enver
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
#! /bin/bash
#
# enver 3.5.0
#
# Liefert die Uebersetzung eines englische Verbs aus einer Woerterbuchdatei von "dict.cc".
#
# Fuer eine Bedienungsanleitung "source enver --help" aufrufen!
#
# Stand: 2020-03-21
# Autor: Bernd Storck
#
# Veroeffentlicht via "https://www.facebook.com/BStLinux/"
#
# TECHNICAL COMMENT FOR PROGRAMMERS
#
# enver (for "english verbs") filters a flat file database, which is a list of english verbs and
# their german meaning.
#
# Every data record is a line. Records are consisting of exactly two fields, which are separated
# by a tab character. First field contains the englisch part, second field the german equivalent.
#
# The fields have a substructure, containing for example tags like "[idiom]".
# Static Values:
script_name="enver"
script_version="3.5.0"
Tab=" "
# Default Values:
linecounter="none"
# Current Dynamic Values:
vocabulary="$(dirname "$(which enver)")/dictverbs.txt"
# vocabulary="$(dirname "$(which enver)")/cmcgnoosos-916510172-e6o859.txt"
quantity_of_records=$(wc -l < "$vocabulary")
for i in tput stty
do
if [ "$(which ${i})" ]; then
linecounter="$i"
break
fi
done
function get_terminal_height() {
case "$linecounter" in
tput)
terminal_height=$(tput lines)
;;
stty)
terminal_height=$(stty size | cut -d" " -f1)
;;
*)
terminal_height=$(( quantity_of_records + 1 )) # Suppresses every call of a pager,
# cause the value is greater than the amount of data records.
;;
esac
}
function display_page_by_page() {
local inptfile
local cmd
# tail -f "$inptfile" > /dev/pts/4
cmd="cat" # default program to display the results.
# Current Values:
inptfile="$1"
quantity_of_lines="$(wc -l < "$1")"
get_terminal_height # sets the value of the script global var terminal_height.
if [[ "$quantity_of_lines" -gt $(( terminal_height - 1 )) ]]; then
# Defines the pager program to use:
for i in less most more pg
do
if [ "$(which ${i})" ]; then
case "$i" in
less)
cmd="less -SX"
;;
*)
cmd="${i}"
;;
esac
break
fi
done
fi
eval "${cmd} $inptfile" # Calls pager or cat for displaying the file with the resulting data records.
}
function define_numerus() {
if [ "$1" -gt 1 ]; then
object_name="$3"
else
object_name="$2"
fi
}
function default_output() {
local searched="$1"
grep -E "^[[:blank:]]*(to )\<$searched\>$Tab" "$vocabulary" | tr "\t" "#" | column -s"#" -t
return "$(grep -c -E "^[[:blank:]]*(to )\<$searched\>$Tab" "$vocabulary")"
}
function print_all_verbs() {
local searched="$1"
get_terminal_height
grep -E "^[[:blank:]]*(to )\<$searched\>.*$Tab" "$vocabulary" | tr "\t" "#" | column -s"#" -t > /tmp/enver.tmp
if test -s /tmp/enver.tmp; then
display_page_by_page "/tmp/enver.tmp" # sets also quantity_of_lines
else
# 2020-03-21: Sucht nach einigen Ausdruecken mit Hilfsverben,
# via mkbase.sh wurde dafuer die Datenbasis erweitert.
#
# Diese Datensaetze beginnen in der Datendatei von "dict.cc" mit "[you]".
grep -E "^[[:blank:]]*(|\[you\] )\<$searched\>.*$Tab" "$vocabulary" | tr "\t" "#" | column -s"#" -t > /tmp/enver.tmp
if test -s /tmp/enver.tmp; then
display_page_by_page "/tmp/enver.tmp" # sets also quantity_of_lines
else
quantity_of_lines=0
echo "Kein Datensatz, keine Erklärung zu \"$searched\" gefunden." > /dev/stderr
fi
fi
rm -f /tmp/enver.tmp
return $quantity_of_lines
# grep -E "^[[:blank:]]*(to )\<$searched\>.*$Tab" "$vocabulary" | tr "\t" "#" | column -s"#" -t > /tmp/enver.tmp
# if test -s /tmp/enver.tmp; then
# display_page_by_page "/tmp/enver.tmp" # sets also quantity_of_lines
# else
# quantity_of_lines=0
# echo "Kein Datensatz, keine Erklärung zu \"$searched\" gefunden." > /dev/stderr
# fi
# rm -f /tmp/enver.tmp
# return $quantity_of_lines
}
function print_verbs_full() {
# Finds the searched string if it is in the english field even if it's only
# a part of a word.
#
# In opposite to the function "print_full", this function looks only for lines,
# which are beginning with "to " followed by a word containing the search string.
local searched="$1"
grep -iE "^to +([[:alpha:]]-?)*$searched" "$vocabulary"| grep -F --color=always "$searched"
return $?
}
function print_full() {
# Finds the searched string if it is in the english field even if it's only
# a part of a word. But it ignores the occurence of the string in the annotations
# or tags at the end of the english field. Those annotations are written between
# squared brackets "[anntotation or tag]".
local searched="$1"
grep -iE "^[^[$Tab]*$searched" "$vocabulary" | grep -F --color=always "$searched"
return $?
}
function print_all() {
# Fields are separated by tabs. First field is English, second German.
#
# Finds all records/lines with the searched string in the first field, even
# if it is a part of a word or part of an annotation beween squared brackets.
#
# The second search string contains three tab characters between the pair of
# quotation marks!
local searched="$1"
grep --color=always -iE "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary"
return "$?"
}
function approximate_search() {
# agrep has limitations for the lenght of complex search patterns. Therefore is the 2. search level
# a simplification of the search which is performed by level 1.
#
# Search level 3 accepts that the searched string is not a word, but part of a word.
#
# Search level 4 finally tries to handle agreps problem with long search patterns by shortening the
# searched word. This method was very successful in many test with some long words. Furthermore
# level 4 accepts via agrep a greater difference between the searched string and the found string
# than the levels 1 to 3 would accept.
local searched="$1"
local search_level
local match_found=1 # flag for "no match".
local len
local max
echo -e "\nVersuch einer fehlertoleranten Suche startet." > /dev/stderr
if [ $(which agrep) ]; then
for search_level in 1 2 3 4;
do
case "$search_level" in
1)
agrep -1 "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary" | \
agrep -1 -w "$searched" > /tmp/enver1.tmp
match_found="$?"
;;
2)
agrep -1 "$searched[^$Tab]*$Tab" "$vocabulary" | \
agrep -1 -w "$searched" > /tmp/enver1.tmp
match_found="$?"
;;
3)
agrep -1 "$searched[^$Tab]*$Tab" "$vocabulary" > /tmp/enver1.tmp
match_found="$?"
;;
4)
len=${#searched}
max=$(bc <<< "scale=0; $len / 2")
# echo "\"$searched\" ist $len Zeichen lang."
# echo "max = $max"
for i in $(seq 1 $max);
do
len=$(( len - 1 ))
searched="${searched:0:$len}"
agrep -2 "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary" > /tmp/enver1.tmp
if [ "$?" -eq 0 ]; then
# sublevel=".$i"
match_found=0
break
fi
done
if [ $match_found -eq 1 ]; then
echo "Auch die fehlertolerante Suche hat kein ähnliches Wort gefunden."> /dev/stderr
exit 1
fi
;;
esac
if [ $match_found -eq 0 ]; then
cat /tmp/enver1.tmp | tr "\t" "#" | column -s"#" -t > /tmp/enver.tmp
if [ "$?" -eq 0 ]; then
rm /tmp/enver1.tmp
else
mv /tmp/enver1.tmp /tmp/enver.tmp
fi
# echo "approximate search level ${search_level}${sublevel}" > /dev/pts/0
echo
display_page_by_page "/tmp/enver.tmp"
exit 0
fi
done
else
echo "FEHLER: Das Programm \"agrep\" wurde nicht gefunden."
echo -e "\nFür eine ungefähre, fehlertolerante Suche muss agrep vorhanden sein."
echo "agrep bekommen Sie, wenn Sie das Paket \"glimpse\" installieren."
fi
}
function deep_search_cascade() {
# This extended search tries to find data records by a cascade of regular expressions,
# the cascade will be stopped if one regular expression has found any matching data record.
local searched="$1"
echo "Weitergehende Suche auch nach Wortteilen startet." > /dev/stderr
# sleep .75
print_verbs_full "$searched"
if [ "$?" -ne 0 ]; then
print_full "$searched"
if [ "$?" -ne 0 ]; then
echo "Weitergehende Suche ohne Ergebnis." > /dev/stderr
echo
# sleep .5
echo "Suche nach jedem Vorkommen des Such-Strings startet." > /dev/stderr
print_all "$searched"
if [ "$?" -ne 0 ]; then
echo "Die Zeichenkette \"$searched\" kommt im englischen Teil der Datenbasis nicht vor." > /dev/stderr
approximate_search "$searched"
fi
fi
fi
}
function phrases() {
# As phrases are identified lines which are beginning with an upper case letter or
# containing "[Redewendung]" or containing "[idiom]".
#
# The searched string has to be in the first field, which is the english part.
local searched="$1"
grep -e "^[[:blank:]]*[A-Z]" -e "\[Redewendung\]" -e "\[idiom\]" "$vocabulary" | \
sed "s/\[Redewendung\]//" | grep -iE "^[[:blank:]]*[^ ]*\<$searched\>.* " | \
grep --color=always -wi "$searched" | tr "\t" "#" | column -s"#" -t
}
function get_amount_of_approximate_matches() {
local searched="$1"
local quantity
if [ $(which agrep) ]; then
quantity=$(agrep -c1 "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary")
if [ "$quantity" -gt 0 ]; then
define_numerus $quantity "Datensatz" "Datensätze"
echo -e "\n\"$script_name -c $searched\" liefert $quantity $object_name. (Suche nach ungefähr übereinstimmenden Wörtern)" > /dev/stderr
fi
fi
}
function write_amount_of_phrases() {
local searched="$1"
local quantity_of_phrases
quantity_of_phrases=$(grep -e "^[[:blank:]]*[A-Z]" -e "\[Redewendung\]" -e "\[idiom\]" "$vocabulary" | \
grep -c -iE "^[[:blank:]]*[^ ]*\<$searched\>.* ")
if [ "$quantity_of_phrases" -gt 0 ]; then
define_numerus $quantity_of_phrases "Redewendung" "Redewendungen"
echo "\"$script_name -P $searched\" findet $quantity_of_phrases $object_name." > /dev/stderr
fi
}
function write_total_amount() {
# Counts how many datasets the command "enver -V" finds.
local searched="$1"
local quantity
quantity=$(grep -c -E "^[[:blank:]]*(to )\<$searched\>.* " "$vocabulary")
if [ "$quantity" -gt 0 ]; then
define_numerus $quantity "Datensatz" "Datensätze"
echo -e "\n\"$script_name -V $searched\" liefert $quantity $object_name." > /dev/stderr
fi
}
function count_cascade() {
# Counts how many datasets the function "deep_search_cascade" for extended search finds.
#
# Extended search tries to find datasets by a cascade of three regular expressions, the
# cascade will be stopped if one regular expression has found matching datasets.
local searched="$1"
local amount
amount=$(grep -ciE "^to +([[:alpha:]]-?)*$searched" "$vocabulary") # Counts print_verbs_full().
if [ "$amount" -eq 0 ]; then
amount=$(grep -ciE "^[^[$Tab]*$searched" "$vocabulary") # Counts print_full().
if [ "$amount" -eq 0 ]; then # Count print_all():
amount=$(grep --color=always -ciE "^[^$Tab]*$searched[^$Tab]*$Tab" "$vocabulary")
fi
fi
if [ "$amount" -gt 1 ]; then
echo -e "\nDie vertiefte Suche mit \"$script_name -e $searched\" findet $amount Datensätze."
elif [ "$amount" -eq 1 ]; then
echo -e "\nDie vertiefte Suche mit \"$script_name -e $searched\" findet einen Datensatz."
else
echo "Die Wörterbuchdatei enthält keinen Datensatz mit der Zeichenkette \"$searched\" in Englisch."
fi
return "$amount"
}
display_help_screen() {
echo "
$script_name $script_version
\"$script_name\" zeigt Übersetzungen englischer Verben ins Deutsche an.
AUFRUFFORMAT:
$script_name [-v|-e|-V|-c|-D|-P|-p|-f|-a|-h] VOKABEL
BEISPIELE:
$script_name -v translate
$script_name translate
(Beide Aufrufe bewirken das selbe. \"translate\" ist hier das Wort, nach dem gefragt wird.)
AUFRUFPARAMETER:
-v Gibt schnell eine kurze Auskunft über ein englisches Verb.
-e Kann für eine erweiterte Suche verwendet werden, nachdem \"$script_name -v\" erfolglos war.
-V Sucht gründlich nach englischen Verben und listet alle dazu verfügbaren Datensätze auf.
-c Sucht nach englischen Wörtern, die dem Suchwort ähneln.
-D Sucht von einem deutschen Wort ausgehend nach englischen Verben.
-P Findet Redewendungen (engl. \"phrases\") und Sätze mit der Zeichenkette.
-p Kombiniert \"-v\" (kurze Auskunft) mit \"-P\" (Liste von Redewendungen).
-f Zeigt Datensätze mit der gesuchten Zeichenkette im englischen Vokabeltext.
-a Zeigt alle Datensätze mit der gesuchten Zeichenkette im englischen Datenfeld.
-h Zeigt diese Hilfeseite an.
ALTERNATIVE SPRECHENDE AUFRUFPARAMETER:
-v --verb
-e --weiter, --erweitere, --extend, --deep-search
-V --verbs
-c --circa, --etwa, --ungefähr, --proximate
-D --deutsch, --de-en
-P --phrases
-p --phrases-too
-f --full
-a --all, --alle, --alles
-h --help
"
}
case "$1" in
-v|--verb)
# "Short list, simple info about a verb:
default_output "$2"
if [ $? -ne 0 ]; then
write_total_amount "$2"
write_amount_of_phrases "$2"
else # if "enver -v" does not succeed, automatically execute the basic function of "enver -V":
print_all_verbs "$2" # print_all_verbs returns the number of found datasets. 0 means no success.
if [ "$?" -eq 0 ]; then
count_cascade "$2"
fi
fi
;;
-V|--verbs)
# Long list about the searched verb:
print_all_verbs "$2"
if [ "$?" -eq 0 ]; then
# sleep 1
echo -e "\nWeitergehende Suche auch nach Wortteilen startet." > /dev/stderr
# sleep .75
print_verbs_full "$2"
if [ "$?" -ne 0 ]; then
print_full "$2"
if [ "$?" -ne 0 ]; then
echo "Weitergehende Suche ohne Ergebnis." > /dev/stderr
# sleep .5
echo -e "\nSuche nach jedem Vorkommen des Such-Strings startet." > /dev/stderr
print_all "$2"
if [ "$?" -ne 0 ]; then
echo "Die gesuchte Zeichenkette kommt im englischen Teil der Datenbasis nicht vor." > /dev/stderr
get_amount_of_approximate_matches "$2"
fi
fi
fi
fi
;;
-[eE]|--weiter|--erweitere|--erweitert|--extend|--deep|--deep-search|--deep_search)
# Start der Suchkaskade, die stufenweise bis drei verschiedene reguläre Ausdrücke benutzt,
# um irgendwelche passenden Datensätze zur gesuchten Zeichenkette zu finden.
deep_search_cascade "$2"
;;
-[cC]|--circa|--etwa|--ungefähr|--proximate)
approximate_search "$2"
;;
-[gG]|-[dD]|--de-en|--german|--deutsch)
# Find a verb to a german word:
grep -E "^[[:blank:]]*(to ).*$Tab.*\<$2\>" "$vocabulary" | tr "\t" "#" | column -s"#" -t
;;
-p|--phrases-too|--phrases_too|-Pv|-vP)
# Find also phrases, combines "-v" with "-P":
default_output "$2"
echo
phrases "$2"
write_total_amount "$2"
;;
-P|--phrases)
# Find and write only phrases:
phrases "$2"
;;
-pvf|--verbs-full)
# Only for testing purposes, call not mentioned in help documentation.
# Finds a string even if it is a part of a word, but only in words directly following "^to ".
print_verbs_full "$2"
;;
-f|--full)
# Finds a string even if it is a part of a word.
print_full "$2"
;;
-a|--all|--alle|--alles)
# Finds all records/lines with the searched string in English.
print_all "$2"
;;
-h|--help)
display_help_screen
;;
*)
# Short list, simple info about a verb, same action as with "-v":
default_output "$1"
if [ $? -ne 0 ]; then
write_total_amount "$1"
write_amount_of_phrases "$1"
else # if "enver -v" does not succeed, automatically execute the basic function of "enver -V":
print_all_verbs "$1" # print_all_verbs returns the number of found datasets. 0 means no success.
if [ "$?" -eq 0 ]; then
count_cascade "$1"
fi
fi
;;
esac
exit 0