Refactoring filter.go & README.md

Vic Shóstak · Vic Shóstak · commit e917d6efa437 · 2023-05-02T12:29:47.000+03:00
diff --git a/README.md b/README.md
@@ -1,35 +1,66 @@
 # json2csv – Parse JSON files to CSV with data qualifier
 
+![Go version][go_version_img]
+[![Go report][go_report_img]][go_report_url]
+![Code coverage][code_coverage_img]
+[![Wiki][wiki_img]][wiki_url]
+[![License][license_img]][license_url]
+
 The parser can read given folder with `*.json` files, filtering and 
 qualifying input data with intent & stop words dictionaries and save results 
 to CSV files by given chunk size.
 
-Minimal dependency on other Go packages, maximum performance even on large 
-amounts of input data.
+**Minimal** dependency on other Go packages, but **maximum** performance 
+even on large amounts of the input JSON data.
 
 ## ⚡️ Quick start
 
-Install package:
+First, [download][go_download] and install **Go**. Version `1.20` or higher 
+is required. 
+
+Installation is done by using the [`go install`][go_install] command:
 
 ```bash
 go install https://github.com/koddr/json2csv@latest
 ```
 
-Next, run `json2csv` parser:
+Prepare folder with your data source (format `*.json`) and create JSON files 
+with:
+
+- intents (for ex., `intents-file.json`); 
+- filter (for ex., `filter-file.json`);
+
+> 💡 Note: see the [`Wiki`][wiki_url] page to understand structures of 
+> JSON files and get general recommendations for preparing the input data. 
+
+Next, run `json2csv` parser with (or without) options:
 
 ```bash
 json2csv \
   -json    /path/to/input/json/folder \
   -intents /path/to/intents-file.json \
   -filter  /path/to/filter-file.json \
   -output  /path/to/output/csv/folder \
-  -content data \
-  -min-word-len 5 \
+  -content-field message \
+  -min-word-len  5 \
   -chunk 1000
 ```
 
-> 💡 Note: output CSV files have a default comma (`,`) separators between
-> columns.
+> 💡 Note: output CSV file has a default comma (`,`) separators between columns.
+
+## 🧩 Options
+
+- `-json [path]` is an option to set a path to the folder with JSON source
+  file(s);
+- `-intents [path]` is an option to set a path to the file with intents (see 
+  [Wiki][wiki_intents_url]);
+- `-filter [path]` is an option to set a path to the file with a filter (see 
+  [Wiki][wiki_filter_url]);
+- `-content-field [string]` is an option to set a name of the content field
+  (attribute that contains string to qualify and filter) in JSON source file(s);
+- `-min-word-len [int]` is an option to set a min word length count to filter
+  input words (if a word is smaller than this option, it will be skipped);
+- `-chunk [int]` is an option to set a chunk size for one CSV file;
 
 ## ✨ Solving case
 
@@ -49,6 +80,21 @@ This is what this Go package solves! ✌️
 
 ## ⚠️ License
 
-`json2csv` is free and open-source software licensed under the
-[Apache 2.0 License](LICENSE), created and supported with 🩵 for people and
-robots by [Vic Shóstak](https://github.com/koddr).
+[`json2csv`][json2csv_url] is free and open-source software licensed under the
+[Apache 2.0 License][license_url], created and supported with 🩵 for people and
+robots by [Vic Shóstak][author].
+
+[go_download]: https://golang.org/dl/
+[go_install]: https://golang.org/cmd/go/#hdr-Compile_and_install_packages_and_dependencies
+[go_version_img]: https://img.shields.io/badge/Go-1.20+-00ADD8?style=for-the-badge&logo=go
+[go_report_img]: https://img.shields.io/badge/Go_report-A+-success?style=for-the-badge&logo=none
+[go_report_url]: https://goreportcard.com/report/github.com/koddr/json2csv
+[code_coverage_img]: https://img.shields.io/badge/code_coverage-98%25-success?style=for-the-badge&logo=none
+[wiki_img]: https://img.shields.io/badge/-wiki_page-blue?style=for-the-badge&logo=wikipedia
+[wiki_url]: https://github.com/koddr/json2csv/wiki
+[wiki_intents_url]: https://github.com/koddr/json2csv/wiki#intents
+[wiki_filter_url]: https://github.com/koddr/json2csv/wiki#filter
+[license_img]: https://img.shields.io/badge/license-Apache_2.0-red?style=for-the-badge&logo=none
+[license_url]: https://github.com/koddr/json2csv/LICENSE
+[json2csv_url]: https://github.com/koddr/json2csv
+[author]: https://github.com/koddr
diff --git a/config.go b/config.go
@@ -3,10 +3,10 @@ package main
 import "os"
 
 type config struct {
-	minWordLen, chunkSize     int
-	intentsFile, filterFile   []byte
-	outputFolder, contentAttr string
-	jsonFolder                jsonFolder
+	minWordLen, chunkSize      int
+	intentsFile, filterFile    []byte
+	outputFolder, contentField string
+	jsonFolder                 jsonFolder
 }
 
 type jsonFolder struct {
diff --git a/filter.go b/filter.go
@@ -27,28 +27,22 @@ func (c *config) filter(s string) bool {
 		return false
 	}
 
+	// Filter the given words.
 	for key, words := range skipWords {
-		if key == "skip_prefix" && len(words) > 0 {
+		switch key {
+		case "skip_prefixes":
 			for _, word := range words {
 				if strings.HasPrefix(strings.ToLower(s), strings.ToLower(word)) {
 					return false
 				}
 			}
-		}
-	}
-
-	for key, words := range skipWords {
-		if key == "skip_suffix" && len(words) > 0 {
+		case "skip_suffixes":
 			for _, word := range words {
 				if strings.HasSuffix(strings.ToLower(s), strings.ToLower(word)) {
 					return false
 				}
 			}
-		}
-	}
-
-	for key, words := range skipWords {
-		if key == "skip_words" && len(words) > 0 {
+		case "skip_words":
 			for _, word := range words {
 				if strings.Contains(strings.ToLower(s), strings.ToLower(word)) {
 					return false
diff --git a/main.go b/main.go
@@ -13,7 +13,7 @@ func main() {
 	intentsPath := flag.String("intents", "./intents.json", "set path to JSON file with intents dictionary")
 	filterPath := flag.String("filter", "./filter.json", "set path to JSON file with filter dictionary")
 	outputPath := flag.String("output", "./output_files", "set path to folder for output CSV files")
-	contentAttr := flag.String("content", "content", "set name of the content attribute in your JSON struct")
+	contentField := flag.String("content-field", "content", "set name of the content field in your JSON struct")
 	minWordLen := flag.Int("min-word-len", 0, "set min length of word to parse from JSON files")
 	chunkSize := flag.Int("chunk", 5000, "set chunk size for output CSV file")
 
@@ -25,7 +25,7 @@ func main() {
 	fmt.Println("\nConfig for this session is creating now. Please wait...")
 
 	// Create a new parse session with the given configs from flags.
-	session, err := newSession(*jsonPath, *intentsPath, *filterPath, *outputPath, *contentAttr, *minWordLen, *chunkSize)
+	session, err := newSession(*jsonPath, *intentsPath, *filterPath, *outputPath, *contentField, *minWordLen, *chunkSize)
 	if err != nil {
 		log.Fatal(err)
 	}
@@ -36,7 +36,7 @@ func main() {
 	fmt.Printf(" – file with intents dictionary is '%s'\n", *intentsPath)
 	fmt.Printf(" – file with filter dictionary is '%s'\n", *filterPath)
 	fmt.Printf(" – folder for output CSV files is '%s'\n", *outputPath)
-	fmt.Printf(" – name of the content attribute is '%s'\n", *contentAttr)
+	fmt.Printf(" – name of the content field is '%s'\n", *contentField)
 	fmt.Printf(" – min length of word to parse is %d letters\n", *minWordLen)
 	fmt.Printf(" – chunk size for output CSV file is %d (per file)\n", *chunkSize)
 	fmt.Println("\nParser is starting now. Please wait...")
diff --git a/session.go b/session.go
@@ -6,7 +6,7 @@ import (
 	"path/filepath"
 )
 
-func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr string, minWordLen, chunkSize int) (*config, error) {
+func newSession(jsonPath, intentsPath, filterPath, outputPath, contentField string, minWordLen, chunkSize int) (*config, error) {
 	if minWordLen < 0 {
 		return nil, errors.New("can't parse data with negative word count")
 	}
@@ -15,7 +15,7 @@ func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr strin
 		return nil, errors.New("can't create CSV chunk file with zero (or negative) size")
 	}
 
-	if contentAttr == "" {
+	if contentField == "" {
 		return nil, errors.New("can't parse data without content name attribute")
 	}
 
@@ -52,7 +52,7 @@ func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr strin
 		intentsFile:  intentsFile,
 		filterFile:   filterFile,
 		outputFolder: filepath.Clean(outputPath),
-		contentAttr:  contentAttr,
+		contentField: contentField,
 		jsonFolder:   jsonFolder{path: jsonPath, files: jsonFiles},
 	}, nil
 }

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@ import (`
`6`	`6`	`"path/filepath"`
`7`	`7`	`)`
`8`	`8`
`9`		`-func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr string, minWordLen, chunkSize int) (*config, error) {`
	`9`	`+func newSession(jsonPath, intentsPath, filterPath, outputPath, contentField string, minWordLen, chunkSize int) (*config, error) {`
`10`	`10`	`if minWordLen < 0 {`
`11`	`11`	`return nil, errors.New("can't parse data with negative word count")`
`12`	`12`	`}`
`@@ -15,7 +15,7 @@ func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr strin`
`15`	`15`	`return nil, errors.New("can't create CSV chunk file with zero (or negative) size")`
`16`	`16`	`}`
`17`	`17`
`18`		`- if contentAttr == "" {`
	`18`	`+ if contentField == "" {`
`19`	`19`	`return nil, errors.New("can't parse data without content name attribute")`
`20`	`20`	`}`
`21`	`21`
`@@ -52,7 +52,7 @@ func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr strin`
`52`	`52`	`intentsFile: intentsFile,`
`53`	`53`	`filterFile: filterFile,`
`54`	`54`	`outputFolder: filepath.Clean(outputPath),`
`55`		`- contentAttr: contentAttr,`
	`55`	`+ contentField: contentField,`
`56`	`56`	`jsonFolder: jsonFolder{path: jsonPath, files: jsonFiles},`
`57`	`57`	`}, nil`
`58`	`58`	`}`