Skip to content

Commit e917d6e

Browse files
author
Vic Shóstak
committed
Refactoring filter.go & README.md
1 parent 145bbf0 commit e917d6e

File tree

5 files changed

+72
-32
lines changed

5 files changed

+72
-32
lines changed

README.md

+57-11
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,66 @@
11
# json2csv – Parse JSON files to CSV with data qualifier
22

3+
![Go version][go_version_img]
4+
[![Go report][go_report_img]][go_report_url]
5+
![Code coverage][code_coverage_img]
6+
[![Wiki][wiki_img]][wiki_url]
7+
[![License][license_img]][license_url]
8+
39
The parser can read given folder with `*.json` files, filtering and
410
qualifying input data with intent & stop words dictionaries and save results
511
to CSV files by given chunk size.
612

7-
Minimal dependency on other Go packages, maximum performance even on large
8-
amounts of input data.
13+
**Minimal** dependency on other Go packages, but **maximum** performance
14+
even on large amounts of the input JSON data.
915

1016
## ⚡️ Quick start
1117

12-
Install package:
18+
First, [download][go_download] and install **Go**. Version `1.20` or higher
19+
is required.
20+
21+
Installation is done by using the [`go install`][go_install] command:
1322

1423
```bash
1524
go install https://github.com/koddr/json2csv@latest
1625
```
1726

18-
Next, run `json2csv` parser:
27+
Prepare folder with your data source (format `*.json`) and create JSON files
28+
with:
29+
30+
- intents (for ex., `intents-file.json`);
31+
- filter (for ex., `filter-file.json`);
32+
33+
> 💡 Note: see the [`Wiki`][wiki_url] page to understand structures of
34+
> JSON files and get general recommendations for preparing the input data.
35+
36+
Next, run `json2csv` parser with (or without) options:
1937

2038
```bash
2139
json2csv \
2240
-json /path/to/input/json/folder \
2341
-intents /path/to/intents-file.json \
2442
-filter /path/to/filter-file.json \
2543
-output /path/to/output/csv/folder \
26-
-content data \
27-
-min-word-len 5 \
44+
-content-field message \
45+
-min-word-len 5 \
2846
-chunk 1000
2947
```
3048

31-
> 💡 Note: output CSV files have a default comma (`,`) separators between
32-
> columns.
49+
> 💡 Note: output CSV file has a default comma (`,`) separators between columns.
50+
51+
## 🧩 Options
52+
53+
- `-json [path]` is an option to set a path to the folder with JSON source
54+
file(s);
55+
- `-intents [path]` is an option to set a path to the file with intents (see
56+
[Wiki][wiki_intents_url]);
57+
- `-filter [path]` is an option to set a path to the file with a filter (see
58+
[Wiki][wiki_filter_url]);
59+
- `-content-field [string]` is an option to set a name of the content field
60+
(attribute that contains string to qualify and filter) in JSON source file(s);
61+
- `-min-word-len [int]` is an option to set a min word length count to filter
62+
input words (if a word is smaller than this option, it will be skipped);
63+
- `-chunk [int]` is an option to set a chunk size for one CSV file;
3364

3465
## ✨ Solving case
3566

@@ -49,6 +80,21 @@ This is what this Go package solves! ✌️
4980

5081
## ⚠️ License
5182

52-
`json2csv` is free and open-source software licensed under the
53-
[Apache 2.0 License](LICENSE), created and supported with 🩵 for people and
54-
robots by [Vic Shóstak](https://github.com/koddr).
83+
[`json2csv`][json2csv_url] is free and open-source software licensed under the
84+
[Apache 2.0 License][license_url], created and supported with 🩵 for people and
85+
robots by [Vic Shóstak][author].
86+
87+
[go_download]: https://golang.org/dl/
88+
[go_install]: https://golang.org/cmd/go/#hdr-Compile_and_install_packages_and_dependencies
89+
[go_version_img]: https://img.shields.io/badge/Go-1.20+-00ADD8?style=for-the-badge&logo=go
90+
[go_report_img]: https://img.shields.io/badge/Go_report-A+-success?style=for-the-badge&logo=none
91+
[go_report_url]: https://goreportcard.com/report/github.com/koddr/json2csv
92+
[code_coverage_img]: https://img.shields.io/badge/code_coverage-98%25-success?style=for-the-badge&logo=none
93+
[wiki_img]: https://img.shields.io/badge/-wiki_page-blue?style=for-the-badge&logo=wikipedia
94+
[wiki_url]: https://github.com/koddr/json2csv/wiki
95+
[wiki_intents_url]: https://github.com/koddr/json2csv/wiki#intents
96+
[wiki_filter_url]: https://github.com/koddr/json2csv/wiki#filter
97+
[license_img]: https://img.shields.io/badge/license-Apache_2.0-red?style=for-the-badge&logo=none
98+
[license_url]: https://github.com/koddr/json2csv/LICENSE
99+
[json2csv_url]: https://github.com/koddr/json2csv
100+
[author]: https://github.com/koddr

config.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ package main
33
import "os"
44

55
type config struct {
6-
minWordLen, chunkSize int
7-
intentsFile, filterFile []byte
8-
outputFolder, contentAttr string
9-
jsonFolder jsonFolder
6+
minWordLen, chunkSize int
7+
intentsFile, filterFile []byte
8+
outputFolder, contentField string
9+
jsonFolder jsonFolder
1010
}
1111

1212
type jsonFolder struct {

filter.go

+5-11
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,22 @@ func (c *config) filter(s string) bool {
2727
return false
2828
}
2929

30+
// Filter the given words.
3031
for key, words := range skipWords {
31-
if key == "skip_prefix" && len(words) > 0 {
32+
switch key {
33+
case "skip_prefixes":
3234
for _, word := range words {
3335
if strings.HasPrefix(strings.ToLower(s), strings.ToLower(word)) {
3436
return false
3537
}
3638
}
37-
}
38-
}
39-
40-
for key, words := range skipWords {
41-
if key == "skip_suffix" && len(words) > 0 {
39+
case "skip_suffixes":
4240
for _, word := range words {
4341
if strings.HasSuffix(strings.ToLower(s), strings.ToLower(word)) {
4442
return false
4543
}
4644
}
47-
}
48-
}
49-
50-
for key, words := range skipWords {
51-
if key == "skip_words" && len(words) > 0 {
45+
case "skip_words":
5246
for _, word := range words {
5347
if strings.Contains(strings.ToLower(s), strings.ToLower(word)) {
5448
return false

main.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ func main() {
1313
intentsPath := flag.String("intents", "./intents.json", "set path to JSON file with intents dictionary")
1414
filterPath := flag.String("filter", "./filter.json", "set path to JSON file with filter dictionary")
1515
outputPath := flag.String("output", "./output_files", "set path to folder for output CSV files")
16-
contentAttr := flag.String("content", "content", "set name of the content attribute in your JSON struct")
16+
contentField := flag.String("content-field", "content", "set name of the content field in your JSON struct")
1717
minWordLen := flag.Int("min-word-len", 0, "set min length of word to parse from JSON files")
1818
chunkSize := flag.Int("chunk", 5000, "set chunk size for output CSV file")
1919

@@ -25,7 +25,7 @@ func main() {
2525
fmt.Println("\nConfig for this session is creating now. Please wait...")
2626

2727
// Create a new parse session with the given configs from flags.
28-
session, err := newSession(*jsonPath, *intentsPath, *filterPath, *outputPath, *contentAttr, *minWordLen, *chunkSize)
28+
session, err := newSession(*jsonPath, *intentsPath, *filterPath, *outputPath, *contentField, *minWordLen, *chunkSize)
2929
if err != nil {
3030
log.Fatal(err)
3131
}
@@ -36,7 +36,7 @@ func main() {
3636
fmt.Printf(" – file with intents dictionary is '%s'\n", *intentsPath)
3737
fmt.Printf(" – file with filter dictionary is '%s'\n", *filterPath)
3838
fmt.Printf(" – folder for output CSV files is '%s'\n", *outputPath)
39-
fmt.Printf(" – name of the content attribute is '%s'\n", *contentAttr)
39+
fmt.Printf(" – name of the content field is '%s'\n", *contentField)
4040
fmt.Printf(" – min length of word to parse is %d letters\n", *minWordLen)
4141
fmt.Printf(" – chunk size for output CSV file is %d (per file)\n", *chunkSize)
4242
fmt.Println("\nParser is starting now. Please wait...")

session.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import (
66
"path/filepath"
77
)
88

9-
func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr string, minWordLen, chunkSize int) (*config, error) {
9+
func newSession(jsonPath, intentsPath, filterPath, outputPath, contentField string, minWordLen, chunkSize int) (*config, error) {
1010
if minWordLen < 0 {
1111
return nil, errors.New("can't parse data with negative word count")
1212
}
@@ -15,7 +15,7 @@ func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr strin
1515
return nil, errors.New("can't create CSV chunk file with zero (or negative) size")
1616
}
1717

18-
if contentAttr == "" {
18+
if contentField == "" {
1919
return nil, errors.New("can't parse data without content name attribute")
2020
}
2121

@@ -52,7 +52,7 @@ func newSession(jsonPath, intentsPath, filterPath, outputPath, contentAttr strin
5252
intentsFile: intentsFile,
5353
filterFile: filterFile,
5454
outputFolder: filepath.Clean(outputPath),
55-
contentAttr: contentAttr,
55+
contentField: contentField,
5656
jsonFolder: jsonFolder{path: jsonPath, files: jsonFiles},
5757
}, nil
5858
}

0 commit comments

Comments
 (0)