|  | 
|  | 1 | +package org.jetbrains.kotlinx.dataframe.documentation | 
|  | 2 | + | 
|  | 3 | +import io.deephaven.csv.CsvSpecs | 
|  | 4 | +import org.apache.commons.csv.CSVFormat | 
|  | 5 | +import org.jetbrains.kotlinx.dataframe.DataFrame | 
|  | 6 | +import org.jetbrains.kotlinx.dataframe.api.ParserOptions | 
|  | 7 | +import org.jetbrains.kotlinx.dataframe.api.parser | 
|  | 8 | +import org.jetbrains.kotlinx.dataframe.impl.io.typesDeephavenAlreadyParses | 
|  | 9 | +import org.jetbrains.kotlinx.dataframe.io.AdjustCSVFormat | 
|  | 10 | +import org.jetbrains.kotlinx.dataframe.io.AdjustCsvSpecs | 
|  | 11 | +import org.jetbrains.kotlinx.dataframe.io.ColType | 
|  | 12 | +import org.jetbrains.kotlinx.dataframe.io.Compression | 
|  | 13 | +import org.jetbrains.kotlinx.dataframe.io.QuoteMode | 
|  | 14 | + | 
|  | 15 | +/** | 
|  | 16 | + * Contains both the default values of csv/tsv parameters and the parameter KDocs. | 
|  | 17 | + */ | 
|  | 18 | +@Suppress("ktlint:standard:class-naming", "ClassName", "KDocUnresolvedReference") | 
|  | 19 | +internal object DelimParams { | 
|  | 20 | + | 
|  | 21 | +    /** @param path The file path to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. */ | 
|  | 22 | +    interface PATH_READ | 
|  | 23 | + | 
|  | 24 | +    /** @param file The file to read. Can also be compressed as `.gz` or `.zip`, see [Compression]. */ | 
|  | 25 | +    interface FILE_READ | 
|  | 26 | + | 
|  | 27 | +    /** @param url The URL from which to fetch the data. Can also be compressed as `.gz` or `.zip`, see [Compression]. */ | 
|  | 28 | +    interface URL_READ | 
|  | 29 | + | 
|  | 30 | +    /** @param fileOrUrl The file path or URL to read the data from. Can also be compressed as `.gz` or `.zip`, see [Compression]. */ | 
|  | 31 | +    interface FILE_OR_URL_READ | 
|  | 32 | + | 
|  | 33 | +    /** @param inputStream Represents the file to read. */ | 
|  | 34 | +    interface INPUT_STREAM_READ | 
|  | 35 | + | 
|  | 36 | +    /** @param text The raw data to read in the form of a [String]. */ | 
|  | 37 | +    interface TEXT_READ | 
|  | 38 | + | 
|  | 39 | +    /** @param file The file to write to. */ | 
|  | 40 | +    interface FILE_WRITE | 
|  | 41 | + | 
|  | 42 | +    /** @param path The path pointing to a file to write to. */ | 
|  | 43 | +    interface PATH_WRITE | 
|  | 44 | + | 
|  | 45 | +    /** @param writer The [Appendable] to write to. */ | 
|  | 46 | +    interface WRITER_WRITE | 
|  | 47 | + | 
|  | 48 | +    /** | 
|  | 49 | +     * @param delimiter The field delimiter character. Default: ','. | 
|  | 50 | +     * | 
|  | 51 | +     *   Ignored if [hasFixedWidthColumns] is `true`. | 
|  | 52 | +     */ | 
|  | 53 | +    const val CSV_DELIMITER: Char = ',' | 
|  | 54 | + | 
|  | 55 | +    /** | 
|  | 56 | +     * @param delimiter The field delimiter character. Default: '\t'. | 
|  | 57 | +     * | 
|  | 58 | +     *   Ignored if [hasFixedWidthColumns] is `true`. | 
|  | 59 | +     */ | 
|  | 60 | +    const val TSV_DELIMITER: Char = '\t' | 
|  | 61 | + | 
|  | 62 | +    /** | 
|  | 63 | +     * @param delimiter The field delimiter character. Default: ','. | 
|  | 64 | +     * | 
|  | 65 | +     *   Ignored if [hasFixedWidthColumns] is `true`. | 
|  | 66 | +     */ | 
|  | 67 | +    const val DELIM_DELIMITER: Char = ',' | 
|  | 68 | + | 
|  | 69 | +    /** | 
|  | 70 | +     * @param header Optional column titles. Default: empty list. | 
|  | 71 | +     * | 
|  | 72 | +     *   If non-empty, the data will be read with [header] as the column titles | 
|  | 73 | +     *   (use [skipLines] if there's a header in the data). | 
|  | 74 | +     *   If empty (default), the header will be read from the data. | 
|  | 75 | +     */ | 
|  | 76 | +    val HEADER: List<String> = emptyList() | 
|  | 77 | + | 
|  | 78 | +    /** | 
|  | 79 | +     * @param hasFixedWidthColumns Whether the data has fixed-width columns instead of a single delimiter. | 
|  | 80 | +     *   Default: `false`. | 
|  | 81 | +     * | 
|  | 82 | +     *   Fixed-width columns can occur, for instance, in multi-space delimited data, where the columns are separated | 
|  | 83 | +     *   by multiple spaces instead of a single delimiter, so columns are visually aligned. | 
|  | 84 | +     *   Columns widths are determined by the header in the data (if present), or manually by setting | 
|  | 85 | +     *   [fixedColumnWidths]. | 
|  | 86 | +     */ | 
|  | 87 | +    val HAS_FIXED_WIDTH_COLUMNS: Boolean = false | 
|  | 88 | + | 
|  | 89 | +    /** | 
|  | 90 | +     * @param fixedColumnWidths The fixed column widths. Default: empty list. | 
|  | 91 | +     * | 
|  | 92 | +     *   Requires [hasFixedWidthColumns]. If empty, the column widths will be determined by the header in the data | 
|  | 93 | +     *   (if present), else, this manually sets the column widths. | 
|  | 94 | +     *   The number of widths should match the number of columns. | 
|  | 95 | +     */ | 
|  | 96 | +    val FIXED_COLUMN_WIDTHS: List<Int> = emptyList() | 
|  | 97 | + | 
|  | 98 | +    /** | 
|  | 99 | +     * @param compression The compression of the data. | 
|  | 100 | +     *   Default: [Compression.None], unless detected otherwise from the input file or url. | 
|  | 101 | +     */ | 
|  | 102 | +    val COMPRESSION: Compression<*> = Compression.None | 
|  | 103 | + | 
|  | 104 | +    /** | 
|  | 105 | +     * @param colTypes The expected [ColType] per column name. Default: empty map, a.k.a. infer every column type. | 
|  | 106 | +     * | 
|  | 107 | +     *   If supplied for a certain column name (inferred from data or given by [header]), | 
|  | 108 | +     *   the parser will parse the column with the specified name as the specified type, else it will infer the type. | 
|  | 109 | +     * | 
|  | 110 | +     *   e.g. `colTypes = `[mapOf][mapOf]`("colName" `[to][to]` `[ColType][ColType]`.`[Int][ColType.Int]`)`. | 
|  | 111 | +     *   You can also set [ColType][ColType]`.`[DEFAULT][ColType.DEFAULT]` `[to][to]` `[ColType][ColType]`.X` | 
|  | 112 | +     *   to set a _default_ column type, like [ColType.String]. | 
|  | 113 | +     */ | 
|  | 114 | +    val COL_TYPES: Map<String, ColType> = emptyMap() | 
|  | 115 | + | 
|  | 116 | +    /** | 
|  | 117 | +     * @param skipLines The number of lines to skip before reading the header and data. Default: `0`. | 
|  | 118 | +     * | 
|  | 119 | +     *   Useful for files with metadata, or comments at the beginning, or to give a custom [header]. | 
|  | 120 | +     */ | 
|  | 121 | +    const val SKIP_LINES: Long = 0L | 
|  | 122 | + | 
|  | 123 | +    /** | 
|  | 124 | +     * @param readLines The maximum number of lines to read from the data. Default: `null`. | 
|  | 125 | +     * | 
|  | 126 | +     *   If `null`, all lines will be read. | 
|  | 127 | +     */ | 
|  | 128 | +    val READ_LINES: Long? = null | 
|  | 129 | + | 
|  | 130 | +    /** | 
|  | 131 | +     * @param parserOptions Optional [parsing options][ParserOptions] for columns initially read as [String]. | 
|  | 132 | +     *   Default, `null`. | 
|  | 133 | +     * | 
|  | 134 | +     *   Can configure locale, date format, double parsing, skipping types, etc. | 
|  | 135 | +     * | 
|  | 136 | +     *   If [parserOptions] or any of the arguments are `null`, the global parser configuration | 
|  | 137 | +     *   ([DataFrame.parser][DataFrame.Companion.parser]) will be queried. | 
|  | 138 | +     * | 
|  | 139 | +     *   The only exceptions are: | 
|  | 140 | +     *   - [useFastDoubleParser][ParserOptions.useFastDoubleParser], which will default to `true`, | 
|  | 141 | +     *   regardless of the global setting. | 
|  | 142 | +     *   - [nullStrings][ParserOptions.nullStrings], which, if `null`, | 
|  | 143 | +     *   will take the global setting + [["", "NA", "N/A", "null", "NULL", "None", "none", "NIL", "nil"]][org.jetbrains.kotlinx.dataframe.io.DEFAULT_DELIM_NULL_STRINGS]. | 
|  | 144 | +     *   - [skipTypes][ParserOptions.skipTypes], which will always add [typesDeephavenAlreadyParses] to | 
|  | 145 | +     *   the given types or the global setting. | 
|  | 146 | +     */ | 
|  | 147 | +    val PARSER_OPTIONS: ParserOptions? = null | 
|  | 148 | + | 
|  | 149 | +    /** | 
|  | 150 | +     * @param ignoreEmptyLines Whether to skip intermediate empty lines. Default: `false`. | 
|  | 151 | +     * | 
|  | 152 | +     *   If `false`, empty lines will be interpreted as having _empty_ values if [allowMissingColumns]. | 
|  | 153 | +     */ | 
|  | 154 | +    const val IGNORE_EMPTY_LINES: Boolean = false | 
|  | 155 | + | 
|  | 156 | +    /** | 
|  | 157 | +     * @param allowMissingColumns Wether to allow rows with fewer columns than the header. Default: `true`. | 
|  | 158 | +     * | 
|  | 159 | +     *   If `true`, rows that are too short will be interpreted as _empty_ values. | 
|  | 160 | +     */ | 
|  | 161 | +    const val ALLOW_MISSING_COLUMNS: Boolean = true | 
|  | 162 | + | 
|  | 163 | +    /** | 
|  | 164 | +     * @param ignoreExcessColumns Whether to ignore rows with more columns than the header. Default: `true`. | 
|  | 165 | +     * | 
|  | 166 | +     *   If `true`, rows that are too long will have those columns dropped. | 
|  | 167 | +     */ | 
|  | 168 | +    const val IGNORE_EXCESS_COLUMNS: Boolean = true | 
|  | 169 | + | 
|  | 170 | +    /** | 
|  | 171 | +     * @param quote The quote character. Default: `"`. | 
|  | 172 | +     * | 
|  | 173 | +     *   Used when field- or line delimiters should be interpreted as literal text. | 
|  | 174 | +     * | 
|  | 175 | +     *   For example: `123,"hello, there",456,` would correspond to: `123`; `hello, there`; `456`. | 
|  | 176 | +     */ | 
|  | 177 | +    const val QUOTE: Char = '"' | 
|  | 178 | + | 
|  | 179 | +    /** | 
|  | 180 | +     * @param ignoreSurroundingSpaces Whether to ignore leading and trailing blanks around non-quoted fields. | 
|  | 181 | +     *   Default: `true`. | 
|  | 182 | +     */ | 
|  | 183 | +    const val IGNORE_SURROUNDING_SPACES: Boolean = true | 
|  | 184 | + | 
|  | 185 | +    /** | 
|  | 186 | +     * @param trimInsideQuoted Whether to ignore leading and trailing blanks inside quoted fields. | 
|  | 187 | +     *   Default: `false`. | 
|  | 188 | +     */ | 
|  | 189 | +    const val TRIM_INSIDE_QUOTED: Boolean = false | 
|  | 190 | + | 
|  | 191 | +    /** | 
|  | 192 | +     * @param parseParallel Whether to parse the data in parallel. Default: `true`. | 
|  | 193 | +     * | 
|  | 194 | +     *   If `true`, the data will be read and parsed in parallel by the Deephaven parser. | 
|  | 195 | +     *   This is usually faster, but can be turned off for debugging. | 
|  | 196 | +     */ | 
|  | 197 | +    const val PARSE_PARALLEL: Boolean = true | 
|  | 198 | + | 
|  | 199 | +    /** | 
|  | 200 | +     * @param adjustCsvSpecs Optional extra [CsvSpecs] configuration. Default: `{ it }`. | 
|  | 201 | +     * | 
|  | 202 | +     *   Before instantiating the [CsvSpecs], the [CsvSpecs.Builder] will be passed to this lambda. | 
|  | 203 | +     *   This will allow you to configure/overwrite any CSV / TSV parsing options. | 
|  | 204 | +     */ | 
|  | 205 | +    val ADJUST_CSV_SPECS: AdjustCsvSpecs = { it } | 
|  | 206 | + | 
|  | 207 | +    /** @param includeHeader Whether to include the header in the output. Default: `true`. */ | 
|  | 208 | +    const val INCLUDE_HEADER: Boolean = true | 
|  | 209 | + | 
|  | 210 | +    /** | 
|  | 211 | +     * @param quoteMode The [QuoteMode] to use when writing CSV / TSV files. | 
|  | 212 | +     *   Default: [QuoteMode.MINIMAL]. | 
|  | 213 | +     */ | 
|  | 214 | +    val QUOTE_MODE: QuoteMode = QuoteMode.MINIMAL | 
|  | 215 | + | 
|  | 216 | +    /** | 
|  | 217 | +     * @param escapeChar The escape character to use when writing CSV / TSV files with [QuoteMode.NONE]. | 
|  | 218 | +     *   Default: `null`. This will double-quote the value. | 
|  | 219 | +     */ | 
|  | 220 | +    val ESCAPE_CHAR: Char? = null | 
|  | 221 | + | 
|  | 222 | +    /** | 
|  | 223 | +     * @param commentChar The character that indicates a comment line in a CSV / TSV file. | 
|  | 224 | +     *   Default: `'#'`. | 
|  | 225 | +     */ | 
|  | 226 | +    const val COMMENT_CHAR: Char = '#' | 
|  | 227 | + | 
|  | 228 | +    /** | 
|  | 229 | +     * @param recordSeparator The character that separates records in a CSV / TSV file. | 
|  | 230 | +     *   Default: `'\n'`, a Unix-newline. | 
|  | 231 | +     */ | 
|  | 232 | +    const val RECORD_SEPARATOR: String = "\n" | 
|  | 233 | + | 
|  | 234 | +    /** | 
|  | 235 | +     * @param headerComments A list of comments to include at the beginning of the CSV / TSV file. | 
|  | 236 | +     *   Default: empty list. | 
|  | 237 | +     */ | 
|  | 238 | +    val HEADER_COMMENTS: List<String> = emptyList() | 
|  | 239 | + | 
|  | 240 | +    /** | 
|  | 241 | +     * @param adjustCsvFormat Optional extra [CSVFormat] configuration. Default: `{ it }`. | 
|  | 242 | +     * | 
|  | 243 | +     *   Before instantiating the [CSVFormat], the [CSVFormat.Builder] will be passed to this lambda. | 
|  | 244 | +     *   This will allow you to configure/overwrite any CSV / TSV writing options. | 
|  | 245 | +     */ | 
|  | 246 | +    val ADJUST_CSV_FORMAT: AdjustCSVFormat = { it } | 
|  | 247 | +} | 
0 commit comments