@@ -2,6 +2,7 @@ import React, { useRef, useEffect } from 'react'
2
2
import PropTypes from 'prop-types'
3
3
import { useDispatch , useSelector } from 'react-redux'
4
4
import fileTypeChecker from 'file-type-checker'
5
+ import languageEncoding from 'detect-file-encoding-and-language'
5
6
6
7
import {
7
8
clearError ,
@@ -34,6 +35,87 @@ const INVALID_EXT_ERROR = (
34
35
</ >
35
36
)
36
37
38
+ // The package author suggests using a minimum of 500 words to determine the encoding. However, datafiles don't have
39
+ // "words" so we're using bytes instead to determine the encoding. See: https://www.npmjs.com/package/detect-file-encoding-and-language
40
+ const MIN_BYTES = 500
41
+
42
+ /* istanbul ignore next */
43
+ const tryGetUTF8EncodedFile = async function ( fileBytes , file ) {
44
+ // Create a small view of the file to determine the encoding.
45
+ const btyesView = new Uint8Array ( fileBytes . slice ( 0 , MIN_BYTES ) )
46
+ const blobView = new Blob ( [ btyesView ] , { type : 'text/plain' } )
47
+ try {
48
+ const fileInfo = await languageEncoding ( blobView )
49
+ const bom = btyesView . slice ( 0 , 3 )
50
+ const hasBom = bom [ 0 ] === 0xef && bom [ 1 ] === 0xbb && bom [ 2 ] === 0xbf
51
+ if ( ( fileInfo && fileInfo . encoding !== 'UTF-8' ) || hasBom ) {
52
+ const utf8Encoder = new TextEncoder ( )
53
+ const decoder = new TextDecoder ( fileInfo . encoding )
54
+ const decodedString = decoder . decode (
55
+ hasBom ? fileBytes . slice ( 3 ) : fileBytes
56
+ )
57
+ const utf8Bytes = utf8Encoder . encode ( decodedString )
58
+ return new File ( [ utf8Bytes ] , file . name , file . options )
59
+ }
60
+ return file
61
+ } catch ( error ) {
62
+ // This is a last ditch fallback to ensure consistent functionality and also allows the unit tests to work in the
63
+ // same way they did before this change. When the unit tests (i.e. Node environment) call `languageEncoding` it
64
+ // expects a Buffer/string/URL object. When the browser calls `languageEncoding`, it expects a Blob/File object.
65
+ // There is not a convenient way or universal object to handle both cases. Thus, when the tests run the call to
66
+ // `languageEncoding`, it raises an exception and we return the file as is which is then dispatched as it would
67
+ // have been before this change.
68
+ console . error ( 'Caught error while handling file encoding. Error:' , error )
69
+ return file
70
+ }
71
+ }
72
+
73
+ const load = ( file , section , input , dropTarget , dispatch ) => {
74
+ const filereader = new FileReader ( )
75
+ const types = [ 'png' , 'gif' , 'jpeg' ]
76
+
77
+ return new Promise ( ( resolve , reject ) => {
78
+ filereader . onerror = ( ) => {
79
+ filereader . abort ( )
80
+ reject ( )
81
+ }
82
+
83
+ filereader . onload = ( ) => {
84
+ const re = / ( \. t x t | \. m s \d { 2 } | \. t s \d { 2 , 3 } ) $ / i
85
+ if ( ! re . exec ( file . name ) ) {
86
+ dispatch ( {
87
+ type : FILE_EXT_ERROR ,
88
+ payload : {
89
+ error : { message : INVALID_EXT_ERROR } ,
90
+ section,
91
+ } ,
92
+ } )
93
+ reject ( )
94
+ return
95
+ }
96
+
97
+ const isImg = fileTypeChecker . validateFileType ( filereader . result , types )
98
+
99
+ if ( isImg ) {
100
+ createFileInputErrorState ( input , dropTarget )
101
+
102
+ dispatch ( {
103
+ type : SET_FILE_ERROR ,
104
+ payload : {
105
+ error : { message : INVALID_FILE_ERROR } ,
106
+ section,
107
+ } ,
108
+ } )
109
+ reject ( )
110
+ return
111
+ }
112
+
113
+ resolve ( { result : filereader . result } )
114
+ }
115
+ filereader . readAsArrayBuffer ( file )
116
+ } )
117
+ }
118
+
37
119
function FileUpload ( { section, setLocalAlertState } ) {
38
120
// e.g. 'Aggregate Case Data' => 'aggregate-case-data'
39
121
// The set of uploaded files in our Redux state
@@ -86,7 +168,7 @@ function FileUpload({ section, setLocalAlertState }) {
86
168
}
87
169
const inputRef = useRef ( null )
88
170
89
- const validateAndUploadFile = ( event ) => {
171
+ const validateAndUploadFile = async ( event ) => {
90
172
setLocalAlertState ( {
91
173
active : false ,
92
174
type : null ,
@@ -101,51 +183,14 @@ function FileUpload({ section, setLocalAlertState }) {
101
183
dispatch ( clearError ( { section } ) )
102
184
dispatch ( clearFile ( { section } ) )
103
185
104
- // Get the the first 4 bytes of the file with which to check file signatures
105
- const blob = file . slice ( 0 , 4 )
106
-
107
186
const input = inputRef . current
108
187
const dropTarget = inputRef . current . parentNode
109
188
110
- const filereader = new FileReader ( )
111
-
112
- const types = [ 'png' , 'gif' , 'jpeg' ]
113
- filereader . onload = ( ) => {
114
- const re = / ( \. t x t | \. m s \d { 2 } | \. t s \d { 2 , 3 } ) $ / i
115
- if ( ! re . exec ( file . name ) ) {
116
- dispatch ( {
117
- type : FILE_EXT_ERROR ,
118
- payload : {
119
- error : { message : INVALID_EXT_ERROR } ,
120
- section,
121
- } ,
122
- } )
123
- return
124
- }
125
-
126
- const isImg = fileTypeChecker . validateFileType ( filereader . result , types )
127
-
128
- if ( isImg ) {
129
- createFileInputErrorState ( input , dropTarget )
130
-
131
- dispatch ( {
132
- type : SET_FILE_ERROR ,
133
- payload : {
134
- error : { message : INVALID_FILE_ERROR } ,
135
- section,
136
- } ,
137
- } )
138
- } else {
139
- dispatch (
140
- upload ( {
141
- section,
142
- file,
143
- } )
144
- )
145
- }
146
- }
189
+ const { result } = await load ( file , section , input , dropTarget , dispatch )
147
190
148
- filereader . readAsArrayBuffer ( blob )
191
+ // Get the correctly encoded file
192
+ const encodedFile = await tryGetUTF8EncodedFile ( result , file )
193
+ dispatch ( upload ( { file : encodedFile , section } ) )
149
194
}
150
195
151
196
return (
0 commit comments