@@ -67,6 +67,7 @@ public class GCSBatchSink extends AbstractFileSink<GCSBatchSink.GCSBatchSinkConf
6767 private static final String RECORDS_UPDATED_METRIC = "records.updated" ;
6868 public static final String AVRO_NAMED_OUTPUT = "avro.mo.config.namedOutput" ;
6969 public static final String COMMON_NAMED_OUTPUT = "mapreduce.output.basename" ;
70+ public static final String CONTENT_TYPE = "io.cdap.gcs.batch.sink.content.type" ;
7071
7172 private final GCSBatchSinkConfig config ;
7273 private String outputPath ;
@@ -125,6 +126,7 @@ public void prepareRun(BatchSinkContext context) throws Exception {
125126 @ Override
126127 protected Map <String , String > getFileSystemProperties (BatchSinkContext context ) {
127128 Map <String , String > properties = GCPUtils .getFileSystemProperties (config , config .getPath (), new HashMap <>());
129+ properties .put (GCSBatchSink .CONTENT_TYPE , config .getContentType ());
128130 properties .putAll (config .getFileSystemProperties ());
129131 String outputFileBaseName = config .getOutputFileNameBase ();
130132 if (outputFileBaseName == null || outputFileBaseName .isEmpty ()) {
@@ -242,6 +244,23 @@ public static class GCSBatchSinkConfig extends GCPReferenceSinkConfig implements
242244 private static final String NAME_LOCATION = "location" ;
243245 private static final String NAME_FS_PROPERTIES = "fileSystemProperties" ;
244246 private static final String NAME_FILE_NAME_BASE = "outputFileNameBase" ;
247+ private static final String NAME_CONTENT_TYPE = "contentType" ;
248+ private static final String NAME_CUSTOM_CONTENT_TYPE = "customContentType" ;
249+ private static final String DEFAULT_CONTENT_TYPE = "application/octet-stream" ;
250+ private static final String CONTENT_TYPE_OTHER = "other" ;
251+ private static final String CONTENT_TYPE_APPLICATION_JSON = "application/json" ;
252+ private static final String CONTENT_TYPE_APPLICATION_AVRO = "application/avro" ;
253+ private static final String CONTENT_TYPE_APPLICATION_CSV = "application/csv" ;
254+ private static final String CONTENT_TYPE_TEXT_PLAIN = "text/plain" ;
255+ private static final String CONTENT_TYPE_TEXT_CSV = "text/csv" ;
256+ private static final String CONTENT_TYPE_TEXT_TSV = "text/tab-separated-values" ;
257+ private static final String FORMAT_AVRO = "avro" ;
258+ private static final String FORMAT_CSV = "csv" ;
259+ private static final String FORMAT_JSON = "json" ;
260+ private static final String FORMAT_TSV = "tsv" ;
261+ private static final String FORMAT_DELIMITED = "delimited" ;
262+ private static final String FORMAT_ORC = "orc" ;
263+ private static final String FORMAT_PARQUET = "parquet" ;
245264
246265 private static final String SCHEME = "gs://" ;
247266 @ Name (NAME_PATH )
@@ -279,6 +298,18 @@ public static class GCSBatchSinkConfig extends GCPReferenceSinkConfig implements
279298 "This value is ignored if the bucket already exists" )
280299 protected String location ;
281300
301+ @ Macro
302+ @ Description ("The Content Type property is used to indicate the media type of the resource." +
303+ "Defaults to 'application/octet-stream'." )
304+ @ Nullable
305+ protected String contentType ;
306+
307+ @ Macro
308+ @ Description ("The Custom Content Type is used when the value of Content-Type is set to other." +
309+ "User can provide specific Content-Type, different from the options in the dropdown." )
310+ @ Nullable
311+ protected String customContentType ;
312+
282313 @ Name (NAME_FS_PROPERTIES )
283314 @ Macro
284315 @ Nullable
@@ -321,10 +352,19 @@ public void validate(FailureCollector collector) {
321352 collector .addFailure (e .getMessage (), null ).withConfigProperty (NAME_FORMAT ).withStacktrace (e .getStackTrace ());
322353 }
323354
355+ if (!containsMacro (NAME_CONTENT_TYPE ) && !containsMacro (NAME_CUSTOM_CONTENT_TYPE )
356+ && !Strings .isNullOrEmpty (contentType ) && !contentType .equalsIgnoreCase (CONTENT_TYPE_OTHER )
357+ && !containsMacro (NAME_FORMAT )) {
358+ if (!contentType .equalsIgnoreCase (DEFAULT_CONTENT_TYPE )) {
359+ validateContentType (collector );
360+ }
361+ }
362+
324363 try {
325364 getSchema ();
326365 } catch (IllegalArgumentException e ) {
327- collector .addFailure (e .getMessage (), null ).withConfigProperty (NAME_SCHEMA ).withStacktrace (e .getStackTrace ());
366+ collector .addFailure (e .getMessage (), null ).withConfigProperty (NAME_SCHEMA )
367+ .withStacktrace (e .getStackTrace ());
328368 }
329369
330370 try {
@@ -335,6 +375,69 @@ public void validate(FailureCollector collector) {
335375 }
336376 }
337377
378+ //This method validates the specified content type for the used format.
379+ public void validateContentType (FailureCollector failureCollector ) {
380+ switch (format ) {
381+ case FORMAT_AVRO :
382+ if (!contentType .equalsIgnoreCase (CONTENT_TYPE_APPLICATION_AVRO )) {
383+ failureCollector .addFailure (String .format ("Valid content types for avro are %s, %s." ,
384+ CONTENT_TYPE_APPLICATION_AVRO , DEFAULT_CONTENT_TYPE ), null )
385+ .withConfigProperty (NAME_CONTENT_TYPE );
386+ }
387+ break ;
388+ case FORMAT_JSON :
389+ if (!contentType .equalsIgnoreCase (CONTENT_TYPE_APPLICATION_JSON )
390+ && !contentType .equalsIgnoreCase (CONTENT_TYPE_TEXT_PLAIN )) {
391+ failureCollector .addFailure (String .format (
392+ "Valid content types for json are %s, %s, %s." , CONTENT_TYPE_APPLICATION_JSON ,
393+ CONTENT_TYPE_TEXT_PLAIN , DEFAULT_CONTENT_TYPE ), null
394+ ).withConfigProperty (NAME_CONTENT_TYPE );
395+ }
396+ break ;
397+ case FORMAT_CSV :
398+ if (!contentType .equalsIgnoreCase (CONTENT_TYPE_APPLICATION_CSV )
399+ && !contentType .equalsIgnoreCase (CONTENT_TYPE_TEXT_CSV )
400+ && !contentType .equalsIgnoreCase (CONTENT_TYPE_TEXT_PLAIN )) {
401+ failureCollector .addFailure (String .format (
402+ "Valid content types for csv are %s, %s, %s, %s." , CONTENT_TYPE_APPLICATION_CSV ,
403+ CONTENT_TYPE_TEXT_PLAIN , CONTENT_TYPE_TEXT_CSV , DEFAULT_CONTENT_TYPE ), null
404+ ).withConfigProperty (NAME_CONTENT_TYPE );
405+ }
406+ break ;
407+ case FORMAT_DELIMITED :
408+ if (!contentType .equalsIgnoreCase (CONTENT_TYPE_TEXT_PLAIN )
409+ && !contentType .equalsIgnoreCase (CONTENT_TYPE_TEXT_CSV )
410+ && !contentType .equalsIgnoreCase (CONTENT_TYPE_APPLICATION_CSV )
411+ && !contentType .equalsIgnoreCase (CONTENT_TYPE_TEXT_TSV )) {
412+ failureCollector .addFailure (String .format (
413+ "Valid content types for delimited are %s, %s, %s, %s, %s." , CONTENT_TYPE_TEXT_PLAIN ,
414+ CONTENT_TYPE_TEXT_CSV , CONTENT_TYPE_APPLICATION_CSV , CONTENT_TYPE_TEXT_TSV , DEFAULT_CONTENT_TYPE ), null
415+ ).withConfigProperty (NAME_CONTENT_TYPE );
416+ }
417+ break ;
418+ case FORMAT_PARQUET :
419+ if (!contentType .equalsIgnoreCase (DEFAULT_CONTENT_TYPE )) {
420+ failureCollector .addFailure (String .format ("Valid content type for parquet is %s." , DEFAULT_CONTENT_TYPE ),
421+ null ).withConfigProperty (NAME_CONTENT_TYPE );
422+ }
423+ break ;
424+ case FORMAT_ORC :
425+ if (!contentType .equalsIgnoreCase (DEFAULT_CONTENT_TYPE )) {
426+ failureCollector .addFailure (String .format ("Valid content type for orc is %s." , DEFAULT_CONTENT_TYPE ),
427+ null ).withConfigProperty (NAME_CONTENT_TYPE );
428+ }
429+ break ;
430+ case FORMAT_TSV :
431+ if (!contentType .equalsIgnoreCase (CONTENT_TYPE_TEXT_PLAIN )
432+ && !contentType .equalsIgnoreCase (CONTENT_TYPE_TEXT_TSV )) {
433+ failureCollector .addFailure (String .format (
434+ "Valid content types for tsv are %s, %s, %s." , CONTENT_TYPE_TEXT_TSV , CONTENT_TYPE_TEXT_PLAIN ,
435+ DEFAULT_CONTENT_TYPE ), null ).withConfigProperty (NAME_CONTENT_TYPE );
436+ }
437+ break ;
438+ }
439+ }
440+
338441 public String getBucket () {
339442 return GCSPath .from (path ).getBucket ();
340443 }
@@ -378,6 +481,30 @@ public String getLocation() {
378481 return location ;
379482 }
380483
484+ /* This method gets the value of content type. Valid content types for each format are:
485+ *
486+ * avro -> application/avro, application/octet-stream
487+ * json -> application/json, text/plain, application/octet-stream
488+ * csv -> application/csv, text/csv, text/plain, application/octet-stream
489+ * delimited -> application/csv, text/csv, text/plain, text/tsv, application/octet-stream
490+ * orc -> application/octet-stream
491+ * parquet -> application/octet-stream
492+ * tsv -> text/tab-separated-values, application/octet-stream
493+ */
494+ @ Nullable
495+ public String getContentType () {
496+ if (!Strings .isNullOrEmpty (contentType )) {
497+ if (contentType .equals (CONTENT_TYPE_OTHER )) {
498+ if (Strings .isNullOrEmpty (customContentType )) {
499+ return DEFAULT_CONTENT_TYPE ;
500+ }
501+ return customContentType ;
502+ }
503+ return contentType ;
504+ }
505+ return DEFAULT_CONTENT_TYPE ;
506+ }
507+
381508 public Map <String , String > getFileSystemProperties () {
382509 if (fileSystemProperties == null || fileSystemProperties .isEmpty ()) {
383510 return Collections .emptyMap ();
0 commit comments