18
18
package org .apache .tika .server .core .resource ;
19
19
20
20
import static java .nio .charset .StandardCharsets .UTF_8 ;
21
+ import static org .apache .tika .server .core .resource .RecursiveMetadataResource .DEFAULT_HANDLER_TYPE ;
22
+ import static org .apache .tika .server .core .resource .RecursiveMetadataResource .HANDLER_TYPE_PARAM ;
21
23
22
24
import java .io .IOException ;
23
25
import java .io .InputStream ;
36
38
import javax .ws .rs .POST ;
37
39
import javax .ws .rs .PUT ;
38
40
import javax .ws .rs .Path ;
41
+ import javax .ws .rs .PathParam ;
39
42
import javax .ws .rs .Produces ;
40
43
import javax .ws .rs .WebApplicationException ;
41
44
import javax .ws .rs .core .Context ;
62
65
import org .apache .tika .Tika ;
63
66
import org .apache .tika .config .TikaConfig ;
64
67
import org .apache .tika .exception .EncryptedDocumentException ;
68
+ import org .apache .tika .exception .TikaException ;
65
69
import org .apache .tika .metadata .Metadata ;
66
70
import org .apache .tika .metadata .TikaCoreProperties ;
67
71
import org .apache .tika .parser .AutoDetectParser ;
68
72
import org .apache .tika .parser .DigestingParser ;
69
73
import org .apache .tika .parser .ParseContext ;
70
74
import org .apache .tika .parser .Parser ;
75
+ import org .apache .tika .sax .BasicContentHandlerFactory ;
71
76
import org .apache .tika .sax .BodyContentHandler ;
72
77
import org .apache .tika .sax .ExpandedTitleContentHandler ;
73
78
import org .apache .tika .sax .RichTextContentHandler ;
76
81
import org .apache .tika .server .core .InputStreamFactory ;
77
82
import org .apache .tika .server .core .ParseContextConfig ;
78
83
import org .apache .tika .server .core .ServerStatus ;
84
+ import org .apache .tika .server .core .TikaServerConfig ;
79
85
import org .apache .tika .server .core .TikaServerParseException ;
86
+ import org .apache .tika .utils .ExceptionUtils ;
80
87
81
88
@ Path ("/tika" )
82
89
public class TikaResource {
@@ -87,16 +94,19 @@ public class TikaResource {
87
94
private static final Logger LOG = LoggerFactory .getLogger (TikaResource .class );
88
95
private static Pattern ALLOWABLE_HEADER_CHARS = Pattern .compile ("(?i)^[-/_+\\ .A-Z0-9 ]+$" );
89
96
private static TikaConfig tikaConfig ;
97
+ private static TikaServerConfig tikaServerConfig ;
90
98
private static DigestingParser .Digester digester = null ;
91
99
private static InputStreamFactory inputStreamFactory = null ;
92
100
private static ServerStatus SERVER_STATUS = null ;
93
101
94
102
private static ParseContextConfig PARSE_CONTEXT_CONFIG = new CompositeParseContextConfig ();
95
103
96
104
97
- public static void init (TikaConfig config , DigestingParser .Digester digestr ,
105
+ public static void init (TikaConfig config , TikaServerConfig tikaServerConfg ,
106
+ DigestingParser .Digester digestr ,
98
107
InputStreamFactory iSF , ServerStatus serverStatus ) {
99
108
tikaConfig = config ;
109
+ tikaServerConfig = tikaServerConfg ;
100
110
digester = digestr ;
101
111
inputStreamFactory = iSF ;
102
112
SERVER_STATUS = serverStatus ;
@@ -508,6 +518,86 @@ public StreamingOutput getXML(final InputStream is, @Context HttpHeaders httpHea
508
518
httpHeaders .getRequestHeaders (), info , "xml" );
509
519
}
510
520
521
+ @ POST
522
+ @ Consumes ("multipart/form-data" )
523
+ @ Produces ("application/json" )
524
+ @ Path ("form{" + HANDLER_TYPE_PARAM + " : (\\ w+)?}" )
525
+ public Metadata getJsonFromMultipart (Attachment att ,
526
+ @ Context HttpHeaders httpHeaders ,
527
+ @ Context final UriInfo info ,
528
+ @ PathParam (HANDLER_TYPE_PARAM )
529
+ String handlerTypeName )
530
+ throws IOException , TikaException {
531
+ Metadata metadata = new Metadata ();
532
+ parseToMetadata (getInputStream (att .getObject (InputStream .class ), metadata , httpHeaders ),
533
+ metadata , preparePostHeaderMap (att , httpHeaders ), info , handlerTypeName );
534
+ TikaResource .getConfig ().getMetadataFilter ().filter (metadata );
535
+ return metadata ;
536
+ }
537
+
538
+ @ PUT
539
+ @ Consumes ("*/*" )
540
+ @ Produces ("application/json" )
541
+ @ Path ("{" + HANDLER_TYPE_PARAM + " : (\\ w+)?}" )
542
+ public Metadata getJson (final InputStream is , @ Context
543
+ HttpHeaders httpHeaders ,
544
+ @ Context final UriInfo info , @ PathParam (HANDLER_TYPE_PARAM )
545
+ String handlerTypeName )
546
+ throws IOException , TikaException {
547
+ Metadata metadata = new Metadata ();
548
+ parseToMetadata (getInputStream (is , metadata , httpHeaders ), metadata ,
549
+ httpHeaders .getRequestHeaders (), info , handlerTypeName );
550
+ TikaResource .getConfig ().getMetadataFilter ().filter (metadata );
551
+ return metadata ;
552
+ }
553
+
554
+ private void parseToMetadata (InputStream inputStream ,
555
+ Metadata metadata ,
556
+ MultivaluedMap <String , String > httpHeaders ,
557
+ UriInfo info , String handlerTypeName ) throws IOException {
558
+ final Parser parser = createParser ();
559
+ final ParseContext context = new ParseContext ();
560
+
561
+ fillMetadata (parser , metadata , httpHeaders );
562
+ fillParseContext (httpHeaders , metadata , context );
563
+
564
+ logRequest (LOG , "/tika" , metadata );
565
+ int writeLimit = -1 ;
566
+ if (httpHeaders .containsKey ("writeLimit" )) {
567
+ writeLimit = Integer .parseInt (httpHeaders .getFirst ("writeLimit" ));
568
+ }
569
+ BasicContentHandlerFactory .HANDLER_TYPE type =
570
+ BasicContentHandlerFactory .parseHandlerType (handlerTypeName , DEFAULT_HANDLER_TYPE );
571
+ BasicContentHandlerFactory fact = new BasicContentHandlerFactory (type , writeLimit );
572
+ ContentHandler contentHandler = fact .getNewContentHandler ();
573
+
574
+ try {
575
+ parse (parser , LOG , info .getPath (), inputStream , contentHandler , metadata , context );
576
+ } catch (TikaServerParseException e ) {
577
+ if (tikaServerConfig .isReturnStackTrace ()) {
578
+ Throwable cause = e .getCause ();
579
+ if (cause != null ) {
580
+ metadata .add (TikaCoreProperties .CONTAINER_EXCEPTION ,
581
+ ExceptionUtils .getStackTrace (cause ));
582
+ } else {
583
+ metadata .add (TikaCoreProperties .CONTAINER_EXCEPTION ,
584
+ ExceptionUtils .getStackTrace (e ));
585
+ }
586
+ } else {
587
+ throw e ;
588
+ }
589
+ } catch (OutOfMemoryError e ) {
590
+ if (tikaServerConfig .isReturnStackTrace ()) {
591
+ metadata .add (TikaCoreProperties .CONTAINER_EXCEPTION ,
592
+ ExceptionUtils .getStackTrace (e ));
593
+ } else {
594
+ throw e ;
595
+ }
596
+ } finally {
597
+ metadata .add (TikaCoreProperties .TIKA_CONTENT , contentHandler .toString ());
598
+ }
599
+ }
600
+
511
601
private StreamingOutput produceOutput (final InputStream is , Metadata metadata ,
512
602
final MultivaluedMap <String , String > httpHeaders ,
513
603
final UriInfo info , final String format ) {
0 commit comments