001/* 002 * Copyright 2014 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.plugins.explorer.resources.metadata.parsing; 017 018import java.io.ByteArrayInputStream; 019import java.io.ByteArrayOutputStream; 020import java.io.IOException; 021import java.io.InputStream; 022 023import org.apache.commons.io.IOUtils; 024import org.apache.tika.exception.TikaException; 025import org.apache.tika.metadata.Metadata; 026import org.apache.tika.parser.ParseContext; 027import org.apache.tika.parser.pdf.PDFParser; 028import org.xml.sax.ContentHandler; 029import org.xml.sax.SAXException; 030 031/** 032 * {@link PDFParser} using the custom {@link AmetysJempboxExtractor} to extract all dublin core metadata. 033 */ 034public class AmetysPdfParser extends PDFParser 035{ 036 037 @Override 038 public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException 039 { 040 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 041 042 IOUtils.copy(stream, bos); 043 044 ByteArrayInputStream is = new ByteArrayInputStream(bos.toByteArray()); 045 046 super.parse(is, handler, metadata, context); 047 048 is = new ByteArrayInputStream(bos.toByteArray()); 049 050 new AmetysJempboxExtractor(metadata).parse(is); 051 } 052 053}