001/*
002 *  Copyright 2014 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.explorer.resources.metadata.parsing;
017
018import java.io.ByteArrayInputStream;
019import java.io.ByteArrayOutputStream;
020import java.io.IOException;
021import java.io.InputStream;
022
023import org.apache.commons.io.IOUtils;
024import org.apache.tika.exception.TikaException;
025import org.apache.tika.metadata.Metadata;
026import org.apache.tika.parser.ParseContext;
027import org.apache.tika.parser.pdf.PDFParser;
028import org.xml.sax.ContentHandler;
029import org.xml.sax.SAXException;
030
031/**
032 * {@link PDFParser} using the custom {@link AmetysJempboxExtractor} to extract all dublin core metadata.
033 */
034public class AmetysPdfParser extends PDFParser
035{
036    
037    @Override
038    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException
039    {
040        ByteArrayOutputStream bos = new ByteArrayOutputStream();
041        
042        IOUtils.copy(stream, bos);
043        
044        ByteArrayInputStream is = new ByteArrayInputStream(bos.toByteArray());
045        
046        super.parse(is, handler, metadata, context);
047        
048        is = new ByteArrayInputStream(bos.toByteArray());
049        
050        new AmetysJempboxExtractor(metadata).parse(is);
051    }
052    
053}