001/*
002 *  Copyright 2020 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.plugins.contentio.csv;
017
018import java.io.BufferedReader;
019import java.io.File;
020import java.io.FileInputStream;
021import java.io.IOException;
022import java.io.InputStreamReader;
023import java.nio.charset.Charset;
024import java.nio.file.Files;
025import java.nio.file.Path;
026import java.nio.file.Paths;
027import java.nio.file.StandardCopyOption;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.HashMap;
031import java.util.List;
032import java.util.Map;
033import java.util.UUID;
034import java.util.function.Function;
035import java.util.stream.Collectors;
036
037import org.apache.avalon.framework.parameters.Parameters;
038import org.apache.avalon.framework.service.ServiceException;
039import org.apache.avalon.framework.service.ServiceManager;
040import org.apache.cocoon.acting.ServiceableAction;
041import org.apache.cocoon.environment.ObjectModelHelper;
042import org.apache.cocoon.environment.Redirector;
043import org.apache.cocoon.environment.Request;
044import org.apache.cocoon.environment.SourceResolver;
045import org.apache.cocoon.servlet.multipart.Part;
046import org.apache.cocoon.servlet.multipart.PartOnDisk;
047import org.apache.cocoon.servlet.multipart.RejectedPart;
048import org.apache.commons.io.FileUtils;
049import org.apache.commons.io.FilenameUtils;
050import org.apache.commons.lang3.StringUtils;
051import org.apache.tika.detect.DefaultEncodingDetector;
052import org.apache.tika.io.TikaInputStream;
053import org.apache.tika.metadata.Metadata;
054import org.supercsv.io.CsvMapReader;
055import org.supercsv.io.ICsvMapReader;
056import org.supercsv.prefs.CsvPreference;
057
058import org.ametys.cms.contenttype.ContentType;
059import org.ametys.cms.contenttype.ContentTypeExtensionPoint;
060import org.ametys.cms.data.type.ModelItemTypeConstants;
061import org.ametys.core.cocoon.JSonReader;
062import org.ametys.plugins.workflow.support.WorkflowProvider;
063import org.ametys.runtime.i18n.I18nizableText;
064import org.ametys.runtime.model.ModelItem;
065import org.ametys.runtime.servlet.RuntimeConfig;
066
067/**
068 * Import contents from an uploaded CSV file.
069 */
070public class ImportCSVFileAction extends ServiceableAction
071{
072
073    private static final String[] _ALLOWED_EXTENSIONS = new String[] {"txt", "csv"};
074
075    private static final String CONTENTIO_STORAGE_DIRECTORY = "contentio/temp";
076
077    private ContentTypeExtensionPoint _contentTypeEP;
078    
079    private WorkflowProvider _workflowProvider;
080    
081    @Override
082    public void service(ServiceManager serviceManager) throws ServiceException
083    {
084        super.service(serviceManager);
085        _contentTypeEP = (ContentTypeExtensionPoint) serviceManager.lookup(ContentTypeExtensionPoint.ROLE);
086        _workflowProvider = (WorkflowProvider) serviceManager.lookup(WorkflowProvider.ROLE);
087    }
088    
089    @Override
090    public Map act(Redirector redirector, SourceResolver resolver, Map objectModel, String source, Parameters parameters) throws Exception
091    {
092        Request request = ObjectModelHelper.getRequest(objectModel);
093        
094        Map<String, Object> result = new HashMap<>();
095        String contentTypeId = (String) request.get("contentType");
096        ContentType contentType = _contentTypeEP.getExtension(contentTypeId);
097        Part part = (Part) request.get("file");
098        if (part instanceof RejectedPart || part == null)
099        {
100            result.put("success", false);
101            result.put("error", "rejected");
102        }
103        else
104        {
105            PartOnDisk uploadedFilePart = (PartOnDisk) part;
106            File uploadedFile = uploadedFilePart.getFile();
107            
108            String filename = uploadedFilePart.getFileName().toLowerCase();
109            
110            if (!FilenameUtils.isExtension(filename, _ALLOWED_EXTENSIONS))
111            {
112                result.put("error", "invalid-extension");
113                request.setAttribute(JSonReader.OBJECT_TO_READ, result);
114                return EMPTY_MAP;
115            }
116            TikaInputStream stream = TikaInputStream.get(uploadedFile.toPath());
117            DefaultEncodingDetector defaultEncodingDetector = new DefaultEncodingDetector();
118            Charset charset = defaultEncodingDetector.detect(stream, new Metadata());
119            result.put("charset", charset);
120            String[] headers = _extractHeaders(uploadedFile, request, charset);
121          
122            if (headers == null)
123            {
124                result.put("error", "no-header");
125                request.setAttribute(JSonReader.OBJECT_TO_READ, result);
126                return EMPTY_MAP;
127            }
128            
129            List<Map<String, String>> mapping = Arrays.asList(headers)
130                    .stream()
131                    .filter(StringUtils::isNotEmpty)
132                    .map(header -> new HashMap<>(Map.of("header", header, 
133                            "attributePath", contentType.hasModelItem(header.replace("*", "").replace(".", "/")) ? header.replace("/", ".").replace("*", "") : "", 
134                            "isId", Boolean.toString(header.endsWith("*")))))
135                    .collect(Collectors.toList());
136            result.put("mapping", mapping);
137            
138            // Build a map to count how many attribute are there for each group
139            Map<String, Long> attributeCount = mapping.stream()
140                    .map(map -> map.get("attributePath"))
141                    .filter(StringUtils::isNotEmpty)
142                    .map(attributePath -> {
143                        String attributePrefix = "";
144                        int endIndex = attributePath.lastIndexOf(".");
145                        if (endIndex != -1)  
146                        {
147                            attributePrefix = attributePath.substring(0, endIndex);
148                        }
149                        return attributePrefix.replace(".", "/");
150                    })
151                    .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
152            
153            // If an attribute is the only for his group, it is the identifier
154            for (Map<String, String> map : mapping)
155            {
156                String attributePath = map.get("attributePath").replace(".", "/");
157                String attributePrefix = StringUtils.EMPTY;
158                int endIndex = attributePath.lastIndexOf("/");
159                if (endIndex != -1)  
160                {
161                    attributePrefix = attributePath.substring(0, endIndex);
162                }
163
164                boolean parentIsContent;
165                // If there is no prefix, it is an attribute of the content we want to import
166                if (attributePrefix.equals(StringUtils.EMPTY))
167                {
168                    parentIsContent = true;
169                }
170                // Otherwise, check the modelItem 
171                else if (contentType.hasModelItem(attributePrefix))
172                {
173                    ModelItem modelItem = contentType.getModelItem(attributePrefix);
174                    String modelTypeId = modelItem.getType().getId();
175                    parentIsContent = ModelItemTypeConstants.CONTENT_ELEMENT_TYPE_ID.equals(modelTypeId);
176                }
177                else
178                {
179                    parentIsContent = false;
180                }
181                
182                // If an attribute is the only one of its level, and is part of a content, consider it as the identifier
183                if (attributeCount.getOrDefault(attributePrefix, 0L).equals(1L) && parentIsContent && StringUtils.isNotBlank(attributePath))
184                {
185                    map.put("isId", "true");
186                }  
187            }
188            
189            String newPath = _copyFile(uploadedFile.toPath());
190            result.put("path", newPath);
191            result.put("success", true);
192            result.put("workflows", _getWorkflows());
193            result.put("defaultWorkflow", contentType.getDefaultWorkflowName().orElse(null));
194            result.put("fileName", uploadedFile.getName());
195        }
196        
197        request.setAttribute(JSonReader.OBJECT_TO_READ, result);
198        return EMPTY_MAP;
199    }
200        
201    private String _copyFile(Path path) throws IOException
202    {
203        File contentIOStorageDir = FileUtils.getFile(RuntimeConfig.getInstance().getAmetysHome(), CONTENTIO_STORAGE_DIRECTORY);
204        
205        if (!contentIOStorageDir.exists())
206        {
207            if (!contentIOStorageDir.mkdirs())
208            {
209                throw new IOException("Unable to create monitoring directory: " + contentIOStorageDir);
210            }
211        }
212        String id = UUID.randomUUID().toString() + ".csv";
213        Path copy = Files.copy(path, Paths.get(contentIOStorageDir.getPath(), id), StandardCopyOption.REPLACE_EXISTING);
214        
215        return copy.toString();
216        
217    }
218    
219    private CsvPreference _getCSVPreference(Request request)
220    {
221        String escapingChar = (String) request.get("escaping-char");
222        String separatingChar = (String) request.get("separating-char");
223        char separating = separatingChar.charAt(0);
224        char escaping = escapingChar.charAt(0);
225        
226        if (separating == escaping)
227        {
228            throw new IllegalArgumentException("Separating character can not be equals to escaping character");
229        }
230        return new CsvPreference.Builder(escaping, separating, "\r\n").build();
231    }
232    
233    private String[] _extractHeaders(File uploadedFile, Request request, Charset charset) throws IOException
234    {
235        
236        try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(uploadedFile), charset)))
237        {
238            reader.mark(8192);
239            
240            CsvPreference preference = _getCSVPreference(request);
241            String headerLine = reader.readLine();
242            if (headerLine != null)
243            {
244                reader.reset();
245                try (ICsvMapReader mapReader = new CsvMapReader(reader, preference))
246                {
247                    String[] headers = mapReader.getHeader(true);
248                    return headers;
249                }
250            }
251            else
252            {
253                return null;
254            } 
255        }
256    }
257
258    /**
259     * getWorkflows
260     * @return map of workflows
261     */
262    private List<Map<String, Object>> _getWorkflows()
263    {
264        List<Map<String, Object>> workflows = new ArrayList<>();
265        String[] workflowNames = _workflowProvider.getAmetysObjectWorkflow().getWorkflowNames();
266        for (String workflowName : workflowNames)
267        {
268            Map<String, Object> workflowMap = new HashMap<>();
269            workflowMap.put("value", workflowName);
270            workflowMap.put("label", new I18nizableText("application", "WORKFLOW_" + workflowName));
271            workflows.add(workflowMap);
272        }
273        return workflows;
274    }
275}