001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.robots;
017
018import java.io.IOException;
019
020import org.apache.avalon.framework.parameters.ParameterException;
021import org.apache.cocoon.ProcessingException;
022import org.apache.cocoon.xml.XMLUtils;
023import org.xml.sax.SAXException;
024
025import org.ametys.plugins.repository.AmetysObjectIterable;
026import org.ametys.plugins.repository.metadata.CompositeMetadata;
027import org.ametys.web.repository.page.Page;
028import org.ametys.web.repository.page.jcr.DefaultPage;
029import org.ametys.web.repository.site.Site;
030import org.ametys.web.repository.sitemap.Sitemap;
031
032/**
033 * Generates sitemap.xml file.
034 */
035public class RobotsSitemapGenerator extends AbstractRobotsGenerator
036{
037    private static final String LOC_TAG = "loc";
038
039    private static final String URL_TAG = "url";
040
041    private static final String SITEMAPS_ORG_NAMESPACE = "http://www.sitemaps.org/schemas/sitemap/0.9";
042
043    private static final String URL_SET_TAG = "urlset";
044
045    @Override
046    public void generate() throws IOException, SAXException, ProcessingException
047    {
048        String siteName;
049        try
050        {
051            siteName = parameters.getParameter("siteName");
052
053            Site site = getSite(siteName);
054            
055            String siteUrl = site.getUrl();
056            if (!siteUrl.endsWith("/"))
057            {
058                siteUrl = siteUrl + "/";
059            }
060            
061            contentHandler.startDocument();
062            XMLUtils.startElement(contentHandler, SITEMAPS_ORG_NAMESPACE, URL_SET_TAG);
063            
064            // Pages informations
065            for (Sitemap sitemap : site.getSitemaps())
066            {
067                for (Page page : sitemap.getChildrenPages())
068                {
069                    _saxUrl(page, siteUrl);
070                }
071            }
072            
073            // Sitemap URL
074            XMLUtils.endElement(contentHandler, SITEMAPS_ORG_NAMESPACE, URL_SET_TAG);
075            contentHandler.endDocument();
076        }
077        catch (ParameterException e)
078        {
079            getLogger().error("Error when getting site name", e);
080        }
081    }
082
083    /**
084     * Serialize information about a page in sitemap.xml
085     * @param page The page to serialize
086     * @param siteUrl the site public URL (ending with a '/').
087     * @throws SAXException if an error occurs while saxing
088     */
089    protected void _saxUrl(Page page, String siteUrl) throws SAXException
090    {
091        CompositeMetadata metadataHolder = page.getMetadataHolder();
092        boolean disallow = metadataHolder.getBoolean(DefaultPage.METADATA_ROBOTS_DISALLOW, false);
093
094        if (!disallow)
095        {
096            XMLUtils.startElement(contentHandler, URL_TAG);
097            XMLUtils.createElement(contentHandler, LOC_TAG, siteUrl + page.getSitemapName() + "/" + page.getPathInSitemap() + ".html");
098            XMLUtils.endElement(contentHandler, URL_TAG);
099            
100            AmetysObjectIterable< ? extends Page> childrenPages = page.getChildrenPages();
101            for (Page child : childrenPages)
102            {
103                _saxUrl(child, siteUrl);
104            }
105        }
106    }
107}