001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.robots;
017
018import java.io.IOException;
019
020import org.apache.avalon.framework.parameters.ParameterException;
021import org.apache.cocoon.ProcessingException;
022import org.apache.cocoon.xml.XMLUtils;
023import org.xml.sax.SAXException;
024
025import org.ametys.plugins.repository.AmetysObjectIterable;
026import org.ametys.web.repository.page.Page;
027import org.ametys.web.repository.page.jcr.DefaultPage;
028import org.ametys.web.repository.site.Site;
029import org.ametys.web.repository.sitemap.Sitemap;
030
031/**
032 * Generates sitemap.xml file.
033 */
034public class RobotsSitemapGenerator extends AbstractRobotsGenerator
035{
036    private static final String LOC_TAG = "loc";
037
038    private static final String URL_TAG = "url";
039
040    private static final String SITEMAPS_ORG_NAMESPACE = "http://www.sitemaps.org/schemas/sitemap/0.9";
041
042    private static final String URL_SET_TAG = "urlset";
043
044    @Override
045    public void generate() throws IOException, SAXException, ProcessingException
046    {
047        String siteName;
048        try
049        {
050            siteName = parameters.getParameter("siteName");
051
052            Site site = getSite(siteName);
053            
054            String siteUrl = site.getUrl();
055            if (!siteUrl.endsWith("/"))
056            {
057                siteUrl = siteUrl + "/";
058            }
059            
060            contentHandler.startDocument();
061            XMLUtils.startElement(contentHandler, SITEMAPS_ORG_NAMESPACE, URL_SET_TAG);
062            
063            // Pages informations
064            for (Sitemap sitemap : site.getSitemaps())
065            {
066                for (Page page : sitemap.getChildrenPages())
067                {
068                    _saxUrl(page, siteUrl);
069                }
070            }
071            
072            // Sitemap URL
073            XMLUtils.endElement(contentHandler, SITEMAPS_ORG_NAMESPACE, URL_SET_TAG);
074            contentHandler.endDocument();
075        }
076        catch (ParameterException e)
077        {
078            getLogger().error("Error when getting site name", e);
079        }
080    }
081
082    /**
083     * Serialize information about a page in sitemap.xml
084     * @param page The page to serialize
085     * @param siteUrl the site public URL (ending with a '/').
086     * @throws SAXException if an error occurs while saxing
087     */
088    protected void _saxUrl(Page page, String siteUrl) throws SAXException
089    {
090        boolean disallow = page.getValue(DefaultPage.METADATA_ROBOTS_DISALLOW, false);
091
092        if (!disallow)
093        {
094            XMLUtils.startElement(contentHandler, URL_TAG);
095            XMLUtils.createElement(contentHandler, LOC_TAG, siteUrl + page.getSitemapName() + "/" + page.getPathInSitemap() + ".html");
096            XMLUtils.endElement(contentHandler, URL_TAG);
097            
098            AmetysObjectIterable< ? extends Page> childrenPages = page.getChildrenPages();
099            for (Page child : childrenPages)
100            {
101                _saxUrl(child, siteUrl);
102            }
103        }
104    }
105}