001/*
002 *  Copyright 2010 Anyware Services
003 *
004 *  Licensed under the Apache License, Version 2.0 (the "License");
005 *  you may not use this file except in compliance with the License.
006 *  You may obtain a copy of the License at
007 *
008 *      http://www.apache.org/licenses/LICENSE-2.0
009 *
010 *  Unless required by applicable law or agreed to in writing, software
011 *  distributed under the License is distributed on an "AS IS" BASIS,
012 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 *  See the License for the specific language governing permissions and
014 *  limitations under the License.
015 */
016package org.ametys.web.robots;
017
018import java.io.IOException;
019
020import org.apache.avalon.framework.parameters.ParameterException;
021import org.apache.cocoon.ProcessingException;
022import org.apache.cocoon.xml.XMLUtils;
023import org.xml.sax.SAXException;
024
025import org.ametys.plugins.repository.AmetysObjectIterable;
026import org.ametys.web.repository.page.Page;
027import org.ametys.web.repository.page.jcr.DefaultPage;
028import org.ametys.web.repository.site.Site;
029import org.ametys.web.repository.sitemap.Sitemap;
030
031/**
032 * Generates sitemap.xml file.
033 */
034public class RobotsSitemapGenerator extends AbstractRobotsGenerator
035{
036    private static final String LOC_TAG = "loc";
037
038    private static final String URL_TAG = "url";
039
040    private static final String SITEMAPS_ORG_NAMESPACE = "http://www.sitemaps.org/schemas/sitemap/0.9";
041
042    private static final String URL_SET_TAG = "urlset";
043
044    
045    @Override
046    public void generate() throws IOException, SAXException, ProcessingException
047    {
048        String siteName;
049        try
050        {
051            siteName = parameters.getParameter("siteName");
052
053            Site site = getSite(siteName);
054            
055            String siteUrl = site.getUrl();
056            if (!siteUrl.endsWith("/"))
057            {
058                siteUrl = siteUrl + "/";
059            }
060            
061            contentHandler.startDocument();
062            XMLUtils.startElement(contentHandler, SITEMAPS_ORG_NAMESPACE, URL_SET_TAG);
063            
064            // Pages informations
065            for (Sitemap sitemap : site.getSitemaps())
066            {
067                boolean disallow = sitemap.getValue(DefaultPage.METADATA_ROBOTS_DISALLOW, false);
068                if (!disallow)
069                {
070                    for (Page page : sitemap.getChildrenPages())
071                    {
072                        _saxUrl(page, siteUrl);
073                    }
074                }
075            }
076            
077            // Sitemap URL
078            XMLUtils.endElement(contentHandler, SITEMAPS_ORG_NAMESPACE, URL_SET_TAG);
079            contentHandler.endDocument();
080        }
081        catch (ParameterException e)
082        {
083            getLogger().error("Error when getting site name", e);
084        }
085    }
086
087    /**
088     * Serialize information about a page in sitemap.xml
089     * @param page The page to serialize
090     * @param siteUrl the site public URL (ending with a '/').
091     * @throws SAXException if an error occurs while saxing
092     */
093    protected void _saxUrl(Page page, String siteUrl) throws SAXException
094    {
095        boolean disallow = page.getValue(DefaultPage.METADATA_ROBOTS_DISALLOW, false);
096
097        if (!disallow)
098        {
099            if (!hasRestrictedAccess(page)) // exclude restricted pages from sitemap.xml
100            {
101                XMLUtils.startElement(contentHandler, URL_TAG);
102                XMLUtils.createElement(contentHandler, LOC_TAG, siteUrl + page.getSitemapName() + "/" + page.getPathInSitemap() + ".html");
103                XMLUtils.endElement(contentHandler, URL_TAG);
104            }
105            
106            AmetysObjectIterable< ? extends Page> childrenPages = page.getChildrenPages();
107            for (Page child : childrenPages)
108            {
109                _saxUrl(child, siteUrl);
110            }
111        }
112    }
113}