001/* 002 * Copyright 2010 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.robots; 017 018import java.io.IOException; 019 020import org.apache.avalon.framework.parameters.ParameterException; 021import org.apache.cocoon.ProcessingException; 022import org.apache.cocoon.xml.XMLUtils; 023import org.xml.sax.SAXException; 024 025import org.ametys.plugins.repository.AmetysObjectIterable; 026import org.ametys.web.repository.page.Page; 027import org.ametys.web.repository.page.jcr.DefaultPage; 028import org.ametys.web.repository.site.Site; 029import org.ametys.web.repository.sitemap.Sitemap; 030 031/** 032 * Generates sitemap.xml file. 033 */ 034public class RobotsSitemapGenerator extends AbstractRobotsGenerator 035{ 036 private static final String LOC_TAG = "loc"; 037 038 private static final String URL_TAG = "url"; 039 040 private static final String SITEMAPS_ORG_NAMESPACE = "http://www.sitemaps.org/schemas/sitemap/0.9"; 041 042 private static final String URL_SET_TAG = "urlset"; 043 044 045 @Override 046 public void generate() throws IOException, SAXException, ProcessingException 047 { 048 String siteName; 049 try 050 { 051 siteName = parameters.getParameter("siteName"); 052 053 Site site = getSite(siteName); 054 055 String siteUrl = site.getUrl(); 056 if (!siteUrl.endsWith("/")) 057 { 058 siteUrl = siteUrl + "/"; 059 } 060 061 contentHandler.startDocument(); 062 XMLUtils.startElement(contentHandler, SITEMAPS_ORG_NAMESPACE, URL_SET_TAG); 063 064 // Pages informations 065 for (Sitemap sitemap : site.getSitemaps()) 066 { 067 boolean disallow = sitemap.getValue(DefaultPage.METADATA_ROBOTS_DISALLOW, false); 068 if (!disallow) 069 { 070 for (Page page : sitemap.getChildrenPages()) 071 { 072 _saxUrl(page, siteUrl); 073 } 074 } 075 } 076 077 // Sitemap URL 078 XMLUtils.endElement(contentHandler, SITEMAPS_ORG_NAMESPACE, URL_SET_TAG); 079 contentHandler.endDocument(); 080 } 081 catch (ParameterException e) 082 { 083 getLogger().error("Error when getting site name", e); 084 } 085 } 086 087 /** 088 * Serialize information about a page in sitemap.xml 089 * @param page The page to serialize 090 * @param siteUrl the site public URL (ending with a '/'). 091 * @throws SAXException if an error occurs while saxing 092 */ 093 protected void _saxUrl(Page page, String siteUrl) throws SAXException 094 { 095 boolean disallow = page.getValue(DefaultPage.METADATA_ROBOTS_DISALLOW, false); 096 097 if (!disallow) 098 { 099 if (!hasRestrictedAccess(page)) // exclude restricted pages from sitemap.xml 100 { 101 XMLUtils.startElement(contentHandler, URL_TAG); 102 XMLUtils.createElement(contentHandler, LOC_TAG, siteUrl + page.getSitemapName() + "/" + page.getPathInSitemap() + ".html"); 103 XMLUtils.endElement(contentHandler, URL_TAG); 104 } 105 106 AmetysObjectIterable< ? extends Page> childrenPages = page.getChildrenPages(); 107 for (Page child : childrenPages) 108 { 109 _saxUrl(child, siteUrl); 110 } 111 } 112 } 113}