001/* 002 * Copyright 2010 Anyware Services 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.ametys.web.robots; 017 018import java.io.IOException; 019 020import org.apache.avalon.framework.parameters.ParameterException; 021import org.apache.cocoon.ProcessingException; 022import org.apache.cocoon.xml.XMLUtils; 023import org.xml.sax.SAXException; 024 025import org.ametys.plugins.repository.AmetysObjectIterable; 026import org.ametys.web.repository.page.Page; 027import org.ametys.web.repository.page.jcr.DefaultPage; 028import org.ametys.web.repository.site.Site; 029import org.ametys.web.repository.sitemap.Sitemap; 030 031/** 032 * Generates robots.txt file 033 */ 034public class RobotsGenerator extends AbstractRobotsGenerator 035{ 036 037 private static final String DISALLOW_LABEL = "Disallow:"; 038 039 private static final String NEW_LINE = System.getProperty("line.separator"); 040 041 private static final String SITEMAP_LABEL = "Sitemap:"; 042 043 private static final String SPACE_CHAR = " "; 044 045 private static final String TEXT_TAG = "text"; 046 047 private static final String USER_AGENT_LABEL = "User-agent:"; 048 049 private void _serializePageInformation(Page page, StringBuilder builder) 050 { 051 boolean disallow = page.getValue(DefaultPage.METADATA_ROBOTS_DISALLOW, false); 052 053 if (disallow) 054 { 055 builder.append(DISALLOW_LABEL).append(SPACE_CHAR).append("/" + page.getSitemapName() + "/" + page.getPathInSitemap() + ".html" + NEW_LINE); 056 builder.append(DISALLOW_LABEL).append(SPACE_CHAR).append("/" + page.getSitemapName() + "/" + page.getPathInSitemap() + "/" + NEW_LINE); 057 } 058 else 059 { 060 AmetysObjectIterable< ? extends Page> childrenPages = page.getChildrenPages(); 061 for (Page child : childrenPages) 062 { 063 _serializePageInformation(child, builder); 064 } 065 } 066 067 } 068 069 private void _serializeSitemapProperty(Site site, StringBuilder builder) 070 { 071 builder.append(SITEMAP_LABEL).append(" ").append(site.getUrl()).append("/sitemap.xml"); 072 } 073 074 @Override 075 public void generate() throws IOException, SAXException, ProcessingException 076 { 077 String siteName; 078 try 079 { 080 siteName = parameters.getParameter("siteName"); 081 082 Site site = getSite(siteName); 083 084 StringBuilder builder = new StringBuilder(); 085 // User agent information 086 builder.append(USER_AGENT_LABEL).append(" *").append(NEW_LINE); 087 // Pages informations 088 for (Sitemap sitemap : site.getSitemaps()) 089 { 090 boolean disallow = sitemap.getValue(DefaultPage.METADATA_ROBOTS_DISALLOW, false); 091 092 if (disallow) 093 { 094 builder.append(DISALLOW_LABEL).append(SPACE_CHAR).append("/" + sitemap.getSitemapName() + "/" + NEW_LINE); 095 } 096 else 097 { 098 for (Page page : sitemap.getChildrenPages()) 099 { 100 _serializePageInformation(page, builder); 101 } 102 } 103 104 } 105 // Sitemap URL 106 _serializeSitemapProperty(site, builder); 107 contentHandler.startDocument(); 108 XMLUtils.createElement(contentHandler, TEXT_TAG, builder.toString()); 109 contentHandler.endDocument(); 110 } 111 catch (ParameterException e) 112 { 113 getLogger().error("Error when getting site name", e); 114 } 115 } 116}