Add a robots.txt URL to the site root

Adds a robots.txt file to the site root. Defaults defined by
'robotstxt' section of config. New events StartRobotsTxt and
EndRobotsTxt to let plugins add information. Probably not
useful if path is not /, but won't hurt anything, either.
This commit is contained in:
Evan Prodromou 2010-01-31 10:12:26 -05:00
parent fec8066bf7
commit dc62246443
6 changed files with 129 additions and 2 deletions

View File

@ -708,3 +708,9 @@ EndUserRegister: When a new user has been registered
- &$profile: new profile data - &$profile: new profile data
- &$user: new user account - &$user: new user account
StartRobotsTxt: Before outputting the robots.txt page
- &$action: RobotstxtAction being shown
EndRobotsTxt: After the default robots.txt page (good place for customization)
- &$action: RobotstxtAction being shown

14
README
View File

@ -1496,6 +1496,20 @@ interface. It also makes the user's profile the root URL.
enabled: Whether to run in "single user mode". Default false. enabled: Whether to run in "single user mode". Default false.
nickname: nickname of the single user. nickname: nickname of the single user.
robotstxt
---------
We put out a default robots.txt file to guide the processing of
Web crawlers. See http://www.robotstxt.org/ for more information
on the format of this file.
crawldelay: if non-empty, this value is provided as the Crawl-Delay:
for the robots.txt file. see http://ur1.ca/l5a0
for more information. Default is zero, no explicit delay.
disallow: Array of (virtual) directories to disallow. Default is 'main',
'search', 'message', 'settings', 'admin'. Ignored when site
is private, in which case the entire site ('/') is disallowed.
Plugins Plugins
======= =======

100
actions/robotstxt.php Normal file
View File

@ -0,0 +1,100 @@
<?php
/**
* StatusNet - the distributed open-source microblogging tool
* Copyright (C) 2010, StatusNet, Inc.
*
* robots.txt generator
*
* PHP version 5
*
* @category Action
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
if (!defined('STATUSNET')) {
exit(1);
}
/**
* Prints out a static robots.txt
*
* @category Action
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
* @link http://status.net/
*/
class RobotstxtAction extends Action
{
/**
* Handles requests
*
* Since this is a relatively static document, we
* don't do a prepare()
*
* @param array $args GET, POST, and URL params; unused.
*
* @return void
*/
function handle($args)
{
if (Event::handle('StartRobotsTxt', array($this))) {
header('Content-Type: text/plain');
print "User-Agent: *\n";
if (common_config('site', 'private')) {
print "Disallow: /\n";
} else {
$disallow = common_config('robotstxt', 'disallow');
foreach ($disallow as $dir) {
print "Disallow: /$dir/\n";
}
$crawldelay = common_config('robotstxt', 'crawldelay');
if (!empty($crawldelay)) {
print "Crawl-delay: " . $crawldelay . "\n";
}
}
Event::handle('EndRobotsTxt', array($this));
}
}
/**
* Return true; this page doesn't touch the DB.
*
* @param array $args other arguments
*
* @return boolean is read only action?
*/
function isReadOnly($args)
{
return true;
}
}

View File

@ -285,8 +285,9 @@ function main()
if (!$user && common_config('site', 'private') if (!$user && common_config('site', 'private')
&& !isLoginAction($action) && !isLoginAction($action)
&& !preg_match('/rss$/', $action) && !preg_match('/rss$/', $action)
&& !preg_match('/^Api/', $action) && $action != 'robotstxt'
) { && !preg_match('/^Api/', $action)) {
// set returnto // set returnto
$rargs =& common_copy_args($args); $rargs =& common_copy_args($args);
unset($rargs['action']); unset($rargs['action']);

View File

@ -270,4 +270,8 @@ $default =
'singleuser' => 'singleuser' =>
array('enabled' => false, array('enabled' => false,
'nickname' => null), 'nickname' => null),
'robotstxt' =>
array('crawldelay' => 0,
'disallow' => array('main', 'settings', 'admin', 'search', 'message')
),
); );

View File

@ -73,6 +73,8 @@ class Router
if (Event::handle('StartInitializeRouter', array(&$m))) { if (Event::handle('StartInitializeRouter', array(&$m))) {
$m->connect('robots.txt', array('action' => 'robotstxt'));
$m->connect('opensearch/people', array('action' => 'opensearch', $m->connect('opensearch/people', array('action' => 'opensearch',
'type' => 'people')); 'type' => 'people'));
$m->connect('opensearch/notice', array('action' => 'opensearch', $m->connect('opensearch/notice', array('action' => 'opensearch',