Add a robots.txt URL to the site root
Adds a robots.txt file to the site root. Defaults defined by 'robotstxt' section of config. New events StartRobotsTxt and EndRobotsTxt to let plugins add information. Probably not useful if path is not /, but won't hurt anything, either.
This commit is contained in:
parent
fec8066bf7
commit
dc62246443
|
@ -708,3 +708,9 @@ EndUserRegister: When a new user has been registered
|
||||||
- &$profile: new profile data
|
- &$profile: new profile data
|
||||||
- &$user: new user account
|
- &$user: new user account
|
||||||
|
|
||||||
|
StartRobotsTxt: Before outputting the robots.txt page
|
||||||
|
- &$action: RobotstxtAction being shown
|
||||||
|
|
||||||
|
EndRobotsTxt: After the default robots.txt page (good place for customization)
|
||||||
|
- &$action: RobotstxtAction being shown
|
||||||
|
|
||||||
|
|
14
README
14
README
|
@ -1496,6 +1496,20 @@ interface. It also makes the user's profile the root URL.
|
||||||
enabled: Whether to run in "single user mode". Default false.
|
enabled: Whether to run in "single user mode". Default false.
|
||||||
nickname: nickname of the single user.
|
nickname: nickname of the single user.
|
||||||
|
|
||||||
|
robotstxt
|
||||||
|
---------
|
||||||
|
|
||||||
|
We put out a default robots.txt file to guide the processing of
|
||||||
|
Web crawlers. See http://www.robotstxt.org/ for more information
|
||||||
|
on the format of this file.
|
||||||
|
|
||||||
|
crawldelay: if non-empty, this value is provided as the Crawl-Delay:
|
||||||
|
for the robots.txt file. see http://ur1.ca/l5a0
|
||||||
|
for more information. Default is zero, no explicit delay.
|
||||||
|
disallow: Array of (virtual) directories to disallow. Default is 'main',
|
||||||
|
'search', 'message', 'settings', 'admin'. Ignored when site
|
||||||
|
is private, in which case the entire site ('/') is disallowed.
|
||||||
|
|
||||||
Plugins
|
Plugins
|
||||||
=======
|
=======
|
||||||
|
|
||||||
|
|
100
actions/robotstxt.php
Normal file
100
actions/robotstxt.php
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
<?php
|
||||||
|
/**
|
||||||
|
* StatusNet - the distributed open-source microblogging tool
|
||||||
|
* Copyright (C) 2010, StatusNet, Inc.
|
||||||
|
*
|
||||||
|
* robots.txt generator
|
||||||
|
*
|
||||||
|
* PHP version 5
|
||||||
|
*
|
||||||
|
* @category Action
|
||||||
|
* @package StatusNet
|
||||||
|
* @author Evan Prodromou <evan@status.net>
|
||||||
|
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
|
||||||
|
* @link http://status.net/
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (!defined('STATUSNET')) {
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prints out a static robots.txt
|
||||||
|
*
|
||||||
|
* @category Action
|
||||||
|
* @package StatusNet
|
||||||
|
* @author Evan Prodromou <evan@status.net>
|
||||||
|
* @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3
|
||||||
|
* @link http://status.net/
|
||||||
|
*/
|
||||||
|
|
||||||
|
class RobotstxtAction extends Action
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Handles requests
|
||||||
|
*
|
||||||
|
* Since this is a relatively static document, we
|
||||||
|
* don't do a prepare()
|
||||||
|
*
|
||||||
|
* @param array $args GET, POST, and URL params; unused.
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
|
||||||
|
function handle($args)
|
||||||
|
{
|
||||||
|
if (Event::handle('StartRobotsTxt', array($this))) {
|
||||||
|
|
||||||
|
header('Content-Type: text/plain');
|
||||||
|
|
||||||
|
print "User-Agent: *\n";
|
||||||
|
|
||||||
|
if (common_config('site', 'private')) {
|
||||||
|
|
||||||
|
print "Disallow: /\n";
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
$disallow = common_config('robotstxt', 'disallow');
|
||||||
|
|
||||||
|
foreach ($disallow as $dir) {
|
||||||
|
print "Disallow: /$dir/\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
$crawldelay = common_config('robotstxt', 'crawldelay');
|
||||||
|
|
||||||
|
if (!empty($crawldelay)) {
|
||||||
|
print "Crawl-delay: " . $crawldelay . "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Event::handle('EndRobotsTxt', array($this));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return true; this page doesn't touch the DB.
|
||||||
|
*
|
||||||
|
* @param array $args other arguments
|
||||||
|
*
|
||||||
|
* @return boolean is read only action?
|
||||||
|
*/
|
||||||
|
|
||||||
|
function isReadOnly($args)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
|
@ -285,8 +285,9 @@ function main()
|
||||||
if (!$user && common_config('site', 'private')
|
if (!$user && common_config('site', 'private')
|
||||||
&& !isLoginAction($action)
|
&& !isLoginAction($action)
|
||||||
&& !preg_match('/rss$/', $action)
|
&& !preg_match('/rss$/', $action)
|
||||||
&& !preg_match('/^Api/', $action)
|
&& $action != 'robotstxt'
|
||||||
) {
|
&& !preg_match('/^Api/', $action)) {
|
||||||
|
|
||||||
// set returnto
|
// set returnto
|
||||||
$rargs =& common_copy_args($args);
|
$rargs =& common_copy_args($args);
|
||||||
unset($rargs['action']);
|
unset($rargs['action']);
|
||||||
|
|
|
@ -270,4 +270,8 @@ $default =
|
||||||
'singleuser' =>
|
'singleuser' =>
|
||||||
array('enabled' => false,
|
array('enabled' => false,
|
||||||
'nickname' => null),
|
'nickname' => null),
|
||||||
|
'robotstxt' =>
|
||||||
|
array('crawldelay' => 0,
|
||||||
|
'disallow' => array('main', 'settings', 'admin', 'search', 'message')
|
||||||
|
),
|
||||||
);
|
);
|
||||||
|
|
|
@ -73,6 +73,8 @@ class Router
|
||||||
|
|
||||||
if (Event::handle('StartInitializeRouter', array(&$m))) {
|
if (Event::handle('StartInitializeRouter', array(&$m))) {
|
||||||
|
|
||||||
|
$m->connect('robots.txt', array('action' => 'robotstxt'));
|
||||||
|
|
||||||
$m->connect('opensearch/people', array('action' => 'opensearch',
|
$m->connect('opensearch/people', array('action' => 'opensearch',
|
||||||
'type' => 'people'));
|
'type' => 'people'));
|
||||||
$m->connect('opensearch/notice', array('action' => 'opensearch',
|
$m->connect('opensearch/notice', array('action' => 'opensearch',
|
||||||
|
|
Loading…
Reference in New Issue
Block a user