Domain name regular expression into lib/framework.php

cherry-pick-merge
This commit is contained in:
Mikael Nordfeldth 2017-04-22 11:07:38 +02:00
parent eefbfe746f
commit 5e7a7701b9
2 changed files with 13 additions and 2 deletions

View File

@ -57,6 +57,17 @@ define('NOTICE_INBOX_SOURCE_FORWARD', 4);
define('NOTICE_INBOX_SOURCE_PROFILE_TAG', 5);
define('NOTICE_INBOX_SOURCE_GATEWAY', -1);
/**
* StatusNet had this string as valid path characters: '\pN\pL\,\!\(\)\.\:\-\_\+\/\=\&\;\%\~\*\$\'\@'
* Some of those characters can be troublesome when auto-linking plain text. Such as "http://some.com/)"
* URL encoding should be used whenever a weird character is used, the following strings are not definitive.
*/
define('URL_REGEX_VALID_PATH_CHARS', '\pN\pL\,\!\.\:\-\_\+\/\@\=\;\%\~\*');
define('URL_REGEX_VALID_QSTRING_CHARS', URL_REGEX_VALID_PATH_CHARS . '\&');
define('URL_REGEX_VALID_FRAGMENT_CHARS', URL_REGEX_VALID_QSTRING_CHARS . '\?\#');
define('URL_REGEX_EXCLUDED_END_CHARS', '\?\.\,\!\#\:\''); // don't include these if they are directly after a URL
define('URL_REGEX_DOMAIN_NAME', '(?:(?!-)[A-Za-z0-9\-]{1,63}(?<!-)\.)+[A-Za-z]{2,10}');
// append our extlib dir as the last-resort place to find libs
set_include_path(get_include_path() . PATH_SEPARATOR . INSTALLDIR . '/extlib/');

View File

@ -280,7 +280,7 @@ class OStatusPlugin extends Plugin
static function extractWebfingerIds($text)
{
$wmatches = array();
$result = preg_match_all('/(?:^|\s+)@((?:\w+[\w\-\_\.]?)*(?:[\w\-\_\.]*\w+)@(?:(?!-)[A-Za-z0-9\-]{1,63}(?<!-)\.)+[A-Za-z]{2,10})/',
$result = preg_match_all('/(?:^|\s+)@((?:\w+[\w\-\_\.]?)*(?:[\w\-\_\.]*\w+)@'.URL_REGEX_DOMAIN_NAME.')/',
$text,
$wmatches,
PREG_OFFSET_CAPTURE);
@ -300,7 +300,7 @@ class OStatusPlugin extends Plugin
static function extractUrlMentions($text)
{
$wmatches = array();
$result = preg_match_all('!(?:^|\s+)@((?:\w+\.)*\w+(?:\w+\-\w+)*\.\w+(?:/\w+)*)!',
$result = preg_match_all('!(?:^|\s+)@'.URL_REGEX_DOMAIN_NAME.'(?:/\w+)*)!',
$text,
$wmatches,
PREG_OFFSET_CAPTURE);