2021-09-14 21:40:50 +09:00
< ? php
2021-10-10 17:26:18 +09:00
declare ( strict_types = 1 );
2021-09-14 21:40:50 +09:00
// {{{ License
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GNU social is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
// }}}
namespace Component\Tag ;
2021-09-21 00:16:42 +09:00
use App\Core\Cache ;
2021-09-14 21:40:50 +09:00
use App\Core\DB\DB ;
use App\Core\Event ;
2021-12-04 21:58:27 +09:00
use function App\Core\I18n\_m ;
2021-09-14 21:40:50 +09:00
use App\Core\Modules\Component ;
2021-09-20 21:08:17 +09:00
use App\Core\Router\Router ;
2021-12-04 21:58:27 +09:00
use App\Entity\Actor ;
2021-09-21 00:16:42 +09:00
use App\Entity\Note ;
2021-12-23 23:04:00 +09:00
use App\Util\Common ;
2021-12-04 22:11:34 +09:00
use App\Util\Exception\ClientException ;
2021-09-20 21:08:17 +09:00
use App\Util\Formatting ;
2021-12-27 02:01:56 +09:00
use App\Util\Functional as GSF ;
2021-09-20 21:08:17 +09:00
use App\Util\HTML ;
2022-01-05 07:20:12 +09:00
use Component\Circle\Entity\ActorTag ;
2021-12-26 18:48:16 +09:00
use Component\Language\Entity\Language ;
2022-01-05 07:20:12 +09:00
use Component\Tag\Entity\NoteTag ;
2021-09-27 18:39:58 +09:00
use Doctrine\Common\Collections\ExpressionBuilder ;
use Doctrine\ORM\Query\Expr ;
use Doctrine\ORM\QueryBuilder ;
2021-12-10 07:22:31 +09:00
use Functional as F ;
2021-12-04 21:58:27 +09:00
use Symfony\Component\Form\Extension\Core\Type\CheckboxType ;
use Symfony\Component\HttpFoundation\Request ;
2021-09-14 21:40:50 +09:00
/**
* Component responsible for extracting tags from posted notes , as well as normalizing them
*
2021-09-20 21:08:17 +09:00
* @ author Hugo Sales < hugo @ hsal . es >
2022-01-05 07:20:12 +09:00
* @ author Diogo Peralta Cordeiro <@ diogo . site >
2021-09-14 21:40:50 +09:00
* @ copyright 2021 Free Software Foundation , Inc http :// www . fsf . org
* @ license https :// www . gnu . org / licenses / agpl . html GNU AGPL v3 or later
*/
class Tag extends Component
{
2022-01-05 07:20:12 +09:00
public const MAX_TAG_LENGTH = 64 ;
public const TAG_REGEX = '/(^|\\s)(#[\\pL\\pN_\\-]{1,64})/u' ; // Brion Vibber 2011-02-23 v2:classes/Notice.php:367 function saveTags
public const TAG_SLUG_REGEX = '[A-Za-z0-9]{1,64}' ;
2021-09-20 21:08:17 +09:00
public function onAddRoute ( $r ) : bool
{
2022-01-05 07:20:12 +09:00
$r -> connect ( 'single_note_tag' , '/note-tag/{tag<' . self :: TAG_SLUG_REGEX . '>}' , [ Controller\Tag :: class , 'single_note_tag' ]);
$r -> connect ( 'multi_note_tags' , '/note-tags/{tags<(' . self :: TAG_SLUG_REGEX . ',)+' . self :: TAG_SLUG_REGEX . '>}' , [ Controller\Tag :: class , 'multi_note_tags' ]);
2021-09-20 21:08:17 +09:00
return Event :: next ;
}
2021-09-14 21:40:50 +09:00
/**
* Process note by extracting any tags present
*/
2021-12-04 21:58:27 +09:00
public function onProcessNoteContent ( Note $note , string $content , string $content_type , array $extra_args ) : bool
2021-09-14 21:40:50 +09:00
{
2021-12-26 02:46:45 +09:00
if ( $extra_args [ 'TagProcessed' ] ? ? false ) {
return Event :: next ;
}
2021-12-25 06:02:02 +09:00
// XXX: We remove <span> because when content is in html the tag comes as #<span>hashtag</span>
2021-12-26 02:46:45 +09:00
$content = str_replace ( '<span>' , '' , $content );
$matched_tags = [];
preg_match_all ( self :: TAG_REGEX , $content , $matched_tags , \PREG_SET_ORDER );
2021-12-10 07:22:31 +09:00
$matched_tags = array_unique ( F\map ( $matched_tags , fn ( $m ) => $m [ 2 ]));
2021-09-14 21:40:50 +09:00
foreach ( $matched_tags as $match ) {
2022-01-05 07:20:12 +09:00
$tag = self :: extract ( $match );
if ( ! self :: validate ( $tag )) {
continue ; // Ignore invalid tag candidates
}
2021-12-25 06:02:02 +09:00
$canonical_tag = self :: canonicalTag ( $tag , \is_null ( $lang_id = $note -> getLanguageId ()) ? null : Language :: getById ( $lang_id ) -> getLocale ());
2021-12-04 21:58:27 +09:00
DB :: persist ( NoteTag :: create ([
'tag' => $tag ,
'canonical' => $canonical_tag ,
'note_id' => $note -> getId (),
2021-12-10 11:39:19 +09:00
'use_canonical' => $extra_args [ 'tag_use_canonical' ] ? ? false ,
2021-12-26 05:27:10 +09:00
'language_id' => $lang_id ,
2021-12-04 21:58:27 +09:00
]));
2021-09-21 00:16:42 +09:00
Cache :: pushList ( " tag- { $canonical_tag } " , $note );
2021-12-08 04:53:56 +09:00
foreach ( self :: cacheKeys ( $canonical_tag ) as $key ) {
Cache :: delete ( $key );
}
2021-09-14 21:40:50 +09:00
}
2021-11-28 00:06:46 +09:00
return Event :: next ;
2021-09-14 21:40:50 +09:00
}
2022-01-05 07:20:12 +09:00
public function onRenderPlainTextNoteContent ( string & $text , ? string $locale = null ) : bool
2021-09-14 21:40:50 +09:00
{
2022-01-05 07:20:12 +09:00
$text = preg_replace_callback ( self :: TAG_REGEX , fn ( $m ) => $m [ 1 ] . self :: tagLink ( $m [ 2 ], $locale ), $text );
2021-11-28 00:06:46 +09:00
return Event :: next ;
2021-09-20 21:08:17 +09:00
}
2022-01-05 07:20:12 +09:00
public static function cacheKeys ( string $tag_single_or_multi ) : array
2021-12-08 04:53:56 +09:00
{
return [
2022-01-05 07:20:12 +09:00
'note_single' => " note-tag-feed- { $tag_single_or_multi } " ,
'note_multi' => " note-tags-feed- { $tag_single_or_multi } " ,
'actor_single' => " actor-tag-feed- { $tag_single_or_multi } " ,
'actor_multi' => " actor-tags-feed- { $tag_single_or_multi } " ,
2021-12-08 04:53:56 +09:00
];
}
2022-01-05 07:20:12 +09:00
private static function tagLink ( string $tag , ? string $locale ) : string
2021-09-20 21:08:17 +09:00
{
2022-01-05 07:20:12 +09:00
$tag = self :: extract ( $tag );
$url = Router :: url ( 'single_note_tag' , ! \is_null ( $locale ) ? [ 'tag' => $tag , 'locale' => $locale ] : [ 'tag' => $tag ]);
return HTML :: html ([ 'span' => [ 'attrs' => [ 'class' => 'tag' ],
'#' . HTML :: html ([ 'a' => [
'attrs' => [
'href' => $url ,
'rel' => 'tag' , // https://microformats.org/wiki/rel-tag
],
$tag ,
]], options : [ 'indent' => false ]),
]], options : [ 'indent' => false , 'raw' => true ]);
2021-09-20 21:08:17 +09:00
}
2022-01-05 07:20:12 +09:00
public static function extract ( string $tag ) : string
2021-12-04 21:58:27 +09:00
{
2022-01-05 07:20:12 +09:00
return self :: ensureLength ( Formatting :: removePrefix ( $tag , '#' ));
}
public static function validate ( string $tag ) : bool
{
return preg_match ( self :: TAG_REGEX , '#' . $tag ) === 1 ;
}
public static function sanitize ( string $tag ) : string
{
$tag = self :: extract ( $tag );
if ( ! self :: validate ( $tag )) {
2021-12-23 23:04:00 +09:00
throw new ClientException ( _m ( 'Invalid tag given: {tag}' , [ '{tag}' => $tag ]));
}
2022-01-05 07:20:12 +09:00
return $tag ;
2021-12-04 21:58:27 +09:00
}
2021-11-25 00:51:01 +09:00
public static function ensureLength ( string $tag ) : string
2021-09-20 21:08:17 +09:00
{
2021-11-25 00:51:01 +09:00
return mb_substr ( $tag , 0 , self :: MAX_TAG_LENGTH );
}
2021-12-01 21:24:22 +09:00
/**
2022-01-05 07:20:12 +09:00
* Convert a tag to its canonical representation , by splitting it
2021-12-01 21:24:22 +09:00
* into words , stemming it in the given language ( if enabled ) and
* sluggifying it ( turning it into an ASCII representation )
*/
2022-01-05 07:20:12 +09:00
public static function canonicalTag ( string $tag , ? string $language = null ) : string
2021-11-25 00:51:01 +09:00
{
$result = '' ;
2021-11-26 05:04:59 +09:00
foreach ( Formatting :: splitWords ( str_replace ( '#' , '' , $tag )) as $word ) {
$temp_res = null ;
2021-11-28 22:09:04 +09:00
if ( \is_null ( $language ) || Event :: handle ( 'StemWord' , [ $language , $word , & $temp_res ]) !== Event :: stop ) {
2021-11-26 05:04:59 +09:00
$temp_res = $word ;
}
$result .= Formatting :: slugify ( $temp_res );
2021-11-25 00:51:01 +09:00
}
return self :: ensureLength ( $result );
2021-09-14 21:40:50 +09:00
}
2021-09-27 18:39:58 +09:00
2021-10-10 13:44:10 +09:00
/**
* Populate $note_expr with an expression to match a tag , if the term looks like a tag
*
* $term /^ ( note | tag | people | actor ) / means we want to match only either a note or an actor
*/
2022-01-09 00:10:39 +09:00
public function onCollectionQueryCreateExpression ( ExpressionBuilder $eb , string $term , ? string $locale , ? Actor $actor , & $note_expr , & $actor_expr ) : bool
2021-09-27 18:39:58 +09:00
{
2022-01-03 05:37:15 +09:00
if ( ! str_contains ( $term , ':' )) {
return Event :: next ;
}
2022-01-05 07:20:12 +09:00
if ( \is_null ( $locale )) {
$locale = Common :: currentLanguage ();
}
2021-12-27 02:01:56 +09:00
[ $search_type , $search_term ] = explode ( ':' , $term );
if ( str_starts_with ( $search_term , '#' )) {
2022-01-05 07:20:12 +09:00
$search_term = self :: sanitize ( $search_term );
$canonical_search_term = self :: canonicalTag ( $search_term , $locale );
$temp_note_expr = $eb -> eq ( 'note_tag.canonical' , $canonical_search_term );
$temp_actor_expr = $eb -> eq ( 'actor_tag.canonical' , $canonical_search_term );
2021-12-27 02:01:56 +09:00
if ( Formatting :: startsWith ( $term , [ 'note:' , 'tag:' , 'people:' ])) {
$note_expr = $temp_note_expr ;
} elseif ( Formatting :: startsWith ( $term , [ 'people:' , 'actor:' ])) {
$actor_expr = $temp_actor_expr ;
2022-01-01 05:43:23 +09:00
} elseif ( Formatting :: startsWith ( $term , GSF :: cartesianProduct ([[ 'people' , 'actor' ], [ 'circle' , 'list' ], [ ':' ]], separator : [ '-' , '_' ]))) {
2021-12-27 02:01:56 +09:00
$null_tagger_expr = $eb -> isNull ( 'actor_circle.tagger' );
$tagger_expr = \is_null ( $actor_expr ) ? $null_tagger_expr : $eb -> orX ( $null_tagger_expr , $eb -> eq ( 'actor_circle.tagger' , $actor -> getId ()));
2022-01-05 07:20:12 +09:00
$tags = array_unique ([ $search_term , $canonical_search_term ]);
2021-12-27 02:01:56 +09:00
$tag_expr = \count ( $tags ) === 1 ? $eb -> eq ( 'actor_circle.tag' , $tags [ 0 ]) : $eb -> in ( 'actor_circle.tag' , $tags );
$search_expr = $eb -> andX (
$tagger_expr ,
$tag_expr ,
);
$note_expr = $search_expr ;
$actor_expr = $search_expr ;
} else {
$note_expr = $temp_note_expr ;
$actor_expr = $temp_actor_expr ;
return Event :: next ;
}
2021-09-27 18:39:58 +09:00
}
2021-10-10 13:44:10 +09:00
return Event :: stop ;
2021-09-27 18:39:58 +09:00
}
2022-01-09 00:10:39 +09:00
public function onCollectionQueryAddJoins ( QueryBuilder & $note_qb , QueryBuilder & $actor_qb ) : bool
2021-09-27 18:39:58 +09:00
{
2022-01-05 07:20:12 +09:00
$note_qb -> leftJoin ( NoteTag :: class , 'note_tag' , Expr\Join :: WITH , 'note_tag.note_id = note.id' );
$actor_qb -> leftJoin ( ActorTag :: class , 'actor_tag' , Expr\Join :: WITH , 'actor_tag.tagger = actor.id' );
2021-11-28 00:06:46 +09:00
return Event :: next ;
2021-09-27 18:39:58 +09:00
}
2021-12-04 21:58:27 +09:00
2022-01-05 07:20:12 +09:00
public function onPostingAddFormEntries ( Request $request , Actor $actor , array & $form_params ) : bool
2021-12-04 21:58:27 +09:00
{
2021-12-08 23:28:58 +09:00
$form_params [] = [ 'tag_use_canonical' , CheckboxType :: class , [ 'required' => false , 'data' => true , 'label' => _m ( 'Make note tags canonical' ), 'help' => _m ( 'Canonical tags will be treated as a version of an existing tag with the same root/stem (e.g. \'#great_tag\' will be considered as a version of \'#great\', if it already exists)' )]];
2021-12-04 21:58:27 +09:00
return Event :: next ;
}
2022-01-05 07:20:12 +09:00
public function onAddExtraArgsToNoteContent ( Request $request , Actor $actor , array $data , array & $extra_args ) : bool
2021-12-04 21:58:27 +09:00
{
if ( ! isset ( $data [ 'tag_use_canonical' ])) {
2022-01-05 07:20:12 +09:00
throw new ClientException ( _m ( 'Missing Use Canonical preference for Tags.' ));
2021-12-04 21:58:27 +09:00
}
$extra_args [ 'tag_use_canonical' ] = $data [ 'tag_use_canonical' ];
return Event :: next ;
}
2021-09-14 21:40:50 +09:00
}