2021-09-14 21:40:50 +09:00
< ? php
2021-10-10 17:26:18 +09:00
declare ( strict_types = 1 );
2021-09-14 21:40:50 +09:00
// {{{ License
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GNU social is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
// }}}
namespace Component\Tag ;
2021-09-21 00:16:42 +09:00
use App\Core\Cache ;
2021-09-14 21:40:50 +09:00
use App\Core\DB\DB ;
use App\Core\Event ;
2021-12-04 21:58:27 +09:00
use function App\Core\I18n\_m ;
2021-09-14 21:40:50 +09:00
use App\Core\Modules\Component ;
2021-09-20 21:08:17 +09:00
use App\Core\Router\Router ;
2021-12-04 21:58:27 +09:00
use App\Entity\Actor ;
2021-12-12 05:56:47 +09:00
use App\Entity\ActorTag ;
2021-11-25 00:51:01 +09:00
use App\Entity\Language ;
2021-09-21 00:16:42 +09:00
use App\Entity\Note ;
2021-09-14 21:40:50 +09:00
use App\Entity\NoteTag ;
2021-12-04 22:11:34 +09:00
use App\Util\Exception\ClientException ;
2021-09-20 21:08:17 +09:00
use App\Util\Formatting ;
use App\Util\HTML ;
2021-09-27 18:39:58 +09:00
use Doctrine\Common\Collections\ExpressionBuilder ;
use Doctrine\ORM\Query\Expr ;
use Doctrine\ORM\QueryBuilder ;
2021-12-10 07:22:31 +09:00
use Functional as F ;
2021-12-04 21:58:27 +09:00
use Symfony\Component\Form\Extension\Core\Type\CheckboxType ;
use Symfony\Component\HttpFoundation\Request ;
2021-09-14 21:40:50 +09:00
/**
* Component responsible for extracting tags from posted notes , as well as normalizing them
*
2021-09-20 21:08:17 +09:00
* @ author Hugo Sales < hugo @ hsal . es >
2021-09-14 21:40:50 +09:00
* @ copyright 2021 Free Software Foundation , Inc http :// www . fsf . org
* @ license https :// www . gnu . org / licenses / agpl . html GNU AGPL v3 or later
*/
class Tag extends Component
{
2021-10-10 17:26:18 +09:00
public const MAX_TAG_LENGTH = 64 ;
public const TAG_REGEX = '/(^|\\s)(#[\\pL\\pN_\\-\\.]{1,64})/u' ; // Brion Vibber 2011-02-23 v2:classes/Notice.php:367 function saveTags
public const TAG_SLUG_REGEX = '[A-Za-z0-9]{1,64}' ;
2021-09-20 21:08:17 +09:00
public function onAddRoute ( $r ) : bool
{
2021-12-08 04:53:56 +09:00
$r -> connect ( 'single_note_tag' , '/note-tag/{canon<' . self :: TAG_SLUG_REGEX . '>}' , [ Controller\Tag :: class , 'single_note_tag' ]);
$r -> connect ( 'multi_note_tags' , '/note-tags/{canons<(' . self :: TAG_SLUG_REGEX . ',)+' . self :: TAG_SLUG_REGEX . '>}' , [ Controller\Tag :: class , 'multi_note_tags' ]);
$r -> connect ( 'single_actor_tag' , '/actor-tag/{canon<' . self :: TAG_SLUG_REGEX . '>}' , [ Controller\Tag :: class , 'single_actor_tag' ]);
$r -> connect ( 'multi_actor_tags' , '/actor-tags/{canons<(' . self :: TAG_SLUG_REGEX . ',)+' . self :: TAG_SLUG_REGEX . '>}' , [ Controller\Tag :: class , 'multi_actor_tags' ]);
2021-09-20 21:08:17 +09:00
return Event :: next ;
}
2021-09-14 21:40:50 +09:00
/**
* Process note by extracting any tags present
*/
2021-12-04 21:58:27 +09:00
public function onProcessNoteContent ( Note $note , string $content , string $content_type , array $extra_args ) : bool
2021-09-14 21:40:50 +09:00
{
$matched_tags = [];
$processed_tags = false ;
2021-10-10 17:26:18 +09:00
preg_match_all ( self :: TAG_REGEX , $content , $matched_tags , \PREG_SET_ORDER );
2021-12-10 07:22:31 +09:00
$matched_tags = array_unique ( F\map ( $matched_tags , fn ( $m ) => $m [ 2 ]));
2021-09-14 21:40:50 +09:00
foreach ( $matched_tags as $match ) {
2021-12-10 07:22:31 +09:00
$tag = self :: ensureValid ( $match );
2021-12-05 04:58:00 +09:00
$canonical_tag = self :: canonicalTag ( $tag , Language :: getById ( $note -> getLanguageId ()) -> getLocale ());
2021-12-04 21:58:27 +09:00
DB :: persist ( NoteTag :: create ([
'tag' => $tag ,
'canonical' => $canonical_tag ,
'note_id' => $note -> getId (),
2021-12-10 11:39:19 +09:00
'use_canonical' => $extra_args [ 'tag_use_canonical' ] ? ? false ,
2021-12-04 21:58:27 +09:00
]));
2021-09-21 00:16:42 +09:00
Cache :: pushList ( " tag- { $canonical_tag } " , $note );
2021-09-14 21:40:50 +09:00
$processed_tags = true ;
2021-12-08 04:53:56 +09:00
foreach ( self :: cacheKeys ( $canonical_tag ) as $key ) {
Cache :: delete ( $key );
}
2021-09-14 21:40:50 +09:00
}
if ( $processed_tags ) {
DB :: flush ();
}
2021-11-28 00:06:46 +09:00
return Event :: next ;
2021-09-14 21:40:50 +09:00
}
2021-11-28 00:06:46 +09:00
public function onRenderPlainTextNoteContent ( string & $text , ? string $language = null ) : bool
2021-09-14 21:40:50 +09:00
{
2021-11-28 22:09:04 +09:00
$text = preg_replace_callback ( self :: TAG_REGEX , fn ( $m ) => $m [ 1 ] . self :: tagLink ( $m [ 2 ], $language ), $text );
2021-11-28 00:06:46 +09:00
return Event :: next ;
2021-09-20 21:08:17 +09:00
}
2021-12-08 04:53:56 +09:00
public static function cacheKeys ( string $canon_single_or_multi ) : array
{
return [
'note_single' => " note-tag-feed- { $canon_single_or_multi } " ,
'note_multi' => " note-tags-feed- { $canon_single_or_multi } " ,
'actor_single' => " actor-tag-feed- { $canon_single_or_multi } " ,
'actor_multi' => " actor-tags-feed- { $canon_single_or_multi } " ,
];
}
2021-11-28 22:09:04 +09:00
private static function tagLink ( string $tag , ? string $language ) : string
2021-09-20 21:08:17 +09:00
{
2021-11-25 00:51:01 +09:00
$tag = self :: ensureLength ( $tag );
$canonical = self :: canonicalTag ( $tag , $language );
2021-12-08 04:53:56 +09:00
$url = Router :: url ( 'single_note_tag' , ! \is_null ( $language ) ? [ 'canon' => $canonical , 'lang' => $language , 'tag' => $tag ] : [ 'canon' => $canonical , 'tag' => $tag ]);
2021-09-20 21:08:17 +09:00
return HTML :: html ([ 'a' => [ 'attrs' => [ 'href' => $url , 'title' => $tag , 'rel' => 'tag' ], $tag ]], options : [ 'indent' => false ]);
}
2021-12-04 21:58:27 +09:00
public static function ensureValid ( string $tag )
{
return self :: ensureLength ( str_replace ( '#' , '' , $tag ));
}
2021-11-25 00:51:01 +09:00
public static function ensureLength ( string $tag ) : string
2021-09-20 21:08:17 +09:00
{
2021-11-25 00:51:01 +09:00
return mb_substr ( $tag , 0 , self :: MAX_TAG_LENGTH );
}
2021-12-01 21:24:22 +09:00
/**
* Convert a tag to it ' s canonical representation , by splitting it
* into words , stemming it in the given language ( if enabled ) and
* sluggifying it ( turning it into an ASCII representation )
*/
2021-11-28 22:09:04 +09:00
public static function canonicalTag ( string $tag , ? string $language ) : string
2021-11-25 00:51:01 +09:00
{
$result = '' ;
2021-11-26 05:04:59 +09:00
foreach ( Formatting :: splitWords ( str_replace ( '#' , '' , $tag )) as $word ) {
$temp_res = null ;
2021-11-28 22:09:04 +09:00
if ( \is_null ( $language ) || Event :: handle ( 'StemWord' , [ $language , $word , & $temp_res ]) !== Event :: stop ) {
2021-11-26 05:04:59 +09:00
$temp_res = $word ;
}
$result .= Formatting :: slugify ( $temp_res );
2021-11-25 00:51:01 +09:00
}
return self :: ensureLength ( $result );
2021-09-14 21:40:50 +09:00
}
2021-09-27 18:39:58 +09:00
2021-10-10 13:44:10 +09:00
/**
* Populate $note_expr with an expression to match a tag , if the term looks like a tag
*
* $term /^ ( note | tag | people | actor ) / means we want to match only either a note or an actor
*/
2021-12-08 05:25:28 +09:00
public function onSearchCreateExpression ( ExpressionBuilder $eb , string $term , ? string $language , & $note_expr , & $actor_expr ) : bool
2021-09-27 18:39:58 +09:00
{
2021-12-08 05:25:28 +09:00
$search_term = str_contains ( $term , ':#' ) ? explode ( ':' , $term )[ 1 ] : $term ;
$canon_search_term = self :: canonicalTag ( $search_term , $language );
$temp_note_expr = $eb -> eq ( 'note_tag.canonical' , $canon_search_term );
$temp_actor_expr = $eb -> eq ( 'actor_tag.canonical' , $canon_search_term );
2021-12-12 05:56:47 +09:00
if ( Formatting :: startsWith ( $term , [ 'note:' , 'tag:' , 'people:' ])) {
2021-10-10 13:44:10 +09:00
$note_expr = $temp_note_expr ;
2021-12-12 05:56:47 +09:00
} elseif ( Formatting :: startsWith ( $term , [ 'people:' , 'actor:' ])) {
2021-12-08 05:25:28 +09:00
$actor_expr = $temp_actor_expr ;
2021-12-12 05:56:47 +09:00
} elseif ( str_contains ( $term , '#' )) {
2021-12-08 05:25:28 +09:00
$note_expr = $temp_note_expr ;
$actor_expr = $temp_actor_expr ;
return Event :: next ;
2021-09-27 18:39:58 +09:00
}
2021-10-10 13:44:10 +09:00
return Event :: stop ;
2021-09-27 18:39:58 +09:00
}
2021-12-03 04:17:37 +09:00
public function onSearchQueryAddJoins ( QueryBuilder & $note_qb , QueryBuilder & $actor_qb ) : bool
2021-09-27 18:39:58 +09:00
{
2021-12-12 05:56:47 +09:00
$note_qb -> leftJoin ( NoteTag :: class , 'note_tag' , Expr\Join :: WITH , 'note_tag.note_id = note.id' );
$actor_qb -> leftJoin ( ActorTag :: class , 'actor_tag' , Expr\Join :: WITH , 'actor_tag.tagger = actor.id' );
2021-11-28 00:06:46 +09:00
return Event :: next ;
2021-09-27 18:39:58 +09:00
}
2021-12-04 21:58:27 +09:00
public function onPostingAddFormEntries ( Request $request , Actor $actor , array & $form_params )
{
2021-12-08 23:28:58 +09:00
$form_params [] = [ 'tag_use_canonical' , CheckboxType :: class , [ 'required' => false , 'data' => true , 'label' => _m ( 'Make note tags canonical' ), 'help' => _m ( 'Canonical tags will be treated as a version of an existing tag with the same root/stem (e.g. \'#great_tag\' will be considered as a version of \'#great\', if it already exists)' )]];
2021-12-04 21:58:27 +09:00
return Event :: next ;
}
2021-12-20 02:43:43 +09:00
public function onAddExtraArgsToNoteContent ( Request $request , Actor $actor , array $data , array & $extra_args )
2021-12-04 21:58:27 +09:00
{
if ( ! isset ( $data [ 'tag_use_canonical' ])) {
throw new ClientException ;
}
$extra_args [ 'tag_use_canonical' ] = $data [ 'tag_use_canonical' ];
return Event :: next ;
}
2021-09-14 21:40:50 +09:00
}