[TAGS][ENTITY][Note] Properly store the note language, pass it along when rendering content. Add mechanism for stemming tags, with fallback to simply slug-ifying them

This commit is contained in:
Hugo Sales 2021-11-24 15:51:01 +00:00
parent f837df5753
commit 2d057024b9
No known key found for this signature in database
GPG Key ID: 7D0C7EAFC9D835A0
7 changed files with 107 additions and 73 deletions

View File

@ -1,6 +1,6 @@
<?php
declare(strict_types=1);
declare(strict_types = 1);
// {{{ License
@ -28,12 +28,14 @@ use App\Core\DB\DB;
use App\Core\Event;
use App\Core\Form;
use App\Core\GSFile;
use function App\Core\I18n\_m;
use App\Core\Modules\Component;
use App\Core\Security;
use App\Entity\Actor;
use App\Entity\ActorToAttachment;
use App\Entity\Attachment;
use App\Entity\AttachmentToNote;
use App\Entity\Language;
use App\Entity\Note;
use App\Util\Common;
use App\Util\Exception\ClientException;
@ -50,8 +52,6 @@ use Symfony\Component\HttpFoundation\File\Exception\FormSizeFileException;
use Symfony\Component\HttpFoundation\File\UploadedFile;
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\Validator\Constraints\Length;
use function App\Core\I18n\_m;
use function count;
class Posting extends Component
{
@ -69,15 +69,15 @@ class Posting extends Component
return Event::next;
}
$actor = $user->getActor();
$actor = $user->getActor();
$actor_id = $user->getId();
$to_tags = [];
$tags = Cache::get(
$to_tags = [];
$tags = Cache::get(
"actor-circle-{$actor_id}",
fn() => DB::dql('select c.tag from App\Entity\ActorCircle c where c.tagger = :tagger', ['tagger' => $actor_id]),
fn () => DB::dql('select c.tag from App\Entity\ActorCircle c where c.tagger = :tagger', ['tagger' => $actor_id]),
);
foreach ($tags as $t) {
$t = $t['tag'];
$t = $t['tag'];
$to_tags[$t] = $t;
}
@ -94,7 +94,7 @@ class Posting extends Component
Event::handle('PostingAvailableContentTypes', [&$available_content_types]);
$context_actor = null; // This is where we'd plug in the group in which the actor is posting, or whom they're replying to
$form_params = [
$form_params = [
['to', ChoiceType::class, ['label' => _m('To:'), 'multiple' => false, 'expanded' => false, 'choices' => $to_tags]],
['visibility', ChoiceType::class, ['label' => _m('Visibility:'), 'multiple' => false, 'expanded' => false, 'data' => 'public', 'choices' => [_m('Public') => 'public', _m('Instance') => 'instance', _m('Private') => 'private']]],
['content', TextareaType::class, ['label' => _m('Content:'), 'data' => $initial_content, 'attr' => ['placeholder' => _m($placeholder)], 'constraints' => [new Length(['max' => Common::config('site', 'text_limit')])]]],
@ -102,25 +102,25 @@ class Posting extends Component
FormFields::language($actor, $context_actor, label: 'Note language:', help: 'The language in which you wrote this note, so others can see it'),
];
if (count($available_content_types) > 1) {
if (\count($available_content_types) > 1) {
$form_params[] = ['content_type', ChoiceType::class,
[
'label' => _m('Text format:'), 'multiple' => false, 'expanded' => false,
'data' => $available_content_types[array_key_first($available_content_types)],
'label' => _m('Text format:'), 'multiple' => false, 'expanded' => false,
'data' => $available_content_types[array_key_first($available_content_types)],
'choices' => $available_content_types,
],
];
}
$form_params[] = ['post_note', SubmitType::class, ['label' => _m('Post')]];
$form = Form::create($form_params);
$form = Form::create($form_params);
$form->handleRequest($request);
if ($form->isSubmitted()) {
try {
if ($form->isValid()) {
$data = $form->getData();
$data = $form->getData();
$content_type = $data['content_type'] ?? $available_content_types[array_key_first($available_content_types)];
self::storeLocalNote($user->getActor(), $data['content'], $content_type, $data['attachments']);
self::storeLocalNote($user->getActor(), $data['content'], $content_type, $data['language'], $data['attachments']);
throw new RedirectException();
}
} catch (FormSizeFileException $sizeFileException) {
@ -140,35 +140,35 @@ class Posting extends Component
* $actor_id, possibly as a reply to note $reply_to and with flag
* $is_local. Sanitizes $content and $attachments
*
* @param Actor $actor
* @param string $content
* @param string $content_type
* @param array $attachments Array of UploadedFile to be stored as GSFiles associated to this note
* @param array $attachments Array of UploadedFile to be stored as GSFiles associated to this note
* @param array $processed_attachments Array of [Attachment, Attachment's name] to be associated to this $actor and Note
* @return \App\Core\Entity|mixed
*
* @throws \App\Util\Exception\DuplicateFoundException
* @throws ClientException
* @throws ServerException
* @throws \App\Util\Exception\DuplicateFoundException
*
* @return \App\Core\Entity|mixed
*/
public static function storeLocalNote(Actor $actor, string $content, string $content_type, array $attachments = [], $processed_attachments = [])
public static function storeLocalNote(Actor $actor, string $content, string $content_type, string $language, array $attachments = [], $processed_attachments = [])
{
$rendered = null;
Event::handle('RenderNoteContent', [$content, $content_type, &$rendered, $actor]);
Event::handle('RenderNoteContent', [$content, $content_type, &$rendered, $actor, $language]);
$note = Note::create([
'actor_id' => $actor->getId(),
'content' => $content,
'actor_id' => $actor->getId(),
'content' => $content,
'content_type' => $content_type,
'rendered' => $rendered,
'is_local' => true,
'rendered' => $rendered,
'language_id' => Language::getFromLocale($language)->getId(),
'is_local' => true,
]);
/** @var UploadedFile[] $attachments */
foreach ($attachments as $f) {
$filesize = $f->getSize();
$filesize = $f->getSize();
$max_file_size = Common::getUploadLimit();
if ($max_file_size < $filesize) {
throw new ClientException(_m('No file may be larger than {quota} bytes and the file you sent was {size} bytes. '
. 'Try to upload a smaller version.', ['quota' => $max_file_size, 'size' => $filesize],));
. 'Try to upload a smaller version.', ['quota' => $max_file_size, 'size' => $filesize], ));
}
Event::handle('EnforceUserFileQuota', [$filesize, $actor->getId()]);
$processed_attachments[] = [GSFile::storeFileAsAttachment($f), $f->getClientOriginalName()];
@ -193,12 +193,12 @@ class Posting extends Component
return $note;
}
public function onRenderNoteContent(string $content, string $content_type, ?string &$rendered, Actor $author, ?Note $reply_to = null)
public function onRenderNoteContent(string $content, string $content_type, ?string &$rendered, Actor $author, string $language, ?Note $reply_to = null)
{
switch ($content_type) {
case 'text/plain':
$rendered = Formatting::renderPlainText($content);
$rendered = Formatting::linkifyMentions($rendered, $author, $reply_to);
$rendered = Formatting::renderPlainText($content, $language);
$rendered = Formatting::linkifyMentions($rendered, $author, $language, $reply_to);
return Event::stop;
case 'text/html':
// TODO: It has to linkify and stuff as well

View File

@ -13,14 +13,19 @@ class Tag extends Controller
{
public function tag(string $tag)
{
$user = Common::user();
$page = $this->int('page') ?: 1;
$canonical = CompTag::canonicalTag($tag);
$actor = Common::actor();
$page = $this->int('page') ?: 1;
$lang = $this->string('lang');
if (\is_null($lang)) {
$langs = $actor->getPreferredLanguageChoices();
$lang = $langs[array_key_first($langs)];
}
$canonical = CompTag::canonicalTag($tag, $lang);
$notes = Cache::pagedStream(
key: "tag-{$canonical}",
query: 'select n from note n join note_tag nt with n.id = nt.note_id where nt.canonical = :canon order by nt.created DESC, nt.note_id DESC',
query_args: ['canon' => $canonical],
actor: $user,
actor: $actor,
page: $page,
);

View File

@ -28,6 +28,7 @@ use App\Core\DB\DB;
use App\Core\Event;
use App\Core\Modules\Component;
use App\Core\Router\Router;
use App\Entity\Language;
use App\Entity\Note;
use App\Entity\NoteTag;
use App\Util\Formatting;
@ -64,8 +65,8 @@ class Tag extends Component
$processed_tags = false;
preg_match_all(self::TAG_REGEX, $content, $matched_tags, \PREG_SET_ORDER);
foreach ($matched_tags as $match) {
$tag = $match[2];
$canonical_tag = self::canonicalTag($tag);
$tag = self::ensureLength($match[2]);
$canonical_tag = self::canonicalTag($tag, Language::getFromId($note->getLanguageId())->getLocale());
DB::persist(NoteTag::create(['tag' => $tag, 'canonical' => $canonical_tag, 'note_id' => $note->getId()]));
Cache::pushList("tag-{$canonical_tag}", $note);
$processed_tags = true;
@ -75,21 +76,32 @@ class Tag extends Component
}
}
public function onRenderContent(string &$text)
public function onRenderContent(string &$text, string $language)
{
$text = preg_replace_callback(self::TAG_REGEX, fn ($m) => $m[1] . $this->tagLink($m[2]), $text);
$text = preg_replace_callback(self::TAG_REGEX, fn ($m) => $m[1] . self::tagLink($m[2], $language), $text);
}
private function tagLink(string $tag): string
private static function tagLink(string $tag, string $language): string
{
$canonical = self::canonicalTag($tag);
$url = Router::url('tag', ['tag' => $canonical]);
$tag = self::ensureLength($tag);
$canonical = self::canonicalTag($tag, $language);
$url = Router::url('tag', ['tag' => $canonical, 'lang' => $language]);
return HTML::html(['a' => ['attrs' => ['href' => $url, 'title' => $tag, 'rel' => 'tag'], $tag]], options: ['indent' => false]);
}
public static function canonicalTag(string $tag): string
public static function ensureLength(string $tag): string
{
return mb_substr(Formatting::slugify($tag), 0, self::MAX_TAG_LENGTH);
return mb_substr($tag, 0, self::MAX_TAG_LENGTH);
}
public static function canonicalTag(string $tag, string $language): string
{
$result = '';
if (Event::handle('StemWord', [$language, $tag, &$result]) !== Event::stop) {
$result = Formatting::slugify($tag);
}
$result = str_replace('#', '', $result);
return self::ensureLength($result);
}
/**

View File

@ -57,7 +57,7 @@ class Actor extends Entity
private int $id;
private string $nickname;
private ?string $fullname = null;
private int $roles = 4;
private int $roles = 4;
private ?string $homepage;
private ?string $bio;
private ?string $location;
@ -389,8 +389,7 @@ class Actor extends Entity
fn (Language $l) => $l->getLocale(),
),
) ?: [
Common::config('site', 'language') => (Cache::getHashMapKey('languages', Common::config('site', 'language'))
?: DB::findOneBy('language', ['locale' => Common::config('site', 'language')])),
Common::config('site', 'language') => Language::getFromLocale(Common::config('site', 'language')),
];
return array_merge(...F\map(array_values($langs), fn ($l) => $l->toChoiceFormat()));
}

View File

@ -108,6 +108,24 @@ class Language extends Entity
// @codeCoverageIgnoreEnd
// }}} Autocode
public static function getFromId(int $id): self
{
return Cache::getHashMapKey(
'languages-id',
(string) $id,
calculate_map: fn () => F\reindex(DB::dql('select l from language l'), fn (self $l) => (string) $l->getId()),
);
}
public static function getFromLocale(string $locale): self
{
return Cache::getHashMapKey(
'languages',
$locale,
calculate_map: fn () => F\reindex(DB::dql('select l from language l'), fn (self $l) => $l->getLocale()),
);
}
public static function getLanguageChoices(): array
{
$langs = Cache::getHashMap(
@ -136,7 +154,7 @@ class Language extends Entity
$key = array_key_first($preferred_language_choices);
$locale = $preferred_language_choices[$key];
unset($preferred_language_choices[$key], $language_choices[$key]);
$short_display = Cache::getHashMapKey('languages', $locale)->getShortDisplay();
$short_display = self::getFromLocale($locale)->getShortDisplay();
$preferred_language_choices[$short_display] = trim($locale);
$language_choices[$short_display] = trim($locale);
}

View File

@ -53,7 +53,7 @@ class Note extends Entity
private ?string $source;
private int $scope = VisibilityScope::PUBLIC;
private string $url;
private string $language;
private int $language_id;
private DateTimeInterface $created;
private DateTimeInterface $modified;
@ -159,14 +159,14 @@ class Note extends Entity
return $this;
}
public function getLanguage(): string
public function getLanguageId(): int
{
return $this->language;
return $this->language_id;
}
public function setLanguage(string $language): self
public function setLanguageId(int $language_id): self
{
$this->language = $language;
$this->language_id = $language_id;
return $this;
}
@ -284,18 +284,18 @@ class Note extends Entity
$scope = VisibilityScope::create($this->scope);
return $scope->public
|| (!\is_null($a) && (
($scope->subscriber && 0 != DB::count('subscription', ['subscriber' => $a->getId(), 'subscribed' => $this->actor_id]))
($scope->subscriber && 0 != DB::count('subscription', ['subscriber' => $a->getId(), 'subscribed' => $this->actor_id]))
|| ($scope->addressee && 0 != DB::count('notification', ['activity_id' => $this->id, 'actor_id' => $a->getId()]))
|| ($scope->group && [] != DB::dql(
<<<'EOF'
select m from group_member m
join group_inbox i with m.group_id = i.group_id
join note n with i.activity_id = n.id
where n.id = :note_id and m.actor_id = :actor_id
EOF,
['note_id' => $this->id, 'actor_id' => $a->getId()],
))
));
<<<'EOF'
select m from group_member m
join group_inbox i with m.group_id = i.group_id
join note n with i.activity_id = n.id
where n.id = :note_id and m.actor_id = :actor_id
EOF,
['note_id' => $this->id, 'actor_id' => $a->getId()],
))
));
}
public static function schemaDef(): array
@ -310,11 +310,11 @@ class Note extends Entity
'rendered' => ['type' => 'text', 'description' => 'rendered note content, so we can keep the microtags (if not local)'],
'is_local' => ['type' => 'bool', 'not null' => true, 'description' => 'was this note generated by a local actor'],
'source' => ['type' => 'varchar', 'foreign key' => true, 'length' => 32, 'target' => 'NoteSource.code', 'multiplicity' => 'many to one', 'description' => 'fkey to source of note, like "web", "im", or "clientname"'],
'scope' => ['type' => 'int', 'not null' => true, 'default' => VisibilityScope::PUBLIC, 'description' => 'bit map for distribution scope; 0 = everywhere; 1 = this server only; 2 = addressees; 4 = groups; 8 = subscribers; 16 = messages; null = default'],
'url' => ['type' => 'text', 'description' => 'Permalink to Note'],
'language' => ['type' => 'int', 'foreign key' => true, 'target' => 'Language.id', 'multiplicity' => 'one to many', 'description' => 'The language for this note'],
'created' => ['type' => 'datetime', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was created'],
'modified' => ['type' => 'timestamp', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was modified'],
'scope' => ['type' => 'int', 'not null' => true, 'default' => VisibilityScope::PUBLIC, 'description' => 'bit map for distribution scope; 0 = everywhere; 1 = this server only; 2 = addressees; 4 = groups; 8 = subscribers; 16 = messages; null = default'],
'url' => ['type' => 'text', 'description' => 'Permalink to Note'],
'language_id' => ['type' => 'int', 'foreign key' => true, 'target' => 'Language.id', 'multiplicity' => 'one to many', 'description' => 'The language for this note'],
'created' => ['type' => 'datetime', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was created'],
'modified' => ['type' => 'timestamp', 'not null' => true, 'default' => 'CURRENT_TIMESTAMP', 'description' => 'date this record was modified'],
],
'primary key' => ['id'],
'indexes' => [

View File

@ -230,14 +230,14 @@ abstract class Formatting
/**
* Render a plain text note content into HTML, extracting links and tags
*/
public static function renderPlainText(string $text): string
public static function renderPlainText(string $text, ?string $language = null): string
{
$text = self::quoteAndRemoveControlCodes($text);
// Split \n\n into paragraphs, process each paragrah and merge
return implode("\n", F\map(explode("\n\n", $text), function (string $paragraph) {
return implode("\n", F\map(explode("\n\n", $text), function (string $paragraph) use ($language) {
$paragraph = nl2br($paragraph, use_xhtml: false);
Event::handle('RenderContent', [&$paragraph]);
Event::handle('RenderContent', [&$paragraph, $language]);
return HTML::html(['p' => [$paragraph]], options: ['raw' => true, 'indent' => false]);
}));
@ -458,7 +458,7 @@ abstract class Formatting
*
* @return string partially-rendered HTML
*/
public static function linkifyMentions(string $text, Actor $author, ?Note $parent = null): string
public static function linkifyMentions(string $text, Actor $author, string $language, ?Note $parent = null): string
{
$mentions = self::findMentions($text, $author, $parent);