2014-05-07 06:00:30 +09:00
< ? php
2021-04-20 03:51:05 +09:00
// {{{ License
2018-07-18 13:31:24 +09:00
// This file is part of GNU social - https://www.gnu.org/software/social
//
// GNU social is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// GNU social is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with GNU social. If not, see <http://www.gnu.org/licenses/>.
2021-04-20 03:51:05 +09:00
// }}}
2018-07-18 13:31:24 +09:00
/**
2019-07-06 12:31:02 +09:00
* OEmbed and OpenGraph implementation for GNU social
2018-07-18 13:31:24 +09:00
*
* @ package GNUsocial
2021-04-15 00:27:37 +09:00
*
2021-02-19 19:34:21 +09:00
* @ author Mikael Nordfeldth
2018-07-18 13:31:24 +09:00
* @ author Stephen Paul Weber
* @ author hannes
* @ author Mikael Nordfeldth
2021-02-19 19:34:21 +09:00
* @ author Miguel Dantas
2021-08-12 08:41:57 +09:00
* @ author Hugo Sales < hugo @ hsal . es >
2021-04-20 03:51:05 +09:00
* @ author Diogo Peralta Cordeiro < mail @ diogo . site >
2021-02-19 19:34:21 +09:00
* @ copyright 2014 - 2021 Free Software Foundation , Inc http :// www . fsf . org
2018-07-18 13:31:24 +09:00
* @ license https :// www . gnu . org / licenses / agpl . html GNU AGPL v3 or later
*/
2021-04-15 00:27:37 +09:00
namespace Plugin\Embed ;
2018-07-18 13:31:24 +09:00
2021-04-26 06:23:46 +09:00
use App\Core\Cache ;
use App\Core\DB\DB ;
use App\Core\Event ;
2021-04-28 05:56:50 +09:00
use App\Core\GSFile ;
2021-04-26 06:23:46 +09:00
use App\Core\HTTPClient ;
2021-08-12 08:41:57 +09:00
use function App\Core\I18n\_m ;
2021-04-26 06:23:46 +09:00
use App\Core\Log ;
2021-04-20 03:51:05 +09:00
use App\Core\Modules\Plugin ;
2021-04-26 06:23:46 +09:00
use App\Core\Router\RouteLoader ;
use App\Core\Router\Router ;
use App\Entity\Attachment ;
2021-08-12 08:41:57 +09:00
use App\Entity\Note ;
use App\Entity\RemoteURL ;
2021-04-28 05:56:50 +09:00
use App\Util\Common ;
2021-04-26 06:23:46 +09:00
use App\Util\Exception\DuplicateFoundException ;
use App\Util\Exception\NotFoundException ;
2021-08-12 08:41:57 +09:00
use App\Util\Exception\ServerException ;
2021-04-29 00:03:17 +09:00
use App\Util\Formatting ;
2021-04-28 05:56:50 +09:00
use App\Util\TemporaryFile ;
use Embed\Embed as LibEmbed ;
2021-05-02 06:03:54 +09:00
use Exception ;
2021-04-26 06:23:46 +09:00
use Symfony\Component\HttpFoundation\Request ;
2019-07-07 21:26:10 +09:00
2018-07-18 13:31:24 +09:00
/**
2019-07-06 12:31:02 +09:00
* Base class for the Embed plugin that does most of the heavy lifting to get
2018-07-18 13:31:24 +09:00
* and display representations for remote content .
*
2021-02-19 19:34:21 +09:00
* @ copyright 2014 - 2021 Free Software Foundation , Inc http :// www . fsf . org
2018-07-18 13:31:24 +09:00
* @ license https :// www . gnu . org / licenses / agpl . html GNU AGPL v3 or later
*/
2021-04-20 03:51:05 +09:00
class Embed extends Plugin
2014-05-07 06:00:30 +09:00
{
2021-08-12 08:41:57 +09:00
public function version () : string
{
return '3.0.0' ;
}
2021-04-26 06:23:46 +09:00
/**
* Settings which can be set in social . local . yaml
* WARNING , these are _regexps_ ( slashes added later ) . Always escape your dots and end ( '$' ) your strings
*/
2021-08-12 08:41:57 +09:00
public array $domain_whitelist = [
2019-07-06 12:31:02 +09:00
// hostname => service provider
2021-04-28 05:56:50 +09:00
'.*' => '' , // Default to allowing any host
2019-07-06 12:31:02 +09:00
];
2019-07-07 00:52:30 +09:00
2018-07-18 13:31:24 +09:00
/**
* This code executes when GNU social creates the page routing , and we hook
2019-07-06 12:31:02 +09:00
* on this event to add our action handler for Embed .
2018-07-18 13:31:24 +09:00
*
2021-08-12 08:41:57 +09:00
* @ param $m RouteLoader the router that was initialized .
2021-04-15 00:27:37 +09:00
*
2019-07-12 11:13:40 +09:00
* @ throws Exception
2021-04-15 00:27:37 +09:00
*
2021-08-12 08:41:57 +09:00
* @ return bool
2021-07-23 04:49:12 +09:00
*
2021-08-14 03:36:34 +09:00
*
2018-07-18 13:31:24 +09:00
*/
2021-05-02 06:45:47 +09:00
public function onAddRoute ( RouteLoader $m ) : bool
2014-05-07 06:00:30 +09:00
{
2021-04-26 06:23:46 +09:00
$m -> connect ( 'oembed' , 'main/oembed' , Controller\Embed :: class );
$m -> connect ( 'embed' , 'main/embed' , Controller\Embed :: class );
return Event :: next ;
2014-05-07 06:00:30 +09:00
}
2021-04-26 06:23:46 +09:00
/**
* Insert oembed and opengraph tags in all HTML head elements
*/
2021-08-14 03:36:34 +09:00
public function onShowHeadElements ( Request $request , array & $result ) : bool
2014-05-07 06:00:30 +09:00
{
2021-04-26 06:23:46 +09:00
$matches = [];
2021-04-29 05:15:43 +09:00
preg_match ( ',/?([^/]+)/?(.*),' , $request -> getPathInfo (), $matches );
2021-08-14 03:36:34 +09:00
$url = match ( $matches [ 1 ]) {
'attachment' => " { $matches [ 1 ] } / { $matches [ 2 ] } " ,
default => null ,
};
2014-05-07 06:00:30 +09:00
2021-08-14 03:36:34 +09:00
if ( is_null ( $url )) {
2019-07-15 07:35:11 +09:00
foreach ([ 'xml' , 'json' ] as $format ) {
2021-04-26 06:23:46 +09:00
$result [] = [
'link' => [
2021-07-23 04:49:12 +09:00
'rel' => 'alternate' ,
'type' => " application/ { $format } +oembed " ,
'href' => Router :: url ( 'embed' , [ 'format' => $format , 'url' => $url ]),
2021-04-15 00:27:37 +09:00
'title' => 'oEmbed' ,
2021-07-23 04:49:12 +09:00
], ];
2019-07-15 07:35:11 +09:00
}
}
2021-04-26 06:23:46 +09:00
return Event :: next ;
2016-03-17 08:31:45 +09:00
}
2021-08-14 03:36:34 +09:00
/**
* Show this attachment enhanced with the corresponding Embed data , if available
*
* @ param array $vars
* @ param array $res
*
* @ return bool
*/
public function onViewRemoteUrl ( array $vars , array & $res ) : bool
{
$remote_url = $vars [ 'remote_url' ];
try {
$embed = Cache :: get ( 'attachment-embed-' . $remote_url -> getId (),
fn () => DB :: findOneBy ( 'attachment_embed' , [ 'remoteurl_id' => $remote_url -> getId ()]));
} catch ( DuplicateFoundException $e ) {
Log :: warning ( $e );
return Event :: next ;
} catch ( NotFoundException ) {
Log :: debug ( " Embed doesn \\ 't have a representation for the remote_url id= { $remote_url -> getId () } . Must have been stored before the plugin was enabled. " );
return Event :: next ;
}
$attributes = $embed -> getImageHTMLAttributes ([ 'class' => 'u-photo embed' ]);
$res [] = Formatting :: twigRenderFile ( 'embed/embedView.html.twig' ,
[ 'embed' => $embed , 'attributes' => $attributes , 'remote_url' => $remote_url ]);
return Event :: stop ;
}
2014-05-07 06:00:30 +09:00
/**
2021-04-26 06:23:46 +09:00
* Save embedding information for an Attachment , if applicable .
2014-05-07 06:00:30 +09:00
*
2021-08-12 08:41:57 +09:00
* @ param RemoteURL $remote_url
* @ param Note $note
2014-05-07 06:00:30 +09:00
*
2021-08-12 08:41:57 +09:00
* @ throws DuplicateFoundException
2014-05-07 06:00:30 +09:00
*
2021-08-12 08:41:57 +09:00
* @ return bool
2014-05-07 06:00:30 +09:00
*/
2021-08-12 08:41:57 +09:00
public function onNewRemoteURLFromNote ( RemoteURL $remote_url , Note $note ) : bool
2014-05-07 06:00:30 +09:00
{
2021-08-12 08:41:57 +09:00
// Only handle text mime
2021-08-14 03:36:34 +09:00
$mimetype = $remote_url -> getMimetype ();
if ( ! ( Formatting :: startsWith ( $mimetype , 'text/html' ) || Formatting :: startsWith ( $mimetype , 'application/xhtml+xml' ))) {
2021-08-12 08:41:57 +09:00
return Event :: next ;
}
// Ignore if already handled
$attachment_embed = DB :: find ( 'attachment_embed' , [ 'remoteurl_id' => $remote_url -> getId ()]);
if ( ! is_null ( $attachment_embed )) {
return Event :: next ;
}
2021-08-14 03:36:34 +09:00
// If an attachment already exist, do not create an Embed for it. Some other plugin must have done things
$remote_url_to_attachment = DB :: find ( 'remoteurl_to_attachment' , [ 'remoteurl_id' => $remote_url -> getId ()]);
if ( ! is_null ( $remote_url_to_attachment )) {
$attachment_id = $remote_url_to_attachment -> getAttachmentId ();
2021-08-12 08:41:57 +09:00
try {
2021-08-14 03:36:34 +09:00
$attachment = DB :: findOneBy ( 'attachment' , [ 'id' => $attachment_id ]);
$attachment -> livesIncrementAndGet ();
return Event :: next ;
} catch ( DuplicateFoundException | NotFoundException $e ) {
Log :: error ( $e );
2014-05-07 06:00:30 +09:00
}
}
2021-08-14 03:36:34 +09:00
// Create an Embed representation for this URL
$embed_data = $this -> getEmbedLibMetadata ( $remote_url -> getRemoteUrl ());
$embed_data [ 'remoteurl_id' ] = $remote_url -> getId ();
$img_data = $this -> downloadThumbnail ( $embed_data [ 'thumbnail_url' ]);
switch ( $img_data ) {
case null : // URL isn't usable
$embed_data [ 'thumbnail_url' ] = null ;
// no break
case false : // Thumbnail isn't acceptable
DB :: persist ( $attachment = Attachment :: create ([ 'mimetype' => $remote_url -> getMimetype ()]));
Event :: handle ( 'AttachmentStoreNew' , [ & $attachment ]);
break ;
default : // String is valid image data
$temp_file = new TemporaryFile ();
$temp_file -> write ( $img_data );
$attachment = GSFile :: sanitizeAndStoreFileAsAttachment ( $temp_file );
$embed_data [ 'attachment_id' ] = $attachment -> getId ();
}
$embed_data [ 'attachment_id' ] = $attachment -> getId ();
DB :: persist ( Entity\AttachmentEmbed :: create ( $embed_data ));
DB :: flush ();
return Event :: stop ;
2014-05-07 06:00:30 +09:00
}
2021-04-26 06:23:46 +09:00
/**
2021-08-12 08:41:57 +09:00
* Perform an oEmbed or OpenGraph lookup for the given $url .
*
* Some known hosts are whitelisted with API endpoints where we
* know they exist but autodiscovery data isn ' t available .
*
* Throws exceptions on failure .
*
* @ param string $url
*
* @ return array
2021-04-26 06:23:46 +09:00
*/
2021-08-14 03:36:34 +09:00
private function getEmbedLibMetadata ( string $url ) : array
2014-05-07 06:00:30 +09:00
{
2021-08-14 03:36:34 +09:00
Log :: info ( " Trying to find Embed data for { $url } with 'oscarotero/Embed' " );
$embed = new LibEmbed ();
$info = $embed -> get ( $url );
$metadata [ 'title' ] = $info -> title ;
$metadata [ 'description' ] = $info -> description ;
$metadata [ 'author_name' ] = $info -> authorName ;
$metadata [ 'author_url' ] = ( string ) $info -> authorUrl ;
$metadata [ 'provider_name' ] = $info -> providerName ;
$metadata [ 'provider_url' ] = ( string ) $info -> providerUrl ;
if ( ! is_null ( $info -> image )) {
$thumbnail_url = ( string ) $info -> image ;
} else {
$thumbnail_url = ( string ) $info -> favicon ;
2014-05-07 06:00:30 +09:00
}
2021-04-29 00:03:17 +09:00
2021-08-14 03:36:34 +09:00
// Check thumbnail URL validity
$metadata [ 'thumbnail_url' ] = $thumbnail_url ;
return self :: normalizeEmbedLibMetadata ( $metadata );
2021-08-12 08:41:57 +09:00
}
2021-04-29 00:03:17 +09:00
2021-08-12 08:41:57 +09:00
/**
* Normalize fetched info .
*
* @ param array $metadata
*
* @ return array
*/
2021-08-14 03:36:34 +09:00
private static function normalizeEmbedLibMetadata ( array $metadata ) : array
2021-08-12 08:41:57 +09:00
{
if ( isset ( $metadata [ 'thumbnail_url' ])) {
// sometimes sites serve the path, not the full URL, for images
// let's "be liberal in what you accept from others"!
// add protocol and host if the thumbnail_url starts with /
if ( $metadata [ 'thumbnail_url' ][ 0 ] == '/' ) {
$thumbnail_url_parsed = parse_url ( $metadata [ 'thumbnail_url' ]);
$metadata [ 'thumbnail_url' ] = " { $thumbnail_url_parsed [ 'scheme' ] } :// { $thumbnail_url_parsed [ 'host' ] } { $metadata [ 'url' ] } " ;
}
// Some wordpress opengraph implementations sometimes return a white blank image
// no need for us to save that!
if ( $metadata [ 'thumbnail_url' ] == 'https://s0.wp.com/i/blank.jpg' ) {
$metadata [ 'thumbnail_url' ] = null ;
}
}
return $metadata ;
2014-05-07 06:00:30 +09:00
}
2016-03-17 08:31:45 +09:00
2021-04-29 00:03:17 +09:00
/**
2021-08-14 03:36:34 +09:00
* @ param string $url
2021-07-23 04:49:12 +09:00
*
2021-08-14 03:36:34 +09:00
* @ return bool true if allowed by the lists , false otherwise
2015-01-25 10:34:40 +09:00
*/
2021-08-14 03:36:34 +09:00
private function allowedRemoteUrl ( string $url ) : bool
2015-01-25 10:34:40 +09:00
{
2021-08-14 03:36:34 +09:00
return true ;
2021-08-12 08:41:57 +09:00
if ( $this -> check_whitelist ? ? false ) {
2015-01-25 10:34:40 +09:00
return false ; // indicates "no check made"
}
$host = parse_url ( $url , PHP_URL_HOST );
2021-08-12 08:41:57 +09:00
foreach ( $this -> domain_whitelist as $regex => $provider ) {
2021-04-15 00:27:37 +09:00
if ( preg_match ( " / { $regex } / " , $host )) {
2015-01-25 19:18:57 +09:00
return $provider ; // we trust this source, return provider name
}
2015-01-25 10:34:40 +09:00
}
2021-08-14 03:36:34 +09:00
return false ;
2015-01-25 10:34:40 +09:00
}
2018-07-18 13:31:24 +09:00
/**
2021-08-14 03:36:34 +09:00
* Private helper that :
* - checks if given URL is valid and is in fact an image ( basic test ), returns null if not ;
* - checks if respects file quota and whitelist / blacklist , returns false if not ;
* - downloads the thumbnail , returns a string if successful .
2018-07-18 13:31:24 +09:00
*
2021-08-14 03:36:34 +09:00
* @ param string $remote_url URL to the remote thumbnail
2021-04-15 00:27:37 +09:00
*
2021-08-14 03:36:34 +09:00
* @ return null | bool | string
2018-07-18 13:31:24 +09:00
*/
2021-08-14 03:36:34 +09:00
private function downloadThumbnail ( string $remote_url ) : bool | string | null
2015-01-25 10:34:40 +09:00
{
2021-08-14 03:36:34 +09:00
// Is this a valid URL?
if ( ! Common :: isValidHttpUrl ( $remote_url )) {
Log :: debug ( " Invalid URL ( { $remote_url } ) in Embed->downloadThumbnail. " );
return null ;
2015-01-25 10:34:40 +09:00
}
2021-08-14 03:36:34 +09:00
// Is this URL trusted?
if ( ! $this -> allowedRemoteUrl ( $remote_url )) {
Log :: info ( " Blocked URL ( { $remote_url } ) in Embed->downloadThumbnail. " );
2021-04-26 06:23:46 +09:00
return false ;
2018-07-18 13:31:24 +09:00
}
2021-08-14 03:36:34 +09:00
// Validate if the URL really does point to a remote image
$head = HTTPClient :: head ( $remote_url );
$headers = $head -> getHeaders ();
$headers = array_change_key_case ( $headers , CASE_LOWER );
if ( empty ( $headers [ 'content-type' ]) || GSFile :: mimetypeMajor ( $headers [ 'content-type' ][ 0 ]) !== 'image' ) {
Log :: debug ( " URL ( { $remote_url } ) doesn't point to an image (content-type: " . ( ! empty ( $headers [ 'content-type' ][ 0 ]) ? $headers [ 'content-type' ][ 0 ] : 'not available' ) . ') in Embed->downloadThumbnail.' );
return null ;
2019-08-19 09:40:31 +09:00
}
2021-08-14 03:36:34 +09:00
// Does it respect the file quota?
$file_size = $headers [ 'content-length' ][ 0 ];
$max_size = Common :: config ( 'attachments' , 'file_quota' );
if ( $file_size > $max_size ) {
Log :: debug ( " Went to download remote thumbnail of size { $file_size } but the upload limit is { $max_size } so we aborted in Embed->downloadThumbnail. " );
return false ;
2018-07-18 13:31:24 +09:00
}
2021-08-14 03:36:34 +09:00
// Download and return the file
Log :: debug ( " Downloading remote thumbnail from URL: { $remote_url } in Embed->downloadThumbnail. " );
return HTTPClient :: get ( $remote_url ) -> getContent ();
2021-04-28 05:56:50 +09:00
}
/**
2021-08-12 08:41:57 +09:00
* Event raised when GNU social polls the plugin for information about it .
* Adds this plugin ' s version information to $versions array
2021-04-28 05:56:50 +09:00
*
2021-08-12 08:41:57 +09:00
* @ param & $versions array inherited from parent
2021-04-28 05:56:50 +09:00
*
2021-08-14 03:36:34 +09:00
* @ throws ServerException
*
2021-08-12 08:41:57 +09:00
* @ return bool true hook value
2021-04-28 05:56:50 +09:00
*/
2021-08-12 08:41:57 +09:00
public function onPluginVersion ( array & $versions ) : bool
2021-04-28 05:56:50 +09:00
{
2021-08-12 08:41:57 +09:00
$versions [] = [
'name' => 'Embed' ,
'version' => $this -> version (),
'author' => 'Mikael Nordfeldth, Hugo Sales, Diogo Peralta Cordeiro' ,
'homepage' => GNUSOCIAL_PROJECT_URL ,
'description' => // TRANS: Plugin description.
_m ( 'Plugin for using and representing oEmbed, OpenGraph and other data.' ),
];
return Event :: next ;
2014-05-07 06:00:30 +09:00
}
}