[Media] Fix several issues

[StoreRemoteMedia] Upgrade plugin to use the new Media system

API Changes:
- Added getters to File to better formalize the ideas of the commit "[Media] Fix issues with database file storage"

UI Changes:
- Now presented thumbnails are actual thumbnails (bug fix)
- Attachment actions have a slightly more extended behaviour

Many other minor bug fixes...
This commit is contained in:
Diogo Peralta Cordeiro 2021-02-16 18:30:21 +00:00
parent f9290705f8
commit 22b5dd8567
16 changed files with 421 additions and 286 deletions

View File

@ -76,14 +76,7 @@ class AttachmentAction extends ManagedAction
$this->mimetype = $this->attachment->mimetype;
$this->filename = $this->attachment->filename;
if ($this->attachment->isLocal()) {
$this->filepath = $this->attachment->getFileOrThumbnailPath();
if (empty($this->filepath)) {
$this->clientError(
_m('Requested local URL for a file that is not stored locally.'),
404
);
}
if ($this->attachment->isLocal() || $this->attachment->isFetchedRemoteFile()) {
$this->filesize = $this->attachment->getFileOrThumbnailSize();
$this->mimetype = $this->attachment->getFileOrThumbnailMimetype();
$this->filename = MediaFile::getDisplayName($this->attachment);
@ -115,15 +108,6 @@ class AttachmentAction extends ManagedAction
public function showPage(): void
{
if (
!$this->attachment->isLocal()
|| empty($this->filepath)
|| !file_exists($this->filepath)
) {
// If it's not a locally stored file, get lost
common_redirect($this->attachment->getUrl(), 303);
}
parent::showPage();
}

View File

@ -35,6 +35,14 @@ class Attachment_downloadAction extends AttachmentAction
@ini_set('display_errors', 0);
if ($this->attachment->isLocal()) {
try {
$this->filepath = $this->attachment->getFileOrThumbnailPath();
} catch (Exception $e) {
$this->clientError(
_m('Requested local URL for a file that is not stored locally.'),
404
);
}
common_send_file(
$this->filepath,
$this->mimetype,

View File

@ -20,8 +20,8 @@ defined('GNUSOCIAL') || die();
* View notice attachment
*
* @package GNUsocial
* @author Miguel Dantas <biodantasgs@gmail.com>
* @copyright 2019 Free Software Foundation, Inc http://www.fsf.org
* @author Mikael Nordfeldth <mmn@hethane.se>
* @copyright 2016 Free Software Foundation, Inc http://www.fsf.org
* @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
*/
class Attachment_viewAction extends AttachmentAction
@ -33,13 +33,20 @@ class Attachment_viewAction extends AttachmentAction
// script execution, and we don't want to have any more errors until then, so don't reset it
@ini_set('display_errors', 0);
if ($this->attachment->isLocal()) {
if ($this->attachment->isLocal() || $this->attachment->isFetchedRemoteFile()) {
try {
$this->filepath = $this->attachment->getFileOrThumbnailPath();
} catch (Exception $e) {
$this->clientError(
_m('Requested local URL for a file that is not stored locally.'),
404
);
}
$disposition = 'attachment';
if (in_array(common_get_mime_media($this->mimetype), ['image', 'video'])) {
$disposition = 'inline';
}
common_send_file($this->filepath, $this->mimetype,
$this->filename, $disposition);
common_send_file($this->filepath, $this->mimetype, $this->filename, $disposition);
} else {
common_redirect($this->attachment->getUrl(), 303);
}

View File

@ -204,7 +204,9 @@ class NewnoticeAction extends FormAction
$this->stored = Notice::saveActivity($act, $this->scoped, $options);
$upload->attachToNotice($this->stored);
if ($upload instanceof MediaFile) {
$upload->attachToNotice($this->stored);
}
Event::handle('EndNoticeSaveWeb', array($this, $this->stored));
}

View File

@ -557,7 +557,9 @@ class File extends Managed_DataObject
// This means we either don't know what it is, so it can't
// be shown as an enclosure, or it is an HTML link which
// does not link to a resource with further metadata.
throw new ServerException('Unknown enclosure mimetype, not enough metadata');
// throw new ServerException('Unknown enclosure mimetype, not enough metadata');
// It's not really an error that must be shown or handled...
common_debug('Unknown enclosure mimetype, not enough metadata');
}
self::$_enclosures[$this->getID()] = $enclosure;
@ -830,11 +832,36 @@ class File extends Managed_DataObject
return $count;
}
public function isLocal()
// A file with no url and with filename is a local file.
public function isLocal(): bool
{
return empty($this->url) && !empty($this->filename);
}
// A file with an url but no filename is a remote file that wasn't fetched, not even the thumbnail.
public function isNonFetchedRemoteFile(): bool
{
return !empty($this->url) && empty($this->filename);
}
// A file with an url and filename is a fetched remote file (maybe just a thumbnail of it).
public function isFetchedRemoteFile(): bool
{
return !empty($this->url) && !empty($this->filename);
}
// A file with no filename nor url is a redirect.
public function isRedirect(): bool
{
return empty($this->url) && empty($this->filename);
}
// Is in a remote location.
public function isStoredRemotely(): bool
{
return empty($this->filename);
}
public function unlink() {
// Delete the file, if it exists locally
if (!empty($this->filename) && file_exists(self::path($this->filename))) {

View File

@ -88,8 +88,8 @@ class File_thumbnail extends Managed_DataObject
bool $force_still = true,
?bool $upscale = null
): File_thumbnail {
if (is_null($file->filename)) { // Remote file
// If StoreRemoteMedia is enabled...
if ($file->isStoredRemotely()) { // Remote file
// If StoreRemoteMedia or Embed are enabled...
if (Event::handle('CreateFileImageThumbnailSource', [$file, &$imgPath, 'image'])) {
if (!file_exists($imgPath)) {
throw new FileNotFoundException($imgPath);
@ -101,7 +101,7 @@ class File_thumbnail extends Managed_DataObject
throw new UseFileAsThumbnailException($file);
}
}
throw new FileNotFoundException("This remote file has no local thumbnail.");
throw new ServerException("This remote file has no local thumbnail.");
}
$image = ImageFile::fromFileObject($file);
$imgPath = $image->getPath();
@ -215,7 +215,7 @@ class File_thumbnail extends Managed_DataObject
return $tn;
}
public static function path($filename)
public static function path($filename): string
{
File::tryFilename($filename);
@ -239,7 +239,7 @@ class File_thumbnail extends Managed_DataObject
* @throws FileNotFoundException
* @throws ServerException
*/
public function getPath()
public function getPath(): string
{
$oldpath = File::path($this->getFilename());
$thumbpath = self::path($this->getFilename());

View File

@ -103,7 +103,8 @@ class AttachmentListItem extends Widget
$this->showRepresentation();
}
function showRepresentation() {
function showRepresentation()
{
$enclosure = $this->attachment->getEnclosure();
if (Event::handle('StartShowAttachmentRepresentation', [$this->out, $this->attachment])) {

View File

@ -201,6 +201,7 @@ class ImageFile extends MediaFile
if ($mediafile instanceof self) {
return $mediafile;
} else {
$mediafile->delete();
// We can conclude that we have failed to get the MIME type
// TRANS: Client exception thrown trying to upload an invalid image type.
// TRANS: %s is the file type that was denied
@ -219,21 +220,23 @@ class ImageFile extends MediaFile
* @param string $url Remote image URL
* @param Profile|null $scoped
* @param string|null $name
* @param int|null $file_id same as in this class constructor
* @return ImageFile
* @throws ClientException
* @throws FileNotFoundException
* @throws HTTP_Request2_Exception
* @throws InvalidFilenameException
* @throws NoResultException
* @throws ServerException
* @throws UnsupportedMediaException
* @throws UseFileAsThumbnailException
*/
public static function fromUrl(string $url, ?Profile $scoped = null, ?string $name = null): self
public static function fromUrl(string $url, ?Profile $scoped = null, ?string $name = null, ?int $file_id = null): self
{
$mediafile = parent::fromUrl($url, $scoped, $name);
$mediafile = parent::fromUrl($url, $scoped, $name, $file_id);
if ($mediafile instanceof self) {
return $mediafile;
} else {
$mediafile->delete();
// We can conclude that we have failed to get the MIME type
// TRANS: Client exception thrown trying to upload an invalid image type.
// TRANS: %s is the file type that was denied

View File

@ -252,7 +252,7 @@ class MediaFile
// video support plugin or something.
// FIXME: Do this more automagically.
// Honestly, I think this is unlikely these days,
// but better be safe than sure, I guess
// but better be safe than sorry, I guess
if ($image->getPath() != $file->getPath()) {
$image->unlink();
}
@ -506,16 +506,17 @@ class MediaFile
* @param string $url Remote media URL
* @param Profile|null $scoped
* @param string|null $name
* @param int|null $file_id same as in this class constructor
* @return ImageFile|MediaFile
* @throws ClientException
* @throws FileNotFoundException
* @throws HTTP_Request2_Exception
* @throws InvalidFilenameException
* @throws NoResultException
* @throws ServerException
* @throws UnsupportedMediaException
* @throws UseFileAsThumbnailException
*/
public static function fromUrl(string $url, ?Profile $scoped = null, ?string $name = null)
public static function fromUrl(string $url, ?Profile $scoped = null, ?string $name = null, ?int $file_id = null)
{
if (!common_valid_http_url($url)) {
// TRANS: Server exception. %s is a URL.
@ -631,10 +632,10 @@ class MediaFile
}
if ($media === 'image') {
return new ImageFile(null, $filepath, $filehash, $url);
return new ImageFile($file_id, $filepath, $filehash, $url);
}
}
return new self($filepath, $mimetype, $filehash, null, $url);
return new self($filepath, $mimetype, $filehash, $file_id, $url);
}
public static function fromFileInfo(SplFileInfo $finfo, Profile $scoped = null)

View File

@ -403,7 +403,7 @@ class HTTPClient extends HTTP_Request2
return new GNUsocial_HTTPResponse($response, $this->getUrl(), $redirs);
}
public static function get_filename(string $url, array $headers = null) : string {
public static function get_filename(string $url, array $headers = null) : ?string {
if ($headers === null) {
$head = (new HTTPClient())->head($url);
$headers = $head->getHeader();
@ -414,7 +414,7 @@ class HTTPClient extends HTTP_Request2
return $matches[1];
} else {
common_log(LOG_INFO, "Couldn't determine filename for url: {$url}");
return _('Untitled attachment');
return null;
}
}
}

View File

@ -227,10 +227,10 @@ class Activitypub_notice
$act->context->location = Location::fromLatLon($settings['latitude'], $settings['longitude']);
}
/* Reject notice if it is too long (without the HTML)
// Reject notice if it is too long (without the HTML)
if (Notice::contentTooLong($content)) {
throw new Exception('That\'s too long. Maximum notice size is %d character.');
}*/
}
// Attachments (first part)
$attachments = [];

View File

@ -40,7 +40,7 @@ use Embed\Embed;
*/
class EmbedPlugin extends Plugin
{
const PLUGIN_VERSION = '0.1.0';
const PLUGIN_VERSION = '2.0.0';
// settings which can be set in config.php with addPlugin('Embed', ['param'=>'value', ...]);
// WARNING, these are _regexps_ (slashes added later). Always escape your dots and end ('$') your strings
@ -141,7 +141,7 @@ class EmbedPlugin extends Plugin
}
} catch (Exception $e) {
common_log(LOG_INFO, "Failed to find Embed data for {$url} with 'oscarotero/Embed'" .
", got exception: " . get_class($e));
", got exception: " . get_class($e));
}
if (isset($metadata->thumbnail_url)) {
@ -151,7 +151,7 @@ class EmbedPlugin extends Plugin
if ($metadata->thumbnail_url[0] == '/') {
$thumbnail_url_parsed = parse_url($metadata->url);
$metadata->thumbnail_url = "{$thumbnail_url_parsed['scheme']}://".
"{$thumbnail_url_parsed['host']}{$metadata->thumbnail_url}";
"{$thumbnail_url_parsed['host']}{$metadata->thumbnail_url}";
}
// some wordpress opengraph implementations sometimes return a white blank image
@ -168,21 +168,21 @@ class EmbedPlugin extends Plugin
public function onEndShowHeadElements(Action $action)
{
switch ($action->getActionName()) {
case 'attachment':
$url = common_local_url('attachment', ['attachment' => $action->attachment->getID()]);
break;
case 'shownotice':
if (!$action->notice->isLocal()) {
return true;
}
try {
$url = $action->notice->getUrl();
} catch (InvalidUrlException $e) {
// The notice is probably a share or similar, which don't
// have a representational URL of their own.
return true;
}
break;
case 'attachment':
$url = common_local_url('attachment', ['attachment' => $action->attachment->getID()]);
break;
case 'shownotice':
if (!$action->notice->isLocal()) {
return true;
}
try {
$url = $action->notice->getUrl();
} catch (InvalidUrlException $e) {
// The notice is probably a share or similar, which don't
// have a representational URL of their own.
return true;
}
break;
}
if (isset($url)) {
@ -226,7 +226,7 @@ class EmbedPlugin extends Plugin
if (isset($file->mimetype)
&& (('text/html' === substr($file->mimetype, 0, 9) ||
'application/xhtml+xml' === substr($file->mimetype, 0, 21)))) {
'application/xhtml+xml' === substr($file->mimetype, 0, 21)))) {
try {
$embed_data = File_embed::getEmbed($file->url);
if ($embed_data === false) {
@ -255,7 +255,7 @@ class EmbedPlugin extends Plugin
}
$out->elementStart('div', ['id'=>'oembed_info', 'class'=>'e-content']);
foreach (['author_name' => ['class' => ' author', 'url' => 'author_url'],
'provider' => ['class' => '', 'url' => 'provider_url']]
'provider' => ['class' => '', 'url' => 'provider_url']]
as $field => $options) {
if (!empty($embed->{$field})) {
$out->elementStart('div', "fn vcard" . $options['class']);
@ -265,7 +265,7 @@ class EmbedPlugin extends Plugin
$out->element(
'a',
['href' => $embed->{$options['url']},
'class' => 'url'],
'class' => 'url'],
$embed->{$field}
);
}
@ -367,16 +367,16 @@ class EmbedPlugin extends Plugin
// the 'photo' type is shown through ordinary means, using StartShowAttachmentRepresentation!
switch ($embed->type) {
case 'video':
case 'link':
if (!empty($embed->html)
case 'video':
case 'link':
if (!empty($embed->html)
&& (GNUsocial::isAjax() || common_config('attachments', 'show_html'))) {
$purifier = new HTMLPurifier();
// FIXME: do we allow <object> and <embed> here? we did that when we used htmLawed,
// but I'm not sure anymore...
$out->raw($purifier->purify($embed->html));
}
return false;
$purifier = new HTMLPurifier();
// FIXME: do we allow <object> and <embed> here? we did that when we used htmLawed,
// but I'm not sure anymore...
$out->raw($purifier->purify($embed->html));
}
return false;
}
return true;
@ -389,11 +389,15 @@ class EmbedPlugin extends Plugin
*
* @param $file File the file of the created thumbnail
* @param &$imgPath string = the path to the created thumbnail
* @param $media string = media type
* @return bool true if it succeeds (including non-action
* states where it isn't oEmbed data, so it doesn't mess up the event handle
* for other things hooked into it), or the exception if it fails.
* @throws FileNotFoundException
* @throws NoResultException
* @throws ServerException
*/
public function onCreateFileImageThumbnailSource(File $file, &$imgPath, $media)
public function onCreateFileImageThumbnailSource(File $file, &$imgPath, string $media): bool
{
// If we are on a private node, we won't do any remote calls (just as a precaution until
// we can configure this from config.php for the private nodes)
@ -402,9 +406,9 @@ class EmbedPlugin extends Plugin
}
// All our remote Embed images lack a local filename property in the File object
if (!is_null($file->filename)) {
common_debug(sprintf('Filename of file id==%d is not null (%s), so nothing Embed '.
'should handle.', $file->getID(), _ve($file->filename)));
if ($file->isLocal()) {
common_debug(sprintf('File of id==%d is local (filename: %s), so nothing Embed '.
'should handle.', $file->getID(), _ve($file->filename)));
return true;
}
@ -486,7 +490,7 @@ class EmbedPlugin extends Plugin
return isset($headers['content-length']) ? $headers['content-length'] : false;
} catch (Exception $err) {
common_log(LOG_ERR, __CLASS__.': getRemoteFileSize on URL : '._ve($url).
' threw exception: '.$err->getMessage());
' threw exception: '.$err->getMessage());
return false;
}
}
@ -521,7 +525,7 @@ class EmbedPlugin extends Plugin
* @param array|null $headers - The headers possible previous request to $url
* @param int|null $file_id - The id of the file this image belongs to, used for logging
*/
protected function validateAndWriteImage(&$imgData, $url = null, $headers = null, $file_id = 0) : array
protected function validateAndWriteImage(&$imgData, ?string $url = null, ?array $headers = null, ?int $file_id = null) : array
{
$info = @getimagesizefromstring($imgData);
// array indexes documented on php.net:
@ -540,7 +544,7 @@ class EmbedPlugin extends Plugin
if (!empty($url)) {
$original_name = HTTPClient::get_filename($url, $headers);
}
$filename = MediaFile::encodeFilename($original_name ?? '', $filehash);
$filename = MediaFile::encodeFilename($original_name ?? _m('Untitled attachment'), $filehash);
$fullpath = File_thumbnail::path($filename);
// Write the file to disk. Throw Exception on failure
if (!file_exists($fullpath)) {
@ -569,13 +573,13 @@ class EmbedPlugin extends Plugin
}
} else {
throw new AlreadyFulfilledException('A thumbnail seems to already exist for remote file' .
($file_id ? 'with id==' . $file_id : '') . ' at path ' . $fullpath);
($file_id ? 'with id==' . $file_id : '') . ' at path ' . $fullpath);
}
} catch (AlreadyFulfilledException $e) {
// Carry on
} catch (Exception $err) {
common_log(LOG_ERR, "Went to write a thumbnail to disk in EmbedPlugin::storeRemoteThumbnail " .
"but encountered error: {$err}");
"but encountered error: {$err}");
throw $err;
} finally {
unset($imgData);
@ -620,7 +624,7 @@ class EmbedPlugin extends Plugin
$file_size = $this->getRemoteFileSize($url, $headers);
if (($file_size!=false) && ($file_size > $max_size)) {
common_debug("Went to store remote thumbnail of size " . $file_size .
" but the upload limit is " . $max_size . " so we aborted.");
" but the upload limit is " . $max_size . " so we aborted.");
return false;
}
} else {
@ -628,7 +632,7 @@ class EmbedPlugin extends Plugin
}
} catch (Exception $err) {
common_debug("Could not determine size of remote image, aborted local storage.");
return $err;
throw $err;
}
// First we download the file to memory and test whether it's actually an image file
@ -667,8 +671,8 @@ class EmbedPlugin extends Plugin
$thumbnail->updateWithKeys($orig);
} catch (Exception $err) {
common_log(LOG_ERR, "Went to write a thumbnail entry to the database in " .
"EmbedPlugin::storeRemoteThumbnail but encountered error: ".$err);
return $err;
"EmbedPlugin::storeRemoteThumbnail but encountered error: ".$err);
throw $err;
}
return true;
}

View File

@ -1,10 +1,10 @@
The Oembed plugin for using and representing oEmbed data.
The Embed plugin for using and representing both Open Graph and oEmbed data.
See: http://www.oembed.com/
See: https://ogp.me/ and https://www.oembed.com/
Installation
============
This plugin is enabled by default
This plugin is enabled by default.
Settings
========
@ -16,14 +16,17 @@ check_whitelist: Whether to check the domain_whitelist.
Example
=======
```
$config['thumbnail']['width'] = 42;
$config['thumbnail']['height'] = 42;
$config['attachments']['show_html'] = true;
addPlugin('Oembed', array(
'domain_whitelist' => array(
addPlugin('Embed', [
'domain_whitelist' => [
'^i\d*\.ytimg\.com$' => 'YouTube',
'^i\d*\.vimeocdn\.com$' => 'Vimeo'
),
],
'check_whitelist' => true
));
]
);
```

View File

@ -1,34 +0,0 @@
The StoreRemoteMedia plugin downloads remotely attached files to local server.
Installation
============
add "addPlugin('StoreRemoteMedia');"
to the bottom of your config.php
Settings
========
domain_blacklist: Array of regular expressions. Always escape your dots and end your strings.
check_blacklist: Whether to check the domain_blacklist.
domain_whitelist: Array of regular expressions. Always escape your dots and end your strings.
check_whitelist: Whether to check the domain_whitelist.
max_image_bytes: Max image size. Anything bigger than this is rejected. 10MiB by default
When check_whitelist is set, only images from URLs matching a regex in the
domain_whitelist array are accepted for local storage. When check_blacklist
is set, images from URLs matching any regex in the domain_blacklist are
denied local storage. When both lists are checked, only images from URLs
that match a regex in the domain_whitelist and that match no regexen in the
domain_blacklist are accepted for local storage.
Example
=======
addPlugin('StoreRemoteMedia', array(
'domain_whitelist' => array(
'^i\d*\.ytimg\.com$' => 'YouTube',
'^i\d*\.vimeocdn\.com$' => 'Vimeo'
),
'check_whitelist' => true,
));

View File

@ -0,0 +1,31 @@
The StoreRemoteMedia plugin downloads remotely attached files to local server.
IMPORTANT: If using both Embed and StoreRemoteMedia plugins, Embed should be added first.
Installation
============
add `addPlugin('StoreRemoteMedia');`
to the bottom of your config.php
Settings
========
domain_whitelist: Array of regular expressions. Always escape your dots and end your strings.
check_whitelist: Whether to check the domain_whitelist.
max_size: Max media size. Anything bigger than this is rejected. 10MiB by default.
When check_whitelist is set, only images from URLs matching a regex in the
domain_whitelist array are accepted for local storage.
Example
=======
```
addPlugin('StoreRemoteMedia', [
'domain_whitelist' => [
'^i\d*\.ytimg\.com$' => 'YouTube',
'^i\d*\.vimeocdn\.com$' => 'Vimeo'
],
'check_whitelist' => true,
]);
```

View File

@ -1,40 +1,61 @@
<?php
if (!defined('GNUSOCIAL')) { exit(1); }
// FIXME: To support remote video/whatever files, this plugin needs reworking.
class StoreRemoteMediaPlugin extends Plugin
{
const PLUGIN_VERSION = '2.0.0';
// settings which can be set in config.php with addPlugin('Embed', array('param'=>'value', ...));
// settings which can be set in config.php with addPlugin('StoreRemoteMedia', array('param'=>'value', ...));
// WARNING, these are _regexps_ (slashes added later). Always escape your dots and end your strings
public $domain_whitelist = [
// hostname => service provider
'^i\d*\.ytimg\.com$' => 'YouTube',
'^i\d*\.vimeocdn\.com$' => 'Vimeo',
];
];
public $append_whitelist = []; // fill this array as domain_whitelist to add more trusted sources
public $check_whitelist = false; // security/abuse precaution
public $domain_blacklist = [];
public $check_blacklist = false;
public $max_image_bytes = 10 * 1024 * 1024; // 10MiB max image size by default
public $thumbnail_width = null;
public $thumbnail_height = 128;
public $thumbnail_crop = true;
public $max_size = 10 * 1024 * 1024; // 10MiB max image size by default
protected $imgData = [];
// these should be declared protected everywhere
/**
* Initialize the StoreRemoteMedia plugin and set up the environment it needs for it.
* Returns true if it initialized properly, the exception object if it
* doesn't.
*/
public function initialize()
{
parent::initialize();
if (is_null($this->thumbnail_width)) {
$this->thumbnail_width = common_config('thumbnail', 'width');
$this->thumbnail_height = common_config('thumbnail', 'height');
$this->thumbnail_crop = common_config('thumbnail', 'crop');
$this->max_size = common_get_preferred_php_upload_limit();
}
$this->domain_whitelist = array_merge($this->domain_whitelist, $this->append_whitelist);
}
public function onCreateFileImageThumbnailSource(File $file, &$imgPath, $media=null)
/**
* This event executes when GNU social is creating a file thumbnail entry in
* the database. We glom onto this to fetch remote attachments.
*
* @param $file File the file of the created thumbnail
* @param &$imgPath string = the path to the created thumbnail
* @param $media string = media type
* @return bool
* @throws AlreadyFulfilledException
* @throws FileNotFoundException
* @throws FileNotStoredLocallyException
* @throws HTTP_Request2_Exception
* @throws ServerException
*/
public function onCreateFileImageThumbnailSource(File $file, &$imgPath, string $media): bool
{
// If we are on a private node, we won't do any remote calls (just as a precaution until
// we can configure this from config.php for the private nodes)
@ -42,183 +63,260 @@ class StoreRemoteMediaPlugin extends Plugin
return true;
}
if ($media !== 'image') {
return true;
}
// If there is a local filename, it is either a local file already or has already been downloaded.
if (!empty($file->filename)) {
return true;
}
$remoteUrl = $file->getUrl();
if (empty($remoteUrl)) {
return true;
}
if (!$this->checkWhiteList($remoteUrl) ||
!$this->checkBlackList($remoteUrl)) {
return true;
}
// Relative URL, something's off
if (empty(parse_url($remoteUrl, PHP_URL_HOST))) {
common_err("StoreRemoteMedia found a url without host (\"{$remoteUrl}\") for file with id = {$file->id}");
if (!$file->isStoredRemotely()) {
common_debug(sprintf('File id==%d isn\'t a non-fetched remote file (%s), so nothing StoreRemoteMedia '.
'should handle.', $file->getID(), _ve($file->filename)));
return true;
}
try {
File_thumbnail::byFile($file);
// If we don't get the exception `No result found on File_thumbnail lookup.` then Embed has already handled it most likely.
return true;
} catch (NoResultException $e) {
// We can move on
}
$http = new HTTPClient();
common_debug(sprintf('Performing HEAD request for remote file id==%u to avoid '.
'unnecessarily downloading too large files. URL: %s',
$file->getID(), $remoteUrl));
$url = $remoteUrl;
$head = $http->head($remoteUrl);
$remoteUrl = $head->getEffectiveUrl(); // to avoid going through redirects again
if (empty($remoteUrl)) {
common_log(LOG_ERR, "URL after redirects is somehow empty, for URL {$url}");
return true;
}
if (!$this->checkBlackList($remoteUrl)) {
common_log(LOG_WARN, sprintf('%s: Non-blacklisted URL %s redirected to blacklisted URL %s',
__CLASS__, $file->getUrl(), $remoteUrl));
return true;
}
$url = $file->getUrl();
if (substr($url, 0, 7) == 'file://') {
$filename = substr($url, 7);
$info = getimagesize($filename);
$filename = basename($filename);
$width = $info[0];
$height = $info[1];
} else {
$this->checkWhitelist($url);
$head = (new HTTPClient())->head($url);
$headers = $head->getHeader();
$headers = array_change_key_case($headers, CASE_LOWER);
$filesize = isset($headers['content-length']) ?: $file->getSize();
if (empty($filesize)) {
// file size not specified on remote server
common_debug(sprintf('%s: Ignoring remote media because we did not get a ' .
'content length for file id==%u', __CLASS__, $file->getID()));
return true;
} elseif ($filesize > $this->max_image_bytes) {
//FIXME: When we perhaps start fetching videos etc. we'll need to
// differentiate max_image_bytes from that...
// file too big according to plugin configuration
common_debug(sprintf('%s: Skipping remote media because content length (%u) ' .
'is larger than plugin configured max_image_bytes (%u) ' .
'for file id==%u', __CLASS__, intval($filesize),
$this->max_image_bytes, $file->getID()));
return true;
} elseif ($filesize > common_config('attachments', 'file_quota')) {
// file too big according to site configuration
common_debug(sprintf('%s: Skipping remote media because content length (%u) ' .
'is larger than file_quota (%u) for file id==%u',
__CLASS__, intval($filesize),
common_config('attachments', 'file_quota'), $file->getID()));
return true;
try {
$is_image = $this->isRemoteImage($url, $headers);
if ($is_image == true) {
$file_size = $this->getRemoteFileSize($url, $headers);
if (($file_size!=false) && ($file_size > $this->max_size)) {
common_debug("Went to store remote thumbnail of size " . $file_size .
" but the upload limit is " . $this->max_size . " so we aborted.");
return false;
}
} else {
return false;
}
} catch (Exception $err) {
common_debug("Could not determine size of remote image, aborted local storage.");
throw $err;
}
// Then we download the file to memory and test whether it's actually an image file
common_debug(sprintf('Downloading remote file id=%u (should be size %u) ' .
'with effective URL: %s', $file->getID(), $filesize, _ve($remoteUrl)));
$imgData = HTTPClient::quickGet($remoteUrl);
} catch (HTTP_Request2_ConnectionException $e) {
common_log(LOG_ERR, __CLASS__.': '._ve(get_class($e)).' on URL: ' .
_ve($file->getUrl()).' threw exception: '.$e->getMessage());
return true;
}
$info = @getimagesizefromstring($imgData);
if ($info === false) {
throw new UnsupportedMediaException(_('Remote file format was not identified as an image.'), $remoteUrl);
} elseif (!$info[0] || !$info[1]) {
throw new UnsupportedMediaException(_('Image file had impossible geometry (0 width or height)'));
}
$filehash = hash(File::FILEHASH_ALG, $imgData);
try {
// Exception will be thrown before $file is set to anything, so old $file value will be kept
$file = File::getByHash($filehash);
$file->fetch();
//FIXME: Add some code so we don't have to store duplicate File rows for same hash files.
} catch (NoResultException $e) {
$original_name = HTTPClient::get_filename($remoteUrl, $headers);
$filename = MediaFile::encodeFilename($original_name, $filehash);
$fullpath = File::path($filename);
common_debug("StoreRemoteMedia retrieved url {$remoteUrl} for file with id={$file->id} " .
"and will store in {$fullpath}");
// Write the file to disk if it doesn't exist yet. Throw Exception on failure.
if ((!file_exists($fullpath) || substr($fullpath, 0, strlen(INSTALLDIR)) != INSTALLDIR) &&
file_put_contents($fullpath, $imgData) === false) {
throw new ServerException(_('Could not write downloaded file to disk.'));
}
// Updated our database for the file record
$orig = clone($file);
$file->filehash = $filehash;
$file->filename = $filename;
$file->width = $info[0]; // array indexes documented on php.net:
$file->height = $info[1]; // https://php.net/manual/en/function.getimagesize.php
// Throws exception on failure.
$file->updateWithKeys($orig);
}
// Get rid of the file from memory
unset($imgData);
// Output
$imgPath = $file->getPath();
return false;
}
/**
* @return boolean true if given url passes blacklist check
*/
protected function checkBlackList($url)
{
if (!$this->check_blacklist) {
return true;
}
$host = parse_url($url, PHP_URL_HOST);
foreach ($this->domain_blacklist as $regex => $provider) {
if (preg_match("/$regex/", $host)) {
// First we download the file to memory and test whether it's actually an image file
// FIXME: To support remote video/whatever files, this needs reworking.
common_debug(sprintf(
'Downloading remote image for file id==%u with URL: %s',
$file->getID(),
$url
));
try {
$imgData = HTTPClient::quickGet($url);
if (isset($imgData)) {
list($filename, $filehash, $width, $height) = $this->validateAndWriteImage(
$imgData,
$url,
$headers,
$file->getID()
);
} else {
throw new UnsupportedMediaException('HTTPClient returned an empty result');
}
} catch (UnsupportedMediaException $e) {
// Couldn't find anything that looks like an image, nothing to do
common_debug("StoreRemoteMedia was not able to find an image for URL `{$url}`: " . $e->getMessage());
return false;
}
}
return true;
try {
// Update our database for the file record
$orig = clone($file);
$file->filename = $filename;
$file->filehash = $filehash;
$file->width = $width;
$file->height = $height;
// Throws exception on failure.
$file->updateWithKeys($orig);
} catch (Exception $err) {
common_log(LOG_ERR, "Went to update a file entry to the database in " .
"StoreRemoteMediaPlugin::storeRemoteThumbnail but encountered error: ".$err);
throw $err;
}
// Out
$imgPath = $file->getPath();
return !file_exists($imgPath);
}
/***
* @return boolean true if given url passes whitelist check
/**
* Check the file size of a remote file using a HEAD request and checking
* the content-length variable returned. This isn't 100% foolproof but is
* reliable enough for our purposes.
*
* @return string|bool the file size if it succeeds, false otherwise.
*/
protected function checkWhiteList($url)
private function getRemoteFileSize($url, $headers = null)
{
try {
if ($headers === null) {
if (!common_valid_http_url($url)) {
common_log(LOG_ERR, "Invalid URL in StoreRemoteMedia::getRemoteFileSize()");
return false;
}
$head = (new HTTPClient())->head($url);
$headers = $head->getHeader();
$headers = array_change_key_case($headers, CASE_LOWER);
}
return isset($headers['content-length']) ? $headers['content-length'] : false;
} catch (Exception $err) {
common_log(LOG_ERR, __CLASS__.': getRemoteFileSize on URL : '._ve($url).
' threw exception: '.$err->getMessage());
return false;
}
}
/**
* A private helper function that uses a CURL lookup to check the mime type
* of a remote URL to see it it's an image.
*
* @return bool true if the remote URL is an image, or false otherwise.
*/
private function isRemoteImage($url, $headers = null)
{
if (empty($headers)) {
if (!common_valid_http_url($url)) {
common_log(LOG_ERR, "Invalid URL in StoreRemoteMedia::isRemoteImage()");
return false;
}
$head = (new HTTPClient())->head($url);
$headers = $head->getHeader();
$headers = array_change_key_case($headers, CASE_LOWER);
}
return !empty($headers['content-type']) && common_get_mime_media($headers['content-type']) === 'image';
}
/**
* Validate that $imgData is a valid image before writing it to
* disk, as well as resizing it to at most $this->thumbnail_width
* by $this->thumbnail_height
*
* @param $imgData - The image data to validate. Taken by reference to avoid copying
* @param string|null $url - The url where the image came from, to fetch metadata
* @param array|null $headers - The headers possible previous request to $url
* @param int|null $file_id - The id of the file this image belongs to, used for logging
*/
protected function validateAndWriteImage(&$imgData, ?string $url = null, ?array $headers = null, ?int $file_id = null) : array
{
$info = @getimagesizefromstring($imgData);
// array indexes documented on php.net:
// https://php.net/manual/en/function.getimagesize.php
if ($info === false) {
throw new UnsupportedMediaException(_m('Remote file format was not identified as an image.'), $url);
} elseif (!$info[0] || !$info[1]) {
throw new UnsupportedMediaException(_m('Image file had impossible geometry (0 width or height)'));
}
$width = min($info[0], $this->thumbnail_width);
$height = min($info[1], $this->thumbnail_height);
$filehash = hash(File::FILEHASH_ALG, $imgData);
try {
if (!empty($url)) {
$original_name = HTTPClient::get_filename($url, $headers);
}
$filename = MediaFile::encodeFilename($original_name ?? _m('Untitled attachment'), $filehash);
$filepath = File::path($filename);
// Write the file to disk. Throw Exception on failure
if (!file_exists($filepath)) {
if (strpos($filepath, INSTALLDIR) !== 0 || file_put_contents($filepath, $imgData) === false) {
throw new ServerException(_m('Could not write downloaded file to disk.'));
}
if (common_get_mime_media(MediaFile::getUploadedMimeType($filepath)) !== 'image') {
@unlink($filepath);
throw new UnsupportedMediaException(
_m('Remote file format was not identified as an image.'),
$url
);
}
// If the image is not of the desired size, resize it
if ($info[0] > $this->thumbnail_width || $info[1] > $this->thumbnail_height) {
// Temporary object, not stored in DB
$img = new ImageFile(-1, $filepath);
$box = $img->scaleToFit($this->thumbnail_width, $this->thumbnail_height, $this->thumbnail_crop);
$width = $box['width'];
$height = $box['height'];
$outpath = $img->resizeTo($filepath, $box);
$result = rename($outpath, $filepath);
if (!$result) {
// TRANS: Client exception thrown when a file upload operation fails because the file could
// TRANS: not be moved from the temporary folder to the permanent file location.
// UX: too specific
throw new ClientException(_m('File could not be moved to destination directory.'));
}
}
} else {
throw new AlreadyFulfilledException('A thumbnail seems to already exist for remote file' .
($file_id ? 'with id==' . $file_id : '') . ' at path ' . $filepath);
}
} catch (AlreadyFulfilledException $e) {
// Carry on
} catch (Exception $err) {
common_log(LOG_ERR, "Went to write a thumbnail to disk in StoreRemoteMediaPlugin::storeRemoteThumbnail " .
"but encountered error: {$err}");
throw $err;
} finally {
unset($imgData);
}
return [$filename, $filehash, $width, $height];
}
/**
* @return bool false on no check made, provider name on success
* @throws ServerException if check is made but fails
*/
protected function checkWhitelist($url)
{
if (!$this->check_whitelist) {
return true;
return false; // indicates "no check made"
}
$host = parse_url($url, PHP_URL_HOST);
foreach ($this->domain_whitelist as $regex => $provider) {
if (preg_match("/$regex/", $host)) {
return true;
return $provider; // we trust this source, return provider name
}
}
return false;
throw new ServerException(sprintf(_m('Domain not in remote thumbnail source whitelist: %s'), $host));
}
/**
* Event raised when GNU social polls the plugin for information about it.
* Adds this plugin's version information to $versions array
*
* @param &$versions array inherited from parent
* @return bool true hook value
*/
public function onPluginVersion(array &$versions): bool
{
$versions[] = array('name' => 'StoreRemoteMedia',
'version' => self::PLUGIN_VERSION,
'author' => 'Mikael Nordfeldth',
'homepage' => GNUSOCIAL_ENGINE_URL,
'description' =>
// TRANS: Plugin description.
_m('Plugin for downloading remotely attached files to local server.'));
$versions[] = ['name' => 'StoreRemoteMedia',
'version' => self::PLUGIN_VERSION,
'author' => 'Mikael Nordfeldth, Diogo Peralta Cordeiro',
'homepage' => GNUSOCIAL_ENGINE_URL,
'description' =>
// TRANS: Plugin description.
_m('Plugin for downloading remotely attached files to local server.')];
return true;
}
}