Don't store duplicates of files.
If a new file is uploaded, it will be matched with a previously uploaded file so we don't have to store duplicates. SHA256 is random enough and also unlikely enough to cause collisions.
This commit is contained in:
parent
0d577584c3
commit
325e784ccd
|
@ -28,6 +28,7 @@ class File extends Managed_DataObject
|
||||||
public $id; // int(4) primary_key not_null
|
public $id; // int(4) primary_key not_null
|
||||||
public $urlhash; // varchar(64) unique_key
|
public $urlhash; // varchar(64) unique_key
|
||||||
public $url; // text
|
public $url; // text
|
||||||
|
public $filehash; // varchar(64) indexed
|
||||||
public $mimetype; // varchar(50)
|
public $mimetype; // varchar(50)
|
||||||
public $size; // int(4)
|
public $size; // int(4)
|
||||||
public $title; // varchar(191) not 255 because utf8mb4 takes more space
|
public $title; // varchar(191) not 255 because utf8mb4 takes more space
|
||||||
|
@ -39,6 +40,7 @@ class File extends Managed_DataObject
|
||||||
public $modified; // timestamp() not_null default_CURRENT_TIMESTAMP
|
public $modified; // timestamp() not_null default_CURRENT_TIMESTAMP
|
||||||
|
|
||||||
const URLHASH_ALG = 'sha256';
|
const URLHASH_ALG = 'sha256';
|
||||||
|
const FILEHASH_ALG = 'sha256';
|
||||||
|
|
||||||
public static function schemaDef()
|
public static function schemaDef()
|
||||||
{
|
{
|
||||||
|
@ -47,6 +49,7 @@ class File extends Managed_DataObject
|
||||||
'id' => array('type' => 'serial', 'not null' => true),
|
'id' => array('type' => 'serial', 'not null' => true),
|
||||||
'urlhash' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'sha256 of destination URL (url field)'),
|
'urlhash' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'sha256 of destination URL (url field)'),
|
||||||
'url' => array('type' => 'text', 'description' => 'destination URL after following possible redirections'),
|
'url' => array('type' => 'text', 'description' => 'destination URL after following possible redirections'),
|
||||||
|
'filehash' => array('type' => 'varchar', 'length' => 64, 'not null' => false, 'description' => 'sha256 of the file contents, only for locally stored files of course'),
|
||||||
'mimetype' => array('type' => 'varchar', 'length' => 50, 'description' => 'mime type of resource'),
|
'mimetype' => array('type' => 'varchar', 'length' => 50, 'description' => 'mime type of resource'),
|
||||||
'size' => array('type' => 'int', 'description' => 'size of resource when available'),
|
'size' => array('type' => 'int', 'description' => 'size of resource when available'),
|
||||||
'title' => array('type' => 'varchar', 'length' => 191, 'description' => 'title of resource when available'),
|
'title' => array('type' => 'varchar', 'length' => 191, 'description' => 'title of resource when available'),
|
||||||
|
@ -62,6 +65,9 @@ class File extends Managed_DataObject
|
||||||
'unique keys' => array(
|
'unique keys' => array(
|
||||||
'file_urlhash_key' => array('urlhash'),
|
'file_urlhash_key' => array('urlhash'),
|
||||||
),
|
),
|
||||||
|
'indexes' => array(
|
||||||
|
'file_filehash_idx' => array('filehash'),
|
||||||
|
),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -247,12 +253,7 @@ class File extends Managed_DataObject
|
||||||
|
|
||||||
static function filename(Profile $profile, $origname, $mimetype)
|
static function filename(Profile $profile, $origname, $mimetype)
|
||||||
{
|
{
|
||||||
try {
|
$ext = self::guessMimeExtension($mimetype);
|
||||||
$ext = common_supported_mime_to_ext($mimetype);
|
|
||||||
} catch (Exception $e) {
|
|
||||||
// We don't support this mimetype, but let's guess the extension
|
|
||||||
$ext = substr(strrchr($mimetype, '/'), 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Normalize and make the original filename more URL friendly.
|
// Normalize and make the original filename more URL friendly.
|
||||||
$origname = basename($origname, ".$ext");
|
$origname = basename($origname, ".$ext");
|
||||||
|
@ -273,6 +274,17 @@ class File extends Managed_DataObject
|
||||||
return $filename;
|
return $filename;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static function guessMimeExtension($mimetype)
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
$ext = common_supported_mime_to_ext($mimetype);
|
||||||
|
} catch (Exception $e) {
|
||||||
|
// We don't support this mimetype, but let's guess the extension
|
||||||
|
$ext = substr(strrchr($mimetype, '/'), 1);
|
||||||
|
}
|
||||||
|
return strtolower($ext);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validation for as-saved base filenames
|
* Validation for as-saved base filenames
|
||||||
*/
|
*/
|
||||||
|
@ -464,7 +476,11 @@ class File extends Managed_DataObject
|
||||||
|
|
||||||
public function getPath()
|
public function getPath()
|
||||||
{
|
{
|
||||||
return self::path($this->filename);
|
$filepath = self::path($this->filename);
|
||||||
|
if (!file_exists($filepath)) {
|
||||||
|
throw new FileNotFoundException($filepath);
|
||||||
|
}
|
||||||
|
return $filepath;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getUrl()
|
public function getUrl()
|
||||||
|
@ -494,6 +510,19 @@ class File extends Managed_DataObject
|
||||||
return $file;
|
return $file;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param string $hashstr String of (preferrably lower case) hexadecimal characters, same as result of 'hash_file(...)'
|
||||||
|
*/
|
||||||
|
static public function getByHash($hashstr, $alg=File::FILEHASH_ALG)
|
||||||
|
{
|
||||||
|
$file = new File();
|
||||||
|
$file->filehash = strtolower($hashstr);
|
||||||
|
if (!$file->find(true)) {
|
||||||
|
throw new NoResultException($file);
|
||||||
|
}
|
||||||
|
return $file;
|
||||||
|
}
|
||||||
|
|
||||||
public function updateUrl($url)
|
public function updateUrl($url)
|
||||||
{
|
{
|
||||||
$file = File::getKV('urlhash', self::hashurl($url));
|
$file = File::getKV('urlhash', self::hashurl($url));
|
||||||
|
|
|
@ -119,7 +119,11 @@ class File_thumbnail extends Managed_DataObject
|
||||||
|
|
||||||
public function getPath()
|
public function getPath()
|
||||||
{
|
{
|
||||||
return self::path($this->filename);
|
$filepath = self::path($this->filename);
|
||||||
|
if (!file_exists($filepath)) {
|
||||||
|
throw new FileNotFoundException($filepath);
|
||||||
|
}
|
||||||
|
return $filepath;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function getUrl()
|
public function getUrl()
|
||||||
|
|
|
@ -253,6 +253,7 @@ $default =
|
||||||
'user_quota' => 50000000,
|
'user_quota' => 50000000,
|
||||||
'monthly_quota' => 15000000,
|
'monthly_quota' => 15000000,
|
||||||
'uploads' => true,
|
'uploads' => true,
|
||||||
|
'filename_base' => 'hash', // for new files, choose one: 'upload', 'hash'
|
||||||
'show_html' => false, // show (filtered) text/html attachments (and oEmbed HTML etc.). Doesn't affect AJAX calls.
|
'show_html' => false, // show (filtered) text/html attachments (and oEmbed HTML etc.). Doesn't affect AJAX calls.
|
||||||
'show_thumbs' => true, // show thumbnails in notice lists for uploaded images, and photos and videos linked remotely that provide oEmbed info
|
'show_thumbs' => true, // show thumbnails in notice lists for uploaded images, and photos and videos linked remotely that provide oEmbed info
|
||||||
'process_links' => true, // check linked resources for embeddable photos and videos; this will hit referenced external web sites when processing new messages.
|
'process_links' => true, // check linked resources for embeddable photos and videos; this will hit referenced external web sites when processing new messages.
|
||||||
|
|
|
@ -42,12 +42,13 @@ class MediaFile
|
||||||
var $short_fileurl = null;
|
var $short_fileurl = null;
|
||||||
var $mimetype = null;
|
var $mimetype = null;
|
||||||
|
|
||||||
function __construct(Profile $scoped, $filename = null, $mimetype = null)
|
function __construct(Profile $scoped, $filename = null, $mimetype = null, $filehash = null)
|
||||||
{
|
{
|
||||||
$this->scoped = $scoped;
|
$this->scoped = $scoped;
|
||||||
|
|
||||||
$this->filename = $filename;
|
$this->filename = $filename;
|
||||||
$this->mimetype = $mimetype;
|
$this->mimetype = $mimetype;
|
||||||
|
$this->filehash = $filehash;
|
||||||
$this->fileRecord = $this->storeFile();
|
$this->fileRecord = $this->storeFile();
|
||||||
|
|
||||||
$this->fileurl = common_local_url('attachment',
|
$this->fileurl = common_local_url('attachment',
|
||||||
|
@ -90,6 +91,24 @@ class MediaFile
|
||||||
|
|
||||||
protected function storeFile()
|
protected function storeFile()
|
||||||
{
|
{
|
||||||
|
$filepath = File::path($this->filename);
|
||||||
|
if (!empty($this->filename) && $this->filehash === null) {
|
||||||
|
// Calculate if we have an older upload method somewhere (Qvitter) that
|
||||||
|
// doesn't do this before calling new MediaFile on its local files...
|
||||||
|
$this->filehash = hash_file(File::FILEHASH_ALG, $filepath);
|
||||||
|
if ($this->filehash === false) {
|
||||||
|
throw new ServerException('Could not read file for hashing');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$file = File::getByHash($this->filehash);
|
||||||
|
// We're done here. Yes. Already. We assume sha256 won't collide on us anytime soon.
|
||||||
|
return $file;
|
||||||
|
} catch (NoResultException $e) {
|
||||||
|
// Well, let's just continue below.
|
||||||
|
}
|
||||||
|
|
||||||
$fileurl = File::url($this->filename);
|
$fileurl = File::url($this->filename);
|
||||||
|
|
||||||
$file = new File;
|
$file = new File;
|
||||||
|
@ -97,11 +116,15 @@ class MediaFile
|
||||||
$file->filename = $this->filename;
|
$file->filename = $this->filename;
|
||||||
$file->urlhash = File::hashurl($fileurl);
|
$file->urlhash = File::hashurl($fileurl);
|
||||||
$file->url = $fileurl;
|
$file->url = $fileurl;
|
||||||
$filepath = File::path($this->filename);
|
$file->filehash = $this->filehash;
|
||||||
$file->size = filesize($filepath);
|
$file->size = filesize($filepath);
|
||||||
|
if ($file->size === false) {
|
||||||
|
throw new ServerException('Could not read file to get its size');
|
||||||
|
}
|
||||||
$file->date = time();
|
$file->date = time();
|
||||||
$file->mimetype = $this->mimetype;
|
$file->mimetype = $this->mimetype;
|
||||||
|
|
||||||
|
|
||||||
$file_id = $file->insert();
|
$file_id = $file->insert();
|
||||||
|
|
||||||
if ($file_id===false) {
|
if ($file_id===false) {
|
||||||
|
@ -206,14 +229,35 @@ class MediaFile
|
||||||
throw new ClientException(_('System error uploading file.'));
|
throw new ClientException(_('System error uploading file.'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Make documentation clearer that this won't work for files >2GiB because
|
||||||
|
// PHP is stupid in its 32bit head. But noone accepts 2GiB files with PHP
|
||||||
|
// anyway... I hope.
|
||||||
|
$filehash = hash_file(File::FILEHASH_ALG, $_FILES[$param]['tmp_name']);
|
||||||
|
|
||||||
|
try {
|
||||||
|
$file = File::getByHash($filehash);
|
||||||
|
// If no exception is thrown the file exists locally, so we'll use that and just add redirections.
|
||||||
|
$filename = $file->filename;
|
||||||
|
$mimetype = $file->mimetype;
|
||||||
|
|
||||||
|
} catch (NoResultException $e) {
|
||||||
|
// We have to save the upload as a new local file. This is the normal course of action.
|
||||||
|
|
||||||
// Throws exception if additional size does not respect quota
|
// Throws exception if additional size does not respect quota
|
||||||
|
// This test is only needed, of course, if we're uploading something new.
|
||||||
File::respectsQuota($scoped, $_FILES[$param]['size']);
|
File::respectsQuota($scoped, $_FILES[$param]['size']);
|
||||||
|
|
||||||
$mimetype = self::getUploadedMimeType($_FILES[$param]['tmp_name'],
|
$mimetype = self::getUploadedMimeType($_FILES[$param]['tmp_name'], $_FILES[$param]['name']);
|
||||||
$_FILES[$param]['name']);
|
|
||||||
|
|
||||||
|
switch (common_config('attachments', 'filename_base')) {
|
||||||
|
case 'upload':
|
||||||
$basename = basename($_FILES[$param]['name']);
|
$basename = basename($_FILES[$param]['name']);
|
||||||
$filename = File::filename($scoped, $basename, $mimetype);
|
$filename = File::filename($scoped, $basename, $mimetype);
|
||||||
|
break;
|
||||||
|
case 'hash':
|
||||||
|
default:
|
||||||
|
$filename = strtolower($filehash) . '.' . File::guessMimeExtension($mimetype);
|
||||||
|
}
|
||||||
$filepath = File::path($filename);
|
$filepath = File::path($filename);
|
||||||
|
|
||||||
$result = move_uploaded_file($_FILES[$param]['tmp_name'], $filepath);
|
$result = move_uploaded_file($_FILES[$param]['tmp_name'], $filepath);
|
||||||
|
@ -223,20 +267,35 @@ class MediaFile
|
||||||
// TRANS: not be moved from the temporary folder to the permanent file location.
|
// TRANS: not be moved from the temporary folder to the permanent file location.
|
||||||
throw new ClientException(_('File could not be moved to destination directory.'));
|
throw new ClientException(_('File could not be moved to destination directory.'));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new MediaFile($scoped, $filename, $mimetype);
|
return new MediaFile($scoped, $filename, $mimetype, $filehash);
|
||||||
}
|
}
|
||||||
|
|
||||||
static function fromFilehandle($fh, Profile $scoped) {
|
static function fromFilehandle($fh, Profile $scoped) {
|
||||||
|
|
||||||
$stream = stream_get_meta_data($fh);
|
$stream = stream_get_meta_data($fh);
|
||||||
|
// So far we're only handling filehandles originating from tmpfile(),
|
||||||
|
// so we can always do hash_file on $stream['uri'] as far as I can tell!
|
||||||
|
$filehash = hash_file(File::FILEHASH_ALG, $stream['uri']);
|
||||||
|
|
||||||
|
try {
|
||||||
|
$file = File::getByHash($filehash);
|
||||||
|
// Already have it, so let's reuse the locally stored File
|
||||||
|
$filename = $file->filename;
|
||||||
|
$mimetype = $file->mimetype;
|
||||||
|
} catch (NoResultException $e) {
|
||||||
File::respectsQuota($scoped, filesize($stream['uri']));
|
File::respectsQuota($scoped, filesize($stream['uri']));
|
||||||
|
|
||||||
$mimetype = self::getUploadedMimeType($stream['uri']);
|
$mimetype = self::getUploadedMimeType($stream['uri']);
|
||||||
|
|
||||||
|
switch (common_config('attachments', 'filename_base')) {
|
||||||
|
case 'upload':
|
||||||
$filename = File::filename($scoped, "email", $mimetype);
|
$filename = File::filename($scoped, "email", $mimetype);
|
||||||
|
break;
|
||||||
|
case 'hash':
|
||||||
|
default:
|
||||||
|
$filename = strtolower($filehash) . '.' . File::guessMimeExtension($mimetype);
|
||||||
|
}
|
||||||
$filepath = File::path($filename);
|
$filepath = File::path($filename);
|
||||||
|
|
||||||
$result = copy($stream['uri'], $filepath) && chmod($filepath, 0664);
|
$result = copy($stream['uri'], $filepath) && chmod($filepath, 0664);
|
||||||
|
@ -247,8 +306,9 @@ class MediaFile
|
||||||
throw new ClientException(_('File could not be moved to destination directory.' .
|
throw new ClientException(_('File could not be moved to destination directory.' .
|
||||||
$stream['uri'] . ' ' . $filepath));
|
$stream['uri'] . ' ' . $filepath));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new MediaFile($scoped, $filename, $mimetype);
|
return new MediaFile($scoped, $filename, $mimetype, $filehash);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -48,6 +48,7 @@ function main()
|
||||||
fixupFileGeometry();
|
fixupFileGeometry();
|
||||||
deleteLocalFileThumbnailsWithoutFilename();
|
deleteLocalFileThumbnailsWithoutFilename();
|
||||||
deleteMissingLocalFileThumbnails();
|
deleteMissingLocalFileThumbnails();
|
||||||
|
setFilehashOnLocalFiles();
|
||||||
|
|
||||||
initGroupProfileId();
|
initGroupProfileId();
|
||||||
initLocalGroup();
|
initLocalGroup();
|
||||||
|
@ -490,7 +491,9 @@ function deleteMissingLocalFileThumbnails()
|
||||||
// Checking if there were any File_thumbnail entries without filename
|
// Checking if there were any File_thumbnail entries without filename
|
||||||
if ($thumbs->find()) {
|
if ($thumbs->find()) {
|
||||||
while ($thumbs->fetch()) {
|
while ($thumbs->fetch()) {
|
||||||
if (!file_exists(File_thumbnail::path($thumbs->filename))) {
|
try {
|
||||||
|
$thumbs->getPath();
|
||||||
|
} catch (FileNotFoundException $e) {
|
||||||
$thumbs->delete();
|
$thumbs->delete();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -499,4 +502,30 @@ function deleteMissingLocalFileThumbnails()
|
||||||
printfnq("DONE.\n");
|
printfnq("DONE.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Files are now stored with their hash, so let's generate for previously uploaded files.
|
||||||
|
*/
|
||||||
|
function setFilehashOnLocalFiles()
|
||||||
|
{
|
||||||
|
printfnq('Ensuring all local files have the filehash field set...');
|
||||||
|
|
||||||
|
$file = new File();
|
||||||
|
$file->whereAdd('filename IS NOT NULL'); // local files
|
||||||
|
$file->whereAdd('filehash IS NULL', 'AND'); // without filehash value
|
||||||
|
|
||||||
|
if ($file->find()) {
|
||||||
|
while ($file->fetch()) {
|
||||||
|
try {
|
||||||
|
$orig = clone($file);
|
||||||
|
$file->filehash = hash_file(File::FILEHASH_ALG, $file->getPath());
|
||||||
|
$file->update($orig);
|
||||||
|
} catch (FileNotFoundException $e) {
|
||||||
|
echo "\n WARNING: file ID {$file->id} does not exist on path '{$e->path}'. Clean up the file table?";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
printfnq("DONE.\n");
|
||||||
|
}
|
||||||
|
|
||||||
main();
|
main();
|
||||||
|
|
Loading…
Reference in New Issue
Block a user