[DATABASE] Change collation handling
Before now table definitions could define collations only for MariaDB using the MariaDB's collation names directly. Now instead definitions get a slightly more abstract collation name syntax, but only supporting the collations utf8mb4_bin and utf8mb4_unicode_(cs|ci) (wrapped as utf8_bin, utf8_general_(cs|ci)), because those are the ones that have practical use for GNU social. Which also means that on MariaDB the formerly used utf8mb4_general_(cs|ci) have been superseded by utf8mb4_unicode_(cs|ci), as they are the more modern replacement. Introduce collation support on PostgreSQL which results in use of the C (POSIX) collation as utf8_bin and the und-x-icu collation as utf8_general_cs. utf8_general_ci is also mapped to und-x-icu, which makes it case-sensitive, unfortunately.
This commit is contained in:
parent
86826a1a68
commit
3075cffcd7
|
@ -79,7 +79,7 @@ class Notice extends Managed_DataObject
|
|||
'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'),
|
||||
'profile_id' => array('type' => 'int', 'not null' => true, 'description' => 'who made the update'),
|
||||
'uri' => array('type' => 'varchar', 'length' => 191, 'description' => 'universally unique identifier, usually a tag URI'),
|
||||
'content' => array('type' => 'text', 'description' => 'update content', 'collate' => 'utf8mb4_general_ci'),
|
||||
'content' => array('type' => 'text', 'description' => 'update content', 'collate' => 'utf8_general_ci'),
|
||||
'rendered' => array('type' => 'text', 'description' => 'HTML version of the content'),
|
||||
'url' => array('type' => 'varchar', 'length' => 191, 'description' => 'URL of any attachment (image, video, bookmark, whatever)'),
|
||||
'created' => array('type' => 'datetime', 'description' => 'date this record was created'),
|
||||
|
|
|
@ -46,12 +46,12 @@ class Profile extends Managed_DataObject
|
|||
'description' => 'local and remote users have profiles',
|
||||
'fields' => array(
|
||||
'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'),
|
||||
'nickname' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'nickname or username', 'collate' => 'utf8mb4_general_ci'),
|
||||
'fullname' => array('type' => 'text', 'description' => 'display name', 'collate' => 'utf8mb4_general_ci'),
|
||||
'nickname' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'nickname or username', 'collate' => 'utf8_general_ci'),
|
||||
'fullname' => array('type' => 'text', 'description' => 'display name', 'collate' => 'utf8_general_ci'),
|
||||
'profileurl' => array('type' => 'text', 'description' => 'URL, cached so we dont regenerate'),
|
||||
'homepage' => array('type' => 'text', 'description' => 'identifying URL', 'collate' => 'utf8mb4_general_ci'),
|
||||
'bio' => array('type' => 'text', 'description' => 'descriptive biography', 'collate' => 'utf8mb4_general_ci'),
|
||||
'location' => array('type' => 'text', 'description' => 'physical location', 'collate' => 'utf8mb4_general_ci'),
|
||||
'homepage' => array('type' => 'text', 'description' => 'identifying URL', 'collate' => 'utf8_general_ci'),
|
||||
'bio' => array('type' => 'text', 'description' => 'descriptive biography', 'collate' => 'utf8_general_ci'),
|
||||
'location' => array('type' => 'text', 'description' => 'physical location', 'collate' => 'utf8_general_ci'),
|
||||
'lat' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'latitude'),
|
||||
'lon' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'longitude'),
|
||||
'location_id' => array('type' => 'int', 'description' => 'location id if possible'),
|
||||
|
|
|
@ -147,10 +147,8 @@ class MysqlSchema extends Schema
|
|||
}
|
||||
}
|
||||
|
||||
$table_props = $this->getTableProperties($table, ['TABLE_COLLATION']);
|
||||
$collate = $row['COLLATION_NAME'];
|
||||
if (!empty($collate) && $collate !== $table_props['TABLE_COLLATION']) {
|
||||
$field['collate'] = $collate;
|
||||
if (!empty($row['COLLATION_NAME'])) {
|
||||
$field['collate'] = $row['COLLATION_NAME'];
|
||||
}
|
||||
|
||||
$def['fields'][$name] = $field;
|
||||
|
@ -471,17 +469,6 @@ class MysqlSchema extends Schema
|
|||
return in_array(strtolower($cd['type']), $ints);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this column a string type?
|
||||
* @param array $cd
|
||||
* @return bool
|
||||
*/
|
||||
private function isStringType(array $cd): bool
|
||||
{
|
||||
$strings = ['char', 'varchar', 'text'];
|
||||
return in_array(strtolower($cd['type']), $strings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the proper SQL for creating or
|
||||
* altering a column.
|
||||
|
@ -547,6 +534,34 @@ class MysqlSchema extends Schema
|
|||
return $type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collation in MariaDB format from our format
|
||||
*
|
||||
* @param string $collate
|
||||
* @return string
|
||||
*/
|
||||
protected function collationToMySQL(string $collate): string
|
||||
{
|
||||
if (!in_array($collate, [
|
||||
'utf8_bin',
|
||||
'utf8_general_cs',
|
||||
'utf8_general_ci',
|
||||
])) {
|
||||
common_log(
|
||||
LOG_ERR,
|
||||
'Collation not supported: "' . $collate . '"'
|
||||
);
|
||||
$collate = 'utf8_bin';
|
||||
}
|
||||
|
||||
if (substr($collate, 0, 13) === 'utf8_general_') {
|
||||
$collate = 'utf8mb4_unicode_' . substr($collate, 13);
|
||||
} elseif (substr($collate, 0, 5) === 'utf8_') {
|
||||
$collate = 'utf8mb4_' . substr($collate, 5);
|
||||
}
|
||||
return $collate;
|
||||
}
|
||||
|
||||
public function typeAndSize(string $name, array $column)
|
||||
{
|
||||
if ($column['type'] === 'enum') {
|
||||
|
@ -581,15 +596,6 @@ class MysqlSchema extends Schema
|
|||
{
|
||||
$tableDef = parent::filterDef($tableName, $tableDef);
|
||||
|
||||
// Get existing table collation if the table exists.
|
||||
// To know if collation that's been set is unique for the table.
|
||||
try {
|
||||
$table_props = $this->getTableProperties($tableName, ['TABLE_COLLATION']);
|
||||
$table_collate = $table_props['TABLE_COLLATION'];
|
||||
} catch (SchemaTableMissingException $e) {
|
||||
$table_collate = null;
|
||||
}
|
||||
|
||||
foreach ($tableDef['fields'] as $name => &$col) {
|
||||
switch ($col['type']) {
|
||||
case 'serial':
|
||||
|
@ -603,9 +609,8 @@ class MysqlSchema extends Schema
|
|||
break;
|
||||
}
|
||||
|
||||
if (!empty($col['collate'])
|
||||
&& $col['collate'] === $table_collate) {
|
||||
unset($col['collate']);
|
||||
if (!empty($col['collate'])) {
|
||||
$col['collate'] = $this->collationToMySQL($col['collate']);
|
||||
}
|
||||
|
||||
$col['type'] = $this->mapType($col);
|
||||
|
|
|
@ -132,10 +132,13 @@ class PgsqlSchema extends Schema
|
|||
) {
|
||||
$field['auto_increment'] = true;
|
||||
} elseif (array_key_exists($name, $enum_info)) {
|
||||
$field['type'] = $type = 'enum';
|
||||
$field['enum'] = $enum_info[$name];
|
||||
}
|
||||
|
||||
if (!empty($row['collation_name'])) {
|
||||
$field['collate'] = $row['collation_name'];
|
||||
}
|
||||
|
||||
$def['fields'][$name] = $field;
|
||||
}
|
||||
|
||||
|
@ -415,6 +418,7 @@ class PgsqlSchema extends Schema
|
|||
'integer' => 'int',
|
||||
'char' => 'bpchar',
|
||||
'datetime' => 'timestamp',
|
||||
'enum' => 'text',
|
||||
'blob' => 'bytea'
|
||||
];
|
||||
|
||||
|
@ -442,6 +446,49 @@ class PgsqlSchema extends Schema
|
|||
return $type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collation in PostgreSQL format from our format
|
||||
*
|
||||
* @param string $collate
|
||||
* @return string
|
||||
*/
|
||||
protected function collationToPostgreSQL(string $collate): string
|
||||
{
|
||||
if (!in_array($collate, [
|
||||
'utf8_bin',
|
||||
'utf8_general_cs',
|
||||
'utf8_general_ci',
|
||||
])) {
|
||||
common_log(
|
||||
LOG_ERR,
|
||||
'Collation not supported: "' . $collate . '"'
|
||||
);
|
||||
$collate = 'utf8_bin';
|
||||
}
|
||||
|
||||
// @fixme No case-insensitivity support
|
||||
if (substr($collate, 0, 13) === 'utf8_general_') {
|
||||
$collate = 'und-x-icu';
|
||||
} elseif (substr($collate, 0, 8) === 'utf8_bin') {
|
||||
$collate = 'C';
|
||||
}
|
||||
|
||||
return $collate;
|
||||
}
|
||||
|
||||
public function typeAndSize(string $name, array $column)
|
||||
{
|
||||
$col = parent::typeAndSize($name, $column);
|
||||
|
||||
if ($this->isStringType($column)) {
|
||||
if (!empty($column['collate'])) {
|
||||
$col .= ' COLLATE "' . $column['collate'] . '"';
|
||||
}
|
||||
}
|
||||
|
||||
return $col;
|
||||
}
|
||||
|
||||
/**
|
||||
* Append an SQL statement with an index definition for a full-text search
|
||||
* index over one or more columns on a table.
|
||||
|
@ -475,14 +522,16 @@ class PgsqlSchema extends Schema
|
|||
foreach ($tableDef['fields'] as $name => &$col) {
|
||||
// No convenient support for field descriptions
|
||||
unset($col['description']);
|
||||
// @fixme Nor for MariaDB-specific collations
|
||||
unset($col['collate']);
|
||||
|
||||
if ($col['type'] === 'serial') {
|
||||
$col['type'] = 'int';
|
||||
$col['auto_increment'] = true;
|
||||
}
|
||||
|
||||
if (!empty($col['collate'])) {
|
||||
$col['collate'] = $this->collationToPostgreSQL($col['collate']);
|
||||
}
|
||||
|
||||
$col['type'] = $this->mapType($col);
|
||||
unset($col['size']);
|
||||
}
|
||||
|
|
|
@ -891,6 +891,48 @@ class Schema
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this column a string type?
|
||||
*
|
||||
* @param array $cd
|
||||
* @return bool
|
||||
*/
|
||||
protected function isStringType(array $cd): bool
|
||||
{
|
||||
$strings = ['char', 'varchar', 'text'];
|
||||
$strings[] = 'bpchar'; // PostgreSQL
|
||||
$strings[] = 'enum'; // MariaDB
|
||||
return in_array(strtolower($cd['type']), $strings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Collation in our format from MariaDB format
|
||||
*
|
||||
* @param string $collate
|
||||
* @return string
|
||||
*/
|
||||
protected function collationFromMySQL(string $collate): string
|
||||
{
|
||||
if (substr($collate, 0, 8) === 'utf8mb4_') {
|
||||
$collate = 'utf8_' . substr($collate, 8);
|
||||
}
|
||||
if (substr($collate, 0, 13) === 'utf8_unicode_') {
|
||||
$collate = 'utf8_general_' . substr($collate, 13);
|
||||
}
|
||||
if (!in_array($collate, [
|
||||
'utf8_bin',
|
||||
'utf8_general_cs',
|
||||
'utf8_general_ci',
|
||||
])) {
|
||||
common_log(
|
||||
LOG_ERR,
|
||||
'Collation not supported: "' . $collate . '"'
|
||||
);
|
||||
$collate = 'utf8_bin';
|
||||
}
|
||||
return $collate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the proper SQL for creating or
|
||||
* altering a column.
|
||||
|
@ -1059,6 +1101,15 @@ class Schema
|
|||
if (array_key_exists('not null', $col) && $col['not null'] !== true) {
|
||||
unset($col['not null']);
|
||||
}
|
||||
|
||||
if ($this->isStringType($col)) {
|
||||
// Default collation
|
||||
if (empty($col['collate'])) {
|
||||
$col['collate'] = 'utf8_bin';
|
||||
}
|
||||
// Migration from direct MariaDB collations
|
||||
$col['collate'] = $this->collationFromMySQL($col['collate']);
|
||||
}
|
||||
}
|
||||
|
||||
if (common_config('search', 'type') !== 'fulltext') {
|
||||
|
|
Loading…
Reference in New Issue
Block a user