From c12212bd684577be729d60db6783dab5461acfb2 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 01/15] Added a way to create linked resources (fix #162). --- config/module.config.php | 5 ++++ src/Form/MappingForm.php | 18 +++++++++++++++ src/Job/Import.php | 8 ++----- src/Mapping/AbstractMapping.php | 2 +- src/Mapping/PropertyMapping.php | 41 ++++++++++++++++++++++++++++++++- 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/config/module.config.php b/config/module.config.php index f03154dd..d9cc8e84 100644 --- a/config/module.config.php +++ b/config/module.config.php @@ -178,6 +178,10 @@ 'label' => 'Omeka resource (by ID)', // @translate 'adapter' => 'resource', ], + 'resource_by_property' => [ + 'label' => 'Omeka resource (by property)', // @translate + 'adapter' => 'resource', + ], ], 'media_ingester_adapter' => [ 'url' => MediaIngesterAdapter\UrlMediaIngesterAdapter::class, @@ -192,6 +196,7 @@ 'csv_import_multivalue_separator' => ',', 'csv_import_rows_by_batch' => 20, 'csv_import_global_language' => '', + 'csv_import_property_identifier' => 'dcterms:identifier', 'csv_import_identifier_property' => '', 'csv_import_automap_check_names_alone' => false, ], diff --git a/src/Form/MappingForm.php b/src/Form/MappingForm.php index 52fbc786..3ace4a1a 100644 --- a/src/Form/MappingForm.php +++ b/src/Form/MappingForm.php @@ -265,6 +265,24 @@ public function init() ], ]); + $basicSettingsFieldset->add([ + 'name' => 'property_identifier', + 'type' => PropertySelect::class, + 'options' => [ + 'label' => 'Property used as identifier of linked resources', // @translate + 'info' => 'Allows to create a link to the resources identified by the values in a cell.', // @translate + 'empty_option' => 'Select below', // @translate + 'term_as_value' => true, + ], + 'attributes' => [ + 'value' => $userSettings->get( + 'csv_import_property_identifier', + $default['csv_import_property_identifier']), + 'class' => 'chosen-select', + 'data-placeholder' => 'Select a property', // @translate + ], + ]); + $this->add([ 'type' => 'fieldset', 'name' => 'advanced-settings', diff --git a/src/Job/Import.php b/src/Job/Import.php index a36cecbf..afd372fd 100644 --- a/src/Job/Import.php +++ b/src/Job/Import.php @@ -170,12 +170,8 @@ public function perform() $this->rowsByBatch = (int) $args['rows_by_batch']; } - // The core allows batch processes only for creation and deletion. - if (!in_array($args['action'], [self::ACTION_CREATE, self::ACTION_DELETE, self::ACTION_SKIP]) - // It allows to identify resources too, so to use a new resource - // from a previous row. - || ($args['action'] === self::ACTION_CREATE && $this->resourceType === 'resources') - ) { + // The core allows batch processes only for deletion. + if (!in_array($args['action'], [self::ACTION_DELETE, self::ACTION_SKIP])) { $this->rowsByBatch = 1; } diff --git a/src/Mapping/AbstractMapping.php b/src/Mapping/AbstractMapping.php index 8c31111b..c11a512b 100644 --- a/src/Mapping/AbstractMapping.php +++ b/src/Mapping/AbstractMapping.php @@ -57,7 +57,7 @@ public function init(array $args, ServiceLocatorInterface $serviceLocator) $this->args = $args; $this->serviceLocator = $serviceLocator; $this->logger = $serviceLocator->get('Omeka\Logger'); - $this->api = $serviceLocator->get('Omeka\ApiManager'); + $this->api = $serviceLocator->get('ControllerPluginManager')->get('api'); } public function getServiceLocator() diff --git a/src/Mapping/PropertyMapping.php b/src/Mapping/PropertyMapping.php index 4278a68c..2fc03083 100644 --- a/src/Mapping/PropertyMapping.php +++ b/src/Mapping/PropertyMapping.php @@ -1,6 +1,8 @@ csvPropertySelector($view->translate('Properties'), false); } + public function init(array $args, ServiceLocatorInterface $serviceLocator) + { + parent::init($args, $serviceLocator); + $this->findResourceFromIdentifier = $serviceLocator->get('ControllerPluginManager') + ->get('findResourceFromIdentifier'); + + // The main identifier property may be used as term or as id in some + // places, so prepare it one time only. + $propertyIdentifier = $this->args['property_identifier']; + if (is_numeric($propertyIdentifier)) { + $this->propertyIdentifier = (int) $propertyIdentifier; + } else { + $property = $this->api->searchOne('properties', ['term' => $propertyIdentifier])->getContent(); + $this->propertyIdentifier = $property + ? $property->id() + : null; + } + } + public function processRow(array $row) { // Reset the data and the map between rows. @@ -46,6 +77,7 @@ public function processRow(array $row) } $dataTypeAdapters = $this->getDataTypeAdapters(); + $findResourceFromIdentifier = $this->findResourceFromIdentifier; // Get default option values. $globalLanguage = isset($this->args['global_language']) ? $this->args['global_language'] : ''; @@ -98,10 +130,17 @@ public function processRow(array $row) break; case 'resource': + if ($type === 'resource_by_property' && $this->propertyIdentifier) { + $linkedResource = $findResourceFromIdentifier($value, $this->propertyIdentifier); + if (!$linkedResource) { + break; + } + $value = $linkedResource; + } $valueData = [ 'value_resource_id' => $value, 'property_id' => $propertyId, - 'type' => $type, + 'type' => $typeAdapter, ]; break; From 479f78ff852c15123281ac51d4509100788d62c3 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 02/15] Cleaned plugin "FindResourcesFromIdentifiers". --- .../Plugin/FindResourcesFromIdentifiers.php | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php index 54a5c072..dc2fb8ee 100644 --- a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php +++ b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php @@ -56,25 +56,34 @@ public function __construct(Connection $connection, ApiManager $apiManager) } /** - * Find a list of resource ids from a list of identifiers. + * Find a list of resource ids from a list of identifiers (or one id). * * When there are true duplicates and case insensitive duplicates, the first * case sensitive is returned, else the first case insensitive resource. * + * All identifiers are returned, even without id. + * * @todo Manage Media source html. * * @param array|string $identifiers Identifiers should be unique. If a * string is sent, the result will be the resource. * @param string|int|array $identifierName Property as integer or term, - * media ingester or "internal_id", or an array with multiple conditions. + * "internal_id", a media ingester (url or file), or an associative array with + * multiple conditions (for media source). * @param string $resourceType The resource type if any. - * @return array|int|null Associative array with the identifiers as key and the ids - * or null as value. Order is kept, but duplicate identifiers are removed. - * If $identifiers is a string, return directly the resource id, or null. + * @return array|int|null|Object Associative array with the identifiers as key + * and the ids or null as value. Order is kept, but duplicate identifiers + * are removed. If $identifiers is a string, return directly the resource + * id, or null. */ public function __invoke($identifiers, $identifierName, $resourceType = null) { - $isSingle = is_string($identifiers); + $isSingle = !is_array($identifiers); + + if (empty($identifierName)) { + return $isSingle ? null : []; + } + if ($isSingle) { $identifiers = [$identifiers]; } @@ -111,9 +120,6 @@ public function __invoke($identifiers, $identifierName, $resourceType = null) $identifierType = 'property'; $identifierName = $result ? $result[0]->id() : null; } - if (empty($identifierName)) { - return $isSingle ? null : []; - } if (!empty($resourceType)) { $resourceTypes = [ @@ -238,9 +244,7 @@ protected function findResourcesFromMediaSource($identifiers, $ingesterName, $it /** * Reorder the result according to the input (simpler in php and there is no - * duplicated identifiers). When there are true duplicates, it returns the - * first. When there are case insensitive duplicates, it returns the first - * too. + * duplicated identifiers). * * @param array $identifiers * @param array $result From 365f8cc20d98b764f13a28a5f7777aaf8030703a Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 03/15] Merged the two options to set the datatype "resource". --- config/module.config.php | 6 +---- src/Mapping/PropertyMapping.php | 41 +++++++++++++++++++-------------- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/config/module.config.php b/config/module.config.php index d9cc8e84..501d1ae5 100644 --- a/config/module.config.php +++ b/config/module.config.php @@ -175,11 +175,7 @@ 'adapter' => 'uri', ], 'resource' => [ - 'label' => 'Omeka resource (by ID)', // @translate - 'adapter' => 'resource', - ], - 'resource_by_property' => [ - 'label' => 'Omeka resource (by property)', // @translate + 'label' => 'Omeka resource', // @translate 'adapter' => 'resource', ], ], diff --git a/src/Mapping/PropertyMapping.php b/src/Mapping/PropertyMapping.php index 2fc03083..ef656605 100644 --- a/src/Mapping/PropertyMapping.php +++ b/src/Mapping/PropertyMapping.php @@ -2,6 +2,7 @@ namespace CSVImport\Mapping; use CSVImport\Mvc\Controller\Plugin\FindResourcesFromIdentifiers; +use Omeka\Stdlib\Message; use Zend\ServiceManager\ServiceLocatorInterface; use Zend\View\Renderer\PhpRenderer; @@ -16,7 +17,7 @@ class PropertyMapping extends AbstractMapping protected $findResourceFromIdentifier; /** - * @var int + * @var int|string */ protected $propertyIdentifier; @@ -33,14 +34,14 @@ public function init(array $args, ServiceLocatorInterface $serviceLocator) // The main identifier property may be used as term or as id in some // places, so prepare it one time only. - $propertyIdentifier = $this->args['property_identifier']; - if (is_numeric($propertyIdentifier)) { - $this->propertyIdentifier = (int) $propertyIdentifier; + if (empty($args['property_identifier']) || $args['property_identifier'] === 'o:id') { + $this->propertyIdentifier = 'o:id'; + } elseif (is_numeric($args['property_identifier'])) { + $this->propertyIdentifier = (int) $args['property_identifier']; } else { - $property = $this->api->searchOne('properties', ['term' => $propertyIdentifier])->getContent(); - $this->propertyIdentifier = $property - ? $property->id() - : null; + $result = $this->api + ->searchOne('properties', ['term' => $args['property_identifier']])->getContent(); + $this->propertyIdentifier = $result ? $result->id() : 'o:id'; } } @@ -130,17 +131,11 @@ public function processRow(array $row) break; case 'resource': - if ($type === 'resource_by_property' && $this->propertyIdentifier) { - $linkedResource = $findResourceFromIdentifier($value, $this->propertyIdentifier); - if (!$linkedResource) { - break; - } - $value = $linkedResource; - } + $identifier = $this->findResource($value, $this->propertyIdentifier); $valueData = [ - 'value_resource_id' => $value, + 'value_resource_id' => $identifier, 'property_id' => $propertyId, - 'type' => $typeAdapter, + 'type' => $type, ]; break; @@ -187,4 +182,16 @@ protected function getDataTypeAdapters() } return $dataTypeAdapters; } + + protected function findResource($identifier, $propertyIdentifier = 'o:id') + { + $resourceType = $this->args['resource_type']; + $findResourceFromIdentifier = $this->findResourceFromIdentifier; + $resourceId = $findResourceFromIdentifier($identifier, $propertyIdentifier, $resourceType); + if (empty($resourceId)) { + $this->setHasErr(true); + return false; + } + return $resourceId; + } } From 586cbb8dcd09ab34081b93ed8462a49d75bcc773 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 04/15] Moved the normalization of the check of the resource type in its own method. --- .../Plugin/FindResourcesFromIdentifiers.php | 51 +++++++++++++------ 1 file changed, 36 insertions(+), 15 deletions(-) diff --git a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php index dc2fb8ee..24c6dd78 100644 --- a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php +++ b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php @@ -121,22 +121,11 @@ public function __invoke($identifiers, $identifierName, $resourceType = null) $identifierName = $result ? $result[0]->id() : null; } - if (!empty($resourceType)) { - $resourceTypes = [ - 'item_sets' => \Omeka\Entity\ItemSet::class, - 'items' => \Omeka\Entity\Item::class, - 'media' => \Omeka\Entity\Media::class, - 'resources' => '', - // Avoid a check and make the plugin more flexible. - 'Omeka\Entity\ItemSet' => \Omeka\Entity\ItemSet::class, - 'Omeka\Entity\Item' => \Omeka\Entity\Item::class, - 'Omeka\Entity\Media' => \Omeka\Entity\Media::class, - 'Omeka\Entity\Resource' => '', - ]; - if (!isset($resourceTypes[$resourceType])) { + if ($resourceType) { + $resourceType = $this->normalizeResourceType($resourceType); + if (is_null($resourceType)) { return $isSingle ? null : []; } - $resourceType = $resourceTypes[$resourceType]; } switch ($identifierType) { @@ -154,6 +143,38 @@ public function __invoke($identifiers, $identifierName, $resourceType = null) return $isSingle ? ($result ? reset($result) : null) : $result; } + protected function normalizeResourceType($resourceType) + { + $resourceTypes = [ + 'items' => \Omeka\Entity\Item::class, + 'item_sets' => \Omeka\Entity\ItemSet::class, + 'media' => \Omeka\Entity\Media::class, + 'resources' => '', + 'resource' => '', + 'resource:item' => \Omeka\Entity\Item::class, + 'resource:itemset' => \Omeka\Entity\ItemSet::class, + 'resource:media' => \Omeka\Entity\Media::class, + // Avoid a check and make the plugin more flexible. + \Omeka\Entity\Item::class => \Omeka\Entity\Item::class, + \Omeka\Entity\ItemSet::class => \Omeka\Entity\ItemSet::class, + \Omeka\Entity\Media::class => \Omeka\Entity\Media::class, + \Omeka\Entity\Resource::class => '', + 'o:item' => \Omeka\Entity\Item::class, + 'o:item_set' => \Omeka\Entity\ItemSet::class, + 'o:media' => \Omeka\Entity\Media::class, + // Other resource types. + 'item' => \Omeka\Entity\Item::class, + 'item_set' => \Omeka\Entity\ItemSet::class, + 'item-set' => \Omeka\Entity\ItemSet::class, + 'itemset' => \Omeka\Entity\ItemSet::class, + 'resource:item_set' => \Omeka\Entity\ItemSet::class, + 'resource:item-set' => \Omeka\Entity\ItemSet::class, + ]; + return isset($resourceTypes[$resourceType]) + ? $resourceTypes[$resourceType] + : null; + } + protected function findResourcesFromInternalIds($identifiers, $resourceType) { // The api manager doesn't manage this type of search. @@ -180,7 +201,7 @@ protected function findResourcesFromInternalIds($identifiers, $resourceType) return array_replace(array_fill_keys($identifiers, null), array_combine($result, $result)); } - protected function findResourcesFromPropertyIds($identifiers, $identifierPropertyId, $resourceType) + protected function findResourcesFromPropertyIds(array $identifiers, $identifierPropertyId, $resourceType) { // The api manager doesn't manage this type of search. $conn = $this->connection; From a8c2a1dcbbd4145987f38a206a34ec68a8bf6554 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 05/15] Fixed deduplication of uri. --- src/Job/Import.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Job/Import.php b/src/Job/Import.php index a36cecbf..2f310e50 100644 --- a/src/Job/Import.php +++ b/src/Job/Import.php @@ -983,7 +983,7 @@ protected function deduplicatePropertyValues($values) $base = []; $base['literal'] = ['property_id' => 0, 'type' => 'literal', '@language' => '', '@value' => '']; $base['resource'] = ['property_id' => 0, 'type' => 'resource', 'value_resource_id' => 0]; - $base['url'] = ['property_id' => 0, 'type' => 'url', '@id' => 0, 'o:label' => '']; + $base['uri'] = ['property_id' => 0, 'type' => 'uri', '@id' => 0, 'o:label' => '']; foreach ($values as $key => $value) { $values[$key] = array_values( // Deduplicate values. From e6e06a15db6add7ae760412172cf95fef7fbdd21 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 06/15] Added a message when an identifier is not identified. --- src/Mapping/PropertyMapping.php | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Mapping/PropertyMapping.php b/src/Mapping/PropertyMapping.php index ef656605..734e0c69 100644 --- a/src/Mapping/PropertyMapping.php +++ b/src/Mapping/PropertyMapping.php @@ -189,6 +189,8 @@ protected function findResource($identifier, $propertyIdentifier = 'o:id') $findResourceFromIdentifier = $this->findResourceFromIdentifier; $resourceId = $findResourceFromIdentifier($identifier, $propertyIdentifier, $resourceType); if (empty($resourceId)) { + $this->logger->err(new Message('"%s" (%s) is not a valid resource identifier.', // @translate + $identifier, $propertyIdentifier)); $this->setHasErr(true); return false; } From f0b04b850cb00d090820c57bcc18ba19aef0da59 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 07/15] Moved the check of the arguments into its own method. --- .../Plugin/FindResourcesFromIdentifiers.php | 81 +++++++++++++------ 1 file changed, 57 insertions(+), 24 deletions(-) diff --git a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php index 24c6dd78..a8fc27c0 100644 --- a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php +++ b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php @@ -94,53 +94,86 @@ public function __invoke($identifiers, $identifierName, $resourceType = null) return $isSingle ? null : []; } + $args = $this->normalizeArgs($identifierName, $resourceType); + if (empty($args)) { + return $isSingle ? null : []; + } + list($identifierType, $identifierName, $resourceType, $itemId) = $args; + + $result = $this->findResources($identifierType, $identifiers, $identifierName, $resourceType, $itemId); + return $isSingle ? ($result ? reset($result) : null) : $result; + } + + protected function findResources($identifierType, array $identifiers, $identifierName, $resourceType, $itemId) + { + switch ($identifierType) { + case 'o:id': + return $this->findResourcesFromInternalIds($identifiers, $resourceType); + case 'property': + return $this->findResourcesFromPropertyIds($identifiers, $identifierName, $resourceType); + case 'media_source': + return $this->findResourcesFromMediaSource($identifiers, $identifierName, $itemId); + } + } + + protected function normalizeArgs($identifierName, $resourceType) + { $identifierType = null; - // Process identifierName as an array. + $identifierTypeName = null; + $itemId = null; + + // Process identifier metadata names as an array. if (is_array($identifierName)) { if (isset($identifierName['o:ingester'])) { // TODO Currently, the media source cannot be html. if ($identifierName['o:ingester'] === 'html') { - return $isSingle ? null : []; + return null; } $identifierType = 'media_source'; + $identifierTypeName = $identifierName['o:ingester']; $resourceType = 'media'; $itemId = empty($identifierName['o:item']['o:id']) ? null : $identifierName['o:item']['o:id']; - $identifierName = $identifierName['o:ingester']; } } - // Here, identifierName is a string or an integer. - elseif (in_array($identifierName, ['internal_id'])) { - $identifierType = 'internal_id'; + // Next, identifierName is a string or an integer. + elseif (in_array($identifierName, ['internal_id', 'o:id'])) { + $identifierType = 'o:id'; + $identifierTypeName = 'o:id'; } elseif (is_numeric($identifierName)) { $identifierType = 'property'; - $identifierName = (int) $identifierName; + // No check of the property id for quicker process. + $identifierTypeName = (int) $identifierName; + } elseif (in_array($identifierName, ['url', 'file'])) { + $identifierType = 'media_source'; + $identifierTypeName = $identifierName; + $resourceType = 'media'; + $itemId = null; } else { - $result = $this->api + $properties = $this->api ->search('properties', ['term' => $identifierName])->getContent(); - $identifierType = 'property'; - $identifierName = $result ? $result[0]->id() : null; + if ($properties) { + $identifierType = 'property'; + $identifierTypeName = $properties[0]->id(); + } + } + + if (empty($identifierTypeName)) { + return null; } if ($resourceType) { $resourceType = $this->normalizeResourceType($resourceType); if (is_null($resourceType)) { - return $isSingle ? null : []; + return null; } } - switch ($identifierType) { - case 'internal_id': - $result = $this->findResourcesFromInternalIds($identifiers, $resourceType); - break; - case 'property': - $result = $this->findResourcesFromPropertyIds($identifiers, $identifierName, $resourceType); - break; - case 'media_source': - $result = $this->findResourcesFromMediaSource($identifiers, $identifierName, $itemId); - break; - } - - return $isSingle ? ($result ? reset($result) : null) : $result; + return [ + $identifierType, + $identifierTypeName, + $resourceType, + $itemId, + ]; } protected function normalizeResourceType($resourceType) From 66292bff25c50271f45e382450497e014d122563 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 08/15] Improved check of duplicate values. --- src/Job/Import.php | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/Job/Import.php b/src/Job/Import.php index 2f310e50..d0df7814 100644 --- a/src/Job/Import.php +++ b/src/Job/Import.php @@ -981,17 +981,40 @@ protected function deduplicatePropertyValues($values) { // Base to normalize data in order to deduplicate them in one pass. $base = []; - $base['literal'] = ['property_id' => 0, 'type' => 'literal', '@language' => '', '@value' => '']; - $base['resource'] = ['property_id' => 0, 'type' => 'resource', 'value_resource_id' => 0]; - $base['uri'] = ['property_id' => 0, 'type' => 'uri', '@id' => 0, 'o:label' => '']; + + $base['literal'] = ['is_public' => true, 'property_id' => 0, 'type' => 'literal', '@language' => null, '@value' => '']; + $base['resource'] = ['is_public' => true, 'property_id' => 0, 'type' => 'resource', 'value_resource_id' => 0]; + $base['uri'] = ['is_public' => true, 'o:label' => null, 'property_id' => 0, 'type' => 'uri', '@id' => '']; foreach ($values as $key => $value) { $values[$key] = array_values( // Deduplicate values. - array_map('unserialize', array_unique(array_map('serialize', + array_map('unserialize', array_unique(array_map( + 'serialize', // Normalize values. array_map(function ($v) use ($base) { - return array_replace($base[$v['type']], array_intersect_key($v, $base[$v['type']])); - }, $value))))); + // Data types "resource" and "uri" have "@id" (in json). + $mainType = array_key_exists('value_resource_id', $v) + ? 'resource' + : (array_key_exists('@id', $v) ? 'uri' : 'literal'); + // Keep order and meaning keys. + $r = array_replace($base[$mainType], array_intersect_key($v, $base[$mainType])); + $r['is_public'] = (bool) $r['is_public']; + switch ($mainType) { + case 'literal': + if (empty($r['@language'])) { + $r['@language'] = null; + } + break; + case 'uri': + if (empty($r['o:label'])) { + $r['o:label'] = null; + } + break; + } + return $r; + }, $value) + ))) + ); } return $values; } From 6910447000ef4f6579178bbd92b57ec07ce83eff Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 09/15] Merged the settings "property_identifier" and "identifier_property". --- config/module.config.php | 3 +-- src/Form/MappingForm.php | 34 ++++++++------------------------- src/Job/Import.php | 2 +- src/Mapping/PropertyMapping.php | 12 ++++++------ 4 files changed, 16 insertions(+), 35 deletions(-) diff --git a/config/module.config.php b/config/module.config.php index 501d1ae5..fa1eb3ef 100644 --- a/config/module.config.php +++ b/config/module.config.php @@ -192,8 +192,7 @@ 'csv_import_multivalue_separator' => ',', 'csv_import_rows_by_batch' => 20, 'csv_import_global_language' => '', - 'csv_import_property_identifier' => 'dcterms:identifier', - 'csv_import_identifier_property' => '', + 'csv_import_identifier_property' => 'dcterms:identifier', 'csv_import_automap_check_names_alone' => false, ], ], diff --git a/src/Form/MappingForm.php b/src/Form/MappingForm.php index 3ace4a1a..d74b597f 100644 --- a/src/Form/MappingForm.php +++ b/src/Form/MappingForm.php @@ -266,18 +266,21 @@ public function init() ]); $basicSettingsFieldset->add([ - 'name' => 'property_identifier', + 'name' => 'identifier_property', 'type' => PropertySelect::class, 'options' => [ - 'label' => 'Property used as identifier of linked resources', // @translate - 'info' => 'Allows to create a link to the resources identified by the values in a cell.', // @translate + 'label' => 'Resource identifier property', // @translate + 'info' => 'Use this property, generally "dcterms:identifier", to identify the existing resources to link or to get. In all cases, it is strongly recommended to add one or more unique identifiers to all your resources.', // @translate 'empty_option' => 'Select below', // @translate + 'prepend_value_options' => [ + 'internal_id' => 'Internal ID', // @translate + ], 'term_as_value' => true, ], 'attributes' => [ 'value' => $userSettings->get( - 'csv_import_property_identifier', - $default['csv_import_property_identifier']), + 'csv_import_identifier_property', + $default['csv_import_identifier_property']), 'class' => 'chosen-select', 'data-placeholder' => 'Select a property', // @translate ], @@ -337,27 +340,6 @@ public function init() ]); } - $advancedSettingsFieldset->add([ - 'name' => 'identifier_property', - 'type' => PropertySelect::class, - 'options' => [ - 'label' => 'Resource identifier property', // @translate - 'info' => 'Use this property, generally "dcterms:identifier", to identify the existing resources, so it will be possible to update them. One column of the file must map the selected property. In all cases, it is strongly recommended to add one ore more unique identifiers to all your resources.', // @translate - 'empty_option' => 'Select below', // @translate - 'prepend_value_options' => [ - 'internal_id' => 'Internal ID', // @translate - ], - 'term_as_value' => true, - ], - 'attributes' => [ - 'value' => $userSettings->get( - 'csv_import_identifier_property', - $default['csv_import_identifier_property']), - 'class' => 'action-option chosen-select', - 'data-placeholder' => 'Select a property', // @translate - ], - ]); - $advancedSettingsFieldset->add([ 'name' => 'action_unidentified', 'type' => 'radio', diff --git a/src/Job/Import.php b/src/Job/Import.php index afd372fd..aedbc2d2 100644 --- a/src/Job/Import.php +++ b/src/Job/Import.php @@ -157,7 +157,7 @@ public function perform() // The main identifier property may be used as term or as id in some // places, so prepare it one time only. if (empty($args['identifier_property']) || $args['identifier_property'] === 'internal_id') { - $this->identifierPropertyId = $args['identifier_property']; + $this->identifierPropertyId = 'internal_id'; } elseif (is_numeric($args['identifier_property'])) { $this->identifierPropertyId = (int) $args['identifier_property']; } else { diff --git a/src/Mapping/PropertyMapping.php b/src/Mapping/PropertyMapping.php index 734e0c69..36b8cad4 100644 --- a/src/Mapping/PropertyMapping.php +++ b/src/Mapping/PropertyMapping.php @@ -34,14 +34,14 @@ public function init(array $args, ServiceLocatorInterface $serviceLocator) // The main identifier property may be used as term or as id in some // places, so prepare it one time only. - if (empty($args['property_identifier']) || $args['property_identifier'] === 'o:id') { - $this->propertyIdentifier = 'o:id'; - } elseif (is_numeric($args['property_identifier'])) { - $this->propertyIdentifier = (int) $args['property_identifier']; + if (empty($args['identifier_property']) || $args['identifier_property'] === 'internal_id') { + $this->propertyIdentifier = 'internal_id'; + } elseif (is_numeric($args['identifier_property'])) { + $this->propertyIdentifier = (int) $args['identifier_property']; } else { $result = $this->api - ->searchOne('properties', ['term' => $args['property_identifier']])->getContent(); - $this->propertyIdentifier = $result ? $result->id() : 'o:id'; + ->searchOne('properties', ['term' => $args['identifier_property']])->getContent(); + $this->propertyIdentifier = $result ? $result->id() : 'internal_id'; } } From 98fdcd97088797857359312ef50f1de8b3e3273f Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 10/15] Optimized process for resource ids. --- .../Plugin/FindResourcesFromIdentifiers.php | 47 +++++++++++++++---- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php index a8fc27c0..25b15fc0 100644 --- a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php +++ b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php @@ -208,30 +208,57 @@ protected function normalizeResourceType($resourceType) : null; } - protected function findResourcesFromInternalIds($identifiers, $resourceType) + protected function findResourcesFromInternalIds(array $ids, $resourceType) { + $ids = array_filter(array_map('intval', $ids)); + if (empty($ids)) { + return []; + } + // The api manager doesn't manage this type of search. $conn = $this->connection; - $identifiers = array_map('intval', $identifiers); - $quotedIdentifiers = implode(',', $identifiers); - $qb = $conn->createQueryBuilder() + + $qb = $conn->createQueryBuilder(); + $expr = $qb->expr(); + $qb ->select('resource.id') ->from('resource', 'resource') - // ->andWhere('resource.id in (:ids)') - // ->setParameter(':ids', $identifiers) - ->andWhere("resource.id in ($quotedIdentifiers)") ->addOrderBy('resource.id', 'ASC'); + + $parameters = []; + if (count($ids) === 1) { + $qb + ->andWhere($expr->eq('resource.id', ':id')); + $parameters['id'] = reset($ids); + } else { + // Warning: there is a difference between qb / dbal and qb / orm for + // "in" in qb, when a placeholder is used, there should be one + // placeholder for each value for expr->in(). + $placeholders = []; + foreach (array_values($ids) as $key => $value) { + $placeholder = 'id_' . $key; + $parameters[$placeholder] = $value; + $placeholders[] = ':' . $placeholder; + } + $qb + ->andWhere($expr->in('resource.id', $placeholders)); + } + if ($resourceType) { $qb - ->andWhere('resource.resource_type = :resource_type') - ->setParameter(':resource_type', $resourceType); + ->andWhere($expr->eq('resource.resource_type', ':resource_type')); + $parameters['resource_type'] = $resourceType; } + + $qb + ->setParameters($parameters); + $stmt = $conn->executeQuery($qb, $qb->getParameters()); $result = $stmt->fetchAll(\PDO::FETCH_COLUMN); // Reorder the result according to the input (simpler in php and there // is no duplicated identifiers). - return array_replace(array_fill_keys($identifiers, null), array_combine($result, $result)); + return array_replace(array_fill_keys($ids, null), array_combine($result, $result)); } protected function findResourcesFromPropertyIds(array $identifiers, $identifierPropertyId, $resourceType) From 787919dd674064077febfcbcb106f86ff6036861 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 11/15] Updated some code with last core improvements. --- src/Job/Import.php | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Job/Import.php b/src/Job/Import.php index a36cecbf..127d591e 100644 --- a/src/Job/Import.php +++ b/src/Job/Import.php @@ -804,11 +804,11 @@ protected function updateRevise($resourceType, $id, $data, $action) */ protected function removeEmptyData(array $data) { - // Data are updated in place. - foreach ($data as $name => &$metadata) { + foreach ($data as $name => $metadata) { switch ($name) { case 'o:resource_template': case 'o:resource_class': + case 'o:thumbnail': case 'o:owner': case 'o:item': if (empty($metadata) || empty($metadata['o:id'])) { @@ -827,6 +827,7 @@ protected function removeEmptyData(array $data) case 'o:ingester': case 'o:source': case 'ingest_filename': + case 'o:size': unset($data[$name]); break; case 'o:is_public': @@ -835,12 +836,12 @@ protected function removeEmptyData(array $data) unset($data[$name]); } break; + // Properties. default: - if (is_array($metadata)) { - if (empty($metadata)) { - unset($data[$name]); - } + if (is_array($metadata) && empty($metadata)) { + unset($data[$name]); } + break; } } return $data; From 802fef47bb932e7feb6f5a28b42b19f148a09612 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 12/15] Optimized process for property ids. --- .../Plugin/FindResourcesFromIdentifiers.php | 48 ++++++++++++++----- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php index 25b15fc0..8700d44e 100644 --- a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php +++ b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php @@ -266,25 +266,49 @@ protected function findResourcesFromPropertyIds(array $identifiers, $identifierP // The api manager doesn't manage this type of search. $conn = $this->connection; - // Search in multiple resource types in one time. - $quotedIdentifiers = array_map([$conn, 'quote'], $identifiers); - $quotedIdentifiers = implode(',', $quotedIdentifiers); - $qb = $conn->createQueryBuilder() - ->select('value.value as identifier', 'value.resource_id as id') + $qb = $conn->createQueryBuilder(); + $expr = $qb->expr(); + $qb + ->select('value.value AS identifier', 'value.resource_id AS id') ->from('value', 'value') ->leftJoin('value', 'resource', 'resource', 'value.resource_id = resource.id') - ->andwhere('value.property_id = :property_id') - ->setParameter(':property_id', $identifierPropertyId) - // ->andWhere('value.value in (:values)') - // ->setParameter(':values', $identifiers) - ->andWhere("value.value in ($quotedIdentifiers)") + // ->andWhere($expr->in('value.property_id', $propertyIds)) + // ->andWhere($expr->in('value.value', $identifiers)) ->addOrderBy('resource.id', 'ASC') ->addOrderBy('value.id', 'ASC'); + + $parameters = []; + if (count($identifiers) === 1) { + $qb + ->andWhere($expr->eq('value.value', ':identifier')); + $parameters['identifier'] = reset($identifiers); + } else { + // Warning: there is a difference between qb / dbal and qb / orm for + // "in" in qb, when a placeholder is used, there should be one + // placeholder for each value for expr->in(). + $placeholders = []; + foreach (array_values($identifiers) as $key => $value) { + $placeholder = 'value_' . $key; + $parameters[$placeholder] = $value; + $placeholders[] = ':' . $placeholder; + } + $qb + ->andWhere($expr->in('value.value', $placeholders)); + } + + $qb + ->andWhere($expr->eq('value.property_id', ':property_id')); + $parameters['property_id'] = $identifierPropertyId; + if ($resourceType) { $qb - ->andWhere('resource.resource_type = :resource_type') - ->setParameter(':resource_type', $resourceType); + ->andWhere($expr->eq('resource.resource_type', ':resource_type')); + $parameters['resource_type'] = $resourceType; } + + $qb + ->setParameters($parameters); + $stmt = $conn->executeQuery($qb, $qb->getParameters()); // $stmt->fetchAll(\PDO::FETCH_KEY_PAIR) cannot be used, because it // replaces the first id by later ids in case of true duplicates. From dbd86b1b956398557b4a933d729cdd09c5dbc6e9 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 13/15] Renamed option "internal_id" by "o:id". --- src/Controller/IndexController.php | 2 +- src/Form/MappingForm.php | 2 +- src/Job/Import.php | 4 ++-- src/Mapping/AbstractResourceMapping.php | 4 ++-- .../Controller/Plugin/FindResourcesFromIdentifiers.php | 8 ++++---- view/csv-import/mapping-sidebar/item.phtml | 2 +- view/csv-import/mapping-sidebar/media.phtml | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Controller/IndexController.php b/src/Controller/IndexController.php index 92d3ee78..1c7edcd0 100644 --- a/src/Controller/IndexController.php +++ b/src/Controller/IndexController.php @@ -324,7 +324,7 @@ protected function cleanArgs(array $post) // Check the identifier property. if (array_key_exists('identifier_property', $args)) { $identifierProperty = $args['identifier_property']; - if (empty($identifierProperty) && $identifierProperty !== 'internal_id') { + if (empty($identifierProperty) && $identifierProperty !== 'o:id') { $properties = $api->search('properties', ['term' => $identifierProperty])->getContent(); if (empty($properties)) { $args['identifier_property'] = null; diff --git a/src/Form/MappingForm.php b/src/Form/MappingForm.php index 52fbc786..7afb368b 100644 --- a/src/Form/MappingForm.php +++ b/src/Form/MappingForm.php @@ -327,7 +327,7 @@ public function init() 'info' => 'Use this property, generally "dcterms:identifier", to identify the existing resources, so it will be possible to update them. One column of the file must map the selected property. In all cases, it is strongly recommended to add one ore more unique identifiers to all your resources.', // @translate 'empty_option' => 'Select below', // @translate 'prepend_value_options' => [ - 'internal_id' => 'Internal ID', // @translate + 'o:id' => 'Internal ID', // @translate ], 'term_as_value' => true, ], diff --git a/src/Job/Import.php b/src/Job/Import.php index a36cecbf..5e234c83 100644 --- a/src/Job/Import.php +++ b/src/Job/Import.php @@ -156,7 +156,7 @@ public function perform() // The main identifier property may be used as term or as id in some // places, so prepare it one time only. - if (empty($args['identifier_property']) || $args['identifier_property'] === 'internal_id') { + if (empty($args['identifier_property']) || $args['identifier_property'] === 'o:id') { $this->identifierPropertyId = $args['identifier_property']; } elseif (is_numeric($args['identifier_property'])) { $this->identifierPropertyId = (int) $args['identifier_property']; @@ -712,7 +712,7 @@ protected function filterDataWithoutIdentifier(array $data, array $identifiers) protected function idsForLog($ids, $hasIdentifierKeys = false) { switch ($this->args['identifier_property']) { - case 'internal_id': + case 'o:id': // Nothing to do. break; default: diff --git a/src/Mapping/AbstractResourceMapping.php b/src/Mapping/AbstractResourceMapping.php index dd3d1912..83162043 100644 --- a/src/Mapping/AbstractResourceMapping.php +++ b/src/Mapping/AbstractResourceMapping.php @@ -241,7 +241,7 @@ protected function processCellMedia($index, array $values) // Check params to avoid useless search and improve speed. $action = $this->args['action']; $identifier = reset($values); - $identifierProperty = $this->map['item'][$index] ?: 'internal_id'; + $identifierProperty = $this->map['item'][$index] ?: 'o:id'; $resourceType = 'items'; if (empty($identifier)) { @@ -268,7 +268,7 @@ protected function processCellMedia($index, array $values) } } - protected function findResource($identifier, $identifierProperty = 'internal_id') + protected function findResource($identifier, $identifierProperty = 'o:id') { $resourceType = $this->args['resource_type']; $findResourceFromIdentifier = $this->findResourceFromIdentifier; diff --git a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php index 54a5c072..0bea351d 100644 --- a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php +++ b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php @@ -66,7 +66,7 @@ public function __construct(Connection $connection, ApiManager $apiManager) * @param array|string $identifiers Identifiers should be unique. If a * string is sent, the result will be the resource. * @param string|int|array $identifierName Property as integer or term, - * media ingester or "internal_id", or an array with multiple conditions. + * media ingester or "o:id", or an array with multiple conditions. * @param string $resourceType The resource type if any. * @return array|int|null Associative array with the identifiers as key and the ids * or null as value. Order is kept, but duplicate identifiers are removed. @@ -100,8 +100,8 @@ public function __invoke($identifiers, $identifierName, $resourceType = null) } } // Here, identifierName is a string or an integer. - elseif (in_array($identifierName, ['internal_id'])) { - $identifierType = 'internal_id'; + elseif (in_array($identifierName, ['o:id'])) { + $identifierType = 'o:id'; } elseif (is_numeric($identifierName)) { $identifierType = 'property'; $identifierName = (int) $identifierName; @@ -134,7 +134,7 @@ public function __invoke($identifiers, $identifierName, $resourceType = null) } switch ($identifierType) { - case 'internal_id': + case 'o:id': $result = $this->findResourcesFromInternalIds($identifiers, $resourceType); break; case 'property': diff --git a/view/csv-import/mapping-sidebar/item.phtml b/view/csv-import/mapping-sidebar/item.phtml index b43bf3ca..c258e091 100644 --- a/view/csv-import/mapping-sidebar/item.phtml +++ b/view/csv-import/mapping-sidebar/item.phtml @@ -11,7 +11,7 @@ 'empty_option' => $this->translate('Select below'), 'term_as_value' => false, 'prepend_value_options' => [ - 'internal_id' => $this->translate('Internal ID'), + 'o:id' => $this->translate('Internal ID'), ], 'term_as_value' => true, ], diff --git a/view/csv-import/mapping-sidebar/media.phtml b/view/csv-import/mapping-sidebar/media.phtml index e64dae00..1cbf2899 100644 --- a/view/csv-import/mapping-sidebar/media.phtml +++ b/view/csv-import/mapping-sidebar/media.phtml @@ -11,7 +11,7 @@ 'options' => [ 'empty_option' => $this->translate('Select below'), 'prepend_value_options' => [ - 'internal_id' => $this->translate('Internal ID'), + 'o:id' => $this->translate('Internal ID'), ], 'term_as_value' => true, ], From 312d54a85a54960e8d1d989b75147564e330ba28 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 14/15] Optimized process for media source. --- .../Plugin/FindResourcesFromIdentifiers.php | 48 ++++++++++++++----- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php index 8700d44e..40c51304 100644 --- a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php +++ b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php @@ -317,28 +317,50 @@ protected function findResourcesFromPropertyIds(array $identifiers, $identifierP return $this->cleanResult($identifiers, $result); } - protected function findResourcesFromMediaSource($identifiers, $ingesterName, $itemId = null) + protected function findResourcesFromMediaSource(array $identifiers, $ingesterName, $itemId = null) { // The api manager doesn't manage this type of search. $conn = $this->connection; - // Search in multiple resource types in one time. - $quotedIdentifiers = array_map([$conn, 'quote'], $identifiers); - $quotedIdentifiers = implode(',', $quotedIdentifiers); - $qb = $conn->createQueryBuilder() - ->select('media.source as identifier', 'media.id as id') + $qb = $conn->createQueryBuilder(); + $expr = $qb->expr(); + $qb + ->select('media.source AS identifier', 'media.id AS id') ->from('media', 'media') - ->andwhere('media.ingester = :ingester') - ->setParameter(':ingester', $ingesterName) - // ->andWhere('media.source in (:sources)') - // ->setParameter(':sources', $identifiers) - ->andwhere("media.source in ($quotedIdentifiers)") + ->andWhere('media.ingester = :ingester') + // ->andWhere('media.source IN (' . implode(',', array_map([$conn, 'quote'], $identifiers)) . ')') ->addOrderBy('media.id', 'ASC'); + + $parameters = []; + $parameters['ingester'] = $ingesterName; + + if (count($identifiers) === 1) { + $qb + ->andWhere($expr->eq('media.source', ':identifier')); + $parameters['identifier'] = reset($identifiers); + } else { + // Warning: there is a difference between qb / dbal and qb / orm for + // "in" in qb, when a placeholder is used, there should be one + // placeholder for each value for expr->in(). + $placeholders = []; + foreach (array_values($identifiers) as $key => $value) { + $placeholder = 'value_' . $key; + $parameters[$placeholder] = $value; + $placeholders[] = ':' . $placeholder; + } + $qb + ->andWhere($expr->in('media.source', $placeholders)); + } + if ($itemId) { $qb - ->andWhere('media.item_id = :item_id') - ->setParameter(':item_id', $itemId); + ->andWhere($expr->eq('media.item_id', ':item_id')); + $parameters['item_id'] = $itemId; } + + $qb + ->setParameters($parameters); + $stmt = $conn->executeQuery($qb, $qb->getParameters()); // $stmt->fetchAll(\PDO::FETCH_KEY_PAIR) cannot be used, because it // replaces the first id by later ids in case of true duplicates. From dad3e61f7a7a8291f5a151bba8f9e73977d1bef1 Mon Sep 17 00:00:00 2001 From: Daniel Berthereau Date: Mon, 2 Dec 2019 00:00:00 +0100 Subject: [PATCH 15/15] Allowed to specify multiple properties as identifier. --- config/module.config.php | 5 +- src/Controller/IndexController.php | 17 ++-- src/Form/MappingForm.php | 15 +-- src/Job/Import.php | 54 ++++++----- src/Mapping/PropertyMapping.php | 40 ++++---- .../Plugin/FindResourcesFromIdentifiers.php | 92 ++++++++++++++++--- 6 files changed, 155 insertions(+), 68 deletions(-) diff --git a/config/module.config.php b/config/module.config.php index fa1eb3ef..d2e9f04e 100644 --- a/config/module.config.php +++ b/config/module.config.php @@ -192,7 +192,10 @@ 'csv_import_multivalue_separator' => ',', 'csv_import_rows_by_batch' => 20, 'csv_import_global_language' => '', - 'csv_import_identifier_property' => 'dcterms:identifier', + 'csv_import_identifier_properties' => [ + 'o:id', + 'dcterms:identifier', + ], 'csv_import_automap_check_names_alone' => false, ], ], diff --git a/src/Controller/IndexController.php b/src/Controller/IndexController.php index 1c7edcd0..d56ca841 100644 --- a/src/Controller/IndexController.php +++ b/src/Controller/IndexController.php @@ -321,15 +321,18 @@ protected function cleanArgs(array $post) } } - // Check the identifier property. - if (array_key_exists('identifier_property', $args)) { - $identifierProperty = $args['identifier_property']; - if (empty($identifierProperty) && $identifierProperty !== 'o:id') { - $properties = $api->search('properties', ['term' => $identifierProperty])->getContent(); - if (empty($properties)) { - $args['identifier_property'] = null; + // Check the identifier properties. + if (array_key_exists('identifier_properties', $args)) { + $identifierProperties = $args['identifier_properties'] ? $args['identifier_properties'] : []; + foreach ($identifierProperties as $key => $identifierProperty) { + if ($identifierProperty !== 'o:id') { + $property = $api->searchOne('properties', ['term' => $identifierProperty])->getContent(); + if (empty($property)) { + unset($args['identifier_properties'][$key]); + } } } + $args['identifier_properties'] = array_values($args['identifier_properties']); } if (!array_key_exists('column-multivalue', $post)) { diff --git a/src/Form/MappingForm.php b/src/Form/MappingForm.php index d74b597f..83179f45 100644 --- a/src/Form/MappingForm.php +++ b/src/Form/MappingForm.php @@ -266,21 +266,22 @@ public function init() ]); $basicSettingsFieldset->add([ - 'name' => 'identifier_property', + 'name' => 'identifier_properties', 'type' => PropertySelect::class, 'options' => [ - 'label' => 'Resource identifier property', // @translate - 'info' => 'Use this property, generally "dcterms:identifier", to identify the existing resources to link or to get. In all cases, it is strongly recommended to add one or more unique identifiers to all your resources.', // @translate + 'label' => 'Resource identifier properties', // @translate + 'info' => 'Use these properties, generally "Internal id" or "dcterms:identifier", to identify the existing resources to link or to get. In all cases, it is strongly recommended to add one or more unique identifiers to all your resources.', // @translate 'empty_option' => 'Select below', // @translate 'prepend_value_options' => [ - 'internal_id' => 'Internal ID', // @translate + 'o:id' => 'Internal ID', // @translate ], 'term_as_value' => true, ], 'attributes' => [ + 'multiple' => true, 'value' => $userSettings->get( - 'csv_import_identifier_property', - $default['csv_import_identifier_property']), + 'csv_import_identifier_properties', + $default['csv_import_identifier_properties']), 'class' => 'chosen-select', 'data-placeholder' => 'Select a property', // @translate ], @@ -411,7 +412,7 @@ public function init() 'required' => false, ]); $advancedSettingsInputFilter->add([ - 'name' => 'identifier_property', + 'name' => 'identifier_properties', 'required' => false, ]); $advancedSettingsInputFilter->add([ diff --git a/src/Job/Import.php b/src/Job/Import.php index c6397b1d..6a28eceb 100644 --- a/src/Job/Import.php +++ b/src/Job/Import.php @@ -5,7 +5,7 @@ use CSVImport\Mvc\Controller\Plugin\FindResourcesFromIdentifiers; use CSVImport\Source\SourceInterface; use finfo; -use Omeka\Api\Manager; +use Omeka\Mvc\Controller\Plugin\Api; use Omeka\Job\AbstractJob; use Omeka\Stdlib\Message; use Zend\Log\Logger; @@ -28,7 +28,7 @@ class Import extends AbstractJob protected $rowsByBatch = 20; /** - * @var Manager + * @var Api */ protected $api; @@ -83,9 +83,9 @@ class Import extends AbstractJob protected $identifiers; /** - * @var string|int + * @var array */ - protected $identifierPropertyId; + protected $identifierProperties; /** * @var bool @@ -101,7 +101,7 @@ public function perform() { ini_set('auto_detect_line_endings', true); $services = $this->getServiceLocator(); - $this->api = $services->get('Omeka\ApiManager'); + $this->api = $services->get('ControllerPluginManager')->get('api'); $this->logger = $services->get('Omeka\Logger'); $this->findResourcesFromIdentifiers = $services->get('ControllerPluginManager') ->get('findResourcesFromIdentifiers'); @@ -154,16 +154,24 @@ public function perform() return $this->endJob(); } - // The main identifier property may be used as term or as id in some - // places, so prepare it one time only. - if (empty($args['identifier_property']) || $args['identifier_property'] === 'o:id') { - $this->identifierPropertyId = 'o:id'; - } elseif (is_numeric($args['identifier_property'])) { - $this->identifierPropertyId = (int) $args['identifier_property']; - } else { - $result = $this->api - ->search('properties', ['term' => $args['identifier_property']])->getContent(); - $this->identifierPropertyId = $result ? $result[0]->id() : null; + // The main identifier properties may be used as term or as id in some + // places, so prepare them one time only. + $this->identifierProperties = []; + foreach ($args['identifier_properties'] as $identifierProperty) { + if ($identifierProperty === 'o:id') { + $this->identifierProperties[] = 'o:id'; + } elseif (is_numeric($identifierProperty)) { + $this->identifierProperties[] = (int) $identifierProperty; + } else { + $result = $this->api + ->searchOne('properties', ['term' => $identifierProperty])->getContent(); + if ($result) { + $this->identifierProperties[] = $result->id(); + } + } + } + if (!$this->identifierProperties) { + $this->identifierProperties = ['o:id']; } if (!empty($args['rows_by_batch'])) { @@ -258,7 +266,7 @@ protected function processBatchData(array $data) case self::ACTION_REPLACE: $findResourcesFromIdentifiers = $this->findResourcesFromIdentifiers; $identifiers = $this->extractIdentifiers($data); - $ids = $findResourcesFromIdentifiers($identifiers, $this->identifierPropertyId, $this->resourceType); + $ids = $findResourcesFromIdentifiers($identifiers, $this->identifierProperties, $this->resourceType); $ids = $this->assocIdentifierKeysAndIds($identifiers, $ids); $idsToProcess = array_filter($ids); $idsRemaining = array_diff_key($ids, $idsToProcess); @@ -294,7 +302,7 @@ protected function processBatchData(array $data) case self::ACTION_DELETE: $findResourcesFromIdentifiers = $this->findResourcesFromIdentifiers; $identifiers = $this->extractIdentifiers($data); - $ids = $findResourcesFromIdentifiers($identifiers, $this->identifierPropertyId, $this->resourceType); + $ids = $findResourcesFromIdentifiers($identifiers, $this->identifierProperties, $this->resourceType); $idsToProcess = array_filter($ids); $idsRemaining = array_diff_key($ids, $idsToProcess); @@ -521,11 +529,11 @@ protected function identifyMedias(array $data, array $ids) if (empty($media['o:source']) || empty($media['o:ingester'])) { continue; } - $identifierProperties = []; - $identifierProperties['o:ingester'] = $media['o:ingester']; - $identifierProperties['o:item']['o:id'] = $ids[$key]; + $identifier = []; + $identifier['o:ingester'] = $media['o:ingester']; + $identifier['o:item']['o:id'] = $ids[$key]; $resourceId = $findResourceFromIdentifier( - $media['o:source'], $identifierProperties, 'media'); + $media['o:source'], [$identifier], 'media'); if ($resourceId) { $media['o:id'] = $resourceId; } @@ -948,7 +956,7 @@ protected function extractPropertyValuesFromResource($resourceJson) } /** - * Deduplicate data ids for collections of items set, items, media.... + * Deduplicate data ids for collections of items set, items, media… * * @param array $data * @return array @@ -1091,7 +1099,7 @@ protected function checkOptions() // Specific check when a identifier is required. elseif (!in_array($args['action'], [self::ACTION_CREATE, self::ACTION_SKIP])) { - if (empty($args['identifier_property'])) { + if (empty($args['identifier_properties'])) { $this->hasErr = true; $this->logger->err(new Message('The action "%s" requires a resource identifier property.', // @translate $args['action'])); diff --git a/src/Mapping/PropertyMapping.php b/src/Mapping/PropertyMapping.php index 36b8cad4..03a2f0bc 100644 --- a/src/Mapping/PropertyMapping.php +++ b/src/Mapping/PropertyMapping.php @@ -17,9 +17,9 @@ class PropertyMapping extends AbstractMapping protected $findResourceFromIdentifier; /** - * @var int|string + * @var array */ - protected $propertyIdentifier; + protected $identifierProperties; public function getSidebar(PhpRenderer $view) { @@ -32,16 +32,24 @@ public function init(array $args, ServiceLocatorInterface $serviceLocator) $this->findResourceFromIdentifier = $serviceLocator->get('ControllerPluginManager') ->get('findResourceFromIdentifier'); - // The main identifier property may be used as term or as id in some - // places, so prepare it one time only. - if (empty($args['identifier_property']) || $args['identifier_property'] === 'internal_id') { - $this->propertyIdentifier = 'internal_id'; - } elseif (is_numeric($args['identifier_property'])) { - $this->propertyIdentifier = (int) $args['identifier_property']; - } else { - $result = $this->api - ->searchOne('properties', ['term' => $args['identifier_property']])->getContent(); - $this->propertyIdentifier = $result ? $result->id() : 'internal_id'; + // The main identifier properties may be used as term or as id in some + // places, so prepare them one time only. + $this->identifierProperties = []; + foreach ($args['identifier_properties'] as $identifierProperty) { + if ($identifierProperty === 'o:id') { + $this->identifierProperties[] = 'o:id'; + } elseif (is_numeric($identifierProperty)) { + $this->identifierProperties[] = (int) $identifierProperty; + } else { + $result = $this->api + ->searchOne('properties', ['term' => $identifierProperty])->getContent(); + if ($result) { + $this->identifierProperties[] = $result->id(); + } + } + } + if (!$this->identifierProperties) { + $this->identifierProperties = ['o:id']; } } @@ -131,7 +139,7 @@ public function processRow(array $row) break; case 'resource': - $identifier = $this->findResource($value, $this->propertyIdentifier); + $identifier = $this->findResource($value, $this->identifierProperties); $valueData = [ 'value_resource_id' => $identifier, 'property_id' => $propertyId, @@ -183,14 +191,14 @@ protected function getDataTypeAdapters() return $dataTypeAdapters; } - protected function findResource($identifier, $propertyIdentifier = 'o:id') + protected function findResource($identifier, $identifierProperties = ['o:id']) { $resourceType = $this->args['resource_type']; $findResourceFromIdentifier = $this->findResourceFromIdentifier; - $resourceId = $findResourceFromIdentifier($identifier, $propertyIdentifier, $resourceType); + $resourceId = $findResourceFromIdentifier($identifier, $identifierProperties, $resourceType); if (empty($resourceId)) { $this->logger->err(new Message('"%s" (%s) is not a valid resource identifier.', // @translate - $identifier, $propertyIdentifier)); + $identifier, implode('", "', $identifierProperties))); $this->setHasErr(true); return false; } diff --git a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php index 1dacd014..a67dbc16 100644 --- a/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php +++ b/src/Mvc/Controller/Plugin/FindResourcesFromIdentifiers.php @@ -69,7 +69,9 @@ public function __construct(Connection $connection, ApiManager $apiManager) * string is sent, the result will be the resource. * @param string|int|array $identifierName Property as integer or term, * "o:id", a media ingester (url or file), or an associative array with - * multiple conditions (for media source). + * multiple conditions (for media source). May be a list of identifier + * metadata names, in which case the identifiers are searched in a list of + * properties and/or in internal ids. * @param string $resourceType The resource type if any. * @return array|int|null|Object Associative array with the identifiers as key * and the ids or null as value. Order is kept, but duplicate identifiers @@ -98,10 +100,16 @@ public function __invoke($identifiers, $identifierName, $resourceType = null) if (empty($args)) { return $isSingle ? null : []; } - list($identifierType, $identifierName, $resourceType, $itemId) = $args; + list($identifierTypeNames, $resourceType, $itemId) = $args; - $result = $this->findResources($identifierType, $identifiers, $identifierName, $resourceType, $itemId); - return $isSingle ? ($result ? reset($result) : null) : $result; + foreach ($identifierTypeNames as $identifierType => $identifierName) { + $result = $this->findResources($identifierType, $identifiers, $identifierName, $resourceType, $itemId); + if (empty($result)) { + continue; + } + return $isSingle ? reset($result) : $result; + } + return $isSingle ? null : []; } protected function findResources($identifierType, array $identifiers, $identifierName, $resourceType, $itemId) @@ -110,8 +118,14 @@ protected function findResources($identifierType, array $identifiers, $identifie case 'o:id': return $this->findResourcesFromInternalIds($identifiers, $resourceType); case 'property': + if (!is_array($identifierName)) { + $identifierName = [$identifierName]; + } return $this->findResourcesFromPropertyIds($identifiers, $identifierName, $resourceType); case 'media_source': + if (is_array($identifierName)) { + $identifierName = reset($identifierName); + } return $this->findResourcesFromMediaSource($identifiers, $identifierName, $itemId); } } @@ -133,10 +147,12 @@ protected function normalizeArgs($identifierName, $resourceType) $identifierTypeName = $identifierName['o:ingester']; $resourceType = 'media'; $itemId = empty($identifierName['o:item']['o:id']) ? null : $identifierName['o:item']['o:id']; + } else { + return $this->normalizeMultipleIdentifierMetadata($identifierName, $resourceType); } } // Next, identifierName is a string or an integer. - elseif (in_array($identifierName, ['internal_id', 'o:id'])) { + elseif ($identifierName === 'o:id') { $identifierType = 'o:id'; $identifierTypeName = 'o:id'; } elseif (is_numeric($identifierName)) { @@ -169,13 +185,50 @@ protected function normalizeArgs($identifierName, $resourceType) } return [ - $identifierType, - $identifierTypeName, + [$identifierType => $identifierTypeName], $resourceType, $itemId, ]; } + protected function normalizeMultipleIdentifierMetadata($identifierNames, $resourceType) + { + $identifierTypeNames = []; + foreach ($identifierNames as $identifierName) { + $args = $this->normalizeArgs($identifierName, $resourceType); + if ($args) { + list($identifierTypeName) = $args; + $identifierName = reset($identifierTypeName); + $identifierType = key($identifierTypeName); + switch ($identifierType) { + case 'o:id': + case 'media_source': + $identifierTypeNames[$identifierType] = $identifierName; + break; + default: + $identifierTypeNames[$identifierType][] = $identifierName; + break; + } + } + } + if (!$identifierTypeNames) { + return null; + } + + if ($resourceType) { + $resourceType = $this->normalizeResourceType($resourceType); + if (is_null($resourceType)) { + return null; + } + } + + return [ + $identifierTypeNames, + $resourceType, + null, + ]; + } + protected function normalizeResourceType($resourceType) { $resourceTypes = [ @@ -261,7 +314,7 @@ protected function findResourcesFromInternalIds(array $ids, $resourceType) return array_replace(array_fill_keys($ids, null), array_combine($result, $result)); } - protected function findResourcesFromPropertyIds(array $identifiers, $identifierPropertyId, $resourceType) + protected function findResourcesFromPropertyIds(array $identifiers, array $propertyIds, $resourceType) { // The api manager doesn't manage this type of search. $conn = $this->connection; @@ -296,9 +349,20 @@ protected function findResourcesFromPropertyIds(array $identifiers, $identifierP ->andWhere($expr->in('value.value', $placeholders)); } - $qb - ->andWhere($expr->eq('value.property_id', ':property_id')); - $parameters['property_id'] = $identifierPropertyId; + if (count($propertyIds) === 1) { + $qb + ->andWhere($expr->eq('value.property_id', ':property_id')); + $parameters['property_id'] = reset($propertyIds); + } else { + $placeholders = []; + foreach (array_values($propertyIds) as $key => $value) { + $placeholder = 'property_' . $key; + $parameters[$placeholder] = $value; + $placeholders[] = ':' . $placeholder; + } + $qb + ->andWhere($expr->in('value.property_id', $placeholders)); + } if ($resourceType) { $qb @@ -396,9 +460,9 @@ protected function cleanResult(array $identifiers, array $result) } // Look for the first case insensitive result. $lowerKey = strtolower($key); - foreach ($lowerResult as $resultValue) { - if ($resultValue['identifier'] == $lowerKey) { - $cleanedResult[$key] = $resultValue['id']; + foreach ($lowerResult as $lowerResultValue) { + if ($lowerResultValue['identifier'] == $lowerKey) { + $cleanedResult[$key] = $lowerResultValue['id']; continue 2; } }