Source code for skosprovider.registry

'''This module provides a registry for skos providers.

This registry helps us find providers during runtime. We can also apply some
operations to all or several providers at the same time.
'''
import logging

log = logging.getLogger(__name__)

from .uri import is_uri


[docs]class RegistryException(Exception): pass
[docs]class Registry: ''' This registry collects all skos providers. ''' providers = {} ''' Dictionary containing all providers, keyed by id. ''' concept_scheme_uri_map = {} ''' Dictionary mapping concept scheme uri's to vocabulary id's. ''' metadata = {} ''' Dictionary containing metadata about this registry. ''' instance_scope = 'single' ''' Indicates how the registry is being used. Options: - single: The registry is part of a script or a single process. It can be assumed to be operational for the entire duration of the process and there are no threads involved. - threaded_global: The registry is part of a program that uses threads, such as a typical web application. It's attached to the global process and duplicated to threads, making it not thread safe. Proceed carefully with certain providers. Should generally only be used with applications that only use read-only providers that load all data in memory at startup and use no database connections or other kinds of sessions. - threaded_thread: The registry is part of a program that uses threads, such as a typical web application. It's attached to a thread, such as a web request. The registry is instantiated for this thread/request and dies with this thread/request. This is needed for providers such as the SQLAlchemyProvider. Providers that use database connections or other session handling code generally require this. ''' def __init__(self, instance_scope='single', metadata={}): ''' :param str instance_scope: Indicates how the registry was instantiated. Possible values: single, threaded_global, threaded_thread. :param dict metadata: Metadata essential to this registry. Possible metadata: * `catalog`: A :class:`dict` detailing the catalog all \ conceptschemes are part of. \ Currently the contents of the dictionary are undefined \ except for a :term:`uri` attribute that must be present. * `dataset`: A :class:`dict` detailing the dataset all \ conceptschemes are part of. \ Currently the contents of the dictionary are undefined \ except for a :term:`uri` attribute that must be present. ''' self.providers = {} self.concept_scheme_uri_map = {} self.metadata = metadata if instance_scope not in ['single', 'threaded_global', 'threaded_thread']: raise ValueError('Invalid instance_scope.') self.instance_scope = instance_scope
[docs] def get_metadata(self): '''Get some metadata on the registry it represents. :rtype: Dict. ''' return self.metadata
[docs] def register_provider(self, provider): ''' Register a :class:`skosprovider.providers.VocabularyProvider`. :param skosprovider.providers.VocabularyProvider provider: The provider to register. :raises RegistryException: A provider with this id or uri has already been registered. ''' if ( provider.allowed_instance_scopes and self.instance_scope not in provider.allowed_instance_scopes ): raise RegistryException( 'This provider does not support instance_scope %s' % self.instance_scope ) if provider.get_vocabulary_id() in self.providers: raise RegistryException( 'A provider with this id has already been registered.' ) self.providers[provider.get_vocabulary_id()] = provider try: cs_uri = provider.get_vocabulary_uri() except AttributeError as e: log.error(e) # For providers not compatible with skosprovider >= 0.8.0 log.warning( 'New versions of skosprovider (>=0.8.0) require your provider ' 'to have a get_vocabulary_uri method. This fallback mechanism ' 'will be removed in version 2.0.0.' ) cs_uri = provider.concept_scheme.uri if cs_uri in self.concept_scheme_uri_map: raise RegistryException( 'A provider with URI %s has already been registered.' % cs_uri ) self.concept_scheme_uri_map[cs_uri] = provider.get_vocabulary_id()
[docs] def remove_provider(self, id): ''' Remove the provider with the given id or :term:`URI`. :param str id: The identifier for the provider. :returns: A :class:`skosprovider.providers.VocabularyProvider` or `False` if the id is unknown. ''' if id in self.providers: p = self.providers.get(id, False) del self.providers[id] try: cs_uri = p.get_vocabulary_uri() except AttributeError as e: log.error(e) # For providers not compatible with skosprovider >= 0.8.0 log.warning( 'New versions of skosprovider (>=0.8.0) require your provider ' 'to have a get_vocabulary_uri method. This fallback mechanism ' 'will be removed in version 2.0.0.' ) # For providers not compatible with skosprovider >= 0.8.0 cs_uri = p.concept_scheme.uri del self.concept_scheme_uri_map[cs_uri] return p elif id in self.concept_scheme_uri_map: id = self.concept_scheme_uri_map[id] return self.remove_provider(id) else: return False
[docs] def get_provider(self, id): ''' Get a provider by id or :term:`URI`. :param str id: The identifier for the provider. This can either be the id with which it was registered or the :term:`URI` of the conceptscheme that the provider services. :returns: A :class:`skosprovider.providers.VocabularyProvider` or `False` if the id or uri is unknown. ''' if id in self.providers: return self.providers.get(id, False) elif is_uri(id) and id in self.concept_scheme_uri_map: return self.providers.get(self.concept_scheme_uri_map[id], False) return False
[docs] def get_providers(self, **kwargs): '''Get all providers registered. If keyword `ids` is present, get only the providers with these ids. If keys `subject` is present, get only the providers that have this subject. .. code-block:: python # Get all providers with subject 'biology' registry.get_providers(subject='biology') # Get all providers with id 1 or 2 registry.get_providers(ids=[1,2]) # Get all providers with id 1 or 2 and subject 'biology' registry.get_providers(ids=[1,2], subject='biology'] :param list ids: Only return providers with one of the Ids or :term:`URIs <URI>`. :param str subject: Only return providers with this subject. :returns: A list of :class:`providers <skosprovider.providers.VocabularyProvider>` ''' if 'ids' in kwargs: ids = [self.concept_scheme_uri_map.get(id, id) for id in kwargs['ids']] providers = [ self.providers[k] for k in self.providers.keys() if k in ids ] else: providers = list(self.providers.values()) if 'subject' in kwargs: providers = [p for p in providers if kwargs['subject'] in p.metadata['subject']] return providers
[docs] def find(self, query, **kwargs): '''Launch a query across all or a selection of providers. .. code-block:: python # Find anything that has a label of church in any provider. registry.find({'label': 'church'}) # Find anything that has a label of church with the BUILDINGS provider. # Attention, this syntax was deprecated in version 0.3.0 registry.find({'label': 'church'}, providers=['BUILDINGS']) # Find anything that has a label of church with the BUILDINGS provider. registry.find({'label': 'church'}, providers={'ids': ['BUILDINGS']}) # Find anything that has a label of church with a provider # marked with the subject 'architecture'. registry.find({'label': 'church'}, providers={'subject': 'architecture'}) # Find anything that has a label of church in any provider. # If possible, display the results with a Dutch label. registry.find({'label': 'church'}, language='nl') # Find anything that has a match with an external concept # If possible, display the results with a Dutch label. registry.find({ 'matches': { 'uri': 'http://id.python.org/different/types/of/trees/nr/1/the/larch' }}, language='nl') # Find anything that has a label of lariks with a close match to an external concept # If possible, display the results with a Dutch label. provider.find({ 'matches': { 'label': 'lariks', 'type': 'close', 'uri': 'http://id.python.org/different/types/of/trees/nr/1/the/larch' }}, language='nl') :param dict query: The query parameters that will be passed on to each :meth:`~skosprovider.providers.VocabularyProvider.find` method of the selected. :class:`providers <skosprovider.providers.VocabularyProvider>`. :param dict providers: Optional. If present, it should be a dictionary. This dictionary can contain any of the keyword arguments available to the :meth:`get_providers` method. The query will then only be passed to the providers confirming to these arguments. :param string language: Optional. If present, it should be a :term:`language-tag`. This language-tag is passed on to the underlying providers and used when selecting the label to display for each concept. :returns: a list of :class:`dict`. Each dict has two keys: id and concepts. ''' if 'providers' not in kwargs: providers = self.get_providers() else: pargs = kwargs['providers'] if isinstance(pargs, list): providers = self.get_providers(ids=pargs) else: providers = self.get_providers(**pargs) kwarguments = {} if 'language' in kwargs: kwarguments['language'] = kwargs['language'] return [{'id': p.get_vocabulary_id(), 'concepts': p.find(query, **kwarguments)} for p in providers]
[docs] def get_all(self, **kwargs): '''Get all concepts from all providers. .. code-block:: python # get all concepts in all providers. registry.get_all() # get all concepts in all providers. # If possible, display the results with a Dutch label. registry.get_all(language='nl') :param string language: Optional. If present, it should be a :term:`language-tag`. This language-tag is passed on to the underlying providers and used when selecting the label to display for each concept. :returns: a list of :class:`dict`. Each dict has two keys: id and concepts. ''' kwarguments = {} if 'language' in kwargs: kwarguments['language'] = kwargs['language'] return [{'id': p.get_vocabulary_id(), 'concepts': p.get_all(**kwarguments)} for p in self.providers.values()]
[docs] def get_by_uri(self, uri): '''Get a concept or collection by its uri. Returns a single concept or collection if one exists with this uri. Returns False otherwise. :param string uri: The uri to find a concept or collection for. :raises ValueError: The uri is invalid. :rtype: :class:`skosprovider.skos.Concept` or :class:`skosprovider.skos.Collection` ''' if not is_uri(uri): raise ValueError('%s is not a valid URI.' % uri) # Check if there's a provider that's more likely to have the URI csuris = [csuri for csuri in self.concept_scheme_uri_map.keys() if uri.startswith(csuri)] for csuri in csuris: c = self.get_provider(csuri).get_by_uri(uri) if c: return c # Check all providers for p in self.providers.values(): c = p.get_by_uri(uri) if c: return c return False