• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zopefoundation / Products.ZCatalog / 3979545908

pending completion
3979545908

push

github

GitHub
Drop support for Python 2.7, 3.5, 3.6. (#143)

817 of 1057 branches covered (77.29%)

Branch coverage included in aggregate %.

49 of 49 new or added lines in 12 files covered. (100.0%)

3144 of 3538 relevant lines covered (88.86%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.29
/src/Products/ZCatalog/Catalog.py
1
##############################################################################
2
#
3
# Copyright (c) 2002 Zope Foundation and Contributors.
4
#
5
# This software is subject to the provisions of the Zope Public License,
6
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
7
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
8
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
9
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
10
# FOR A PARTICULAR PURPOSE
11
#
12
##############################################################################
13

14
import logging
1✔
15
from bisect import bisect
1✔
16
from collections import defaultdict
1✔
17
from functools import cmp_to_key
1✔
18
from operator import itemgetter
1✔
19
from random import randint
1✔
20

21
import Acquisition
1✔
22
import BTrees.Length
1✔
23
import ExtensionClass
1✔
24
from Acquisition import aq_base
1✔
25
from Acquisition import aq_parent
1✔
26
from BTrees.IIBTree import IISet
1✔
27
from BTrees.IIBTree import intersection
1✔
28
from BTrees.IIBTree import weightedIntersection
1✔
29
from BTrees.IOBTree import IOBTree
1✔
30
from BTrees.OIBTree import OIBTree
1✔
31
from Missing import MV
1✔
32
from Persistence import Persistent
1✔
33
from ZTUtils.Lazy import LazyCat
1✔
34
from ZTUtils.Lazy import LazyMap
1✔
35
from ZTUtils.Lazy import LazyValues
1✔
36

37
from Products.PluginIndexes.interfaces import ILimitedResultIndex
1✔
38
from Products.PluginIndexes.interfaces import IQueryIndex
1✔
39
from Products.PluginIndexes.interfaces import ITransposeQuery
1✔
40
from Products.PluginIndexes.util import safe_callable
1✔
41
from Products.ZCatalog.CatalogBrains import AbstractCatalogBrain
1✔
42
from Products.ZCatalog.CatalogBrains import NoBrainer
1✔
43
from Products.ZCatalog.plan import CatalogPlan
1✔
44
from Products.ZCatalog.ProgressHandler import ZLogHandler
1✔
45
from Products.ZCatalog.query import IndexQuery
1✔
46

47

48
LOG = logging.getLogger('Zope.ZCatalog')
1✔
49

50

51
class CatalogError(Exception):
1✔
52
    pass
1✔
53

54

55
class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
1✔
56
    """ An Object Catalog
57

58
    An Object Catalog maintains a table of object metadata, and a
59
    series of manageable indexes to quickly search for objects
60
    (references in the metadata) that satisfy a search query.
61

62
    This class is not Zope specific, and can be used in any python
63
    program to build catalogs of objects.  Note that it does require
64
    the objects to be Persistent, and thus must be used with ZODB3.
65
    """
66

67
    _v_brains = NoBrainer
1✔
68

69
    def __init__(self, vocabulary=None, brains=None):
1✔
70
        # Catalogs no longer care about vocabularies and lexicons
71
        # so the vocabulary argument is ignored. (Casey)
72

73
        self.schema = {}   # mapping from attribute name to column number
1✔
74
        self.names = ()    # sequence of column names
1✔
75
        self.indexes = {}  # mapping from index name to index object
1✔
76

77
        # The catalog maintains a BTree of object meta_data for
78
        # convenient display on result pages.  meta_data attributes
79
        # are turned into brain objects and returned by
80
        # searchResults.  The indexing machinery indexes all records
81
        # by an integer id (rid). self.data is a mapping from the
82
        # integer id to the meta_data, self.uids is a mapping of the
83
        # object unique identifier to the rid, and self.paths is a
84
        # mapping of the rid to the unique identifier.
85

86
        self.clear()
1✔
87

88
        if brains is not None:
1!
89
            self._v_brains = brains
×
90

91
        self.updateBrains()
1✔
92

93
    def __len__(self):
1✔
94
        return self._length()
1✔
95

96
    def clear(self):
1✔
97
        """ clear catalog """
98

99
        self.data = IOBTree()  # mapping of rid to meta_data
1✔
100
        self.uids = OIBTree()  # mapping of uid to rid
1✔
101
        self.paths = IOBTree()  # mapping of rid to uid
1✔
102
        self._length = BTrees.Length.Length()
1✔
103

104
        for index in self.indexes:
1✔
105
            self.getIndex(index).clear()
1✔
106

107
    def updateBrains(self):
1✔
108
        self.useBrains(self._v_brains)
1✔
109

110
    def __getitem__(self, index):
1✔
111
        """
112
        Returns instances of self._v_brains, or whatever is passed
113
        into self.useBrains.
114
        """
115
        if isinstance(index, tuple):
1✔
116
            # then it contains a score...
117
            normalized_score, score, key = index
1✔
118
        else:
119
            # otherwise no score, set all scores to 1
120
            normalized_score, score, key = (1, 1, index)
1✔
121
        return self.instantiate((key, self.data[key]),
1✔
122
                                score_data=(score, normalized_score))
123

124
    def __setstate__(self, state):
1✔
125
        """ initialize your brains.  This method is called when the
126
        catalog is first activated (from the persistent storage) """
127
        Persistent.__setstate__(self, state)
1✔
128
        self.updateBrains()
1✔
129

130
    def useBrains(self, brains):
1✔
131
        """ Sets up the Catalog to return an object (ala ZTables) that
132
        is created on the fly from the tuple stored in the self.data
133
        Btree.
134
        """
135

136
        class mybrains(AbstractCatalogBrain, brains):  # NOQA
1✔
137
            pass
1✔
138

139
        scopy = self.schema.copy()
1✔
140

141
        schema_len = len(self.schema.keys())
1✔
142
        scopy['data_record_id_'] = schema_len
1✔
143
        scopy['data_record_score_'] = schema_len + 1
1✔
144
        scopy['data_record_normalized_score_'] = schema_len + 2
1✔
145

146
        mybrains.__record_schema__ = scopy
1✔
147

148
        self._v_brains = brains
1✔
149
        self._v_result_class = mybrains
1✔
150

151
    def addColumn(self, name, default_value=None, threshold=10000):
1✔
152
        """Adds a row to the meta data schema"""
153
        schema = self.schema
1✔
154
        names = list(self.names)
1✔
155
        threshold = threshold if threshold is not None else 10000
1✔
156

157
        if name != name.strip():
1✔
158
            # Someone could have mistakenly added a space at the end
159
            # of the input field.
160
            LOG.warning('stripped space from new column %r -> %r', name,
1✔
161
                        name.strip())
162
            name = name.strip()
1✔
163

164
        if name in schema:
1!
165
            raise CatalogError('The column %s already exists' % name)
×
166

167
        if name[0] == '_':
1✔
168
            raise CatalogError('Cannot cache fields beginning with "_"')
1✔
169

170
        values = schema.values()
1✔
171
        if values:
1✔
172
            schema[name] = max(values) + 1
1✔
173
        else:
174
            schema[name] = 0
1✔
175
        names.append(name)
1✔
176

177
        if default_value in (None, ''):
1✔
178
            default_value = MV
1✔
179

180
        if len(self):
1✔
181
            pghandler = ZLogHandler(threshold)
1✔
182
            pghandler.init('Adding %s column' % name, len(self))
1✔
183
            for i, (key, value) in enumerate(self.data.iteritems()):
1✔
184
                pghandler.report(i)
1✔
185
                self.data[key] = value + (default_value, )
1✔
186
            pghandler.finish()
1✔
187

188
        self.names = tuple(names)
1✔
189
        self.schema = schema
1✔
190

191
        # new column? update the brain
192
        self.updateBrains()
1✔
193

194
    def delColumn(self, name, threshold=10000):
1✔
195
        """Deletes a row from the meta data schema"""
196
        names = list(self.names)
1✔
197
        _index = names.index(name)
1✔
198
        threshold = threshold if threshold is not None else 10000
1✔
199

200
        if name not in self.schema:
1!
201
            LOG.error('delColumn attempted to delete nonexistent '
×
202
                      'column %s.', str(name))
203
            return
×
204

205
        del names[_index]
1✔
206

207
        # rebuild the schema
208
        schema = {}
1✔
209
        for i, name in enumerate(names):
1✔
210
            schema[name] = i
1✔
211

212
        self.schema = schema
1✔
213
        self.names = tuple(names)
1✔
214

215
        # update the brain
216
        self.updateBrains()
1✔
217

218
        # remove the column value from each record
219
        if len(self):
1✔
220
            _next_index = _index + 1
1✔
221
            pghandler = ZLogHandler(threshold)
1✔
222
            pghandler.init('Deleting %s column' % name, len(self))
1✔
223
            for i, (key, value) in enumerate(self.data.iteritems()):
1✔
224
                pghandler.report(i)
1✔
225
                self.data[key] = value[:_index] + value[_next_index:]
1✔
226
            pghandler.finish()
1✔
227

228
    def addIndex(self, name, index_type):
1✔
229
        """Create a new index, given a name and a index_type.
230

231
        Old format: index_type was a string, 'FieldIndex' 'TextIndex' or
232
        'KeywordIndex' is no longer valid; the actual index must be
233
        instantiated and passed in to addIndex.
234

235
        New format: index_type is the actual index object to be stored.
236
        """
237

238
        if name in self.indexes:
1!
239
            raise CatalogError('The index %s already exists' % name)
×
240

241
        if name.startswith('_'):
1!
242
            raise CatalogError('Cannot index fields beginning with "_"')
×
243

244
        if not name:
1!
245
            raise CatalogError('Name of index is empty')
×
246

247
        if name != name.strip():
1✔
248
            # Someone could have mistakenly added a space at the end
249
            # of the input field.
250
            LOG.warning('stripped space from new index %r -> %r', name,
1✔
251
                        name.strip())
252
            name = name.strip()
1✔
253

254
        indexes = self.indexes
1✔
255

256
        if isinstance(index_type, str):
1!
257
            raise TypeError("Catalog addIndex now requires the index type to"
×
258
                            "be resolved prior to adding; create the proper "
259
                            "index in the caller.")
260

261
        indexes[name] = index_type
1✔
262
        self.indexes = indexes
1✔
263

264
    def delIndex(self, name):
1✔
265
        """ deletes an index """
266

267
        if name not in self.indexes:
1!
268
            raise CatalogError('The index %s does not exist' % name)
×
269

270
        indexes = self.indexes
1✔
271
        del indexes[name]
1✔
272
        self.indexes = indexes
1✔
273

274
    def getIndex(self, name):
1✔
275
        """ get an index wrapped in the catalog """
276
        return self.indexes[name].__of__(self)
1✔
277

278
    def updateMetadata(self, object, uid, index):
1✔
279
        """ Given an object and a uid, update the column data for the
280
        uid with the object data iff the object has changed """
281
        data = self.data
1✔
282
        newDataRecord = self.recordify(object)
1✔
283

284
        if index is None:
1✔
285
            index = getattr(self, '_v_nextid', 0)
1✔
286
            if index % 4000 == 0:
1✔
287
                index = randint(-2000000000, 2000000000)
1✔
288
            while not data.insert(index, newDataRecord):
1!
289
                index = randint(-2000000000, 2000000000)
×
290

291
            # We want ids to be somewhat random, but there are
292
            # advantages for having some ids generated
293
            # sequentially when many catalog updates are done at
294
            # once, such as when reindexing or bulk indexing.
295
            # We allocate ids sequentially using a volatile base,
296
            # so different threads get different bases. This
297
            # further reduces conflict and reduces churn in
298
            # here and it result sets when bulk indexing.
299
            self._v_nextid = index + 1
1✔
300
        else:
301
            if data.get(index, 0) != newDataRecord:
1✔
302
                data[index] = newDataRecord
1✔
303
        return index
1✔
304

305
    # the cataloging API
306

307
    def catalogObject(self, object, uid, threshold=None, idxs=None,
1✔
308
                      update_metadata=True):
309
        """
310
        Adds an object to the Catalog by iteratively applying it to
311
        all indexes.
312

313
        'object' is the object to be cataloged
314

315
        'uid' is the unique Catalog identifier for this object
316

317
        If 'idxs' is specified (as a sequence), apply the object only
318
        to the named indexes.
319

320
        If 'update_metadata' is true (the default), also update metadata for
321
        the object.  If the object is new to the catalog, this flag has
322
        no effect (metadata is always created for new objects).
323
        """
324
        if idxs is None:
1✔
325
            idxs = []
1✔
326

327
        index = self.uids.get(uid, None)
1✔
328

329
        if index is None:
1✔
330
            # we are inserting new data
331
            index = self.updateMetadata(object, uid, None)
1✔
332
            self._length.change(1)
1✔
333
            self.uids[uid] = index
1✔
334
            self.paths[index] = uid
1✔
335
        elif update_metadata:
1✔
336
            # we are updating and we need to update metadata
337
            self.updateMetadata(object, uid, index)
1✔
338

339
        # do indexing
340
        total = 0
1✔
341

342
        if idxs == []:
1✔
343
            use_indexes = self.indexes.keys()
1✔
344
        else:
345
            use_indexes = set(idxs)
1✔
346
            for iid in self.indexes:
1✔
347
                x = self.getIndex(iid)
1✔
348
                if ITransposeQuery.providedBy(x):
1!
349
                    # supported index names for query optimization
350
                    names = x.getIndexNames()
×
351
                    intersec = use_indexes.intersection(names)
×
352
                    # add current index for indexing if supported index
353
                    # names are member of idxs
354
                    if intersec:
×
355
                        use_indexes.update([iid])
×
356

357
            use_indexes = list(use_indexes)
1✔
358

359
        for name in use_indexes:
1✔
360
            x = self.getIndex(name)
1✔
361
            if hasattr(x, 'index_object'):
1!
362
                blah = x.index_object(index, object, threshold)
1✔
363
                total = total + blah
1✔
364
            else:
365
                LOG.error('catalogObject was passed bad index '
×
366
                          'object %s.', str(x))
367

368
        return total
1✔
369

370
    def uncatalogObject(self, uid):
1✔
371
        """
372
        Uncatalog and object from the Catalog.  and 'uid' is a unique
373
        Catalog identifier
374

375
        Note, the uid must be the same as when the object was
376
        catalogued, otherwise it will not get removed from the catalog
377

378
        This method should not raise an exception if the uid cannot
379
        be found in the catalog.
380
        """
381
        data = self.data
1✔
382
        uids = self.uids
1✔
383
        paths = self.paths
1✔
384
        indexes = self.indexes.keys()
1✔
385
        rid = uids.get(uid, None)
1✔
386

387
        if rid is not None:
1✔
388
            for name in indexes:
1✔
389
                x = self.getIndex(name)
1✔
390
                if hasattr(x, 'unindex_object'):
1!
391
                    x.unindex_object(rid)
1✔
392
            del data[rid]
1✔
393
            del paths[rid]
1✔
394
            del uids[uid]
1✔
395
            self._length.change(-1)
1✔
396

397
        else:
398
            LOG.error('uncatalogObject unsuccessfully '
1✔
399
                      'attempted to uncatalog an object '
400
                      'with a uid of %s. ', str(uid))
401

402
    def uniqueValuesFor(self, name):
1✔
403
        """ return unique values for FieldIndex name """
404
        return tuple(self.getIndex(name).uniqueValues())
1✔
405

406
    def hasuid(self, uid):
1✔
407
        """ return the rid if catalog contains an object with uid """
408
        return self.uids.get(uid)
×
409

410
    def recordify(self, object):
1✔
411
        """ turns an object into a record tuple """
412
        record = []
1✔
413
        # the unique id is always the first element
414
        for x in self.names:
1✔
415
            attr = getattr(object, x, MV)
1✔
416
            if (attr is not MV and safe_callable(attr)):
1✔
417
                attr = attr()
1✔
418
            record.append(attr)
1✔
419
        return tuple(record)
1✔
420

421
    def _maintain_zodb_cache(self):
1✔
422
        parent = aq_parent(self)
1✔
423
        if hasattr(aq_base(parent), 'maintain_zodb_cache'):
1✔
424
            parent.maintain_zodb_cache()
1✔
425

426
    def instantiate(self, record, score_data=None):
1✔
427
        """ internal method: create and initialise search result object.
428
        record should be a tuple of (document RID, metadata columns tuple),
429
        score_data can be a tuple of (scode, normalized score) or be omitted"""
430
        self._maintain_zodb_cache()
1✔
431
        key, data = record
1✔
432
        klass = self._v_result_class
1✔
433
        if score_data:
1!
434
            score, normalized_score = score_data
1✔
435
            schema_len = len(klass.__record_schema__)
1✔
436
            if schema_len == len(data) + 3:
1!
437
                # if we have complete data, create in a single pass
438
                data = tuple(data) + (key, score, normalized_score)
1✔
439
                return klass(data).__of__(aq_parent(self))
1✔
440
        r = klass(data)
×
441
        r.data_record_id_ = key
×
442
        if score_data:
×
443
            # preserved during refactoring for compatibility reasons:
444
            # can only be reached if score_data is present,
445
            # but schema length is not equal to len(data) + 3
446
            # no known use cases
447
            r.data_record_score_ = score
×
448
            r.data_record_normalized_score_ = normalized_score
×
449
            return r.__of__(aq_parent(self))
×
450
        return r.__of__(self)
×
451

452
    def getMetadataForRID(self, rid):
1✔
453
        record = self.data[rid]
1✔
454
        result = {}
1✔
455
        for (key, pos) in self.schema.items():
1✔
456
            result[key] = record[pos]
1✔
457
        return result
1✔
458

459
    def getIndexDataForRID(self, rid):
1✔
460
        result = {}
1✔
461
        for name in self.indexes:
1✔
462
            result[name] = self.getIndex(name).getEntryForObject(rid, "")
1✔
463
        return result
1✔
464

465
    def merge_query_args(self, query=None, **kw):
1✔
466
        if not kw and isinstance(query, dict):
1✔
467
            # Short cut for the best practice.
468
            return query
1✔
469

470
        merged_query = {}
1✔
471
        if isinstance(query, dict):
1!
472
            merged_query.update(query)
×
473
        merged_query.update(kw)
1✔
474
        return merged_query
1✔
475

476
    def make_query(self, query):
1✔
477
        for iid in self.indexes:
1✔
478
            index = self.getIndex(iid)
1✔
479
            if ITransposeQuery.providedBy(index):
1!
480
                query = index.make_query(query)
×
481

482
        # Canonicalize tuple/list query arguments.
483
        new_query = {}
1✔
484
        for key, value in query.items():
1✔
485
            if isinstance(value, (list, tuple)):
1✔
486
                new_query[key] = list(sorted(value))
1✔
487
            else:
488
                new_query[key] = value
1✔
489

490
        return new_query
1✔
491

492
    def _get_index_query_names(self, index):
1✔
493
        if hasattr(index, 'getIndexQueryNames'):
1!
494
            return index.getIndexQueryNames()
1✔
495
        return (index.getId(),)
×
496

497
    def _sort_limit_arguments(self, query, sort_index, reverse, limit):
1✔
498
        b_start = int(query.get('b_start', 0))
1✔
499
        b_size = query.get('b_size', None)
1✔
500
        if b_size is not None:
1✔
501
            b_size = int(b_size)
1✔
502

503
        if b_size is not None:
1✔
504
            limit = b_start + b_size
1✔
505
        elif limit and b_size is None:
1✔
506
            b_size = limit
1✔
507

508
        if sort_index is None:
1✔
509
            sort_report_name = None
1✔
510
        else:
511
            if isinstance(sort_index, list):
1✔
512
                sort_name = '-'.join(i.getId() for i in sort_index)
1✔
513
            else:
514
                sort_name = sort_index.getId()
1✔
515
            if isinstance(reverse, list):
1✔
516
                reverse_name = '-'.join(
1✔
517
                    'desc' if r else 'asc' for r in reverse)
518
            else:
519
                reverse_name = 'desc' if reverse else 'asc'
1✔
520
            sort_report_name = 'sort_on#' + sort_name + '#' + reverse_name
1✔
521
            if limit is not None:
1✔
522
                sort_report_name += '#limit-%s' % limit
1✔
523
        return (b_start, b_size, limit, sort_report_name)
1✔
524

525
    def _sorted_search_indexes(self, query):
1✔
526
        # Simple implementation ordering only by limited result support
527
        query_keys = query.keys()
1✔
528
        order = []
1✔
529
        for name, index in self.indexes.items():
1✔
530
            for attr in self._get_index_query_names(index):
1✔
531
                if attr in query_keys:
1✔
532
                    order.append((ILimitedResultIndex.providedBy(index), name))
1✔
533
        order.sort()
1✔
534
        return [i[1] for i in order]
1✔
535

536
    def _limit_sequence(self, sequence, slen, b_start=0, b_size=None,
1✔
537
                        switched_reverse=False):
538
        if b_size is not None:
1✔
539
            sequence = sequence[b_start:b_start + b_size]
1✔
540
            if slen:
1!
541
                slen = len(sequence)
×
542
        if switched_reverse:
1✔
543
            sequence.reverse()
1✔
544
        return (sequence, slen)
1✔
545

546
    def _search_index(self, cr, index_id, query, rs):
1✔
547
        cr.start_split(index_id)
1✔
548

549
        index_rs = None
1✔
550
        index = self.getIndex(index_id)
1✔
551
        limit_result = ILimitedResultIndex.providedBy(index)
1✔
552

553
        if IQueryIndex.providedBy(index):
1!
554
            index_query = IndexQuery(query, index.id, index.query_options,
1✔
555
                                     index.operators, index.useOperator)
556
            if index_query.keys is not None:
1!
557
                index_rs = index.query_index(index_query, rs)
1✔
558
        else:
559
            if limit_result:
×
560
                index_result = index._apply_index(query, rs)
×
561
            else:
562
                index_result = index._apply_index(query)
×
563

564
            # Parse (resultset, used_attributes) index return value.
565
            if index_result:
×
566
                index_rs, _ = index_result
×
567

568
        if not index_rs:
1✔
569
            # Short circuit if empty index result.
570
            rs = None
1✔
571
        else:
572
            # Provide detailed info about the pure intersection time.
573
            intersect_id = index_id + '#intersection'
1✔
574
            cr.start_split(intersect_id)
1✔
575
            # weightedIntersection preserves the values from any mappings
576
            # we get, as some indexes don't return simple sets.
577
            if hasattr(rs, 'items') or hasattr(index_rs, 'items'):
1✔
578
                _, rs = weightedIntersection(rs, index_rs)
1✔
579
            else:
580
                rs = intersection(rs, index_rs)
1✔
581

582
            cr.stop_split(intersect_id)
1✔
583

584
        # Consider the time it takes to intersect the index result
585
        # with the total result set to be part of the index time.
586
        cr.stop_split(index_id, result=index_rs, limit=limit_result)
1✔
587

588
        return rs
1✔
589

590
    def search(self, query,
1✔
591
               sort_index=None, reverse=False, limit=None, merge=True):
592
        """Iterate through the indexes, applying the query to each one. If
593
        merge is true then return a lazy result set (sorted if appropriate)
594
        otherwise return the raw (possibly scored) results for later merging.
595
        Limit is used in conjunction with sorting or scored results to inform
596
        the catalog how many results you are really interested in. The catalog
597
        can then use optimizations to save time and memory. The number of
598
        results is not guaranteed to fall within the limit however, you should
599
        still slice or batch the results as usual."""
600

601
        # Indexes fulfill a fairly large contract here. We hand each
602
        # index the query mapping we are given (which may be composed
603
        # of some combination of web request, kw mappings or plain old dicts)
604
        # and the index decides what to do with it. If the index finds work
605
        # for itself in the query, it returns the results and a tuple of
606
        # the attributes that were used. If the index finds nothing for it
607
        # to do then it returns None.
608

609
        # Canonicalize the request into a sensible query before passing it on
610
        query = self.make_query(query)
1✔
611

612
        cr = self.getCatalogPlan(query)
1✔
613
        cr.start()
1✔
614

615
        plan = cr.plan()
1✔
616
        if not plan:
1✔
617
            plan = self._sorted_search_indexes(query)
1✔
618

619
        rs = None  # result set
1✔
620
        for index_id in plan:
1✔
621
            # The actual core loop over all indices.
622
            if index_id not in self.indexes:
1✔
623
                # We can have bogus keys or the plan can contain index names
624
                # that have been removed in the meantime.
625
                continue
1✔
626

627
            rs = self._search_index(cr, index_id, query, rs)
1✔
628
            if not rs:
1✔
629
                break
1✔
630

631
        if not rs:
1✔
632
            # None of the indexes found anything to do with the query.
633
            result = LazyCat([])
1✔
634
            cr.stop()
1✔
635
            return result
1✔
636

637
        # Try to deduce the sort limit from batching arguments.
638
        b_start, b_size, limit, sort_report_name = self._sort_limit_arguments(
1✔
639
            query, sort_index, reverse, limit)
640

641
        # We got some results from the indexes, sort and convert to sequences.
642
        rlen = len(rs)
1✔
643
        if sort_index is None and hasattr(rs, 'items'):
1✔
644
            # Having a 'items' means we have a data structure with
645
            # scores. Build a new result set, sort it by score, reverse
646
            # it, compute the normalized score, and Lazify it.
647

648
            if not merge:
1✔
649
                # Don't bother to sort here, return a list of
650
                # three tuples to be passed later to mergeResults.
651
                # Note that data_record_normalized_score_ cannot be
652
                # calculated and will always be 1 in this case.
653
                result = [(score, (1, score, rid), self.__getitem__)
1✔
654
                          for rid, score in rs.items()]
655
            else:
656
                cr.start_split('sort_on#score')
1✔
657

658
                # Sort it by score.
659
                rs = rs.byValue(0)
1✔
660
                max = float(rs[0][0])
1✔
661

662
                # Here we define our getter function inline so that
663
                # we can conveniently store the max value as a default arg
664
                # and make the normalized score computation lazy
665
                def getScoredResult(item, max=max, self=self):
1✔
666
                    """
667
                    Returns instances of self._v_brains, or whatever is
668
                    passed into self.useBrains.
669
                    """
670
                    score, key = item
1✔
671
                    norm_score = int(100.0 * score / max)
1✔
672
                    return self.instantiate((key, self.data[key]),
1✔
673
                                            score_data=(score, norm_score))
674

675
                sequence, slen = self._limit_sequence(
1✔
676
                    rs, rlen, b_start, b_size)
677
                result = LazyMap(getScoredResult, sequence, slen,
1✔
678
                                 actual_result_count=rlen)
679
                cr.stop_split('sort_on#score', None)
1✔
680

681
        elif sort_index is None and not hasattr(rs, 'values'):
1✔
682
            # no scores
683
            if hasattr(rs, 'keys'):
1!
684
                rs = rs.keys()
1✔
685
            sequence, slen = self._limit_sequence(
1✔
686
                rs, rlen, b_start, b_size)
687
            result = LazyMap(self.__getitem__, sequence, slen,
1✔
688
                             actual_result_count=rlen)
689
        else:
690
            # Sort. If there are scores, then this block is not
691
            # reached, therefore 'sort-on' does not happen in the
692
            # context of a text index query.  This should probably
693
            # sort by relevance first, then the 'sort-on' attribute.
694
            cr.start_split(sort_report_name)
1✔
695
            result = self.sortResults(
1✔
696
                rs, sort_index, reverse, limit, merge,
697
                actual_result_count=rlen, b_start=b_start, b_size=b_size)
698
            cr.stop_split(sort_report_name, None)
1✔
699

700
        cr.stop()
1✔
701
        return result
1✔
702

703
    def _sort_iterate_index(self, actual_result_count, result, rs,
1✔
704
                            limit, merge, reverse,
705
                            sort_index, sort_index_length, sort_spec,
706
                            second_indexes_key_map):
707
        # The result set is much larger than the sorted index,
708
        # so iterate over the sorted index for speed.
709
        # TODO: len(sort_index) isn't actually what we want for a keyword
710
        # index, as it's only the unique values, not the documents.
711
        # Don't use this case while using limit, as we return results of
712
        # non-flattened intsets, and would have to merge/unflattened those
713
        # before limiting.
714
        length = 0
1✔
715
        try:
1✔
716
            intersection(rs, IISet(()))
1✔
717
        except TypeError:
×
718
            # rs is not an object in the IIBTree family.
719
            # Try to turn rs into an IISet.
720
            rs = IISet(rs)
×
721

722
        if sort_index_length == 1:
1✔
723
            for k, intset in sort_index.items():
1✔
724
                # We have an index that has a set of values for
725
                # each sort key, so we intersect with each set and
726
                # get a sorted sequence of the intersections.
727
                intset = intersection(rs, intset)
1✔
728
                if intset:
1!
729
                    keys = getattr(intset, 'keys', None)
1✔
730
                    if keys is not None:
1!
731
                        # Is this ever true?
732
                        intset = keys()
1✔
733
                    length += len(intset)
1✔
734
                    result.append((k, intset, self.__getitem__))
1✔
735
            result.sort(reverse=reverse)
1✔
736
        else:
737
            for k, intset in sort_index.items():
1✔
738
                # We have an index that has a set of values for
739
                # each sort key, so we intersect with each set and
740
                # get a sorted sequence of the intersections.
741
                intset = intersection(rs, intset)
1✔
742
                if intset:
1!
743
                    keys = getattr(intset, 'keys', None)
1✔
744
                    if keys is not None:
1!
745
                        # Is this ever true?
746
                        intset = keys()
1✔
747
                    length += len(intset)
1✔
748
                    # sort on secondary index
749
                    keysets = defaultdict(list)
1✔
750
                    for i in intset:
1✔
751
                        full_key = (k, )
1✔
752
                        for km in second_indexes_key_map:
1✔
753
                            try:
1✔
754
                                full_key += (km[i], )
1✔
755
                            except KeyError:
×
756
                                pass
×
757
                        keysets[full_key].append(i)
1✔
758
                    for k2, v2 in keysets.items():
1✔
759
                        result.append((k2, v2, self.__getitem__))
1✔
760
            result = multisort(result, sort_spec)
1✔
761

762
        return (actual_result_count, length, result)
1✔
763

764
    def _sort_iterate_resultset(self, actual_result_count, result, rs,
1✔
765
                                limit, merge, reverse,
766
                                sort_index, sort_index_length, sort_spec,
767
                                second_indexes_key_map):
768
        # Iterate over the result set getting sort keys from the index.
769
        # If we are interested in at least 25% or more of the result set,
770
        # the N-Best algorithm is slower, so we iterate over all.
771
        index_key_map = sort_index.documentToKeyMap()
1✔
772

773
        if sort_index_length == 1:
1✔
774
            for did in rs:
1✔
775
                try:
1✔
776
                    key = index_key_map[did]
1✔
777
                except KeyError:
×
778
                    # This document is not in the sort key index, skip it.
779
                    actual_result_count -= 1
×
780
                else:
781
                    # The reference back to __getitem__ is used in case
782
                    # we do not merge now and need to intermingle the
783
                    # results with those of other catalogs while avoiding
784
                    # the cost of instantiating a LazyMap per result
785
                    result.append((key, did, self.__getitem__))
1✔
786
            if merge:
1✔
787
                result = sorted(
1✔
788
                    result,
789
                    key=lambda x: (0,) if x[0] is None else x,
790
                    reverse=reverse)
791
        else:
792
            for did in rs:
1✔
793
                try:
1✔
794
                    full_key = (index_key_map[did], )
1✔
795
                    for km in second_indexes_key_map:
1✔
796
                        full_key += (km[did], )
1✔
797
                except KeyError:
×
798
                    # This document is not in the sort key index, skip it.
799
                    actual_result_count -= 1
×
800
                else:
801
                    result.append((full_key, did, self.__getitem__))
1✔
802
            if merge:
1!
803
                result = multisort(result, sort_spec)
1✔
804

805
        if merge and limit is not None:
1✔
806
            result = result[:limit]
1✔
807

808
        return (actual_result_count, 0, result)
1✔
809

810
    def _sort_nbest(self, actual_result_count, result, rs,
1✔
811
                    limit, merge, reverse,
812
                    sort_index, sort_index_length, sort_spec,
813
                    second_indexes_key_map):
814
        # Limit / sort results using N-Best algorithm
815
        # This is faster for large sets then a full sort
816
        # And uses far less memory
817
        index_key_map = sort_index.documentToKeyMap()
1✔
818
        keys = []
1✔
819
        n = 0
1✔
820
        worst = None
1✔
821
        if sort_index_length == 1:
1!
822
            for did in rs:
1✔
823
                try:
1✔
824
                    key = index_key_map[did]
1✔
825
                except KeyError:
×
826
                    # This document is not in the sort key index, skip it.
827
                    actual_result_count -= 1
×
828
                else:
829
                    if n >= limit and key <= worst:
1✔
830
                        continue
1✔
831
                    i = bisect(keys, key)
1✔
832
                    keys.insert(i, key)
1✔
833
                    result.insert(i, (key, did, self.__getitem__))
1✔
834
                    if n == limit:
1✔
835
                        del keys[0], result[0]
1✔
836
                    else:
837
                        n += 1
1✔
838
                    worst = keys[0]
1✔
839
            result.reverse()
1✔
840
        else:
841
            for did in rs:
×
842
                try:
×
843
                    key = index_key_map[did]
×
844
                    full_key = (key, )
×
845
                    for km in second_indexes_key_map:
×
846
                        full_key += (km[did], )
×
847
                except KeyError:
×
848
                    # This document is not in the sort key index, skip it.
849
                    actual_result_count -= 1
×
850
                else:
851
                    if n >= limit and key <= worst:
×
852
                        continue
×
853
                    i = bisect(keys, key)
×
854
                    keys.insert(i, key)
×
855
                    result.insert(i, (full_key, did, self.__getitem__))
×
856
                    if n == limit:
×
857
                        del keys[0], result[0]
×
858
                    else:
859
                        n += 1
×
860
                    worst = keys[0]
×
861
            result = multisort(result, sort_spec)
×
862

863
        return (actual_result_count, 0, result)
1✔
864

865
    def _sort_nbest_reverse(self, actual_result_count, result, rs,
1✔
866
                            limit, merge, reverse,
867
                            sort_index, sort_index_length, sort_spec,
868
                            second_indexes_key_map):
869
        # Limit / sort results using N-Best algorithm in reverse (N-Worst?)
870
        index_key_map = sort_index.documentToKeyMap()
1✔
871
        keys = []
1✔
872
        n = 0
1✔
873
        best = None
1✔
874
        if sort_index_length == 1:
1!
875
            for did in rs:
1✔
876
                try:
1✔
877
                    key = index_key_map[did]
1✔
878
                except KeyError:
1✔
879
                    # This document is not in the sort key index, skip it.
880
                    actual_result_count -= 1
1✔
881
                else:
882
                    if n >= limit and key >= best:
1✔
883
                        continue
1✔
884
                    i = bisect(keys, key)
1✔
885
                    keys.insert(i, key)
1✔
886
                    result.insert(i, (key, did, self.__getitem__))
1✔
887
                    if n == limit:
1✔
888
                        del keys[-1], result[-1]
1✔
889
                    else:
890
                        n += 1
1✔
891
                    best = keys[-1]
1✔
892
        else:
893
            for did in rs:
×
894
                try:
×
895
                    key = index_key_map[did]
×
896
                    full_key = (key, )
×
897
                    for km in second_indexes_key_map:
×
898
                        full_key += (km[did], )
×
899
                except KeyError:
×
900
                    # This document is not in the sort key index, skip it.
901
                    actual_result_count -= 1
×
902
                else:
903
                    if n >= limit and key >= best:
×
904
                        continue
×
905
                    i = bisect(keys, key)
×
906
                    keys.insert(i, key)
×
907
                    result.insert(i, (full_key, did, self.__getitem__))
×
908
                    if n == limit:
×
909
                        del keys[-1], result[-1]
×
910
                    else:
911
                        n += 1
×
912
                    best = keys[-1]
×
913
            result = multisort(result, sort_spec)
×
914

915
        return (actual_result_count, 0, result)
1✔
916

917
    def sortResults(self, rs, sort_index,
1✔
918
                    reverse=False, limit=None, merge=True,
919
                    actual_result_count=None, b_start=0, b_size=None):
920
        # Sort a result set using one or more sort indexes. Both sort_index
921
        # and reverse can be lists of indexes and reverse specifications.
922
        # Return a lazy result set in sorted order if merge is true otherwise
923
        # returns a list of (sortkey, uid, getter_function) tuples, where
924
        # sortkey can be a tuple on its own.
925
        second_indexes = None
1✔
926
        second_indexes_key_map = None
1✔
927
        sort_index_length = 1
1✔
928
        if isinstance(sort_index, list):
1✔
929
            sort_index_length = len(sort_index)
1✔
930
            if sort_index_length > 1:
1!
931
                second_indexes = sort_index[1:]
1✔
932
                second_indexes_key_map = []
1✔
933
                for si in second_indexes:
1✔
934
                    second_indexes_key_map.append(si.documentToKeyMap())
1✔
935
            sort_index = sort_index[0]
1✔
936

937
        result = []
1✔
938
        if hasattr(rs, 'keys'):
1!
939
            rs = rs.keys()
1✔
940
        if actual_result_count is None:
1✔
941
            rlen = len(rs)
1✔
942
            actual_result_count = rlen
1✔
943
        else:
944
            rlen = actual_result_count
1✔
945

946
        # don't limit to more than what we have
947
        if limit is not None and limit >= rlen:
1✔
948
            limit = rlen
1✔
949

950
        # if we want a batch from the end of the result set, reverse sorting
951
        # order and limit it, then reverse the result set again
952
        switched_reverse = False
1✔
953
        if b_size and b_start and b_start > rlen / 2:
1✔
954
            if isinstance(reverse, list):
1✔
955
                reverse = [not r for r in reverse]
1✔
956
            else:
957
                reverse = not reverse
1✔
958
            switched_reverse = True
1✔
959
            b_end = b_start + b_size
1✔
960
            if b_end >= rlen:
1✔
961
                overrun = rlen - b_end
1✔
962
                if b_start >= rlen:
1✔
963
                    # bail out, we are outside the possible range
964
                    return LazyCat([], 0, actual_result_count)
1✔
965
                else:
966
                    b_size += overrun
1✔
967
                b_start = 0
1✔
968
            else:
969
                b_start = rlen - b_end
1✔
970
            limit = b_start + b_size
1✔
971

972
        # determine sort_spec
973
        if isinstance(reverse, list):
1✔
974
            sort_spec = [r and -1 or 1 for r in reverse]
1✔
975
            # limit to current maximum of sort indexes
976
            sort_spec = sort_spec[:sort_index_length]
1✔
977
            # use first sort order for choosing the algorithm
978
            first_reverse = reverse[0]
1✔
979
        else:
980
            sort_spec = []
1✔
981
            for i in range(sort_index_length):
1✔
982
                sort_spec.append(reverse and -1 or 1)
1✔
983
            first_reverse = reverse
1✔
984

985
        # Special first condition, as it changes post-processing.
986
        iterate_sort_index = (
1✔
987
            merge and limit is None and (
988
                rlen > (len(sort_index) * (rlen / 100 + 1))))
989

990
        # Choose one of the sort algorithms.
991
        if iterate_sort_index:
1✔
992
            sort_func = self._sort_iterate_index
1✔
993
        elif limit is None or (limit * 4 > rlen) or sort_index_length > 1:
1✔
994
            sort_func = self._sort_iterate_resultset
1✔
995
        elif first_reverse:
1✔
996
            sort_func = self._sort_nbest
1✔
997
        else:
998
            sort_func = self._sort_nbest_reverse
1✔
999

1000
        actual_result_count, length, result = sort_func(
1✔
1001
            actual_result_count, result, rs,
1002
            limit, merge, reverse,
1003
            sort_index, sort_index_length, sort_spec,
1004
            second_indexes_key_map)
1005

1006
        sequence, slen = self._limit_sequence(
1✔
1007
            result, length, b_start, b_size, switched_reverse)
1008

1009
        if iterate_sort_index:
1✔
1010
            result = LazyCat(LazyValues(sequence), slen, actual_result_count)
1✔
1011
        else:
1012
            if not merge:
1✔
1013
                return sequence
1✔
1014

1015
            result = LazyValues(sequence)
1✔
1016
            result.actual_result_count = actual_result_count
1✔
1017

1018
        return LazyMap(self.__getitem__, result, len(result),
1✔
1019
                       actual_result_count=actual_result_count)
1020

1021
    def _get_sort_attr(self, attr, kw):
1✔
1022
        """Helper function to find sort-on or sort-order."""
1023
        # There are three different ways to find the attribute:
1024
        # 1. kw[sort-attr]
1025
        # 2. self.sort-attr
1026
        # 3. kw[sort_attr]
1027
        # kw may be a dict or an ExtensionClass MultiMapping, which
1028
        # differ in what get() returns with no default value.
1029
        name = "sort-%s" % attr
1✔
1030
        val = kw.get(name, None)
1✔
1031
        if val is not None:
1!
1032
            return val
×
1033
        val = getattr(self, name, None)
1✔
1034
        if val is not None:
1!
1035
            return val
×
1036
        return kw.get("sort_%s" % attr, None)
1✔
1037

1038
    def _getSortIndex(self, args):
1✔
1039
        """Returns a list of search index objects or None."""
1040
        sort_index_names = self._get_sort_attr("on", args)
1✔
1041
        if sort_index_names is not None:
1✔
1042
            # self.indexes is always a dict, so get() w/ 1 arg works
1043
            sort_indexes = []
1✔
1044
            if not isinstance(sort_index_names, (list, tuple)):
1✔
1045
                sort_index_names = [sort_index_names]
1✔
1046
            for name in sort_index_names:
1✔
1047
                sort_index = self.indexes.get(name)
1✔
1048
                if sort_index is None:
1✔
1049
                    raise CatalogError('Unknown sort_on index: %s' %
1✔
1050
                                       repr(name))
1051
                else:
1052
                    if not hasattr(sort_index, 'documentToKeyMap'):
1✔
1053
                        raise CatalogError(
1✔
1054
                            'The index chosen for sort_on is '
1055
                            'not capable of being used as a sort index: '
1056
                            '%s' % repr(name))
1057
                sort_indexes.append(sort_index)
1✔
1058
            if len(sort_indexes) == 1:
1✔
1059
                # be nice and keep the old API intact for single sort_on's
1060
                return sort_indexes[0]
1✔
1061
            return sort_indexes
1✔
1062
        return None
1✔
1063

1064
    def searchResults(self, query=None, _merge=True, **kw):
1✔
1065
        # You should pass in a simple dictionary as the first argument,
1066
        # which only contains the relevant query.
1067
        query = self.merge_query_args(query, **kw)
1✔
1068
        sort_indexes = self._getSortIndex(query)
1✔
1069
        sort_limit = self._get_sort_attr('limit', query)
1✔
1070
        reverse = False
1✔
1071
        if sort_indexes is not None:
1✔
1072
            order = self._get_sort_attr("order", query)
1✔
1073
            reverse = []
1✔
1074
            if order is None:
1✔
1075
                order = ['']
1✔
1076
            elif isinstance(order, str):
1✔
1077
                order = [order]
1✔
1078
            for o in order:
1✔
1079
                reverse.append(o.lower() in ('reverse', 'descending'))
1✔
1080
            if len(reverse) == 1:
1✔
1081
                # be nice and keep the old API intact for single sort_order
1082
                reverse = reverse[0]
1✔
1083
        # Perform searches with indexes and sort_index
1084
        return self.search(query, sort_indexes, reverse, sort_limit, _merge)
1✔
1085

1086
    __call__ = searchResults
1✔
1087

1088
    def getCatalogPlan(self, query=None):
1✔
1089
        """Query time reporting and planning.
1090
        """
1091
        parent = aq_base(aq_parent(self))
1✔
1092
        threshold = getattr(parent, 'long_query_time', 0.1)
1✔
1093
        return CatalogPlan(self, query, threshold)
1✔
1094

1095

1096
def mergeResults(results, has_sort_keys, reverse):
1✔
1097
    """Sort/merge sub-results, generating a flat sequence.
1098

1099
    results is a list of result set sequences, all with or without sort keys
1100
    """
1101
    if not has_sort_keys:
1✔
1102
        return LazyCat(results)
1✔
1103
    else:
1104
        # Concatenate the catalog results into one list and sort it
1105
        # Each result record consists of a list of tuples with three values:
1106
        # (sortkey, docid, catalog__getitem__)
1107
        combined = []
1✔
1108
        if len(results) > 1:
1!
1109
            for r in results:
1✔
1110
                combined.extend(r)
1✔
1111
        elif len(results) == 1:
×
1112
            combined = results[0]
×
1113
        else:
1114
            return []
×
1115
        if reverse:
1✔
1116
            combined.sort(reverse=True)
1✔
1117
        else:
1118
            combined.sort()
1✔
1119
        return LazyMap(lambda rec: rec[2](rec[1]), combined, len(combined))
1✔
1120

1121

1122
def multisort(items, sort_spec):
1✔
1123
    """Sort a list by multiple keys bidirectionally.
1124

1125
    sort_spec is a list of ones and minus ones, with 1 meaning sort normally
1126
    and -1 meaning sort reversed.
1127

1128
    The length of sort_spec must match the length of the first value in each
1129
    list entry given via `items`.
1130
    """
1131
    first = sort_spec[0]
1✔
1132
    different = any([True for s in sort_spec if s != first])
1✔
1133
    first_reverse = first != 1 and True or False
1✔
1134

1135
    # always do an in-place sort. The default sort via key is really fast and
1136
    # the later cmp approach is efficient when getting mostly sorted data.
1137
    items.sort(reverse=first_reverse, key=itemgetter(0))
1✔
1138
    if not different:
1✔
1139
        # if all keys are sorted in the same direction, we are done
1140
        return items
1✔
1141

1142
    comparers = []
1✔
1143
    for i, v in enumerate(sort_spec):
1✔
1144
        comparers.append((itemgetter(i), v))
1✔
1145

1146
    def comparer(left, right):
1✔
1147
        for func, order in comparers:
1!
1148
            # emulate cmp
1149
            a = func(left[0])
1✔
1150
            b = func(right[0])
1✔
1151
            result = ((a > b) - (a < b))
1✔
1152
            if result:
1✔
1153
                return order * result
1✔
1154
        return 0
×
1155

1156
    items.sort(key=cmp_to_key(comparer))
1✔
1157

1158
    return items
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc