• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zopefoundation / Products.ZCatalog / 8099143358

29 Feb 2024 04:15PM UTC coverage: 86.152% (-0.05%) from 86.202%
8099143358

Pull #149

github

web-flow
Merge 3a3dac08c into f2d6ea367
Pull Request #149: feat(catalog): add rids parameter to return only the rids of documents

818 of 1059 branches covered (77.24%)

Branch coverage included in aggregate %.

3 of 5 new or added lines in 1 file covered. (60.0%)

22 existing lines in 1 file now uncovered.

3145 of 3541 relevant lines covered (88.82%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.06
/src/Products/ZCatalog/Catalog.py
1
##############################################################################
2
#
3
# Copyright (c) 2002 Zope Foundation and Contributors.
4
#
5
# This software is subject to the provisions of the Zope Public License,
6
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
7
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
8
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
9
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
10
# FOR A PARTICULAR PURPOSE
11
#
12
##############################################################################
13

14
import logging
1✔
15
from bisect import bisect
1✔
16
from collections import defaultdict
1✔
17
from functools import cmp_to_key
1✔
18
from operator import itemgetter
1✔
19
from random import randint
1✔
20

21
import Acquisition
1✔
22
import BTrees.Length
1✔
23
import ExtensionClass
1✔
24
from Acquisition import aq_base
1✔
25
from Acquisition import aq_parent
1✔
26
from BTrees.IIBTree import IISet
1✔
27
from BTrees.IIBTree import intersection
1✔
28
from BTrees.IIBTree import weightedIntersection
1✔
29
from BTrees.IOBTree import IOBTree
1✔
30
from BTrees.OIBTree import OIBTree
1✔
31
from Missing import MV
1✔
32
from Persistence import Persistent
1✔
33
from ZTUtils.Lazy import LazyCat
1✔
34
from ZTUtils.Lazy import LazyMap
1✔
35
from ZTUtils.Lazy import LazyValues
1✔
36

37
from Products.PluginIndexes.interfaces import ILimitedResultIndex
1✔
38
from Products.PluginIndexes.interfaces import IQueryIndex
1✔
39
from Products.PluginIndexes.interfaces import ITransposeQuery
1✔
40
from Products.PluginIndexes.util import safe_callable
1✔
41
from Products.ZCatalog.CatalogBrains import AbstractCatalogBrain
1✔
42
from Products.ZCatalog.CatalogBrains import NoBrainer
1✔
43
from Products.ZCatalog.plan import CatalogPlan
1✔
44
from Products.ZCatalog.ProgressHandler import ZLogHandler
1✔
45
from Products.ZCatalog.query import IndexQuery
1✔
46

47

48
LOG = logging.getLogger('Zope.ZCatalog')
1✔
49

50

51
class CatalogError(Exception):
1✔
52
    pass
1✔
53

54

55
class Catalog(Persistent, Acquisition.Implicit, ExtensionClass.Base):
1✔
56
    """ An Object Catalog
57

58
    An Object Catalog maintains a table of object metadata, and a
59
    series of manageable indexes to quickly search for objects
60
    (references in the metadata) that satisfy a search query.
61

62
    This class is not Zope specific, and can be used in any python
63
    program to build catalogs of objects.  Note that it does require
64
    the objects to be Persistent, and thus must be used with ZODB3.
65
    """
66

67
    _v_brains = NoBrainer
1✔
68

69
    def __init__(self, vocabulary=None, brains=None):
1✔
70
        # Catalogs no longer care about vocabularies and lexicons
71
        # so the vocabulary argument is ignored. (Casey)
72

73
        self.schema = {}   # mapping from attribute name to column number
1✔
74
        self.names = ()    # sequence of column names
1✔
75
        self.indexes = {}  # mapping from index name to index object
1✔
76

77
        # The catalog maintains a BTree of object meta_data for
78
        # convenient display on result pages.  meta_data attributes
79
        # are turned into brain objects and returned by
80
        # searchResults.  The indexing machinery indexes all records
81
        # by an integer id (rid). self.data is a mapping from the
82
        # integer id to the meta_data, self.uids is a mapping of the
83
        # object unique identifier to the rid, and self.paths is a
84
        # mapping of the rid to the unique identifier.
85

86
        self.clear()
1✔
87

88
        if brains is not None:
1!
89
            self._v_brains = brains
×
90

91
        self.updateBrains()
1✔
92

93
    def __len__(self):
1✔
94
        return self._length()
1✔
95

96
    def clear(self):
1✔
97
        """ clear catalog """
98

99
        self.data = IOBTree()  # mapping of rid to meta_data
1✔
100
        self.uids = OIBTree()  # mapping of uid to rid
1✔
101
        self.paths = IOBTree()  # mapping of rid to uid
1✔
102
        self._length = BTrees.Length.Length()
1✔
103

104
        for index in self.indexes:
1✔
105
            self.getIndex(index).clear()
1✔
106

107
    def updateBrains(self):
1✔
108
        self.useBrains(self._v_brains)
1✔
109

110
    def __getitem__(self, index):
1✔
111
        """
112
        Returns instances of self._v_brains, or whatever is passed
113
        into self.useBrains.
114
        """
115
        if isinstance(index, tuple):
1✔
116
            # then it contains a score...
117
            normalized_score, score, key = index
1✔
118
        else:
119
            # otherwise no score, set all scores to 1
120
            normalized_score, score, key = (1, 1, index)
1✔
121
        return self.instantiate((key, self.data[key]),
1✔
122
                                score_data=(score, normalized_score))
123

124
    def __setstate__(self, state):
1✔
125
        """ initialize your brains.  This method is called when the
126
        catalog is first activated (from the persistent storage) """
127
        Persistent.__setstate__(self, state)
1✔
128
        self.updateBrains()
1✔
129

130
    def useBrains(self, brains):
1✔
131
        """ Sets up the Catalog to return an object (ala ZTables) that
132
        is created on the fly from the tuple stored in the self.data
133
        Btree.
134
        """
135

136
        class mybrains(AbstractCatalogBrain, brains):  # NOQA
1✔
137
            pass
1✔
138

139
        scopy = self.schema.copy()
1✔
140

141
        schema_len = len(self.schema.keys())
1✔
142
        scopy['data_record_id_'] = schema_len
1✔
143
        scopy['data_record_score_'] = schema_len + 1
1✔
144
        scopy['data_record_normalized_score_'] = schema_len + 2
1✔
145

146
        mybrains.__record_schema__ = scopy
1✔
147

148
        self._v_brains = brains
1✔
149
        self._v_result_class = mybrains
1✔
150

151
    def addColumn(self, name, default_value=None, threshold=10000):
1✔
152
        """Adds a row to the meta data schema"""
153
        schema = self.schema
1✔
154
        names = list(self.names)
1✔
155
        threshold = threshold if threshold is not None else 10000
1✔
156

157
        if name != name.strip():
1✔
158
            # Someone could have mistakenly added a space at the end
159
            # of the input field.
160
            LOG.warning('stripped space from new column %r -> %r', name,
1✔
161
                        name.strip())
162
            name = name.strip()
1✔
163

164
        if name in schema:
1!
165
            raise CatalogError('The column %s already exists' % name)
×
166

167
        if name[0] == '_':
1✔
168
            raise CatalogError('Cannot cache fields beginning with "_"')
1✔
169

170
        values = schema.values()
1✔
171
        if values:
1✔
172
            schema[name] = max(values) + 1
1✔
173
        else:
174
            schema[name] = 0
1✔
175
        names.append(name)
1✔
176

177
        if default_value in (None, ''):
1✔
178
            default_value = MV
1✔
179

180
        if len(self):
1✔
181
            pghandler = ZLogHandler(threshold)
1✔
182
            pghandler.init('Adding %s column' % name, len(self))
1✔
183
            for i, (key, value) in enumerate(self.data.iteritems()):
1✔
184
                pghandler.report(i)
1✔
185
                self.data[key] = value + (default_value, )
1✔
186
            pghandler.finish()
1✔
187

188
        self.names = tuple(names)
1✔
189
        self.schema = schema
1✔
190

191
        # new column? update the brain
192
        self.updateBrains()
1✔
193

194
    def delColumn(self, name, threshold=10000):
1✔
195
        """Deletes a row from the meta data schema"""
196
        names = list(self.names)
1✔
197
        _index = names.index(name)
1✔
198
        threshold = threshold if threshold is not None else 10000
1✔
199

200
        if name not in self.schema:
1!
201
            LOG.error('delColumn attempted to delete nonexistent '
×
202
                      'column %s.', str(name))
203
            return
×
204

205
        del names[_index]
1✔
206

207
        # rebuild the schema
208
        schema = {}
1✔
209
        for i, name in enumerate(names):
1✔
210
            schema[name] = i
1✔
211

212
        self.schema = schema
1✔
213
        self.names = tuple(names)
1✔
214

215
        # update the brain
216
        self.updateBrains()
1✔
217

218
        # remove the column value from each record
219
        if len(self):
1✔
220
            _next_index = _index + 1
1✔
221
            pghandler = ZLogHandler(threshold)
1✔
222
            pghandler.init('Deleting %s column' % name, len(self))
1✔
223
            for i, (key, value) in enumerate(self.data.iteritems()):
1✔
224
                pghandler.report(i)
1✔
225
                self.data[key] = value[:_index] + value[_next_index:]
1✔
226
            pghandler.finish()
1✔
227

228
    def addIndex(self, name, index_type):
1✔
229
        """Create a new index, given a name and a index_type.
230

231
        Old format: index_type was a string, 'FieldIndex' 'TextIndex' or
232
        'KeywordIndex' is no longer valid; the actual index must be
233
        instantiated and passed in to addIndex.
234

235
        New format: index_type is the actual index object to be stored.
236
        """
237

238
        if name in self.indexes:
1!
239
            raise CatalogError('The index %s already exists' % name)
×
240

241
        if name.startswith('_'):
1!
242
            raise CatalogError('Cannot index fields beginning with "_"')
×
243

244
        if not name:
1!
245
            raise CatalogError('Name of index is empty')
×
246

247
        if name != name.strip():
1✔
248
            # Someone could have mistakenly added a space at the end
249
            # of the input field.
250
            LOG.warning('stripped space from new index %r -> %r', name,
1✔
251
                        name.strip())
252
            name = name.strip()
1✔
253

254
        indexes = self.indexes
1✔
255

256
        if isinstance(index_type, str):
1!
257
            raise TypeError("Catalog addIndex now requires the index type to"
×
258
                            "be resolved prior to adding; create the proper "
259
                            "index in the caller.")
260

261
        indexes[name] = index_type
1✔
262
        self.indexes = indexes
1✔
263

264
    def delIndex(self, name):
1✔
265
        """ deletes an index """
266

267
        if name not in self.indexes:
1!
268
            raise CatalogError('The index %s does not exist' % name)
×
269

270
        indexes = self.indexes
1✔
271
        del indexes[name]
1✔
272
        self.indexes = indexes
1✔
273

274
    def getIndex(self, name):
1✔
275
        """ get an index wrapped in the catalog """
276
        return self.indexes[name].__of__(self)
1✔
277

278
    def updateMetadata(self, object, uid, index):
1✔
279
        """ Given an object and a uid, update the column data for the
280
        uid with the object data iff the object has changed """
281
        data = self.data
1✔
282
        newDataRecord = self.recordify(object)
1✔
283

284
        if index is None:
1✔
285
            index = getattr(self, '_v_nextid', 0)
1✔
286
            if index % 4000 == 0:
1✔
287
                index = randint(-2000000000, 2000000000)
1✔
288
            while not data.insert(index, newDataRecord):
1!
289
                index = randint(-2000000000, 2000000000)
×
290

291
            # We want ids to be somewhat random, but there are
292
            # advantages for having some ids generated
293
            # sequentially when many catalog updates are done at
294
            # once, such as when reindexing or bulk indexing.
295
            # We allocate ids sequentially using a volatile base,
296
            # so different threads get different bases. This
297
            # further reduces conflict and reduces churn in
298
            # here and it result sets when bulk indexing.
299
            self._v_nextid = index + 1
1✔
300
        else:
301
            if data.get(index, 0) != newDataRecord:
1✔
302
                data[index] = newDataRecord
1✔
303
        return index
1✔
304

305
    # the cataloging API
306

307
    def catalogObject(self, object, uid, threshold=None, idxs=None,
1✔
308
                      update_metadata=True):
309
        """
310
        Adds an object to the Catalog by iteratively applying it to
311
        all indexes.
312

313
        'object' is the object to be cataloged
314

315
        'uid' is the unique Catalog identifier for this object
316

317
        If 'idxs' is specified (as a sequence), apply the object only
318
        to the named indexes.
319

320
        If 'update_metadata' is true (the default), also update metadata for
321
        the object.  If the object is new to the catalog, this flag has
322
        no effect (metadata is always created for new objects).
323
        """
324
        if idxs is None:
1✔
325
            idxs = []
1✔
326

327
        index = self.uids.get(uid, None)
1✔
328

329
        if index is None:
1✔
330
            # we are inserting new data
331
            index = self.updateMetadata(object, uid, None)
1✔
332
            self._length.change(1)
1✔
333
            self.uids[uid] = index
1✔
334
            self.paths[index] = uid
1✔
335
        elif update_metadata:
1✔
336
            # we are updating and we need to update metadata
337
            self.updateMetadata(object, uid, index)
1✔
338

339
        # do indexing
340
        total = 0
1✔
341

342
        if idxs == []:
1✔
343
            use_indexes = self.indexes.keys()
1✔
344
        else:
345
            use_indexes = set(idxs)
1✔
346
            for iid in self.indexes:
1✔
347
                x = self.getIndex(iid)
1✔
348
                if ITransposeQuery.providedBy(x):
1!
349
                    # supported index names for query optimization
350
                    names = x.getIndexNames()
×
351
                    intersec = use_indexes.intersection(names)
×
352
                    # add current index for indexing if supported index
353
                    # names are member of idxs
354
                    if intersec:
×
355
                        use_indexes.update([iid])
×
356

357
            use_indexes = list(use_indexes)
1✔
358

359
        for name in use_indexes:
1✔
360
            x = self.getIndex(name)
1✔
361
            if hasattr(x, 'index_object'):
1!
362
                blah = x.index_object(index, object, threshold)
1✔
363
                total = total + blah
1✔
364
            else:
365
                LOG.error('catalogObject was passed bad index '
×
366
                          'object %s.', str(x))
367

368
        return total
1✔
369

370
    def uncatalogObject(self, uid):
1✔
371
        """
372
        Uncatalog and object from the Catalog.  and 'uid' is a unique
373
        Catalog identifier
374

375
        Note, the uid must be the same as when the object was
376
        catalogued, otherwise it will not get removed from the catalog
377

378
        This method should not raise an exception if the uid cannot
379
        be found in the catalog.
380
        """
381
        data = self.data
1✔
382
        uids = self.uids
1✔
383
        paths = self.paths
1✔
384
        indexes = self.indexes.keys()
1✔
385
        rid = uids.get(uid, None)
1✔
386

387
        if rid is not None:
1✔
388
            for name in indexes:
1✔
389
                x = self.getIndex(name)
1✔
390
                if hasattr(x, 'unindex_object'):
1!
391
                    x.unindex_object(rid)
1✔
392
            del data[rid]
1✔
393
            del paths[rid]
1✔
394
            del uids[uid]
1✔
395
            self._length.change(-1)
1✔
396

397
        else:
398
            LOG.error('uncatalogObject unsuccessfully '
1✔
399
                      'attempted to uncatalog an object '
400
                      'with a uid of %s. ', str(uid))
401

402
    def uniqueValuesFor(self, name):
1✔
403
        """ return unique values for FieldIndex name """
404
        return tuple(self.getIndex(name).uniqueValues())
1✔
405

406
    def hasuid(self, uid):
1✔
407
        """ return the rid if catalog contains an object with uid """
408
        return self.uids.get(uid)
×
409

410
    def recordify(self, object):
1✔
411
        """ turns an object into a record tuple """
412
        record = []
1✔
413
        # the unique id is always the first element
414
        for x in self.names:
1✔
415
            attr = getattr(object, x, MV)
1✔
416
            if (attr is not MV and safe_callable(attr)):
1✔
417
                attr = attr()
1✔
418
            record.append(attr)
1✔
419
        return tuple(record)
1✔
420

421
    def _maintain_zodb_cache(self):
1✔
422
        parent = aq_parent(self)
1✔
423
        if hasattr(aq_base(parent), 'maintain_zodb_cache'):
1✔
424
            parent.maintain_zodb_cache()
1✔
425

426
    def instantiate(self, record, score_data=None):
1✔
427
        """ internal method: create and initialise search result object.
428
        record should be a tuple of (document RID, metadata columns tuple),
429
        score_data can be a tuple of (scode, normalized score) or be omitted"""
430
        self._maintain_zodb_cache()
1✔
431
        key, data = record
1✔
432
        klass = self._v_result_class
1✔
433
        if score_data:
1!
434
            score, normalized_score = score_data
1✔
435
            schema_len = len(klass.__record_schema__)
1✔
436
            if schema_len == len(data) + 3:
1!
437
                # if we have complete data, create in a single pass
438
                data = tuple(data) + (key, score, normalized_score)
1✔
439
                return klass(data).__of__(aq_parent(self))
1✔
440
        r = klass(data)
×
441
        r.data_record_id_ = key
×
442
        if score_data:
×
443
            # preserved during refactoring for compatibility reasons:
444
            # can only be reached if score_data is present,
445
            # but schema length is not equal to len(data) + 3
446
            # no known use cases
447
            r.data_record_score_ = score
×
448
            r.data_record_normalized_score_ = normalized_score
×
449
            return r.__of__(aq_parent(self))
×
450
        return r.__of__(self)
×
451

452
    def getMetadataForRID(self, rid):
1✔
453
        record = self.data[rid]
1✔
454
        result = {}
1✔
455
        for (key, pos) in self.schema.items():
1✔
456
            result[key] = record[pos]
1✔
457
        return result
1✔
458

459
    def getIndexDataForRID(self, rid):
1✔
460
        result = {}
1✔
461
        for name in self.indexes:
1✔
462
            result[name] = self.getIndex(name).getEntryForObject(rid, "")
1✔
463
        return result
1✔
464

465
    def merge_query_args(self, query=None, **kw):
1✔
466
        if not kw and isinstance(query, dict):
1✔
467
            # Short cut for the best practice.
468
            return query
1✔
469

470
        merged_query = {}
1✔
471
        if isinstance(query, dict):
1!
472
            merged_query.update(query)
×
473
        merged_query.update(kw)
1✔
474
        return merged_query
1✔
475

476
    def make_query(self, query):
1✔
477
        for iid in self.indexes:
1✔
478
            index = self.getIndex(iid)
1✔
479
            if ITransposeQuery.providedBy(index):
1!
480
                query = index.make_query(query)
×
481

482
        # Canonicalize tuple/list query arguments.
483
        new_query = {}
1✔
484
        for key, value in query.items():
1✔
485
            if isinstance(value, (list, tuple)):
1✔
486
                new_query[key] = list(sorted(value))
1✔
487
            else:
488
                new_query[key] = value
1✔
489

490
        return new_query
1✔
491

492
    def _get_index_query_names(self, index):
1✔
493
        if hasattr(index, 'getIndexQueryNames'):
1!
494
            return index.getIndexQueryNames()
1✔
495
        return (index.getId(),)
×
496

497
    def _sort_limit_arguments(self, query, sort_index, reverse, limit):
1✔
498
        b_start = int(query.get('b_start', 0))
1✔
499
        b_size = query.get('b_size', None)
1✔
500
        if b_size is not None:
1✔
501
            b_size = int(b_size)
1✔
502

503
        if b_size is not None:
1✔
504
            limit = b_start + b_size
1✔
505
        elif limit and b_size is None:
1✔
506
            b_size = limit
1✔
507

508
        if sort_index is None:
1✔
509
            sort_report_name = None
1✔
510
        else:
511
            if isinstance(sort_index, list):
1✔
512
                sort_name = '-'.join(i.getId() for i in sort_index)
1✔
513
            else:
514
                sort_name = sort_index.getId()
1✔
515
            if isinstance(reverse, list):
1✔
516
                reverse_name = '-'.join(
1✔
517
                    'desc' if r else 'asc' for r in reverse)
518
            else:
519
                reverse_name = 'desc' if reverse else 'asc'
1✔
520
            sort_report_name = 'sort_on#' + sort_name + '#' + reverse_name
1✔
521
            if limit is not None:
1✔
522
                sort_report_name += '#limit-%s' % limit
1✔
523
        return (b_start, b_size, limit, sort_report_name)
1✔
524

525
    def _sorted_search_indexes(self, query):
1✔
526
        # Simple implementation ordering only by limited result support
527
        query_keys = query.keys()
1✔
528
        order = []
1✔
529
        for name, index in self.indexes.items():
1✔
530
            for attr in self._get_index_query_names(index):
1✔
531
                if attr in query_keys:
1✔
532
                    order.append((ILimitedResultIndex.providedBy(index), name))
1✔
533
        order.sort()
1✔
534
        return [i[1] for i in order]
1✔
535

536
    def _limit_sequence(self, sequence, slen, b_start=0, b_size=None,
1✔
537
                        switched_reverse=False):
538
        if b_size is not None:
1✔
539
            sequence = sequence[b_start:b_start + b_size]
1✔
540
            if slen:
1!
541
                slen = len(sequence)
×
542
        if switched_reverse:
1✔
543
            sequence.reverse()
1✔
544
        return (sequence, slen)
1✔
545

546
    def _search_index(self, cr, index_id, query, rs):
1✔
547
        cr.start_split(index_id)
1✔
548

549
        index_rs = None
1✔
550
        index = self.getIndex(index_id)
1✔
551
        limit_result = ILimitedResultIndex.providedBy(index)
1✔
552

553
        if IQueryIndex.providedBy(index):
1!
554
            index_query = IndexQuery(query, index.id, index.query_options,
1✔
555
                                     index.operators, index.useOperator)
556
            if index_query.keys is not None:
1!
557
                index_rs = index.query_index(index_query, rs)
1✔
558
        else:
559
            if limit_result:
×
560
                index_result = index._apply_index(query, rs)
×
561
            else:
562
                index_result = index._apply_index(query)
×
563

564
            # Parse (resultset, used_attributes) index return value.
565
            if index_result:
×
566
                index_rs, _ = index_result
×
567

568
        if not index_rs:
1✔
569
            # Short circuit if empty index result.
570
            rs = None
1✔
571
        else:
572
            # Provide detailed info about the pure intersection time.
573
            intersect_id = index_id + '#intersection'
1✔
574
            cr.start_split(intersect_id)
1✔
575
            # weightedIntersection preserves the values from any mappings
576
            # we get, as some indexes don't return simple sets.
577
            if hasattr(rs, 'items') or hasattr(index_rs, 'items'):
1✔
578
                _, rs = weightedIntersection(rs, index_rs)
1✔
579
            else:
580
                rs = intersection(rs, index_rs)
1✔
581

582
            cr.stop_split(intersect_id)
1✔
583

584
        # Consider the time it takes to intersect the index result
585
        # with the total result set to be part of the index time.
586
        cr.stop_split(index_id, result=index_rs, limit=limit_result)
1✔
587

588
        return rs
1✔
589

590
    def search(self, query,
1✔
591
               sort_index=None, reverse=False, limit=None, merge=True, rids=False):
592
        """Iterate through the indexes, applying the query to each one. If
593
        merge is true then return a lazy result set (sorted if appropriate)
594
        otherwise return the raw (possibly scored) results for later merging.
595
        Limit is used in conjunction with sorting or scored results to inform
596
        the catalog how many results you are really interested in. The catalog
597
        can then use optimizations to save time and memory. The number of
598
        results is not guaranteed to fall within the limit however, you should
599
        still slice or batch the results as usual."""
600

601
        # Indexes fulfill a fairly large contract here. We hand each
602
        # index the query mapping we are given (which may be composed
603
        # of some combination of web request, kw mappings or plain old dicts)
604
        # and the index decides what to do with it. If the index finds work
605
        # for itself in the query, it returns the results and a tuple of
606
        # the attributes that were used. If the index finds nothing for it
607
        # to do then it returns None.
608

609
        # Canonicalize the request into a sensible query before passing it on
610
        query = self.make_query(query)
1✔
611

612
        cr = self.getCatalogPlan(query)
1✔
613
        cr.start()
1✔
614

615
        plan = cr.plan()
1✔
616
        if not plan:
1✔
617
            plan = self._sorted_search_indexes(query)
1✔
618

619
        rs = None  # result set
1✔
620
        for index_id in plan:
1✔
621
            # The actual core loop over all indices.
622
            if index_id not in self.indexes:
1✔
623
                # We can have bogus keys or the plan can contain index names
624
                # that have been removed in the meantime.
625
                continue
1✔
626

627
            rs = self._search_index(cr, index_id, query, rs)
1✔
628
            if not rs:
1✔
629
                break
1✔
630

631
        if not rs:
1✔
632
            # None of the indexes found anything to do with the query.
633
            result = LazyCat([])
1✔
634
            cr.stop()
1✔
635
            return result
1✔
636

637
        if rids:
1!
NEW
638
            cr.stop()
×
NEW
639
            return rs
×
640

641
        # Try to deduce the sort limit from batching arguments.
642
        b_start, b_size, limit, sort_report_name = self._sort_limit_arguments(
1✔
643
            query, sort_index, reverse, limit)
644

645
        # We got some results from the indexes, sort and convert to sequences.
646
        rlen = len(rs)
1✔
647
        if sort_index is None and hasattr(rs, 'items'):
1✔
648
            # Having a 'items' means we have a data structure with
649
            # scores. Build a new result set, sort it by score, reverse
650
            # it, compute the normalized score, and Lazify it.
651

652
            if not merge:
1✔
653
                # Don't bother to sort here, return a list of
654
                # three tuples to be passed later to mergeResults.
655
                # Note that data_record_normalized_score_ cannot be
656
                # calculated and will always be 1 in this case.
657
                result = [(score, (1, score, rid), self.__getitem__)
1✔
658
                          for rid, score in rs.items()]
659
            else:
660
                cr.start_split('sort_on#score')
1✔
661

662
                # Sort it by score.
663
                rs = rs.byValue(0)
1✔
664
                max = float(rs[0][0])
1✔
665

666
                # Here we define our getter function inline so that
667
                # we can conveniently store the max value as a default arg
668
                # and make the normalized score computation lazy
669
                def getScoredResult(item, max=max, self=self):
1✔
670
                    """
671
                    Returns instances of self._v_brains, or whatever is
672
                    passed into self.useBrains.
673
                    """
674
                    score, key = item
1✔
675
                    norm_score = int(100.0 * score / max)
1✔
676
                    return self.instantiate((key, self.data[key]),
1✔
677
                                            score_data=(score, norm_score))
678

679
                sequence, slen = self._limit_sequence(
1✔
680
                    rs, rlen, b_start, b_size)
681
                result = LazyMap(getScoredResult, sequence, slen,
1✔
682
                                 actual_result_count=rlen)
683
                cr.stop_split('sort_on#score', None)
1✔
684

685
        elif sort_index is None and not hasattr(rs, 'values'):
1✔
686
            # no scores
687
            if hasattr(rs, 'keys'):
1!
688
                rs = rs.keys()
1✔
689
            sequence, slen = self._limit_sequence(
1✔
690
                rs, rlen, b_start, b_size)
691
            result = LazyMap(self.__getitem__, sequence, slen,
1✔
692
                             actual_result_count=rlen)
693
        else:
694
            # Sort. If there are scores, then this block is not
695
            # reached, therefore 'sort-on' does not happen in the
696
            # context of a text index query.  This should probably
697
            # sort by relevance first, then the 'sort-on' attribute.
698
            cr.start_split(sort_report_name)
1✔
699
            result = self.sortResults(
1✔
700
                rs, sort_index, reverse, limit, merge,
701
                actual_result_count=rlen, b_start=b_start, b_size=b_size)
702
            cr.stop_split(sort_report_name, None)
1✔
703

704
        cr.stop()
1✔
705
        return result
1✔
706

707
    def _sort_iterate_index(self, actual_result_count, result, rs,
1✔
708
                            limit, merge, reverse,
709
                            sort_index, sort_index_length, sort_spec,
710
                            second_indexes_key_map):
711
        # The result set is much larger than the sorted index,
712
        # so iterate over the sorted index for speed.
713
        # TODO: len(sort_index) isn't actually what we want for a keyword
714
        # index, as it's only the unique values, not the documents.
715
        # Don't use this case while using limit, as we return results of
716
        # non-flattened intsets, and would have to merge/unflattened those
717
        # before limiting.
718
        length = 0
1✔
719
        try:
1✔
720
            intersection(rs, IISet(()))
1✔
UNCOV
721
        except TypeError:
×
722
            # rs is not an object in the IIBTree family.
723
            # Try to turn rs into an IISet.
UNCOV
724
            rs = IISet(rs)
×
725

726
        if sort_index_length == 1:
1✔
727
            for k, intset in sort_index.items():
1✔
728
                # We have an index that has a set of values for
729
                # each sort key, so we intersect with each set and
730
                # get a sorted sequence of the intersections.
731
                intset = intersection(rs, intset)
1✔
732
                if intset:
1!
733
                    keys = getattr(intset, 'keys', None)
1✔
734
                    if keys is not None:
1!
735
                        # Is this ever true?
736
                        intset = keys()
1✔
737
                    length += len(intset)
1✔
738
                    result.append((k, intset, self.__getitem__))
1✔
739
            result.sort(reverse=reverse)
1✔
740
        else:
741
            for k, intset in sort_index.items():
1✔
742
                # We have an index that has a set of values for
743
                # each sort key, so we intersect with each set and
744
                # get a sorted sequence of the intersections.
745
                intset = intersection(rs, intset)
1✔
746
                if intset:
1!
747
                    keys = getattr(intset, 'keys', None)
1✔
748
                    if keys is not None:
1!
749
                        # Is this ever true?
750
                        intset = keys()
1✔
751
                    length += len(intset)
1✔
752
                    # sort on secondary index
753
                    keysets = defaultdict(list)
1✔
754
                    for i in intset:
1✔
755
                        full_key = (k, )
1✔
756
                        for km in second_indexes_key_map:
1✔
757
                            try:
1✔
758
                                full_key += (km[i], )
1✔
UNCOV
759
                            except KeyError:
×
760
                                pass
×
761
                        keysets[full_key].append(i)
1✔
762
                    for k2, v2 in keysets.items():
1✔
763
                        result.append((k2, v2, self.__getitem__))
1✔
764
            result = multisort(result, sort_spec)
1✔
765

766
        return (actual_result_count, length, result)
1✔
767

768
    def _sort_iterate_resultset(self, actual_result_count, result, rs,
1✔
769
                                limit, merge, reverse,
770
                                sort_index, sort_index_length, sort_spec,
771
                                second_indexes_key_map):
772
        # Iterate over the result set getting sort keys from the index.
773
        # If we are interested in at least 25% or more of the result set,
774
        # the N-Best algorithm is slower, so we iterate over all.
775
        index_key_map = sort_index.documentToKeyMap()
1✔
776

777
        if sort_index_length == 1:
1✔
778
            for did in rs:
1✔
779
                try:
1✔
780
                    key = index_key_map[did]
1✔
UNCOV
781
                except KeyError:
×
782
                    # This document is not in the sort key index, skip it.
UNCOV
783
                    actual_result_count -= 1
×
784
                else:
785
                    # The reference back to __getitem__ is used in case
786
                    # we do not merge now and need to intermingle the
787
                    # results with those of other catalogs while avoiding
788
                    # the cost of instantiating a LazyMap per result
789
                    result.append((key, did, self.__getitem__))
1✔
790
            if merge:
1✔
791
                result = sorted(
1✔
792
                    result,
793
                    key=lambda x: (0,) if x[0] is None else x,
794
                    reverse=reverse)
795
        else:
796
            for did in rs:
1✔
797
                try:
1✔
798
                    full_key = (index_key_map[did], )
1✔
799
                    for km in second_indexes_key_map:
1✔
800
                        full_key += (km[did], )
1✔
UNCOV
801
                except KeyError:
×
802
                    # This document is not in the sort key index, skip it.
UNCOV
803
                    actual_result_count -= 1
×
804
                else:
805
                    result.append((full_key, did, self.__getitem__))
1✔
806
            if merge:
1!
807
                result = multisort(result, sort_spec)
1✔
808

809
        if merge and limit is not None:
1✔
810
            result = result[:limit]
1✔
811

812
        return (actual_result_count, 0, result)
1✔
813

814
    def _sort_nbest(self, actual_result_count, result, rs,
1✔
815
                    limit, merge, reverse,
816
                    sort_index, sort_index_length, sort_spec,
817
                    second_indexes_key_map):
818
        # Limit / sort results using N-Best algorithm
819
        # This is faster for large sets then a full sort
820
        # And uses far less memory
821
        index_key_map = sort_index.documentToKeyMap()
1✔
822
        keys = []
1✔
823
        n = 0
1✔
824
        worst = None
1✔
825
        if sort_index_length == 1:
1!
826
            for did in rs:
1✔
827
                try:
1✔
828
                    key = index_key_map[did]
1✔
UNCOV
829
                except KeyError:
×
830
                    # This document is not in the sort key index, skip it.
UNCOV
831
                    actual_result_count -= 1
×
832
                else:
833
                    if n >= limit and key <= worst:
1✔
834
                        continue
1✔
835
                    i = bisect(keys, key)
1✔
836
                    keys.insert(i, key)
1✔
837
                    result.insert(i, (key, did, self.__getitem__))
1✔
838
                    if n == limit:
1✔
839
                        del keys[0], result[0]
1✔
840
                    else:
841
                        n += 1
1✔
842
                    worst = keys[0]
1✔
843
            result.reverse()
1✔
844
        else:
UNCOV
845
            for did in rs:
×
846
                try:
×
847
                    key = index_key_map[did]
×
848
                    full_key = (key, )
×
849
                    for km in second_indexes_key_map:
×
850
                        full_key += (km[did], )
×
851
                except KeyError:
×
852
                    # This document is not in the sort key index, skip it.
UNCOV
853
                    actual_result_count -= 1
×
854
                else:
UNCOV
855
                    if n >= limit and key <= worst:
×
856
                        continue
×
857
                    i = bisect(keys, key)
×
858
                    keys.insert(i, key)
×
859
                    result.insert(i, (full_key, did, self.__getitem__))
×
860
                    if n == limit:
×
861
                        del keys[0], result[0]
×
862
                    else:
UNCOV
863
                        n += 1
×
864
                    worst = keys[0]
×
865
            result = multisort(result, sort_spec)
×
866

867
        return (actual_result_count, 0, result)
1✔
868

869
    def _sort_nbest_reverse(self, actual_result_count, result, rs,
1✔
870
                            limit, merge, reverse,
871
                            sort_index, sort_index_length, sort_spec,
872
                            second_indexes_key_map):
873
        # Limit / sort results using N-Best algorithm in reverse (N-Worst?)
874
        index_key_map = sort_index.documentToKeyMap()
1✔
875
        keys = []
1✔
876
        n = 0
1✔
877
        best = None
1✔
878
        if sort_index_length == 1:
1!
879
            for did in rs:
1✔
880
                try:
1✔
881
                    key = index_key_map[did]
1✔
882
                except KeyError:
1✔
883
                    # This document is not in the sort key index, skip it.
884
                    actual_result_count -= 1
1✔
885
                else:
886
                    if n >= limit and key >= best:
1✔
887
                        continue
1✔
888
                    i = bisect(keys, key)
1✔
889
                    keys.insert(i, key)
1✔
890
                    result.insert(i, (key, did, self.__getitem__))
1✔
891
                    if n == limit:
1✔
892
                        del keys[-1], result[-1]
1✔
893
                    else:
894
                        n += 1
1✔
895
                    best = keys[-1]
1✔
896
        else:
UNCOV
897
            for did in rs:
×
898
                try:
×
899
                    key = index_key_map[did]
×
900
                    full_key = (key, )
×
901
                    for km in second_indexes_key_map:
×
902
                        full_key += (km[did], )
×
903
                except KeyError:
×
904
                    # This document is not in the sort key index, skip it.
UNCOV
905
                    actual_result_count -= 1
×
906
                else:
UNCOV
907
                    if n >= limit and key >= best:
×
908
                        continue
×
909
                    i = bisect(keys, key)
×
910
                    keys.insert(i, key)
×
911
                    result.insert(i, (full_key, did, self.__getitem__))
×
912
                    if n == limit:
×
913
                        del keys[-1], result[-1]
×
914
                    else:
UNCOV
915
                        n += 1
×
916
                    best = keys[-1]
×
917
            result = multisort(result, sort_spec)
×
918

919
        return (actual_result_count, 0, result)
1✔
920

921
    def sortResults(self, rs, sort_index,
1✔
922
                    reverse=False, limit=None, merge=True,
923
                    actual_result_count=None, b_start=0, b_size=None):
924
        # Sort a result set using one or more sort indexes. Both sort_index
925
        # and reverse can be lists of indexes and reverse specifications.
926
        # Return a lazy result set in sorted order if merge is true otherwise
927
        # returns a list of (sortkey, uid, getter_function) tuples, where
928
        # sortkey can be a tuple on its own.
929
        second_indexes = None
1✔
930
        second_indexes_key_map = None
1✔
931
        sort_index_length = 1
1✔
932
        if isinstance(sort_index, list):
1✔
933
            sort_index_length = len(sort_index)
1✔
934
            if sort_index_length > 1:
1!
935
                second_indexes = sort_index[1:]
1✔
936
                second_indexes_key_map = []
1✔
937
                for si in second_indexes:
1✔
938
                    second_indexes_key_map.append(si.documentToKeyMap())
1✔
939
            sort_index = sort_index[0]
1✔
940

941
        result = []
1✔
942
        if hasattr(rs, 'keys'):
1!
943
            rs = rs.keys()
1✔
944
        if actual_result_count is None:
1✔
945
            rlen = len(rs)
1✔
946
            actual_result_count = rlen
1✔
947
        else:
948
            rlen = actual_result_count
1✔
949

950
        # don't limit to more than what we have
951
        if limit is not None and limit >= rlen:
1✔
952
            limit = rlen
1✔
953

954
        # if we want a batch from the end of the result set, reverse sorting
955
        # order and limit it, then reverse the result set again
956
        switched_reverse = False
1✔
957
        if b_size and b_start and b_start > rlen / 2:
1✔
958
            if isinstance(reverse, list):
1✔
959
                reverse = [not r for r in reverse]
1✔
960
            else:
961
                reverse = not reverse
1✔
962
            switched_reverse = True
1✔
963
            b_end = b_start + b_size
1✔
964
            if b_end >= rlen:
1✔
965
                overrun = rlen - b_end
1✔
966
                if b_start >= rlen:
1✔
967
                    # bail out, we are outside the possible range
968
                    return LazyCat([], 0, actual_result_count)
1✔
969
                else:
970
                    b_size += overrun
1✔
971
                b_start = 0
1✔
972
            else:
973
                b_start = rlen - b_end
1✔
974
            limit = b_start + b_size
1✔
975

976
        # determine sort_spec
977
        if isinstance(reverse, list):
1✔
978
            sort_spec = [r and -1 or 1 for r in reverse]
1✔
979
            # limit to current maximum of sort indexes
980
            sort_spec = sort_spec[:sort_index_length]
1✔
981
            # use first sort order for choosing the algorithm
982
            first_reverse = reverse[0]
1✔
983
        else:
984
            sort_spec = []
1✔
985
            for i in range(sort_index_length):
1✔
986
                sort_spec.append(reverse and -1 or 1)
1✔
987
            first_reverse = reverse
1✔
988

989
        # Special first condition, as it changes post-processing.
990
        iterate_sort_index = (
1✔
991
            merge and limit is None and (
992
                rlen > (len(sort_index) * (rlen / 100 + 1))))
993

994
        # Choose one of the sort algorithms.
995
        if iterate_sort_index:
1✔
996
            sort_func = self._sort_iterate_index
1✔
997
        elif limit is None or (limit * 4 > rlen) or sort_index_length > 1:
1✔
998
            sort_func = self._sort_iterate_resultset
1✔
999
        elif first_reverse:
1✔
1000
            sort_func = self._sort_nbest
1✔
1001
        else:
1002
            sort_func = self._sort_nbest_reverse
1✔
1003

1004
        actual_result_count, length, result = sort_func(
1✔
1005
            actual_result_count, result, rs,
1006
            limit, merge, reverse,
1007
            sort_index, sort_index_length, sort_spec,
1008
            second_indexes_key_map)
1009

1010
        sequence, slen = self._limit_sequence(
1✔
1011
            result, length, b_start, b_size, switched_reverse)
1012

1013
        if iterate_sort_index:
1✔
1014
            result = LazyCat(LazyValues(sequence), slen, actual_result_count)
1✔
1015
        else:
1016
            if not merge:
1✔
1017
                return sequence
1✔
1018

1019
            result = LazyValues(sequence)
1✔
1020
            result.actual_result_count = actual_result_count
1✔
1021

1022
        return LazyMap(self.__getitem__, result, len(result),
1✔
1023
                       actual_result_count=actual_result_count)
1024

1025
    def _get_sort_attr(self, attr, kw):
1✔
1026
        """Helper function to find sort-on or sort-order."""
1027
        # There are three different ways to find the attribute:
1028
        # 1. kw[sort-attr]
1029
        # 2. self.sort-attr
1030
        # 3. kw[sort_attr]
1031
        # kw may be a dict or an ExtensionClass MultiMapping, which
1032
        # differ in what get() returns with no default value.
1033
        name = "sort-%s" % attr
1✔
1034
        val = kw.get(name, None)
1✔
1035
        if val is not None:
1!
UNCOV
1036
            return val
×
1037
        val = getattr(self, name, None)
1✔
1038
        if val is not None:
1!
UNCOV
1039
            return val
×
1040
        return kw.get("sort_%s" % attr, None)
1✔
1041

1042
    def _getSortIndex(self, args):
1✔
1043
        """Returns a list of search index objects or None."""
1044
        sort_index_names = self._get_sort_attr("on", args)
1✔
1045
        if sort_index_names is not None:
1✔
1046
            # self.indexes is always a dict, so get() w/ 1 arg works
1047
            sort_indexes = []
1✔
1048
            if not isinstance(sort_index_names, (list, tuple)):
1✔
1049
                sort_index_names = [sort_index_names]
1✔
1050
            for name in sort_index_names:
1✔
1051
                sort_index = self.indexes.get(name)
1✔
1052
                if sort_index is None:
1✔
1053
                    raise CatalogError('Unknown sort_on index: %s' %
1✔
1054
                                       repr(name))
1055
                else:
1056
                    if not hasattr(sort_index, 'documentToKeyMap'):
1✔
1057
                        raise CatalogError(
1✔
1058
                            'The index chosen for sort_on is '
1059
                            'not capable of being used as a sort index: '
1060
                            '%s' % repr(name))
1061
                sort_indexes.append(sort_index)
1✔
1062
            if len(sort_indexes) == 1:
1✔
1063
                # be nice and keep the old API intact for single sort_on's
1064
                return sort_indexes[0]
1✔
1065
            return sort_indexes
1✔
1066
        return None
1✔
1067

1068
    def searchResults(self, query=None, _merge=True, rids=False, **kw):
1✔
1069
        # You should pass in a simple dictionary as the first argument,
1070
        # which only contains the relevant query.
1071
        query = self.merge_query_args(query, **kw)
1✔
1072
        sort_indexes = self._getSortIndex(query)
1✔
1073
        sort_limit = self._get_sort_attr('limit', query)
1✔
1074
        reverse = False
1✔
1075
        if sort_indexes is not None:
1✔
1076
            order = self._get_sort_attr("order", query)
1✔
1077
            reverse = []
1✔
1078
            if order is None:
1✔
1079
                order = ['']
1✔
1080
            elif isinstance(order, str):
1✔
1081
                order = [order]
1✔
1082
            for o in order:
1✔
1083
                reverse.append(o.lower() in ('reverse', 'descending'))
1✔
1084
            if len(reverse) == 1:
1✔
1085
                # be nice and keep the old API intact for single sort_order
1086
                reverse = reverse[0]
1✔
1087
        # Perform searches with indexes and sort_index
1088
        return self.search(query, sort_indexes, reverse, sort_limit, _merge, rids)
1✔
1089

1090
    __call__ = searchResults
1✔
1091

1092
    def getCatalogPlan(self, query=None):
1✔
1093
        """Query time reporting and planning.
1094
        """
1095
        parent = aq_base(aq_parent(self))
1✔
1096
        threshold = getattr(parent, 'long_query_time', 0.1)
1✔
1097
        return CatalogPlan(self, query, threshold)
1✔
1098

1099

1100
def mergeResults(results, has_sort_keys, reverse):
1✔
1101
    """Sort/merge sub-results, generating a flat sequence.
1102

1103
    results is a list of result set sequences, all with or without sort keys
1104
    """
1105
    if not has_sort_keys:
1✔
1106
        return LazyCat(results)
1✔
1107
    else:
1108
        # Concatenate the catalog results into one list and sort it
1109
        # Each result record consists of a list of tuples with three values:
1110
        # (sortkey, docid, catalog__getitem__)
1111
        combined = []
1✔
1112
        if len(results) > 1:
1!
1113
            for r in results:
1✔
1114
                combined.extend(r)
1✔
UNCOV
1115
        elif len(results) == 1:
×
UNCOV
1116
            combined = results[0]
×
1117
        else:
1118
            return []
×
1119
        if reverse:
1✔
1120
            combined.sort(reverse=True)
1✔
1121
        else:
1122
            combined.sort()
1✔
1123
        return LazyMap(lambda rec: rec[2](rec[1]), combined, len(combined))
1✔
1124

1125

1126
def multisort(items, sort_spec):
1✔
1127
    """Sort a list by multiple keys bidirectionally.
1128

1129
    sort_spec is a list of ones and minus ones, with 1 meaning sort normally
1130
    and -1 meaning sort reversed.
1131

1132
    The length of sort_spec must match the length of the first value in each
1133
    list entry given via `items`.
1134
    """
1135
    first = sort_spec[0]
1✔
1136
    different = any([True for s in sort_spec if s != first])
1✔
1137
    first_reverse = first != 1 and True or False
1✔
1138

1139
    # always do an in-place sort. The default sort via key is really fast and
1140
    # the later cmp approach is efficient when getting mostly sorted data.
1141
    items.sort(reverse=first_reverse, key=itemgetter(0))
1✔
1142
    if not different:
1✔
1143
        # if all keys are sorted in the same direction, we are done
1144
        return items
1✔
1145

1146
    comparers = []
1✔
1147
    for i, v in enumerate(sort_spec):
1✔
1148
        comparers.append((itemgetter(i), v))
1✔
1149

1150
    def comparer(left, right):
1✔
1151
        for func, order in comparers:
1!
1152
            # emulate cmp
1153
            a = func(left[0])
1✔
1154
            b = func(right[0])
1✔
1155
            result = ((a > b) - (a < b))
1✔
1156
            if result:
1✔
1157
                return order * result
1✔
UNCOV
1158
        return 0
×
1159

1160
    items.sort(key=cmp_to_key(comparer))
1✔
1161

1162
    return items
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc