• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

zopefoundation / Products.ZCatalog / 3979545908

pending completion
3979545908

push

github

GitHub
Drop support for Python 2.7, 3.5, 3.6. (#143)

817 of 1057 branches covered (77.29%)

Branch coverage included in aggregate %.

49 of 49 new or added lines in 12 files covered. (100.0%)

3144 of 3538 relevant lines covered (88.86%)

0.89 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.34
/src/Products/ZCatalog/plan.py
1
##############################################################################
2
#
3
# Copyright (c) 2010 Zope Foundation and Contributors.
4
#
5
# This software is subject to the provisions of the Zope Public License,
6
# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
7
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
8
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
9
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
10
# FOR A PARTICULAR PURPOSE
11
#
12
##############################################################################
13

14
import os
1✔
15
import os.path
1✔
16
import time
1✔
17
from _thread import allocate_lock
1✔
18
from collections import namedtuple
1✔
19
from logging import getLogger
1✔
20
from os import environ
1✔
21

22
from Acquisition import aq_base
1✔
23
from Acquisition import aq_parent
1✔
24
from zope.dottedname.resolve import resolve
1✔
25

26
from Products.PluginIndexes.interfaces import IDateRangeIndex
1✔
27
from Products.PluginIndexes.interfaces import ILimitedResultIndex
1✔
28
from Products.PluginIndexes.interfaces import IUniqueValueIndex
1✔
29

30

31
MAX_DISTINCT_VALUES = 10
1✔
32
REFRESH_RATE = 100
1✔
33
VALUE_INDEX_KEY = 'VALUE_INDEXES'
1✔
34

35
Duration = namedtuple('Duration', ['start', 'end'])
1✔
36
IndexMeasurement = namedtuple('IndexMeasurement',
1✔
37
                              ['name', 'duration', 'limit'])
38
Benchmark = namedtuple('Benchmark', ['duration', 'hits', 'limit'])
1✔
39
RecentQuery = namedtuple('RecentQuery', ['duration', 'details'])
1✔
40
Report = namedtuple('Report', ['hits', 'duration', 'last'])
1✔
41

42
logger = getLogger('Products.ZCatalog')
1✔
43

44

45
class NestedDict:
1✔
46
    """Holds a structure of two nested dicts."""
47

48
    @classmethod
1✔
49
    def get(cls, key):
1✔
50
        outer = cls.value.get(key, None)
1✔
51
        if outer is None:
1✔
52
            cls.set(key, {})
1✔
53
            outer = cls.value[key]
1✔
54
        return outer
1✔
55

56
    @classmethod
1✔
57
    def set(cls, key, value):
1✔
58
        with cls.lock:
1✔
59
            cls.value[key] = value
1✔
60

61
    @classmethod
1✔
62
    def clear(cls):
1✔
63
        with cls.lock:
1✔
64
            cls.value = {}
1✔
65

66
    @classmethod
1✔
67
    def get_entry(cls, key, key2):
1✔
68
        outer = cls.get(key)
1✔
69
        inner = outer.get(key2, None)
1✔
70
        if inner is None:
1✔
71
            cls.set_entry(key, key2, {})
1✔
72
            inner = outer.get(key2)
1✔
73
        return inner
1✔
74

75
    @classmethod
1✔
76
    def set_entry(cls, key, key2, value):
1✔
77
        outer = cls.get(key)
1✔
78
        with cls.lock:
1✔
79
            outer[key2] = value
1✔
80

81
    @classmethod
1✔
82
    def clear_entry(cls, key):
1✔
83
        cls.set(key, {})
1✔
84

85

86
class PriorityMap(NestedDict):
1✔
87
    """This holds a structure of nested dicts.
88

89
    The outer dict is a mapping of catalog id to plans. The inner dict holds
90
    a query key to Benchmark mapping.
91
    """
92

93
    lock = allocate_lock()
1✔
94
    value = {}
1✔
95

96
    @classmethod
1✔
97
    def get_value(cls):
1✔
98
        return cls.value.copy()
1✔
99

100
    @classmethod
1✔
101
    def load_default(cls):
1✔
102
        location = environ.get('ZCATALOGQUERYPLAN')
1✔
103
        if location:
1✔
104
            try:
1✔
105
                pmap = resolve(location)
1✔
106
                cls.load_pmap(location, pmap)
1✔
107
            except ImportError:
108
                logger.warning('could not load priority map from %s', location)
109

110
    @classmethod
1✔
111
    def load_from_path(cls, path):
1✔
112
        path = os.path.abspath(path)
1✔
113
        _globals = {}
1✔
114
        _locals = {}
1✔
115

116
        with open(path, 'rb') as fd:
1✔
117
            exec(fd.read(), _globals, _locals)
1✔
118

119
        pmap = _locals['queryplan'].copy()
1✔
120
        cls.load_pmap(path, pmap)
1✔
121

122
    @classmethod
1✔
123
    def load_pmap(cls, location, pmap):
1✔
124
        logger.info('loaded priority %d map(s) from %s',
1✔
125
                    len(pmap), location)
126
        # Convert the simple benchmark tuples to namedtuples
127
        new_plan = {}
1✔
128
        for cid, plan in pmap.items():
1✔
129
            new_plan[cid] = {}
1✔
130
            for querykey, details in plan.items():
1✔
131
                new_plan[cid][querykey] = {}
1✔
132
                if isinstance(details, (frozenset, set)):
1✔
133
                    new_plan[cid][querykey] = details
1✔
134
                else:
135
                    for indexname, benchmark in details.items():
1✔
136
                        new_plan[cid][querykey][indexname] = \
1✔
137
                            Benchmark(*benchmark)
138
        with cls.lock:
1✔
139
            cls.value = new_plan
1✔
140

141

142
class Reports(NestedDict):
1✔
143
    """This holds a structure of nested dicts.
144

145
    The outer dict is a mapping of catalog id to reports. The inner dict holds
146
    a query key to Report mapping.
147
    """
148

149
    lock = allocate_lock()
1✔
150
    value = {}
1✔
151

152

153
class CatalogPlan:
1✔
154
    """Catalog plan class to measure and identify catalog queries and plan
155
    their execution.
156
    """
157

158
    def __init__(self, catalog, query=None, threshold=0.1):
1✔
159
        self.catalog = catalog
1✔
160
        self.cid = self.get_id()
1✔
161
        querykey_to_index = {}
1✔
162
        for index in self.catalog.indexes.values():
1✔
163
            for querykey in self.catalog._get_index_query_names(index):
1✔
164
                querykey_to_index[querykey] = index.getId()
1✔
165
        self.querykey_to_index = querykey_to_index
1✔
166
        self.query = query
1✔
167
        self.key = self.make_key(query)
1✔
168
        self.benchmark = {}
1✔
169
        self.threshold = threshold
1✔
170
        self.init_timer()
1✔
171

172
    def get_id(self):
1✔
173
        parent = aq_parent(self.catalog)
1✔
174
        path = getattr(aq_base(parent), 'getPhysicalPath', None)
1✔
175
        if path is None:
1✔
176
            path = ('', 'NonPersistentCatalog')
1✔
177
        else:
178
            path = tuple(parent.getPhysicalPath())
1✔
179
        return path
1✔
180

181
    def init_timer(self):
1✔
182
        self.res = []
1✔
183
        self.start_time = None
1✔
184
        self.interim = {}
1✔
185
        self.stop_time = None
1✔
186
        self.duration = None
1✔
187

188
    def valueindexes(self):
1✔
189
        indexes = self.catalog.indexes
1✔
190

191
        # This function determines all indexes whose values should be respected
192
        # in the report key. The number of unique values for the index needs to
193
        # be lower than the MAX_DISTINCT_VALUES watermark.
194

195
        # Ideally who would only consider those indexes with a small
196
        # number of unique values, where the number of items for each value
197
        # differs a lot. If the number of items per value is similar, the
198
        # duration of a query is likely similar as well. However, calculating
199
        # all the value indexes with the number of items per value is
200
        # quite slow. Therefore, we do not make this distinction.
201
        value_indexes = PriorityMap.get_entry(self.cid, VALUE_INDEX_KEY)
1✔
202
        if isinstance(value_indexes, (frozenset, set)):
1✔
203
            # Since this is an optimization only, slightly outdated results
204
            # based on index changes in the running process can be ignored.
205
            return value_indexes
1✔
206

207
        value_indexes = set()
1✔
208
        for name, index in indexes.items():
1✔
209
            if IUniqueValueIndex.providedBy(index):
1✔
210

211
                # DateRangeIndex is unsuitable for this purpose
212
                if IDateRangeIndex.providedBy(index):
1✔
213
                    continue
1✔
214

215
                # the size of an UniqueValueIndex is typically equal to the
216
                # number of unique values
217
                isize = index.indexSize()
1✔
218
                if isize >= MAX_DISTINCT_VALUES:
1✔
219
                    continue
1✔
220

221
                value_indexes.add(name)
1✔
222

223
        value_indexes = frozenset(value_indexes)
1✔
224
        PriorityMap.set_entry(self.cid, VALUE_INDEX_KEY, value_indexes)
1✔
225
        return value_indexes
1✔
226

227
    def make_key(self, query):
1✔
228
        if not query:
1✔
229
            return None
1✔
230

231
        valueindexes = self.valueindexes()
1✔
232
        key = keys = query.keys()
1✔
233

234
        values = [name for name in keys if name in valueindexes]
1✔
235
        if values:
1✔
236
            # If we have indexes whose values should be considered, we first
237
            # preserve all normal indexes and then add the keys whose values
238
            # matter including their value into the key
239
            key = [name for name in keys if name not in values]
1✔
240
            for name in values:
1✔
241
                v = query.get(name, [])
1✔
242
                # We need to make sure the key is immutable,
243
                # repr() is an easy way to do this without imposing
244
                # restrictions on the types of values.
245
                key.append((name, repr(v)))
1✔
246
        notkeys = [
1✔
247
            name for name in key
248
            if isinstance(query.get(name), dict) and "not" in query[name]
249
        ]
250
        if notkeys:
1✔
251
            key = [name for name in key if name not in notkeys]
1✔
252
            key.extend([(name, "not") for name in notkeys])
1✔
253
        # Workaround: Python only sorts on identical types.
254
        tuple_keys = set(key) - {
1✔
255
            x for x in key if not isinstance(x, tuple)}
256

257
        str_keys = set(key) - tuple_keys
1✔
258
        return tuple(sorted(str_keys)) + tuple(sorted(tuple_keys))
1✔
259

260
    def plan(self):
1✔
261
        benchmark = PriorityMap.get_entry(self.cid, self.key)
1✔
262
        if not benchmark:
1✔
263
            return None
1✔
264

265
        # sort indexes on (limited result index, mean search time)
266
        # skip internal ('#') bookkeeping records
267
        ranking = sorted(
1✔
268
            [((value.limit, value.duration), name)
269
             for name, value in benchmark.items() if '#' not in name])
270
        return [r[1] for r in ranking]
1✔
271

272
    def start(self):
1✔
273
        self.init_timer()
1✔
274
        self.start_time = time.time()
1✔
275

276
    def start_split(self, name):
1✔
277
        self.interim[name] = Duration(time.time(), None)
1✔
278

279
    def stop_split(self, name, result=None, limit=False):
1✔
280
        current = time.time()
1✔
281
        start_time, stop_time = self.interim.get(name, Duration(None, None))
1✔
282
        self.interim[name] = Duration(start_time, current)
1✔
283
        dt = current - start_time
1✔
284
        self.res.append(IndexMeasurement(
1✔
285
            name=name, duration=dt, limit=limit))
286

287
        if name.startswith('sort_on'):
1✔
288
            # sort_on isn't an index. We only do time reporting on it
289
            return
1✔
290

291
        # remember index's hits, search time and calls
292
        benchmark = self.benchmark
1✔
293
        if name not in benchmark:
1✔
294
            benchmark[name] = Benchmark(duration=dt,
1✔
295
                                        hits=1, limit=limit)
296
        else:
297
            duration, hits, limit = benchmark[name]
1✔
298
            duration = ((duration * hits) + dt) / float(hits + 1)
1✔
299
            # reset adaption
300
            if hits % REFRESH_RATE == 0:
1!
301
                hits = 0
×
302
            hits += 1
1✔
303
            benchmark[name] = Benchmark(duration, hits, limit)
1✔
304

305
    def stop(self):
1✔
306
        self.end_time = time.time()
1✔
307
        self.duration = self.end_time - self.start_time
1✔
308
        # Make absolutely sure we never omit query keys from the plan
309
        current = PriorityMap.get_entry(self.cid, self.key)
1✔
310
        for key in self.query.keys():
1✔
311
            key = self.querykey_to_index.get(key, key)
1✔
312
            if key not in self.benchmark.keys():
1✔
313
                if current and key in current:
1✔
314
                    self.benchmark[key] = Benchmark(*current[key])
1✔
315
                else:
316
                    if key in self.catalog.indexes:
1!
317
                        index = self.catalog.indexes[key]
×
318
                        self.benchmark[key] = Benchmark(
×
319
                            0, 0, ILimitedResultIndex.providedBy(index)
320
                        )
321
                    else:
322
                        self.benchmark[key] = Benchmark(0, 0, False)
1✔
323
        PriorityMap.set_entry(self.cid, self.key, self.benchmark)
1✔
324
        self.log()
1✔
325

326
    def log(self):
1✔
327
        # result of stopwatch
328
        total = self.duration
1✔
329
        if total < self.threshold:
1✔
330
            return
1✔
331

332
        key = self.key
1✔
333
        recent = RecentQuery(duration=total, details=self.res)
1✔
334

335
        previous = Reports.get_entry(self.cid, key)
1✔
336
        if previous:
1✔
337
            counter, mean, last = previous
1✔
338
            mean = (mean * counter + total) / float(counter + 1)
1✔
339
            Reports.set_entry(self.cid, key, Report(counter + 1, mean, recent))
1✔
340
        else:
341
            Reports.set_entry(self.cid, key, Report(1, total, recent))
1✔
342

343
    def reset(self):
1✔
344
        Reports.clear_entry(self.cid)
1✔
345

346
    def report(self):
1✔
347
        """Returns a statistic report of catalog queries as list of dicts.
348
        The duration is provided in millisecond.
349
        """
350
        rval = []
1✔
351
        for key, report in Reports.get(self.cid).items():
1✔
352
            last = report.last
1✔
353
            info = {
1✔
354
                'query': key,
355
                'counter': report.hits,
356
                'duration': report.duration * 1000,
357
                'last': {'duration': last.duration * 1000,
358
                         'details': [dict(id=d.name,
359
                                          duration=d.duration * 1000)
360
                                     for d in last.details],
361
                         },
362
            }
363
            rval.append(info)
1✔
364

365
        return rval
1✔
366

367

368
# Make sure we provide test isolation
369
from zope.testing.cleanup import addCleanUp  # NOQA
1✔
370

371

372
addCleanUp(PriorityMap.clear)
1✔
373
addCleanUp(Reports.clear)
1✔
374
del addCleanUp
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc