• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4601

pending completion
#4601

push

coveralls-python

suhaibmujahid
Highlight crash address commonalities

646 of 3418 branches covered (18.9%)

40 of 40 new or added lines in 1 file covered. (100.0%)

1828 of 8509 relevant lines covered (21.48%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
from datetime import timedelta
×
7
from functools import cached_property
×
8
from typing import Iterable, Iterator
×
9

10
from libmozdata import bugzilla, clouseau, socorro
×
11
from libmozdata import utils as lmdutils
×
12

13
from bugbot.components import ComponentName
×
14
from bugbot.crash import socorro_util
×
15

16
# Crash address commonalities: crashes were near null, or were near allocator
17
# poison values.
18
OFFSET_64_BIT = 0x1000
×
19
OFFSET_32_BIT = 0x100
×
20
ALLOCATOR_ADDRESSES_64_BIT = (
×
21
    0xE5E5E5E5E5E5E5E5,
22
    0x4B4B4B4B4B4B4B4B,
23
)
24
ALLOCATOR_ADDRESSES_32_BIT = (
×
25
    0xE5E5E5E5,
26
    0x4B4B4B4B,
27
)
28
ALLOCATOR_RANGES_64_BIT = (
×
29
    (addr - OFFSET_64_BIT, addr + OFFSET_64_BIT) for addr in ALLOCATOR_ADDRESSES_64_BIT
30
)
31
ALLOCATOR_RANGES_32_BIT = (
×
32
    (addr - OFFSET_32_BIT, addr + OFFSET_32_BIT) for addr in ALLOCATOR_ADDRESSES_32_BIT
33
)
34

35

36
def is_near_null_address(str_address) -> bool:
×
37
    address = int(str_address, 0)
×
38
    is_64_bit = len(str_address) >= 18
×
39

40
    if is_64_bit:
×
41
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT
×
42

43
    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT
×
44

45

46
def is_near_allocator_address(str_address) -> bool:
×
47
    address = int(str_address, 0)
×
48
    is_64_bit = len(str_address) >= 18
×
49

50
    return any(
×
51
        low <= address <= high
52
        for low, high in (
53
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
54
        )
55
    )
56

57

58
class NoCrashReportFoundError(Exception):
×
59
    """Raised when no crash report is found with the required criteria."""
60

61

62
class ClouseauReportsAnalyzer:
×
63
    REGRESSOR_MINIMUM_SCORE: int = 8
×
64

65
    def __init__(self, reports: Iterable[dict]):
×
66
        self._clouseau_reports = reports
×
67

68
    @cached_property
×
69
    def max_score(self):
×
70
        if not self._clouseau_reports:
×
71
            return 0
×
72
        return max(report["max_score"] for report in self._clouseau_reports)
×
73

74
    @cached_property
×
75
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
76
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
×
77
        return {
×
78
            changeset["bug_id"]
79
            for report in self._clouseau_reports
80
            if report["max_score"] >= minimum_accepted_score
81
            for changeset in report["changesets"]
82
            if changeset["max_score"] >= minimum_accepted_score
83
            and not changeset["is_merge"]
84
            and not changeset["is_backedout"]
85
        }
86

87
    @cached_property
×
88
    def regressed_by_patch(self) -> str | None:
×
89
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
×
90
        potential_patches = {
×
91
            changeset["changeset"]
92
            for report in self._clouseau_reports
93
            if report["max_score"] >= minimum_accepted_score
94
            for changeset in report["changesets"]
95
            if changeset["max_score"] >= minimum_accepted_score
96
            and not changeset["is_merge"]
97
            and not changeset["is_backedout"]
98
        }
99
        if len(potential_patches) == 1:
×
100
            return next(iter(potential_patches))
×
101
        return None
×
102

103
    @cached_property
×
104
    def regressed_by(self) -> int | None:
×
105
        bug_ids = self.regressed_by_potential_bug_ids
×
106
        if len(bug_ids) == 1:
×
107
            return next(iter(bug_ids))
×
108
        return None
×
109

110
    @cached_property
×
111
    def regressed_by_potential_bugs(self) -> list[dict]:
×
112
        def handler(bug: dict, data: list):
×
113
            data.append(bug)
×
114

115
        bugs: list[dict] = []
×
116
        bugzilla.Bugzilla(
×
117
            bugids=self.regressed_by_potential_bug_ids,
118
            include_fields=[
119
                "id",
120
                "assigned_to",
121
                "product",
122
                "component",
123
            ],
124
            bughandler=handler,
125
            bugdata=bugs,
126
        ).wait()
127

128
        return bugs
×
129

130
    @cached_property
×
131
    def regressed_by_author(self) -> dict | None:
×
132
        if not self.regressed_by:
×
133
            return None
×
134

135
        bug = self.regressed_by_potential_bugs[0]
×
136
        assert bug["id"] == self.regressed_by
×
137
        return bug["assigned_to_detail"]
×
138

139
    @cached_property
×
140
    def crash_component(self) -> ComponentName:
×
141
        potential_components = {
×
142
            ComponentName(bug["product"], bug["component"])
143
            for bug in self.regressed_by_potential_bugs
144
        }
145
        if len(potential_components) == 1:
×
146
            return next(iter(potential_components))
×
147
        return ComponentName("Core", "General")
×
148

149

150
class SocorroInfoAnalyzer(socorro_util.SignatureStats):
×
151
    __bugzilla_os_values = set(bugzilla.BugFields.fetch_field_values("op_sys"))
×
152
    __bugzilla_cpu_values = {
×
153
        value.lower(): value
154
        for value in bugzilla.BugFields.fetch_field_values("rep_platform")
155
    }
156

157
    @classmethod
×
158
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
159
        if op_sys in cls.__bugzilla_os_values:
×
160
            return op_sys
×
161

162
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
163
            op_sys = "macOS"
×
164
        elif op_sys.startswith("Windows"):
×
165
            op_sys = "Windows"
×
166
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
167
            op_sys = "Linux"
×
168
        else:
169
            op_sys = "Other"
×
170

171
        return op_sys
×
172

173
    @property
×
174
    def bugzilla_op_sys(self) -> str:
×
175
        all_op_sys = {
×
176
            self.to_bugzilla_op_sys(op_sys["term"])
177
            for op_sys in self.signature["facets"]["platform_pretty_version"]
178
        }
179

180
        if len(all_op_sys) > 1:
×
181
            # TODO: explain this workaround
182
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
183

184
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
185
            all_op_sys.remove("Other")
×
186

187
        if len(all_op_sys) == 1:
×
188
            return next(iter(all_op_sys))
×
189

190
        if len(all_op_sys) == 0:
×
191
            return "Unspecified"
×
192

193
        return "All"
×
194

195
    @classmethod
×
196
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
197
        return cls.__bugzilla_cpu_values.get(cpu, "Other")
×
198

199
    @property
×
200
    def bugzilla_cpu_arch(self) -> str:
×
201
        all_cpu_arch = {
×
202
            self.to_bugzilla_cpu(cpu["term"])
203
            for cpu in self.signature["facets"]["cpu_arch"]
204
        }
205

206
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
207
            all_cpu_arch.remove("Other")
×
208

209
        if len(all_cpu_arch) == 1:
×
210
            return next(iter(all_cpu_arch))
×
211

212
        if len(all_cpu_arch) == 0:
×
213
            return "Unspecified"
×
214

215
        return "All"
×
216

217
    @property
×
218
    def num_user_comments(self) -> int:
×
219
        # TODO: count useful/intrusting user comments (e.g., exclude one word comments)
220
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
221

222
    @property
×
223
    def has_user_comments(self) -> bool:
×
224
        return self.num_user_comments > 0
×
225

226
    @property
×
227
    def top_proto_signature(self) -> str:
×
228
        return self.signature["facets"]["proto_signature"][0]["term"]
×
229

230
    @property
×
231
    def num_top_proto_signature_crashes(self) -> int:
×
232
        return self.signature["facets"]["proto_signature"][0]["count"]
×
233

234
    @property
×
235
    def build_ids(self) -> Iterator[int]:
×
236
        for build_id in self.signature["facets"]["build_id"]:
×
237
            yield build_id["term"]
×
238

239
    @property
×
240
    def top_build_id(self) -> int:
×
241
        return self.signature["facets"]["build_id"][0]["term"]
×
242

243
    @cached_property
×
244
    def num_near_null_crashes(self) -> int:
×
245
        return sum(
×
246
            address["count"]
247
            for address in self.signature["facets"]["address"]
248
            if is_near_null_address(address["term"])
249
        )
250

251
    @property
×
252
    def is_near_null_crash(self) -> bool:
×
253
        return self.num_near_null_crashes == self.num_crashes
×
254

255
    @property
×
256
    def is_potential_near_null_crash(self) -> bool:
×
257
        return not self.is_near_null_crash and self.num_near_null_crashes > 0
×
258

259
    @property
×
260
    def is_near_null_related_crash(self) -> bool:
×
261
        return self.is_near_null_crash or self.is_potential_near_null_crash
×
262

263
    @cached_property
×
264
    def num_near_allocator_crashes(self) -> int:
×
265
        return sum(
×
266
            address["count"]
267
            for address in self.signature["facets"]["address"]
268
            if is_near_allocator_address(address["term"])
269
        )
270

271
    @property
×
272
    def is_near_allocator_crash(self) -> bool:
×
273
        return self.num_near_allocator_crashes == self.num_crashes
×
274

275
    @property
×
276
    def is_potential_near_allocator_crash(self) -> bool:
×
277
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0
×
278

279
    @property
×
280
    def is_near_allocator_related_crash(self) -> bool:
×
281
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash
×
282

283

284
class SignatureAnalyzer(SocorroInfoAnalyzer, ClouseauReportsAnalyzer):
×
285
    platforms = [
×
286
        {"short_name": "win", "name": "Windows"},
287
        {"short_name": "mac", "name": "Mac OS X"},
288
        {"short_name": "lin", "name": "Linux"},
289
        {"short_name": "and", "name": "Android"},
290
        {"short_name": "unknown", "name": "Unknown"},
291
    ]
292

293
    def __init__(
×
294
        self,
295
        signature: dict,
296
        num_total_crashes: int,
297
        clouseau_reports: list[dict],
298
    ):
299
        SocorroInfoAnalyzer.__init__(
×
300
            self, signature, num_total_crashes, platforms=self.platforms
301
        )
302
        ClouseauReportsAnalyzer.__init__(self, clouseau_reports)
×
303

304
    def _fetch_crash_reports(
×
305
        self,
306
        proto_signature: str,
307
        build_id: int | Iterable[int],
308
        limit: int = 1,
309
    ) -> Iterator[dict]:
310
        params = {
×
311
            "proto_signature": "=" + proto_signature,
312
            "build_id": build_id,
313
            "_columns": [
314
                "uuid",
315
            ],
316
            "_results_number": limit,
317
        }
318

319
        def handler(res: dict, data: dict):
×
320
            data.update(res)
×
321

322
        data: dict = {}
×
323
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
324

325
        yield from data["hits"]
×
326

327
    def fetch_representing_processed_crash(self) -> dict:
×
328
        limit_to_top_proto_signature = (
×
329
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
330
        )
331

332
        reports = itertools.chain(
×
333
            # Reports with a higher score from clouseau are more likely to be
334
            # useful.
335
            sorted(
336
                self._clouseau_reports,
337
                key=lambda report: report["max_score"],
338
                reverse=True,
339
            ),
340
            # Next we try find reports from the top crashing build because they
341
            # are likely to be representative.
342
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
343
            self._fetch_crash_reports(self.top_proto_signature, self.build_ids),
344
        )
345
        for report in reports:
×
346
            uuid = report["uuid"]
×
347
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
348
            if (
×
349
                not limit_to_top_proto_signature
350
                or processed_crash["proto_signature"] == self.top_proto_signature
351
            ):
352
                # TODO(investigate): maybe we should check if the stack is
353
                # corrupted (ask gsvelto or willkg about how to detect that)
354
                return processed_crash
×
355

356
        raise NoCrashReportFoundError(
×
357
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
358
        )
359

360

361
class SignaturesDataFetcher:
×
362
    def __init__(
×
363
        self,
364
        signatures,
365
        product: str = "Firefox",
366
        channel: str = "nightly",
367
    ):
368
        self._signatures = signatures
×
369
        self._product = product
×
370
        self._channel = channel
×
371

372
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
373
        return clouseau.Reports.get_by_signatures(
×
374
            self._signatures,
375
            product=self._product,
376
            channel=self._channel,
377
        )
378

379
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
380
        # TODO(investigate): should we increase the duration to 6 months?
381
        duration = timedelta(weeks=1)
×
382
        end_date = lmdutils.get_date_ymd("today")
×
383
        start_date = end_date - duration
×
384
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
385

386
        params = {
×
387
            "product": self._product,
388
            # TODO(investigate): should we included all release channels?
389
            "release_channel": self._channel,
390
            # TODO(investigate): should we limit based on the build date as well?
391
            "date": date_range,
392
            # TODO: split signatures into chunks to avoid very long query URLs
393
            "signature": ["=" + signature for signature in self._signatures],
394
            "_aggs.signature": [
395
                "build_id",
396
                "cpu_arch",
397
                "proto_signature",
398
                "_cardinality.user_comments",
399
                "cpu_arch",
400
                "platform_pretty_version",
401
                # The following are needed for SignatureStats:
402
                "platform",
403
                "is_garbage_collecting",
404
                "_cardinality.install_time",
405
                "startup_crash",
406
                "_histogram.uptime",
407
                "process_type",
408
            ],
409
            "_results_number": 0,
410
            "_facets_size": 10000,
411
        }
412

413
        def handler(search_results: dict, data: dict):
×
414
            data["num_total_crashes"] = search_results["total"]
×
415
            data["signatures"] = search_results["facets"]["signature"]
×
416

417
        data: dict = {}
×
418
        socorro.SuperSearchUnredacted(
×
419
            params=params,
420
            handler=handler,
421
            handlerdata=data,
422
        ).wait()
423

424
        return data["signatures"], data["num_total_crashes"]
×
425

426
    def analyze(self) -> list[SignatureAnalyzer]:
×
427
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
428
        signatures, num_total_crashes = self.fetch_socorro_info()
×
429

430
        return [
×
431
            SignatureAnalyzer(
432
                signature,
433
                num_total_crashes,
434
                clouseau_reports[signature["term"]],
435
            )
436
            for signature in signatures
437
            # TODO(investigate): For now, we are ignoring signatures that are
438
            # not analyzed by clouseau. We should investigate why they are not
439
            # analyzed and whether we should include them.
440
            if signature["term"] in clouseau_reports
441
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc