• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4784

25 Oct 2023 06:29AM CUT coverage: 22.023% (-0.07%) from 22.091%
#4784

push

coveralls-python

suhaibmujahid
[file_crash_bug] Generalize the value of the "Crash kind"

716 of 3576 branches covered (0.0%)

27 of 27 new or added lines in 1 file covered. (100.0%)

1925 of 8741 relevant lines covered (22.02%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import date, timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.bug.analyzer import BugAnalyzer, BugsStore
×
19
from bugbot.components import ComponentName
×
20
from bugbot.crash import socorro_util
×
21

22
# The max offset from a memory address to be considered "near".
23
OFFSET_64_BIT = 0x1000
×
24
OFFSET_32_BIT = 0x100
×
25
# Allocator poison value addresses.
26
ALLOCATOR_ADDRESSES_64_BIT = (
×
27
    (0xE5E5E5E5E5E5E5E5, OFFSET_64_BIT),
28
    # On 64-bit windows, sometimes it could be doing something with a 32-bit
29
    # value gotten from freed memory, so it'll be 0X00000000E5E5E5E5 +/-, and
30
    # because of the address limitation, quite often it will be
31
    # 0X0000E5E5E5E5E5E5 +/-.
32
    (0x00000000E5E5E5E5, OFFSET_32_BIT),
33
    (0x0000E5E5E5E5E5E5, OFFSET_64_BIT),
34
    (0x4B4B4B4B4B4B4B4B, OFFSET_64_BIT),
35
)
36
ALLOCATOR_ADDRESSES_32_BIT = (
×
37
    (0xE5E5E5E5, OFFSET_32_BIT),
38
    (0x4B4B4B4B, OFFSET_32_BIT),
39
)
40
# Ranges where addresses are considered near allocator poison values.
41
ALLOCATOR_RANGES_64_BIT = (
×
42
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_64_BIT
43
)
44
ALLOCATOR_RANGES_32_BIT = (
×
45
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_32_BIT
46
)
47

48
# NOTE: If you make changes that affect the output of the analysis, you should
49
# increment this number. This is needed in the experimental phase only.
50
EXPERIMENT_VERSION = 3
×
51

52

53
def is_near_null_address(str_address) -> bool:
×
54
    """Check if the address is near null.
55

56
    Args:
57
        str_address: The memory address to check.
58

59
    Returns:
60
        True if the address is near null, False otherwise.
61
    """
62
    address = int(str_address, 0)
×
63
    is_64_bit = len(str_address) >= 18
×
64

65
    if is_64_bit:
×
66
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT
×
67

68
    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT
×
69

70

71
def is_near_allocator_address(str_address) -> bool:
×
72
    """Check if the address is near an allocator poison value.
73

74
    Args:
75
        str_address: The memory address to check.
76

77
    Returns:
78
        True if the address is near an allocator poison value, False otherwise.
79
    """
80
    address = int(str_address, 0)
×
81
    is_64_bit = len(str_address) >= 18
×
82

83
    return any(
×
84
        low <= address <= high
85
        for low, high in (
86
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
87
        )
88
    )
89

90

91
# TODO: Move this to libmozdata
92
def generate_signature_page_url(params: dict, tab: str) -> str:
×
93
    """Generate a URL to the signature page on Socorro
94

95
    Args:
96
        params: the parameters for the search query.
97
        tab: the page tab that should be selected.
98

99
    Returns:
100
        The URL of the signature page on Socorro
101
    """
102
    web_url = socorro.Socorro.CRASH_STATS_URL
×
103
    query = lmdutils.get_params_for_url(params)
×
104
    return f"{web_url}/signature/{query}#{tab}"
×
105

106

107
# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident
108
# that the bug filing is working as expected, we can switch to filing bugs in
109
# the production instance of Bugzilla.
110
class DevBugzilla(Bugzilla):
×
111
    URL = "https://bugzilla-dev.allizom.org"
×
112
    API_URL = URL + "/rest/bug"
×
113
    ATTACHMENT_API_URL = API_URL + "/attachment"
×
114
    TOKEN = utils.get_login_info()["bz_api_key_dev"]
×
115
    # Note(suhaib): the dev instance of bugzilla has a smaller cluster, so we
116
    # need to go easy on it.
117
    MAX_WORKERS = 1
×
118

119

120
class NoCrashReportFoundError(Exception):
×
121
    """There are no crash reports that meet the required criteria."""
122

123

124
class ClouseauDataAnalyzer:
×
125
    """Analyze the data returned by Crash Clouseau about a specific crash
126
    signature.
127
    """
128

129
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
130
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
131

132
    def __init__(self, reports: Iterable[dict], bugs_store: BugsStore):
×
133
        self._clouseau_reports = reports
×
134
        self.bugs_store = bugs_store
×
135

136
    @cached_property
×
137
    def max_clouseau_score(self):
×
138
        """The maximum Clouseau score in the crash reports."""
139
        if not self._clouseau_reports:
×
140
            return 0
×
141
        return max(report["max_score"] for report in self._clouseau_reports)
×
142

143
    @cached_property
×
144
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
145
        """The IDs for the bugs that their patches could have caused the crash."""
146
        minimum_accepted_score = max(
×
147
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
148
        )
149
        return {
×
150
            changeset["bug_id"]
151
            for report in self._clouseau_reports
152
            if report["max_score"] >= minimum_accepted_score
153
            for changeset in report["changesets"]
154
            if changeset["max_score"] >= minimum_accepted_score
155
            and not changeset["is_merge"]
156
            and not changeset["is_backedout"]
157
        }
158

159
    @cached_property
×
160
    def regressed_by_patch(self) -> str | None:
×
161
        """The hash of the patch that could have caused the crash."""
162
        minimum_accepted_score = max(
×
163
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
164
        )
165
        potential_patches = {
×
166
            changeset["changeset"]
167
            for report in self._clouseau_reports
168
            if report["max_score"] >= minimum_accepted_score
169
            for changeset in report["changesets"]
170
            if changeset["max_score"] >= minimum_accepted_score
171
            and not changeset["is_merge"]
172
            and not changeset["is_backedout"]
173
        }
174
        if len(potential_patches) == 1:
×
175
            return next(iter(potential_patches))
×
176
        return None
×
177

178
    @cached_property
×
179
    def regressed_by(self) -> int | None:
×
180
        """The ID of the bug that one of its patches could have caused
181
        the crash.
182

183
        If there are multiple bugs, the value will be `None`.
184
        """
185
        bug_ids = self.regressed_by_potential_bug_ids
×
186
        if len(bug_ids) == 1:
×
187
            return next(iter(bug_ids))
×
188
        return None
×
189

190
    @cached_property
×
191
    def regressed_by_potential_bugs(self) -> list[BugAnalyzer]:
×
192
        """The bugs whose patches could have caused the crash."""
193
        self.bugs_store.fetch_bugs(
×
194
            self.regressed_by_potential_bug_ids,
195
            [
196
                "id",
197
                "groups",
198
                "assigned_to",
199
                "product",
200
                "component",
201
            ],
202
        )
203
        return [
×
204
            self.bugs_store.get_bug_by_id(bug_id)
205
            for bug_id in self.regressed_by_potential_bug_ids
206
        ]
207

208
    @cached_property
×
209
    def regressed_by_author(self) -> dict | None:
×
210
        """The author of the patch that could have caused the crash.
211

212
        If there are multiple regressors, the value will be `None`.
213

214
        The regressor bug assignee is considered as the author, even if the
215
        assignee is not the patch author.
216
        """
217

218
        if not self.regressed_by:
×
219
            return None
×
220

221
        bug = self.regressed_by_potential_bugs[0]
×
222
        assert bug.id == self.regressed_by
×
223
        return bug.get_field("assigned_to_detail")
×
224

225
    @cached_property
×
226
    def crash_component(self) -> ComponentName:
×
227
        """The component that the crash belongs to.
228

229
        If there are multiple components, the value will be the default one.
230
        """
231
        potential_components = {
×
232
            bug.component for bug in self.regressed_by_potential_bugs
233
        }
234
        if len(potential_components) == 1:
×
235
            return next(iter(potential_components))
×
236
        return self.DEFAULT_CRASH_COMPONENT
×
237

238

239
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
240
    """Analyze the data returned by Socorro."""
241

242
    _bugzilla_os_legal_values = None
×
243
    _bugzilla_cpu_legal_values_map = None
×
244
    _platforms = [
×
245
        {"short_name": "win", "name": "Windows"},
246
        {"short_name": "mac", "name": "Mac OS X"},
247
        {"short_name": "lin", "name": "Linux"},
248
        {"short_name": "and", "name": "Android"},
249
        {"short_name": "unknown", "name": "Unknown"},
250
    ]
251

252
    def __init__(
×
253
        self,
254
        signature: dict,
255
        num_total_crashes: int,
256
    ):
257
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
258

259
    @classmethod
×
260
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
261
        """Return the corresponding OS name in Bugzilla for the provided OS name
262
        from Socorro.
263

264
        If the OS name is not recognized, return "Other".
265
        """
266
        if cls._bugzilla_os_legal_values is None:
×
267
            cls._bugzilla_os_legal_values = set(
×
268
                bugzilla.BugFields.fetch_field_values("op_sys")
269
            )
270

271
        if op_sys in cls._bugzilla_os_legal_values:
×
272
            return op_sys
×
273

274
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
275
            op_sys = "macOS"
×
276
        elif op_sys.startswith("Windows"):
×
277
            op_sys = "Windows"
×
278
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
279
            op_sys = "Linux"
×
280
        else:
281
            op_sys = "Other"
×
282

283
        return op_sys
×
284

285
    @property
×
286
    def first_crash_date(self) -> str:
×
287
        """The date of the first crash within the query time range.
288

289
        The date is in YYYY-MM-DD format.
290
        """
291
        return self.signature["facets"]["histogram_date"][0]["term"][:10]
×
292

293
    @property
×
294
    def bugzilla_op_sys(self) -> str:
×
295
        """The name of the OS where the crash happens.
296

297
        The value is one of the legal values for Bugzilla's `op_sys` field.
298

299
        - If no OS name is found, the value will be "Unspecified".
300
        - If the OS name is not recognized, the value will be "Other".
301
        - If multiple OS names are found, the value will be "All". Unless the OS
302
          names can be resolved to a common name without a version. For example,
303
          "Windows 10" and "Windows 7" will become "Windows".
304
        """
305
        all_op_sys = {
×
306
            self.to_bugzilla_op_sys(op_sys["term"])
307
            for op_sys in self.signature["facets"]["platform_pretty_version"]
308
        }
309

310
        if len(all_op_sys) > 1:
×
311
            # Resolve to root OS name by removing the version number.
312
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
313

314
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
315
            # TODO: explain this workaround.
316
            all_op_sys.remove("Other")
×
317

318
        if len(all_op_sys) == 1:
×
319
            return next(iter(all_op_sys))
×
320

321
        if len(all_op_sys) == 0:
×
322
            return "Unspecified"
×
323

324
        return "All"
×
325

326
    @classmethod
×
327
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
328
        """Return the corresponding CPU name in Bugzilla for the provided name
329
        from Socorro.
330

331
        If the CPU is not recognized, return "Other".
332
        """
333
        if cls._bugzilla_cpu_legal_values_map is None:
×
334
            cls._bugzilla_cpu_legal_values_map = {
×
335
                value.lower(): value
336
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
337
            }
338

339
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
340

341
    @property
×
342
    def bugzilla_cpu_arch(self) -> str:
×
343
        """The CPU architecture of the devices where the crash happens.
344

345
        The value is one of the legal values for Bugzilla's `rep_platform` field.
346

347
        - If no CPU architecture is found, the value will be "Unspecified".
348
        - If the CPU architecture is not recognized, the value will be "Other".
349
        - If multiple CPU architectures are found, the value will "All".
350
        """
351
        all_cpu_arch = {
×
352
            self.to_bugzilla_cpu(cpu["term"])
353
            for cpu in self.signature["facets"]["cpu_arch"]
354
        }
355

356
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
357
            all_cpu_arch.remove("Other")
×
358

359
        if len(all_cpu_arch) == 1:
×
360
            return next(iter(all_cpu_arch))
×
361

362
        if len(all_cpu_arch) == 0:
×
363
            return "Unspecified"
×
364

365
        return "All"
×
366

367
    @property
×
368
    def user_comments_page_url(self) -> str:
×
369
        """The URL to the Signature page on Socorro where the Comments tab is
370
        selected.
371
        """
372
        start_date = date.today() - timedelta(weeks=26)
×
373
        params = {
×
374
            "signature": self.signature_term,
375
            "date": socorro.SuperSearch.get_search_date(start_date),
376
        }
377
        return generate_signature_page_url(params, "comments")
×
378

379
    @property
×
380
    def num_user_comments(self) -> int:
×
381
        """The number of crash reports with user comments."""
382
        # TODO: count useful/interesting user comments (e.g., exclude one word comments)
383
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
384

385
    @property
×
386
    def has_user_comments(self) -> bool:
×
387
        """Whether the crash signature has any reports with a user comment."""
388
        return self.num_user_comments > 0
×
389

390
    @property
×
391
    def top_proto_signature(self) -> str:
×
392
        """The proto signature that occurs the most."""
393
        return self.signature["facets"]["proto_signature"][0]["term"]
×
394

395
    @property
×
396
    def num_top_proto_signature_crashes(self) -> int:
×
397
        """The number of crashes for the most occurring proto signature."""
398
        return self.signature["facets"]["proto_signature"][0]["count"]
×
399

400
    def _build_ids(self) -> Iterator[int]:
×
401
        """Yields the build IDs where the crash occurred."""
402
        for build_id in self.signature["facets"]["build_id"]:
×
403
            yield build_id["term"]
×
404

405
    @property
×
406
    def top_build_id(self) -> int:
×
407
        """The build ID where most crashes occurred."""
408
        return self.signature["facets"]["build_id"][0]["term"]
×
409

410
    @cached_property
×
411
    def num_near_null_crashes(self) -> int:
×
412
        """The number of crashes that occurred on addresses near null."""
413
        return sum(
×
414
            address["count"]
415
            for address in self.signature["facets"]["address"]
416
            if is_near_null_address(address["term"])
417
        )
418

419
    @property
×
420
    def is_near_null_crash(self) -> bool:
×
421
        """Whether all crashes occurred on addresses near null."""
422
        return self.num_near_null_crashes == self.num_crashes
×
423

424
    @property
×
425
    def is_potential_near_null_crash(self) -> bool:
×
426
        """Whether the signature is a potential near null crash.
427

428
        The value will be True if some but not all crashes occurred on addresses
429
        near null.
430
        """
431
        return not self.is_near_null_crash and self.num_near_null_crashes > 0
×
432

433
    @property
×
434
    def is_near_null_related_crash(self) -> bool:
×
435
        """Whether the signature is related to near null crashes.
436

437
        The value will be True if any of the crashes occurred on addresses near
438
        null.
439
        """
440
        return self.is_near_null_crash or self.is_potential_near_null_crash
×
441

442
    @cached_property
×
443
    def num_near_allocator_crashes(self) -> int:
×
444
        """The number of crashes that occurred on addresses near an allocator
445
        poison value.
446
        """
447
        return sum(
×
448
            address["count"]
449
            for address in self.signature["facets"]["address"]
450
            if is_near_allocator_address(address["term"])
451
        )
452

453
    @property
×
454
    def is_near_allocator_crash(self) -> bool:
×
455
        """Whether all crashes occurred on addresses near an allocator poison
456
        value.
457
        """
458
        return self.num_near_allocator_crashes == self.num_crashes
×
459

460
    @property
×
461
    def is_potential_near_allocator_crash(self) -> bool:
×
462
        """Whether the signature is a potential near allocator poison value
463
        crash.
464

465
        The value will be True if some but not all crashes occurred on addresses
466
        near an allocator poison value.
467
        """
468
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0
×
469

470
    @property
×
471
    def is_near_allocator_related_crash(self) -> bool:
×
472
        """Whether the signature is related to near allocator poison value
473
        crashes.
474

475
        The value will be True if any of the crashes occurred on addresses near
476
        an allocator poison value.
477
        """
478
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash
×
479

480
    @property
×
481
    def is_content_crash(self) -> bool:
×
482
        """Whether the crash is related to content process."""
483
        for row in self.signature["facets"]["process_type"]:
×
484
            if row["term"].lower() == "content":
×
485
                return row["count"] > 0
×
486
        return False
×
487

488
    @property
×
489
    def is_gpu_crash(self) -> bool:
×
490
        """Whether the crash is related to GPU."""
491
        for row in self.signature["facets"]["process_type"]:
×
492
            if row["term"].lower() == "gpu":
×
493
                return row["count"] > 0
×
494
        return False
×
495

496
    @property
×
497
    def is_parent_crash(self) -> bool:
×
498
        """Whether the crash is related to the parent process."""
499
        for row in self.signature["facets"]["process_type"]:
×
500
            if row["term"].lower() == "parent":
×
501
                return row["count"] > 0
×
502
        return False
×
503

504
    @property
×
505
    def crash_kind(self) -> str:
×
506
        """The kind of the crash based on the crashing process type."""
507
        if self.is_plugin_crash:
×
508
            return "Plugin"
×
509
        if self.is_parent_crash:
×
510
            return "Parent"
×
511
        if self.is_content_crash:
×
512
            return "Content"
×
513
        return "Browser Crash"
×
514

515

516
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
517
    """Analyze the data related to a signature.
518

519
    This includes data from Socorro and Clouseau.
520
    """
521

522
    def __init__(
×
523
        self,
524
        socorro_signature: dict,
525
        num_total_crashes: int,
526
        clouseau_reports: list[dict],
527
        bugs_store: BugsStore,
528
    ):
529
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
530
        ClouseauDataAnalyzer.__init__(self, clouseau_reports, bugs_store)
×
531

532
    def _fetch_crash_reports(
×
533
        self,
534
        proto_signature: str,
535
        build_id: int | Iterable[int],
536
        limit: int = 1,
537
    ) -> Iterator[dict]:
538
        params = {
×
539
            "proto_signature": "=" + proto_signature,
540
            "build_id": build_id,
541
            "_columns": [
542
                "uuid",
543
            ],
544
            "_results_number": limit,
545
        }
546

547
        def handler(res: dict, data: dict):
×
548
            data.update(res)
×
549

550
        data: dict = {}
×
551
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
552

553
        yield from data["hits"]
×
554

555
    def fetch_representative_processed_crash(self) -> dict:
×
556
        """Fetch a processed crash to represent the signature.
557

558
        This could fetch multiple processed crashes and return the one that is
559
        most likely to be useful.
560
        """
561
        limit_to_top_proto_signature = (
×
562
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
563
        )
564

565
        reports = itertools.chain(
×
566
            # Reports with a higher score from clouseau are more likely to be
567
            # useful.
568
            sorted(
569
                self._clouseau_reports,
570
                key=lambda report: report["max_score"],
571
                reverse=True,
572
            ),
573
            # Next we try find reports from the top crashing build because they
574
            # are likely to be representative.
575
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
576
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
577
        )
578
        for report in reports:
×
579
            uuid = report["uuid"]
×
580
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
581
            if (
×
582
                not limit_to_top_proto_signature
583
                or processed_crash["proto_signature"] == self.top_proto_signature
584
            ):
585
                # TODO(investigate): maybe we should check if the stack is
586
                # corrupted (ask gsvelto or willkg about how to detect that)
587
                return processed_crash
×
588

589
        raise NoCrashReportFoundError(
×
590
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
591
        )
592

593
    @cached_property
×
594
    def is_potential_security_crash(self) -> bool:
×
595
        """Whether the crash is related to a potential security bug.
596

597
        The value will be True if:
598
            - the signature is related to near allocator poison value crashes, or
599
            - one of the potential regressors is a security bug
600
        """
601
        return self.is_near_allocator_related_crash or any(
×
602
            bug.is_security for bug in self.regressed_by_potential_bugs
603
        )
604

605

606
class SignaturesDataFetcher:
×
607
    """Fetch the data related to the given signatures."""
608

609
    MEMORY_ACCESS_ERROR_REASONS = (
×
610
        # On Windows:
611
        "EXCEPTION_ACCESS_VIOLATION_READ",
612
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
613
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
614
        # On Linux:
615
        "SIGSEGV / SEGV_MAPERR",
616
        "SIGSEGV / SEGV_ACCERR",
617
    )
618

619
    EXCLUDED_MOZ_REASON_STRINGS = (
×
620
        "MOZ_CRASH(OOM)",
621
        "MOZ_CRASH(Out of memory)",
622
        "out of memory",
623
        "Shutdown hanging",
624
        # TODO(investigate): do we need to exclude signatures that their reason
625
        # contains `[unhandlable oom]`?
626
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
627
        # "[unhandlable oom]",
628
    )
629

630
    # If any of the crash reason starts with any of the following, then it is
631
    # Network or I/O error.
632
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
633
        "EXCEPTION_IN_PAGE_ERROR_READ",
634
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
635
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
636
    )
637

638
    # TODO(investigate): do we need to exclude all these signatures prefixes?
639
    EXCLUDED_SIGNATURE_PREFIXES = (
×
640
        "OOM | ",
641
        "bad hardware | ",
642
        "shutdownhang | ",
643
    )
644

645
    SUMMARY_DURATION = timedelta(weeks=10)
×
646

647
    def __init__(
×
648
        self,
649
        signatures: Iterable[str],
650
        product: str = "Firefox",
651
        channel: str = "nightly",
652
    ):
653
        self._signatures = set(signatures)
×
654
        self._product = product
×
655
        self._channel = channel
×
656

657
    @classmethod
×
658
    def find_new_actionable_crashes(
×
659
        cls,
660
        product: str,
661
        channel: str,
662
        days_to_check: int = 7,
663
        days_without_crashes: int = 7,
664
    ) -> "SignaturesDataFetcher":
665
        """Find new actionable crashes.
666

667
        Args:
668
            product: The product to check.
669
            channel: The release channel to check.
670
            days_to_check: The number of days to check for crashes.
671
            days_without_crashes: The number of days without crashes before the
672
                `days_to_check` to consider the signature new.
673

674
        Returns:
675
            A list of actionable signatures.
676
        """
677
        duration = days_to_check + days_without_crashes
×
678
        end_date = lmdutils.get_date_ymd("today")
×
679
        start_date = end_date - timedelta(duration)
×
680
        earliest_allowed_date = lmdutils.get_date_str(
×
681
            end_date - timedelta(days_to_check)
682
        )
683
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
684

685
        params = {
×
686
            "product": product,
687
            "release_channel": channel,
688
            "date": date_range,
689
            # TODO(investigate): should we do a local filter instead of the
690
            # following (should we exclude the signature if one of the crashes
691
            # is a shutdown hang?):
692
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
693
            # non-empty then it's a shutdown hang.
694
            "ipc_shutdown_state": "__null__",
695
            "shutdown_progress": "__null__",
696
            # TODO(investigate): should we use the following instead of the
697
            # local filter.
698
            # "oom_allocation_size": "!__null__",
699
            "_aggs.signature": [
700
                "moz_crash_reason",
701
                "reason",
702
                "_histogram.date",
703
                "_cardinality.install_time",
704
                "_cardinality.oom_allocation_size",
705
            ],
706
            "_results_number": 0,
707
            "_facets_size": 10000,
708
        }
709

710
        def handler(search_resp: dict, data: list):
×
711
            logger.debug(
×
712
                "Total of %d signatures received from Socorro",
713
                len(search_resp["facets"]["signature"]),
714
            )
715

716
            for crash in search_resp["facets"]["signature"]:
×
717
                signature = crash["term"]
×
718
                if any(
×
719
                    signature.startswith(excluded_prefix)
720
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
721
                ):
722
                    # Ignore signatures that start with any of the excluded prefixes.
723
                    continue
×
724

725
                facets = crash["facets"]
×
726
                installations = facets["cardinality_install_time"]["value"]
×
727
                if installations <= 1:
×
728
                    # Ignore crashes that only happen on one installation.
729
                    continue
×
730

731
                first_date = facets["histogram_date"][0]["term"]
×
732
                if first_date < earliest_allowed_date:
×
733
                    # The crash is not new, skip it.
734
                    continue
×
735

736
                if any(
×
737
                    reason["term"].startswith(io_error_prefix)
738
                    for reason in facets["reason"]
739
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
740
                ):
741
                    # Ignore Network or I/O error crashes.
742
                    continue
×
743

744
                if crash["count"] < 20:
×
745
                    # For signatures with low volume, having multiple types of
746
                    # memory errors indicates potential bad hardware crashes.
747
                    num_memory_error_types = sum(
×
748
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
749
                        for reason in facets["reason"]
750
                    )
751
                    if num_memory_error_types > 1:
×
752
                        # Potential bad hardware crash, skip it.
753
                        continue
×
754

755
                # TODO: Add a filter using the `possible_bit_flips_max_confidence`
756
                # field to exclude bad hardware crashes. The filed is not available yet.
757
                # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1816669#c3
758

759
                # TODO(investigate): is this needed since we are already
760
                # filtering signatures that start with "OOM | "
761
                if facets["cardinality_oom_allocation_size"]["value"]:
×
762
                    # If one of the crashes is an OOM crash, skip it.
763
                    continue
×
764

765
                # TODO(investigate): do we need to check for the `moz_crash_reason`
766
                moz_crash_reasons = facets["moz_crash_reason"]
×
767
                if moz_crash_reasons and any(
×
768
                    excluded_reason in reason["term"]
769
                    for reason in moz_crash_reasons
770
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
771
                ):
772
                    continue
×
773

774
                data.append(signature)
×
775

776
        signatures: list = []
×
777
        socorro.SuperSearch(
×
778
            params=params,
779
            handler=handler,
780
            handlerdata=signatures,
781
        ).wait()
782

783
        logger.debug(
×
784
            "Total of %d signatures left after applying the filtering criteria",
785
            len(signatures),
786
        )
787

788
        return cls(signatures, product, channel)
×
789

790
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
791
        """Fetch the crash reports data from Crash Clouseau."""
792
        if not self._signatures:
×
793
            return {}
×
794

795
        logger.debug(
×
796
            "Fetch from Clouseau: requesting reports for %d signatures",
797
            len(self._signatures),
798
        )
799

800
        signature_reports = clouseau.Reports.get_by_signatures(
×
801
            self._signatures,
802
            product=self._product,
803
            channel=self._channel,
804
        )
805

806
        logger.debug(
×
807
            "Fetch from Clouseau: received reports for %d signatures",
808
            len(signature_reports),
809
        )
810

811
        return signature_reports
×
812

813
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
814
        """Fetch the signature data from Socorro."""
815
        if not self._signatures:
×
816
            return [], 0
×
817

818
        end_date = lmdutils.get_date_ymd("today")
×
819
        start_date = end_date - self.SUMMARY_DURATION
×
820
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
821

822
        params = {
×
823
            "product": self._product,
824
            # TODO(investigate): should we included all release channels?
825
            "release_channel": self._channel,
826
            # TODO(investigate): should we limit based on the build date as well?
827
            "date": date_range,
828
            # TODO: split signatures into chunks to avoid very long query URLs
829
            "signature": ["=" + signature for signature in self._signatures],
830
            "_aggs.signature": [
831
                "address",
832
                "build_id",
833
                "cpu_arch",
834
                "proto_signature",
835
                "_cardinality.user_comments",
836
                "cpu_arch",
837
                "platform_pretty_version",
838
                "_histogram.date",
839
                # The following are needed for SignatureStats:
840
                "platform",
841
                "is_garbage_collecting",
842
                "_cardinality.install_time",
843
                "startup_crash",
844
                "_histogram.uptime",
845
                "process_type",
846
            ],
847
            "_results_number": 0,
848
            "_facets_size": 10000,
849
        }
850

851
        def handler(search_results: dict, data: dict):
×
852
            data["num_total_crashes"] = search_results["total"]
×
853
            data["signatures"] = search_results["facets"]["signature"]
×
854

855
        logger.debug(
×
856
            "Fetch from Socorro: requesting info for %d signatures",
857
            len(self._signatures),
858
        )
859

860
        data: dict = {}
×
861
        socorro.SuperSearchUnredacted(
×
862
            params=params,
863
            handler=handler,
864
            handlerdata=data,
865
        ).wait()
866

867
        logger.debug(
×
868
            "Fetch from Socorro: received info for %d signatures",
869
            len(data["signatures"]),
870
        )
871

872
        return data["signatures"], data["num_total_crashes"]
×
873

874
    def fetch_bugs(
×
875
        self, include_fields: list[str] | None = None
876
    ) -> dict[str, list[dict]]:
877
        """Fetch bugs that are filed against the given signatures."""
878
        if not self._signatures:
×
879
            return {}
×
880

881
        params_base: dict = {
×
882
            "include_fields": [
883
                "cf_crash_signature",
884
            ],
885
        }
886

887
        if include_fields:
×
888
            params_base["include_fields"].extend(include_fields)
×
889

890
        params_list = []
×
891
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
892
            params = params_base.copy()
×
893
            n = int(utils.get_last_field_num(params))
×
894
            params[f"f{n}"] = "OP"
×
895
            params[f"j{n}"] = "OR"
×
896
            for signature in signatures_chunk:
×
897
                n += 1
×
898
                params[f"f{n}"] = "cf_crash_signature"
×
899
                params[f"o{n}"] = "regexp"
×
900
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
901
            params[f"f{n+1}"] = "CP"
×
902
            params_list.append(params)
×
903

904
        signatures_bugs: dict = defaultdict(list)
×
905

906
        def handler(res, data):
×
907
            for bug in res["bugs"]:
×
908
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
909
                    if signature in self._signatures:
×
910
                        data[signature].append(bug)
×
911

912
        logger.debug(
×
913
            "Fetch from Bugzilla: requesting bugs for %d signatures",
914
            len(self._signatures),
915
        )
916
        timeout = utils.get_config("common", "bz_query_timeout")
×
917
        Bugzilla(
×
918
            timeout=timeout,
919
            queries=[
920
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
921
                for params in params_list
922
            ],
923
        ).wait()
924

925
        # TODO: remove the call to DevBugzilla after moving to production
926
        for params in params_list:
×
927
            # Excluded only filed bugs with the latest version. This will
928
            # re-generate the bugs after bumping the version.
929
            n = int(utils.get_last_field_num(params))
×
930
            params[f"f{n}"] = "status_whiteboard"
×
931
            params[f"o{n}"] = "substring"
×
932
            params[f"v{n}"] = f"[bugbot-crash-v{EXPERIMENT_VERSION}]"
×
933
        DevBugzilla(
×
934
            timeout=timeout,
935
            queries=[
936
                connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs)
937
                for params in params_list
938
            ],
939
        ).wait()
940

941
        logger.debug(
×
942
            "Fetch from Bugzilla: received bugs for %d signatures", len(signatures_bugs)
943
        )
944

945
        return signatures_bugs
×
946

947
    def analyze(self) -> list[SignatureAnalyzer]:
×
948
        """Analyze the data related to the signatures."""
949
        bugs = self.fetch_bugs()
×
950
        # TODO(investigate): For now, we are ignoring signatures that have bugs
951
        # filed even if they are closed long time ago. We should investigate
952
        # whether we should include the ones with closed bugs. For example, if
953
        # the bug was closed as Fixed years ago.
954
        self._signatures.difference_update(bugs.keys())
×
955

956
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
957
        # TODO(investigate): For now, we are ignoring signatures that are not
958
        # analyzed by clouseau. We should investigate why they are not analyzed
959
        # and whether we should include them.
960
        self._signatures.intersection_update(clouseau_reports.keys())
×
961

962
        signatures, num_total_crashes = self.fetch_socorro_info()
×
963
        bugs_store = BugsStore()
×
964

965
        return [
×
966
            SignatureAnalyzer(
967
                signature,
968
                num_total_crashes,
969
                clouseau_reports[signature["term"]],
970
                bugs_store,
971
            )
972
            for signature in signatures
973
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc