• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4788

25 Oct 2023 07:37PM CUT coverage: 22.063%. Remained the same
#4788

push

coveralls-python

suhaibmujahid
Raise search tries threshold

716 of 3568 branches covered (0.0%)

1 of 1 new or added line in 1 file covered. (100.0%)

1925 of 8725 relevant lines covered (22.06%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import date, timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.bug.analyzer import BugAnalyzer, BugsStore
×
19
from bugbot.components import ComponentName
×
20
from bugbot.crash import socorro_util
×
21

22
# The max offset from a memory address to be considered "near".
23
OFFSET_64_BIT = 0x1000
×
24
OFFSET_32_BIT = 0x100
×
25
# Allocator poison value addresses.
26
ALLOCATOR_ADDRESSES_64_BIT = (
×
27
    (0xE5E5E5E5E5E5E5E5, OFFSET_64_BIT),
28
    # On 64-bit windows, sometimes it could be doing something with a 32-bit
29
    # value gotten from freed memory, so it'll be 0X00000000E5E5E5E5 +/-, and
30
    # because of the address limitation, quite often it will be
31
    # 0X0000E5E5E5E5E5E5 +/-.
32
    (0x00000000E5E5E5E5, OFFSET_32_BIT),
33
    (0x0000E5E5E5E5E5E5, OFFSET_64_BIT),
34
    (0x4B4B4B4B4B4B4B4B, OFFSET_64_BIT),
35
)
36
ALLOCATOR_ADDRESSES_32_BIT = (
×
37
    (0xE5E5E5E5, OFFSET_32_BIT),
38
    (0x4B4B4B4B, OFFSET_32_BIT),
39
)
40
# Ranges where addresses are considered near allocator poison values.
41
ALLOCATOR_RANGES_64_BIT = (
×
42
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_64_BIT
43
)
44
ALLOCATOR_RANGES_32_BIT = (
×
45
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_32_BIT
46
)
47

48
# NOTE: If you make changes that affect the output of the analysis, you should
49
# increment this number. This is needed in the experimental phase only.
50
EXPERIMENT_VERSION = 3
×
51

52

53
def is_near_null_address(str_address) -> bool:
×
54
    """Check if the address is near null.
55

56
    Args:
57
        str_address: The memory address to check.
58

59
    Returns:
60
        True if the address is near null, False otherwise.
61
    """
62
    address = int(str_address, 0)
×
63
    is_64_bit = len(str_address) >= 18
×
64

65
    if is_64_bit:
×
66
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT
×
67

68
    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT
×
69

70

71
def is_near_allocator_address(str_address) -> bool:
×
72
    """Check if the address is near an allocator poison value.
73

74
    Args:
75
        str_address: The memory address to check.
76

77
    Returns:
78
        True if the address is near an allocator poison value, False otherwise.
79
    """
80
    address = int(str_address, 0)
×
81
    is_64_bit = len(str_address) >= 18
×
82

83
    return any(
×
84
        low <= address <= high
85
        for low, high in (
86
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
87
        )
88
    )
89

90

91
# TODO: Move this to libmozdata
92
def generate_signature_page_url(params: dict, tab: str) -> str:
×
93
    """Generate a URL to the signature page on Socorro
94

95
    Args:
96
        params: the parameters for the search query.
97
        tab: the page tab that should be selected.
98

99
    Returns:
100
        The URL of the signature page on Socorro
101
    """
102
    web_url = socorro.Socorro.CRASH_STATS_URL
×
103
    query = lmdutils.get_params_for_url(params)
×
104
    return f"{web_url}/signature/{query}#{tab}"
×
105

106

107
# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident
108
# that the bug filing is working as expected, we can switch to filing bugs in
109
# the production instance of Bugzilla.
110
class DevBugzilla(Bugzilla):
×
111
    URL = "https://bugzilla-dev.allizom.org"
×
112
    API_URL = URL + "/rest/bug"
×
113
    ATTACHMENT_API_URL = API_URL + "/attachment"
×
114
    TOKEN = utils.get_login_info()["bz_api_key_dev"]
×
115
    # Note(suhaib): the dev instance of bugzilla has a smaller cluster, so we
116
    # need to go easy on it.
117
    MAX_WORKERS = 1
×
118

119

120
class NoCrashReportFoundError(Exception):
×
121
    """There are no crash reports that meet the required criteria."""
122

123

124
class ClouseauDataAnalyzer:
×
125
    """Analyze the data returned by Crash Clouseau about a specific crash
126
    signature.
127
    """
128

129
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
130
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
131

132
    def __init__(self, reports: Iterable[dict], bugs_store: BugsStore):
×
133
        self._clouseau_reports = reports
×
134
        self.bugs_store = bugs_store
×
135

136
    @cached_property
×
137
    def max_clouseau_score(self):
×
138
        """The maximum Clouseau score in the crash reports."""
139
        if not self._clouseau_reports:
×
140
            return 0
×
141
        return max(report["max_score"] for report in self._clouseau_reports)
×
142

143
    @cached_property
×
144
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
145
        """The IDs for the bugs that their patches could have caused the crash."""
146
        minimum_accepted_score = max(
×
147
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
148
        )
149
        return {
×
150
            changeset["bug_id"]
151
            for report in self._clouseau_reports
152
            if report["max_score"] >= minimum_accepted_score
153
            for changeset in report["changesets"]
154
            if changeset["max_score"] >= minimum_accepted_score
155
            and not changeset["is_merge"]
156
            and not changeset["is_backedout"]
157
        }
158

159
    @cached_property
×
160
    def regressed_by_patch(self) -> str | None:
×
161
        """The hash of the patch that could have caused the crash."""
162
        minimum_accepted_score = max(
×
163
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
164
        )
165
        potential_patches = {
×
166
            changeset["changeset"]
167
            for report in self._clouseau_reports
168
            if report["max_score"] >= minimum_accepted_score
169
            for changeset in report["changesets"]
170
            if changeset["max_score"] >= minimum_accepted_score
171
            and not changeset["is_merge"]
172
            and not changeset["is_backedout"]
173
        }
174
        if len(potential_patches) == 1:
×
175
            return next(iter(potential_patches))
×
176
        return None
×
177

178
    @cached_property
×
179
    def regressed_by(self) -> int | None:
×
180
        """The ID of the bug that one of its patches could have caused
181
        the crash.
182

183
        If there are multiple bugs, the value will be `None`.
184
        """
185
        bug_ids = self.regressed_by_potential_bug_ids
×
186
        if len(bug_ids) == 1:
×
187
            return next(iter(bug_ids))
×
188
        return None
×
189

190
    @cached_property
×
191
    def regressed_by_potential_bugs(self) -> list[BugAnalyzer]:
×
192
        """The bugs whose patches could have caused the crash."""
193
        self.bugs_store.fetch_bugs(
×
194
            self.regressed_by_potential_bug_ids,
195
            [
196
                "id",
197
                "groups",
198
                "assigned_to",
199
                "product",
200
                "component",
201
            ],
202
        )
203
        return [
×
204
            self.bugs_store.get_bug_by_id(bug_id)
205
            for bug_id in self.regressed_by_potential_bug_ids
206
        ]
207

208
    @cached_property
×
209
    def regressed_by_author(self) -> dict | None:
×
210
        """The author of the patch that could have caused the crash.
211

212
        If there are multiple regressors, the value will be `None`.
213

214
        The regressor bug assignee is considered as the author, even if the
215
        assignee is not the patch author.
216
        """
217

218
        if not self.regressed_by:
×
219
            return None
×
220

221
        bug = self.regressed_by_potential_bugs[0]
×
222
        assert bug.id == self.regressed_by
×
223
        return bug.get_field("assigned_to_detail")
×
224

225
    @cached_property
×
226
    def crash_component(self) -> ComponentName:
×
227
        """The component that the crash belongs to.
228

229
        If there are multiple components, the value will be the default one.
230
        """
231
        potential_components = {
×
232
            bug.component for bug in self.regressed_by_potential_bugs
233
        }
234
        if len(potential_components) == 1:
×
235
            return next(iter(potential_components))
×
236
        return self.DEFAULT_CRASH_COMPONENT
×
237

238

239
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
240
    """Analyze the data returned by Socorro."""
241

242
    _bugzilla_os_legal_values = None
×
243
    _bugzilla_cpu_legal_values_map = None
×
244
    _platforms = [
×
245
        {"short_name": "win", "name": "Windows"},
246
        {"short_name": "mac", "name": "Mac OS X"},
247
        {"short_name": "lin", "name": "Linux"},
248
        {"short_name": "and", "name": "Android"},
249
        {"short_name": "unknown", "name": "Unknown"},
250
    ]
251

252
    def __init__(
×
253
        self,
254
        signature: dict,
255
        num_total_crashes: int,
256
    ):
257
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
258

259
    @classmethod
×
260
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
261
        """Return the corresponding OS name in Bugzilla for the provided OS name
262
        from Socorro.
263

264
        If the OS name is not recognized, return "Other".
265
        """
266
        if cls._bugzilla_os_legal_values is None:
×
267
            cls._bugzilla_os_legal_values = set(
×
268
                bugzilla.BugFields.fetch_field_values("op_sys")
269
            )
270

271
        if op_sys in cls._bugzilla_os_legal_values:
×
272
            return op_sys
×
273

274
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
275
            op_sys = "macOS"
×
276
        elif op_sys.startswith("Windows"):
×
277
            op_sys = "Windows"
×
278
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
279
            op_sys = "Linux"
×
280
        else:
281
            op_sys = "Other"
×
282

283
        return op_sys
×
284

285
    @property
×
286
    def first_crash_date(self) -> str:
×
287
        """The date of the first crash within the query time range.
288

289
        The date is in YYYY-MM-DD format.
290
        """
291
        return self.signature["facets"]["histogram_date"][0]["term"][:10]
×
292

293
    @property
×
294
    def bugzilla_op_sys(self) -> str:
×
295
        """The name of the OS where the crash happens.
296

297
        The value is one of the legal values for Bugzilla's `op_sys` field.
298

299
        - If no OS name is found, the value will be "Unspecified".
300
        - If the OS name is not recognized, the value will be "Other".
301
        - If multiple OS names are found, the value will be "All". Unless the OS
302
          names can be resolved to a common name without a version. For example,
303
          "Windows 10" and "Windows 7" will become "Windows".
304
        """
305
        all_op_sys = {
×
306
            self.to_bugzilla_op_sys(op_sys["term"])
307
            for op_sys in self.signature["facets"]["platform_pretty_version"]
308
        }
309

310
        if len(all_op_sys) > 1:
×
311
            # Resolve to root OS name by removing the version number.
312
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
313

314
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
315
            # TODO: explain this workaround.
316
            all_op_sys.remove("Other")
×
317

318
        if len(all_op_sys) == 1:
×
319
            return next(iter(all_op_sys))
×
320

321
        if len(all_op_sys) == 0:
×
322
            return "Unspecified"
×
323

324
        return "All"
×
325

326
    @classmethod
×
327
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
328
        """Return the corresponding CPU name in Bugzilla for the provided name
329
        from Socorro.
330

331
        If the CPU is not recognized, return "Other".
332
        """
333
        if cls._bugzilla_cpu_legal_values_map is None:
×
334
            cls._bugzilla_cpu_legal_values_map = {
×
335
                value.lower(): value
336
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
337
            }
338

339
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
340

341
    @property
×
342
    def bugzilla_cpu_arch(self) -> str:
×
343
        """The CPU architecture of the devices where the crash happens.
344

345
        The value is one of the legal values for Bugzilla's `rep_platform` field.
346

347
        - If no CPU architecture is found, the value will be "Unspecified".
348
        - If the CPU architecture is not recognized, the value will be "Other".
349
        - If multiple CPU architectures are found, the value will "All".
350
        """
351
        all_cpu_arch = {
×
352
            self.to_bugzilla_cpu(cpu["term"])
353
            for cpu in self.signature["facets"]["cpu_arch"]
354
        }
355

356
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
357
            all_cpu_arch.remove("Other")
×
358

359
        if len(all_cpu_arch) == 1:
×
360
            return next(iter(all_cpu_arch))
×
361

362
        if len(all_cpu_arch) == 0:
×
363
            return "Unspecified"
×
364

365
        return "All"
×
366

367
    @property
×
368
    def user_comments_page_url(self) -> str:
×
369
        """The URL to the Signature page on Socorro where the Comments tab is
370
        selected.
371
        """
372
        start_date = date.today() - timedelta(weeks=26)
×
373
        params = {
×
374
            "signature": self.signature_term,
375
            "date": socorro.SuperSearch.get_search_date(start_date),
376
        }
377
        return generate_signature_page_url(params, "comments")
×
378

379
    @property
×
380
    def num_user_comments(self) -> int:
×
381
        """The number of crash reports with user comments."""
382
        # TODO: count useful/interesting user comments (e.g., exclude one word comments)
383
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
384

385
    @property
×
386
    def has_user_comments(self) -> bool:
×
387
        """Whether the crash signature has any reports with a user comment."""
388
        return self.num_user_comments > 0
×
389

390
    @property
×
391
    def top_proto_signature(self) -> str:
×
392
        """The proto signature that occurs the most."""
393
        return self.signature["facets"]["proto_signature"][0]["term"]
×
394

395
    @property
×
396
    def num_top_proto_signature_crashes(self) -> int:
×
397
        """The number of crashes for the most occurring proto signature."""
398
        return self.signature["facets"]["proto_signature"][0]["count"]
×
399

400
    def _build_ids(self) -> Iterator[int]:
×
401
        """Yields the build IDs where the crash occurred."""
402
        for build_id in self.signature["facets"]["build_id"]:
×
403
            yield build_id["term"]
×
404

405
    @property
×
406
    def top_build_id(self) -> int:
×
407
        """The build ID where most crashes occurred."""
408
        return self.signature["facets"]["build_id"][0]["term"]
×
409

410
    @cached_property
×
411
    def num_near_null_crashes(self) -> int:
×
412
        """The number of crashes that occurred on addresses near null."""
413
        return sum(
×
414
            address["count"]
415
            for address in self.signature["facets"]["address"]
416
            if is_near_null_address(address["term"])
417
        )
418

419
    @property
×
420
    def is_near_null_crash(self) -> bool:
×
421
        """Whether all crashes occurred on addresses near null."""
422
        return self.num_near_null_crashes == self.num_crashes
×
423

424
    @property
×
425
    def is_potential_near_null_crash(self) -> bool:
×
426
        """Whether the signature is a potential near null crash.
427

428
        The value will be True if some but not all crashes occurred on addresses
429
        near null.
430
        """
431
        return not self.is_near_null_crash and self.num_near_null_crashes > 0
×
432

433
    @property
×
434
    def is_near_null_related_crash(self) -> bool:
×
435
        """Whether the signature is related to near null crashes.
436

437
        The value will be True if any of the crashes occurred on addresses near
438
        null.
439
        """
440
        return self.is_near_null_crash or self.is_potential_near_null_crash
×
441

442
    @cached_property
×
443
    def num_near_allocator_crashes(self) -> int:
×
444
        """The number of crashes that occurred on addresses near an allocator
445
        poison value.
446
        """
447
        return sum(
×
448
            address["count"]
449
            for address in self.signature["facets"]["address"]
450
            if is_near_allocator_address(address["term"])
451
        )
452

453
    @property
×
454
    def is_near_allocator_crash(self) -> bool:
×
455
        """Whether all crashes occurred on addresses near an allocator poison
456
        value.
457
        """
458
        return self.num_near_allocator_crashes == self.num_crashes
×
459

460
    @property
×
461
    def is_potential_near_allocator_crash(self) -> bool:
×
462
        """Whether the signature is a potential near allocator poison value
463
        crash.
464

465
        The value will be True if some but not all crashes occurred on addresses
466
        near an allocator poison value.
467
        """
468
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0
×
469

470
    @property
×
471
    def is_near_allocator_related_crash(self) -> bool:
×
472
        """Whether the signature is related to near allocator poison value
473
        crashes.
474

475
        The value will be True if any of the crashes occurred on addresses near
476
        an allocator poison value.
477
        """
478
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash
×
479

480

481
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
482
    """Analyze the data related to a signature.
483

484
    This includes data from Socorro and Clouseau.
485
    """
486

487
    def __init__(
×
488
        self,
489
        socorro_signature: dict,
490
        num_total_crashes: int,
491
        clouseau_reports: list[dict],
492
        bugs_store: BugsStore,
493
    ):
494
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
495
        ClouseauDataAnalyzer.__init__(self, clouseau_reports, bugs_store)
×
496

497
    def _fetch_crash_reports(
×
498
        self,
499
        proto_signature: str,
500
        build_id: int | Iterable[int],
501
        limit: int = 1,
502
    ) -> Iterator[dict]:
503
        params = {
×
504
            "proto_signature": "=" + proto_signature,
505
            "build_id": build_id,
506
            "_columns": [
507
                "uuid",
508
            ],
509
            "_results_number": limit,
510
        }
511

512
        def handler(res: dict, data: dict):
×
513
            data.update(res)
×
514

515
        data: dict = {}
×
516
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
517

518
        yield from data["hits"]
×
519

520
    def _is_corrupted_crash_stack(self, processed_crash: dict) -> bool:
×
521
        """Whether the crash stack is corrupted.
522

523
        Args:
524
            processed_crash: The processed crash to check.
525

526
        Returns:
527
            True if the crash stack is corrupted, False otherwise.
528
        """
529

530
        return any(
×
531
            not frame["module"]
532
            for frame in processed_crash["json_dump"]["crashing_thread"]["frames"]
533
        )
534

535
    def fetch_representative_processed_crash(self) -> dict:
×
536
        """Fetch a processed crash to represent the signature.
537

538
        This could fetch multiple processed crashes and return the one that is
539
        most likely to be useful.
540
        """
541
        limit_to_top_proto_signature = (
×
542
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
543
        )
544

545
        candidate_reports = itertools.chain(
×
546
            # Reports with a higher score from clouseau are more likely to be
547
            # useful.
548
            sorted(
549
                self._clouseau_reports,
550
                key=lambda report: report["max_score"],
551
                reverse=True,
552
            ),
553
            # Next we try find reports from the top crashing build because they
554
            # are likely to be representative.
555
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
556
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
557
        )
558

559
        first_representative_report = None
×
560
        for i, report in enumerate(candidate_reports):
×
561
            uuid = report["uuid"]
×
562
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
563
            if (
×
564
                limit_to_top_proto_signature
565
                and processed_crash["proto_signature"] != self.top_proto_signature
566
            ):
567
                continue
×
568

569
            if first_representative_report is None:
×
570
                first_representative_report = processed_crash
×
571

572
            if not self._is_corrupted_crash_stack(processed_crash):
×
573
                return processed_crash
×
574

575
            if i >= 20:
×
576
                # We have tried enough reports, give up.
577
                break
×
578

579
        if first_representative_report is not None:
×
580
            # Fall back to the first representative report that we found, even
581
            # if it's corrupted.
582
            return first_representative_report
×
583

584
        raise NoCrashReportFoundError(
×
585
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
586
        )
587

588
    @cached_property
×
589
    def is_potential_security_crash(self) -> bool:
×
590
        """Whether the crash is related to a potential security bug.
591

592
        The value will be True if:
593
            - the signature is related to near allocator poison value crashes, or
594
            - one of the potential regressors is a security bug
595
        """
596
        return self.is_near_allocator_related_crash or any(
×
597
            bug.is_security for bug in self.regressed_by_potential_bugs
598
        )
599

600

601
class SignaturesDataFetcher:
×
602
    """Fetch the data related to the given signatures."""
603

604
    MEMORY_ACCESS_ERROR_REASONS = (
×
605
        # On Windows:
606
        "EXCEPTION_ACCESS_VIOLATION_READ",
607
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
608
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
609
        # On Linux:
610
        "SIGSEGV / SEGV_MAPERR",
611
        "SIGSEGV / SEGV_ACCERR",
612
    )
613

614
    EXCLUDED_MOZ_REASON_STRINGS = (
×
615
        "MOZ_CRASH(OOM)",
616
        "MOZ_CRASH(Out of memory)",
617
        "out of memory",
618
        "Shutdown hanging",
619
        # TODO(investigate): do we need to exclude signatures that their reason
620
        # contains `[unhandlable oom]`?
621
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
622
        # "[unhandlable oom]",
623
    )
624

625
    # If any of the crash reason starts with any of the following, then it is
626
    # Network or I/O error.
627
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
628
        "EXCEPTION_IN_PAGE_ERROR_READ",
629
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
630
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
631
    )
632

633
    # TODO(investigate): do we need to exclude all these signatures prefixes?
634
    EXCLUDED_SIGNATURE_PREFIXES = (
×
635
        "OOM | ",
636
        "bad hardware | ",
637
        "shutdownhang | ",
638
    )
639

640
    SUMMARY_DURATION = timedelta(weeks=10)
×
641

642
    def __init__(
×
643
        self,
644
        signatures: Iterable[str],
645
        product: str = "Firefox",
646
        channel: str = "nightly",
647
    ):
648
        self._signatures = set(signatures)
×
649
        self._product = product
×
650
        self._channel = channel
×
651

652
    @classmethod
×
653
    def find_new_actionable_crashes(
×
654
        cls,
655
        product: str,
656
        channel: str,
657
        days_to_check: int = 7,
658
        days_without_crashes: int = 7,
659
    ) -> "SignaturesDataFetcher":
660
        """Find new actionable crashes.
661

662
        Args:
663
            product: The product to check.
664
            channel: The release channel to check.
665
            days_to_check: The number of days to check for crashes.
666
            days_without_crashes: The number of days without crashes before the
667
                `days_to_check` to consider the signature new.
668

669
        Returns:
670
            A list of actionable signatures.
671
        """
672
        duration = days_to_check + days_without_crashes
×
673
        end_date = lmdutils.get_date_ymd("today")
×
674
        start_date = end_date - timedelta(duration)
×
675
        earliest_allowed_date = lmdutils.get_date_str(
×
676
            end_date - timedelta(days_to_check)
677
        )
678
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
679

680
        params = {
×
681
            "product": product,
682
            "release_channel": channel,
683
            "date": date_range,
684
            # TODO(investigate): should we do a local filter instead of the
685
            # following (should we exclude the signature if one of the crashes
686
            # is a shutdown hang?):
687
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
688
            # non-empty then it's a shutdown hang.
689
            "ipc_shutdown_state": "__null__",
690
            "shutdown_progress": "__null__",
691
            # TODO(investigate): should we use the following instead of the
692
            # local filter.
693
            # "oom_allocation_size": "!__null__",
694
            "_aggs.signature": [
695
                "moz_crash_reason",
696
                "reason",
697
                "_histogram.date",
698
                "_cardinality.install_time",
699
                "_cardinality.oom_allocation_size",
700
            ],
701
            "_results_number": 0,
702
            "_facets_size": 10000,
703
        }
704

705
        def handler(search_resp: dict, data: list):
×
706
            logger.debug(
×
707
                "Total of %d signatures received from Socorro",
708
                len(search_resp["facets"]["signature"]),
709
            )
710

711
            for crash in search_resp["facets"]["signature"]:
×
712
                signature = crash["term"]
×
713
                if any(
×
714
                    signature.startswith(excluded_prefix)
715
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
716
                ):
717
                    # Ignore signatures that start with any of the excluded prefixes.
718
                    continue
×
719

720
                facets = crash["facets"]
×
721
                installations = facets["cardinality_install_time"]["value"]
×
722
                if installations <= 1:
×
723
                    # Ignore crashes that only happen on one installation.
724
                    continue
×
725

726
                first_date = facets["histogram_date"][0]["term"]
×
727
                if first_date < earliest_allowed_date:
×
728
                    # The crash is not new, skip it.
729
                    continue
×
730

731
                if any(
×
732
                    reason["term"].startswith(io_error_prefix)
733
                    for reason in facets["reason"]
734
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
735
                ):
736
                    # Ignore Network or I/O error crashes.
737
                    continue
×
738

739
                if crash["count"] < 20:
×
740
                    # For signatures with low volume, having multiple types of
741
                    # memory errors indicates potential bad hardware crashes.
742
                    num_memory_error_types = sum(
×
743
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
744
                        for reason in facets["reason"]
745
                    )
746
                    if num_memory_error_types > 1:
×
747
                        # Potential bad hardware crash, skip it.
748
                        continue
×
749

750
                # TODO: Add a filter using the `possible_bit_flips_max_confidence`
751
                # field to exclude bad hardware crashes. The filed is not available yet.
752
                # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1816669#c3
753

754
                # TODO(investigate): is this needed since we are already
755
                # filtering signatures that start with "OOM | "
756
                if facets["cardinality_oom_allocation_size"]["value"]:
×
757
                    # If one of the crashes is an OOM crash, skip it.
758
                    continue
×
759

760
                # TODO(investigate): do we need to check for the `moz_crash_reason`
761
                moz_crash_reasons = facets["moz_crash_reason"]
×
762
                if moz_crash_reasons and any(
×
763
                    excluded_reason in reason["term"]
764
                    for reason in moz_crash_reasons
765
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
766
                ):
767
                    continue
×
768

769
                data.append(signature)
×
770

771
        signatures: list = []
×
772
        socorro.SuperSearch(
×
773
            params=params,
774
            handler=handler,
775
            handlerdata=signatures,
776
        ).wait()
777

778
        logger.debug(
×
779
            "Total of %d signatures left after applying the filtering criteria",
780
            len(signatures),
781
        )
782

783
        return cls(signatures, product, channel)
×
784

785
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
786
        """Fetch the crash reports data from Crash Clouseau."""
787
        if not self._signatures:
×
788
            return {}
×
789

790
        logger.debug(
×
791
            "Fetch from Clouseau: requesting reports for %d signatures",
792
            len(self._signatures),
793
        )
794

795
        signature_reports = clouseau.Reports.get_by_signatures(
×
796
            self._signatures,
797
            product=self._product,
798
            channel=self._channel,
799
        )
800

801
        logger.debug(
×
802
            "Fetch from Clouseau: received reports for %d signatures",
803
            len(signature_reports),
804
        )
805

806
        return signature_reports
×
807

808
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
809
        """Fetch the signature data from Socorro."""
810
        if not self._signatures:
×
811
            return [], 0
×
812

813
        end_date = lmdutils.get_date_ymd("today")
×
814
        start_date = end_date - self.SUMMARY_DURATION
×
815
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
816

817
        params = {
×
818
            "product": self._product,
819
            # TODO(investigate): should we included all release channels?
820
            "release_channel": self._channel,
821
            # TODO(investigate): should we limit based on the build date as well?
822
            "date": date_range,
823
            # TODO: split signatures into chunks to avoid very long query URLs
824
            "signature": ["=" + signature for signature in self._signatures],
825
            "_aggs.signature": [
826
                "address",
827
                "build_id",
828
                "cpu_arch",
829
                "proto_signature",
830
                "_cardinality.user_comments",
831
                "cpu_arch",
832
                "platform_pretty_version",
833
                "_histogram.date",
834
                # The following are needed for SignatureStats:
835
                "platform",
836
                "is_garbage_collecting",
837
                "_cardinality.install_time",
838
                "startup_crash",
839
                "_histogram.uptime",
840
                "process_type",
841
            ],
842
            "_results_number": 0,
843
            "_facets_size": 10000,
844
        }
845

846
        def handler(search_results: dict, data: dict):
×
847
            data["num_total_crashes"] = search_results["total"]
×
848
            data["signatures"] = search_results["facets"]["signature"]
×
849

850
        logger.debug(
×
851
            "Fetch from Socorro: requesting info for %d signatures",
852
            len(self._signatures),
853
        )
854

855
        data: dict = {}
×
856
        socorro.SuperSearchUnredacted(
×
857
            params=params,
858
            handler=handler,
859
            handlerdata=data,
860
        ).wait()
861

862
        logger.debug(
×
863
            "Fetch from Socorro: received info for %d signatures",
864
            len(data["signatures"]),
865
        )
866

867
        return data["signatures"], data["num_total_crashes"]
×
868

869
    def fetch_bugs(
×
870
        self, include_fields: list[str] | None = None
871
    ) -> dict[str, list[dict]]:
872
        """Fetch bugs that are filed against the given signatures."""
873
        if not self._signatures:
×
874
            return {}
×
875

876
        params_base: dict = {
×
877
            "include_fields": [
878
                "cf_crash_signature",
879
            ],
880
        }
881

882
        if include_fields:
×
883
            params_base["include_fields"].extend(include_fields)
×
884

885
        params_list = []
×
886
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
887
            params = params_base.copy()
×
888
            n = int(utils.get_last_field_num(params))
×
889
            params[f"f{n}"] = "OP"
×
890
            params[f"j{n}"] = "OR"
×
891
            for signature in signatures_chunk:
×
892
                n += 1
×
893
                params[f"f{n}"] = "cf_crash_signature"
×
894
                params[f"o{n}"] = "regexp"
×
895
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
896
            params[f"f{n+1}"] = "CP"
×
897
            params_list.append(params)
×
898

899
        signatures_bugs: dict = defaultdict(list)
×
900

901
        def handler(res, data):
×
902
            for bug in res["bugs"]:
×
903
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
904
                    if signature in self._signatures:
×
905
                        data[signature].append(bug)
×
906

907
        logger.debug(
×
908
            "Fetch from Bugzilla: requesting bugs for %d signatures",
909
            len(self._signatures),
910
        )
911
        timeout = utils.get_config("common", "bz_query_timeout")
×
912
        Bugzilla(
×
913
            timeout=timeout,
914
            queries=[
915
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
916
                for params in params_list
917
            ],
918
        ).wait()
919

920
        # TODO: remove the call to DevBugzilla after moving to production
921
        for params in params_list:
×
922
            # Excluded only filed bugs with the latest version. This will
923
            # re-generate the bugs after bumping the version.
924
            n = int(utils.get_last_field_num(params))
×
925
            params[f"f{n}"] = "status_whiteboard"
×
926
            params[f"o{n}"] = "substring"
×
927
            params[f"v{n}"] = f"[bugbot-crash-v{EXPERIMENT_VERSION}]"
×
928
        DevBugzilla(
×
929
            timeout=timeout,
930
            queries=[
931
                connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs)
932
                for params in params_list
933
            ],
934
        ).wait()
935

936
        logger.debug(
×
937
            "Fetch from Bugzilla: received bugs for %d signatures", len(signatures_bugs)
938
        )
939

940
        return signatures_bugs
×
941

942
    def analyze(self) -> list[SignatureAnalyzer]:
×
943
        """Analyze the data related to the signatures."""
944
        bugs = self.fetch_bugs()
×
945
        # TODO(investigate): For now, we are ignoring signatures that have bugs
946
        # filed even if they are closed long time ago. We should investigate
947
        # whether we should include the ones with closed bugs. For example, if
948
        # the bug was closed as Fixed years ago.
949
        self._signatures.difference_update(bugs.keys())
×
950

951
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
952
        # TODO(investigate): For now, we are ignoring signatures that are not
953
        # analyzed by clouseau. We should investigate why they are not analyzed
954
        # and whether we should include them.
955
        self._signatures.intersection_update(clouseau_reports.keys())
×
956

957
        signatures, num_total_crashes = self.fetch_socorro_info()
×
958
        bugs_store = BugsStore()
×
959

960
        return [
×
961
            SignatureAnalyzer(
962
                signature,
963
                num_total_crashes,
964
                clouseau_reports[signature["term"]],
965
                bugs_store,
966
            )
967
            for signature in signatures
968
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc