• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4808

01 Nov 2023 12:36PM CUT coverage: 22.006%. Remained the same
#4808

push

coveralls-python

suhaibmujahid
[file_crash_bug] Clean up the code

716 of 3584 branches covered (0.0%)

3 of 3 new or added lines in 1 file covered. (100.0%)

1924 of 8743 relevant lines covered (22.01%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import date, timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.bug.analyzer import BugAnalyzer, BugsStore
×
19
from bugbot.components import ComponentName
×
20
from bugbot.crash import socorro_util
×
21

22
# The max offset from a memory address to be considered "near".
23
OFFSET_64_BIT = 0x1000
×
24
OFFSET_32_BIT = 0x100
×
25
# Allocator poison value addresses.
26
ALLOCATOR_ADDRESSES_64_BIT = (
×
27
    (0xE5E5E5E5E5E5E5E5, OFFSET_64_BIT),
28
    # On 64-bit windows, sometimes it could be doing something with a 32-bit
29
    # value gotten from freed memory, so it'll be 0X00000000E5E5E5E5 +/-, and
30
    # because of the address limitation, quite often it will be
31
    # 0X0000E5E5E5E5E5E5 +/-.
32
    (0x00000000E5E5E5E5, OFFSET_32_BIT),
33
    (0x0000E5E5E5E5E5E5, OFFSET_64_BIT),
34
    (0x4B4B4B4B4B4B4B4B, OFFSET_64_BIT),
35
)
36
ALLOCATOR_ADDRESSES_32_BIT = (
×
37
    (0xE5E5E5E5, OFFSET_32_BIT),
38
    (0x4B4B4B4B, OFFSET_32_BIT),
39
)
40
# Ranges where addresses are considered near allocator poison values.
41
ALLOCATOR_RANGES_64_BIT = tuple(
×
42
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_64_BIT
43
)
44
ALLOCATOR_RANGES_32_BIT = tuple(
×
45
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_32_BIT
46
)
47

48

49
def is_near_null_address(str_address) -> bool:
×
50
    """Check if the address is near null.
51

52
    Args:
53
        str_address: The memory address to check.
54

55
    Returns:
56
        True if the address is near null, False otherwise.
57
    """
58
    address = int(str_address, 0)
×
59
    is_64_bit = len(str_address) >= 18
×
60

61
    if is_64_bit:
×
62
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT
×
63

64
    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT
×
65

66

67
def is_near_allocator_address(str_address) -> bool:
×
68
    """Check if the address is near an allocator poison value.
69

70
    Args:
71
        str_address: The memory address to check.
72

73
    Returns:
74
        True if the address is near an allocator poison value, False otherwise.
75
    """
76
    address = int(str_address, 0)
×
77
    is_64_bit = len(str_address) >= 18
×
78

79
    return any(
×
80
        low <= address <= high
81
        for low, high in (
82
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
83
        )
84
    )
85

86

87
# TODO: Move this to libmozdata
88
def generate_signature_page_url(params: dict, tab: str) -> str:
×
89
    """Generate a URL to the signature page on Socorro
90

91
    Args:
92
        params: the parameters for the search query.
93
        tab: the page tab that should be selected.
94

95
    Returns:
96
        The URL of the signature page on Socorro
97
    """
98
    web_url = socorro.Socorro.CRASH_STATS_URL
×
99
    query = lmdutils.get_params_for_url(params)
×
100
    return f"{web_url}/signature/{query}#{tab}"
×
101

102

103
class NoCrashReportFoundError(Exception):
×
104
    """There are no crash reports that meet the required criteria."""
105

106

107
class ClouseauDataAnalyzer:
×
108
    """Analyze the data returned by Crash Clouseau about a specific crash
109
    signature.
110
    """
111

112
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
113
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
114

115
    def __init__(self, reports: Iterable[dict], bugs_store: BugsStore):
×
116
        self._clouseau_reports = reports
×
117
        self.bugs_store = bugs_store
×
118

119
    @cached_property
×
120
    def max_clouseau_score(self):
×
121
        """The maximum Clouseau score in the crash reports."""
122
        if not self._clouseau_reports:
×
123
            return 0
×
124
        return max(report["max_score"] for report in self._clouseau_reports)
×
125

126
    @cached_property
×
127
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
128
        """The IDs for the bugs that their patches could have caused the crash."""
129
        minimum_accepted_score = max(
×
130
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
131
        )
132
        return {
×
133
            changeset["bug_id"]
134
            for report in self._clouseau_reports
135
            if report["max_score"] >= minimum_accepted_score
136
            for changeset in report["changesets"]
137
            if changeset["max_score"] >= minimum_accepted_score
138
            and not changeset["is_merge"]
139
            and not changeset["is_backedout"]
140
        }
141

142
    @cached_property
×
143
    def regressed_by_patch(self) -> str | None:
×
144
        """The hash of the patch that could have caused the crash."""
145
        minimum_accepted_score = max(
×
146
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
147
        )
148
        potential_patches = {
×
149
            changeset["changeset"]
150
            for report in self._clouseau_reports
151
            if report["max_score"] >= minimum_accepted_score
152
            for changeset in report["changesets"]
153
            if changeset["max_score"] >= minimum_accepted_score
154
            and not changeset["is_merge"]
155
            and not changeset["is_backedout"]
156
        }
157
        if len(potential_patches) == 1:
×
158
            return next(iter(potential_patches))
×
159
        return None
×
160

161
    @cached_property
×
162
    def regressed_by(self) -> int | None:
×
163
        """The ID of the bug that one of its patches could have caused
164
        the crash.
165

166
        If there are multiple bugs, the value will be `None`.
167
        """
168
        bug_ids = self.regressed_by_potential_bug_ids
×
169
        if len(bug_ids) == 1:
×
170
            return next(iter(bug_ids))
×
171
        return None
×
172

173
    @cached_property
×
174
    def regressed_by_potential_bugs(self) -> list[BugAnalyzer]:
×
175
        """The bugs whose patches could have caused the crash."""
176
        self.bugs_store.fetch_bugs(
×
177
            self.regressed_by_potential_bug_ids,
178
            [
179
                "id",
180
                "groups",
181
                "assigned_to",
182
                "product",
183
                "component",
184
                "_custom",
185
            ],
186
        )
187
        return [
×
188
            self.bugs_store.get_bug_by_id(bug_id)
189
            for bug_id in self.regressed_by_potential_bug_ids
190
        ]
191

192
    @cached_property
×
193
    def regressed_by_author(self) -> dict | None:
×
194
        """The author of the patch that could have caused the crash.
195

196
        If there are multiple regressors, the value will be `None`.
197

198
        The regressor bug assignee is considered as the author, even if the
199
        assignee is not the patch author.
200
        """
201

202
        if not self.regressed_by:
×
203
            return None
×
204

205
        bug = self.regressed_by_potential_bugs[0]
×
206
        assert bug.id == self.regressed_by
×
207
        return bug.get_field("assigned_to_detail")
×
208

209
    @cached_property
×
210
    def crash_component(self) -> ComponentName:
×
211
        """The component that the crash belongs to.
212

213
        If there are multiple components, the value will be the default one.
214
        """
215
        potential_components = {
×
216
            bug.component for bug in self.regressed_by_potential_bugs
217
        }
218
        if len(potential_components) == 1:
×
219
            return next(iter(potential_components))
×
220
        return self.DEFAULT_CRASH_COMPONENT
×
221

222

223
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
224
    """Analyze the data returned by Socorro."""
225

226
    _bugzilla_os_legal_values = None
×
227
    _bugzilla_cpu_legal_values_map = None
×
228
    _platforms = [
×
229
        {"short_name": "win", "name": "Windows"},
230
        {"short_name": "mac", "name": "Mac OS X"},
231
        {"short_name": "lin", "name": "Linux"},
232
        {"short_name": "and", "name": "Android"},
233
        {"short_name": "unknown", "name": "Unknown"},
234
    ]
235

236
    def __init__(
×
237
        self,
238
        signature: dict,
239
        num_total_crashes: int,
240
    ):
241
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
242

243
    @classmethod
×
244
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
245
        """Return the corresponding OS name in Bugzilla for the provided OS name
246
        from Socorro.
247

248
        If the OS name is not recognized, return "Other".
249
        """
250
        if cls._bugzilla_os_legal_values is None:
×
251
            cls._bugzilla_os_legal_values = set(
×
252
                bugzilla.BugFields.fetch_field_values("op_sys")
253
            )
254

255
        if op_sys in cls._bugzilla_os_legal_values:
×
256
            return op_sys
×
257

258
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
259
            op_sys = "macOS"
×
260
        elif op_sys.startswith("Windows"):
×
261
            op_sys = "Windows"
×
262
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
263
            op_sys = "Linux"
×
264
        else:
265
            op_sys = "Other"
×
266

267
        return op_sys
×
268

269
    @property
×
270
    def first_crash_date(self) -> str:
×
271
        """The date of the first crash within the query time range.
272

273
        The date is in YYYY-MM-DD format.
274
        """
275
        return self.signature["facets"]["histogram_date"][0]["term"][:10]
×
276

277
    @property
×
278
    def bugzilla_op_sys(self) -> str:
×
279
        """The name of the OS where the crash happens.
280

281
        The value is one of the legal values for Bugzilla's `op_sys` field.
282

283
        - If no OS name is found, the value will be "Unspecified".
284
        - If the OS name is not recognized, the value will be "Other".
285
        - If multiple OS names are found, the value will be "All". Unless the OS
286
          names can be resolved to a common name without a version. For example,
287
          "Windows 10" and "Windows 7" will become "Windows".
288
        """
289
        all_op_sys = {
×
290
            self.to_bugzilla_op_sys(op_sys["term"])
291
            for op_sys in self.signature["facets"]["platform_pretty_version"]
292
        }
293

294
        if len(all_op_sys) > 1:
×
295
            # Resolve to root OS name by removing the version number.
296
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
297

298
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
299
            # TODO: explain this workaround.
300
            all_op_sys.remove("Other")
×
301

302
        if len(all_op_sys) == 1:
×
303
            return next(iter(all_op_sys))
×
304

305
        if len(all_op_sys) == 0:
×
306
            return "Unspecified"
×
307

308
        return "All"
×
309

310
    @classmethod
×
311
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
312
        """Return the corresponding CPU name in Bugzilla for the provided name
313
        from Socorro.
314

315
        If the CPU is not recognized, return "Other".
316
        """
317
        if cls._bugzilla_cpu_legal_values_map is None:
×
318
            cls._bugzilla_cpu_legal_values_map = {
×
319
                value.lower(): value
320
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
321
            }
322

323
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
324

325
    @property
×
326
    def bugzilla_cpu_arch(self) -> str:
×
327
        """The CPU architecture of the devices where the crash happens.
328

329
        The value is one of the legal values for Bugzilla's `rep_platform` field.
330

331
        - If no CPU architecture is found, the value will be "Unspecified".
332
        - If the CPU architecture is not recognized, the value will be "Other".
333
        - If multiple CPU architectures are found, the value will "All".
334
        """
335
        all_cpu_arch = {
×
336
            self.to_bugzilla_cpu(cpu["term"])
337
            for cpu in self.signature["facets"]["cpu_arch"]
338
        }
339

340
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
341
            all_cpu_arch.remove("Other")
×
342

343
        if len(all_cpu_arch) == 1:
×
344
            return next(iter(all_cpu_arch))
×
345

346
        if len(all_cpu_arch) == 0:
×
347
            return "Unspecified"
×
348

349
        return "All"
×
350

351
    @property
×
352
    def user_comments_page_url(self) -> str:
×
353
        """The URL to the Signature page on Socorro where the Comments tab is
354
        selected.
355
        """
356
        start_date = date.today() - timedelta(weeks=26)
×
357
        params = {
×
358
            "signature": self.signature_term,
359
            "date": socorro.SuperSearch.get_search_date(start_date),
360
        }
361
        return generate_signature_page_url(params, "comments")
×
362

363
    @property
×
364
    def num_user_comments(self) -> int:
×
365
        """The number of crash reports with user comments."""
366
        # TODO: count useful/interesting user comments (e.g., exclude one word comments)
367
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
368

369
    @property
×
370
    def has_user_comments(self) -> bool:
×
371
        """Whether the crash signature has any reports with a user comment."""
372
        return self.num_user_comments > 0
×
373

374
    @property
×
375
    def top_proto_signature(self) -> str:
×
376
        """The proto signature that occurs the most."""
377
        return self.signature["facets"]["proto_signature"][0]["term"]
×
378

379
    @property
×
380
    def num_top_proto_signature_crashes(self) -> int:
×
381
        """The number of crashes for the most occurring proto signature."""
382
        return self.signature["facets"]["proto_signature"][0]["count"]
×
383

384
    def _build_ids(self) -> Iterator[int]:
×
385
        """Yields the build IDs where the crash occurred."""
386
        for build_id in self.signature["facets"]["build_id"]:
×
387
            yield build_id["term"]
×
388

389
    @property
×
390
    def top_build_id(self) -> int:
×
391
        """The build ID where most crashes occurred."""
392
        return self.signature["facets"]["build_id"][0]["term"]
×
393

394
    @cached_property
×
395
    def num_near_null_crashes(self) -> int:
×
396
        """The number of crashes that occurred on addresses near null."""
397
        return sum(
×
398
            address["count"]
399
            for address in self.signature["facets"]["address"]
400
            if is_near_null_address(address["term"])
401
        )
402

403
    @property
×
404
    def is_near_null_crash(self) -> bool:
×
405
        """Whether all crashes occurred on addresses near null."""
406
        return self.num_near_null_crashes == self.num_crashes
×
407

408
    @property
×
409
    def is_potential_near_null_crash(self) -> bool:
×
410
        """Whether the signature is a potential near null crash.
411

412
        The value will be True if some but not all crashes occurred on addresses
413
        near null.
414
        """
415
        return not self.is_near_null_crash and self.num_near_null_crashes > 0
×
416

417
    @property
×
418
    def is_near_null_related_crash(self) -> bool:
×
419
        """Whether the signature is related to near null crashes.
420

421
        The value will be True if any of the crashes occurred on addresses near
422
        null.
423
        """
424
        return self.is_near_null_crash or self.is_potential_near_null_crash
×
425

426
    @cached_property
×
427
    def num_near_allocator_crashes(self) -> int:
×
428
        """The number of crashes that occurred on addresses near an allocator
429
        poison value.
430
        """
431
        return sum(
×
432
            address["count"]
433
            for address in self.signature["facets"]["address"]
434
            if is_near_allocator_address(address["term"])
435
        )
436

437
    @property
×
438
    def is_near_allocator_crash(self) -> bool:
×
439
        """Whether all crashes occurred on addresses near an allocator poison
440
        value.
441
        """
442
        return self.num_near_allocator_crashes == self.num_crashes
×
443

444
    @property
×
445
    def is_potential_near_allocator_crash(self) -> bool:
×
446
        """Whether the signature is a potential near allocator poison value
447
        crash.
448

449
        The value will be True if some but not all crashes occurred on addresses
450
        near an allocator poison value.
451
        """
452
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0
×
453

454
    @property
×
455
    def is_near_allocator_related_crash(self) -> bool:
×
456
        """Whether the signature is related to near allocator poison value
457
        crashes.
458

459
        The value will be True if any of the crashes occurred on addresses near
460
        an allocator poison value.
461
        """
462
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash
×
463

464

465
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
466
    """Analyze the data related to a signature.
467

468
    This includes data from Socorro and Clouseau.
469
    """
470

471
    def __init__(
×
472
        self,
473
        socorro_signature: dict,
474
        num_total_crashes: int,
475
        clouseau_reports: list[dict],
476
        bugs_store: BugsStore,
477
    ):
478
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
479
        ClouseauDataAnalyzer.__init__(self, clouseau_reports, bugs_store)
×
480

481
    def _fetch_crash_reports(
×
482
        self,
483
        proto_signature: str,
484
        build_id: int | Iterable[int],
485
        limit: int = 1,
486
    ) -> Iterator[dict]:
487
        params = {
×
488
            "proto_signature": "=" + proto_signature,
489
            "build_id": build_id,
490
            "_columns": [
491
                "uuid",
492
            ],
493
            "_results_number": limit,
494
        }
495

496
        def handler(res: dict, data: dict):
×
497
            data.update(res)
×
498

499
        data: dict = {}
×
500
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
501

502
        yield from data["hits"]
×
503

504
    def _is_corrupted_crash_stack(self, processed_crash: dict) -> bool:
×
505
        """Whether the crash stack is corrupted.
506

507
        Args:
508
            processed_crash: The processed crash to check.
509

510
        Returns:
511
            True if the crash stack is corrupted, False otherwise.
512
        """
513

514
        return any(
×
515
            not frame["module"]
516
            for frame in processed_crash["json_dump"]["crashing_thread"]["frames"]
517
        )
518

519
    def fetch_representative_processed_crash(self) -> dict:
×
520
        """Fetch a processed crash to represent the signature.
521

522
        This could fetch multiple processed crashes and return the one that is
523
        most likely to be useful.
524
        """
525
        limit_to_top_proto_signature = (
×
526
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
527
        )
528

529
        candidate_reports = itertools.chain(
×
530
            # Reports with a higher score from clouseau are more likely to be
531
            # useful.
532
            sorted(
533
                self._clouseau_reports,
534
                key=lambda report: report["max_score"],
535
                reverse=True,
536
            ),
537
            # Next we try find reports from the top crashing build because they
538
            # are likely to be representative.
539
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
540
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
541
        )
542

543
        first_representative_report = None
×
544
        for i, report in enumerate(candidate_reports):
×
545
            uuid = report["uuid"]
×
546
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
547
            if (
×
548
                limit_to_top_proto_signature
549
                and processed_crash["proto_signature"] != self.top_proto_signature
550
            ):
551
                continue
×
552

553
            if first_representative_report is None:
×
554
                first_representative_report = processed_crash
×
555

556
            if not self._is_corrupted_crash_stack(processed_crash):
×
557
                return processed_crash
×
558

559
            if i >= 20:
×
560
                # We have tried enough reports, give up.
561
                break
×
562

563
        if first_representative_report is not None:
×
564
            # Fall back to the first representative report that we found, even
565
            # if it's corrupted.
566
            return first_representative_report
×
567

568
        raise NoCrashReportFoundError(
×
569
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
570
        )
571

572
    @cached_property
×
573
    def is_potential_security_crash(self) -> bool:
×
574
        """Whether the crash is related to a potential security bug.
575

576
        The value will be True if:
577
            - the signature is related to near allocator poison value crashes, or
578
            - one of the potential regressors is a security bug
579
        """
580
        return self.is_near_allocator_related_crash or any(
×
581
            bug.is_security for bug in self.regressed_by_potential_bugs
582
        )
583

584
    def has_moz_crash_reason(self, reason: str) -> bool:
×
585
        """Whether the crash has a specific MOZ_CRASH reason.
586

587
        Args:
588
            reason: The MOZ_CRASH reason to check.
589

590
        Returns:
591
            True if the any of the MOZ_CRASH reasons has a partial match with
592
            the provided reason.
593
        """
594
        return any(
×
595
            reason in moz_crash_reason["term"]
596
            for moz_crash_reason in self.signature["facets"]["moz_crash_reason"]
597
        )
598

599
    @property
×
600
    def process_type_summary(self) -> str:
×
601
        """The summary of the process types for the crash signature."""
602
        process_types = self.signature["facets"]["process_type"]
×
603
        if len(process_types) == 0:
×
604
            return "Unknown"
×
605

606
        if len(process_types) == 1:
×
607
            process_type = process_types[0]["term"]
×
608
            # Small process types are usually acronyms (e.g., gpu for GPU), thus
609
            # we use upper case for them. Otherwise, we capitalize the first letter.
610
            if len(process_type) <= 3:
×
611
                return process_type.upper()
×
612
            return process_type.capitalize()
×
613

614
        return "Multiple distinct types"
×
615

616

617
class SignaturesDataFetcher:
×
618
    """Fetch the data related to the given signatures."""
619

620
    MEMORY_ACCESS_ERROR_REASONS = (
×
621
        # On Windows:
622
        "EXCEPTION_ACCESS_VIOLATION_READ",
623
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
624
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
625
        # On Linux:
626
        "SIGSEGV / SEGV_MAPERR",
627
        "SIGSEGV / SEGV_ACCERR",
628
    )
629

630
    EXCLUDED_MOZ_REASON_STRINGS = (
×
631
        "MOZ_CRASH(OOM)",
632
        "MOZ_CRASH(Out of memory)",
633
        "out of memory",
634
        "Shutdown hanging",
635
        # TODO(investigate): do we need to exclude signatures that their reason
636
        # contains `[unhandlable oom]`?
637
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
638
        # "[unhandlable oom]",
639
    )
640

641
    # If any of the crash reason starts with any of the following, then it is
642
    # Network or I/O error.
643
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
644
        "EXCEPTION_IN_PAGE_ERROR_READ",
645
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
646
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
647
    )
648

649
    # TODO(investigate): do we need to exclude all these signatures prefixes?
650
    EXCLUDED_SIGNATURE_PREFIXES = (
×
651
        "OOM | ",
652
        "bad hardware | ",
653
        "shutdownhang | ",
654
    )
655

656
    SUMMARY_DURATION = timedelta(weeks=10)
×
657

658
    def __init__(
×
659
        self,
660
        signatures: Iterable[str],
661
        product: str = "Firefox",
662
        channel: str = "nightly",
663
    ):
664
        self._signatures = set(signatures)
×
665
        self._product = product
×
666
        self._channel = channel
×
667

668
    @classmethod
×
669
    def find_new_actionable_crashes(
×
670
        cls,
671
        product: str,
672
        channel: str,
673
        days_to_check: int = 7,
674
        days_without_crashes: int = 7,
675
    ) -> "SignaturesDataFetcher":
676
        """Find new actionable crashes.
677

678
        Args:
679
            product: The product to check.
680
            channel: The release channel to check.
681
            days_to_check: The number of days to check for crashes.
682
            days_without_crashes: The number of days without crashes before the
683
                `days_to_check` to consider the signature new.
684

685
        Returns:
686
            A list of actionable signatures.
687
        """
688
        duration = days_to_check + days_without_crashes
×
689
        end_date = lmdutils.get_date_ymd("today")
×
690
        start_date = end_date - timedelta(duration)
×
691
        earliest_allowed_date = lmdutils.get_date_str(
×
692
            end_date - timedelta(days_to_check)
693
        )
694
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
695

696
        params = {
×
697
            "product": product,
698
            "release_channel": channel,
699
            "date": date_range,
700
            # TODO(investigate): should we do a local filter instead of the
701
            # following (should we exclude the signature if one of the crashes
702
            # is a shutdown hang?):
703
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
704
            # non-empty then it's a shutdown hang.
705
            "ipc_shutdown_state": "__null__",
706
            "shutdown_progress": "__null__",
707
            # TODO(investigate): should we use the following instead of the
708
            # local filter.
709
            # "oom_allocation_size": "!__null__",
710
            "_aggs.signature": [
711
                "moz_crash_reason",
712
                "reason",
713
                "possible_bit_flips_max_confidence",
714
                "_histogram.date",
715
                "_cardinality.install_time",
716
                "_cardinality.oom_allocation_size",
717
            ],
718
            "_results_number": 0,
719
            "_facets_size": 10000,
720
        }
721

722
        def handler(search_resp: dict, data: list):
×
723
            logger.debug(
×
724
                "Total of %d signatures received from Socorro",
725
                len(search_resp["facets"]["signature"]),
726
            )
727

728
            for crash in search_resp["facets"]["signature"]:
×
729
                signature = crash["term"]
×
730
                if any(
×
731
                    signature.startswith(excluded_prefix)
732
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
733
                ):
734
                    # Ignore signatures that start with any of the excluded prefixes.
735
                    continue
×
736

737
                facets = crash["facets"]
×
738
                installations = facets["cardinality_install_time"]["value"]
×
739
                if installations <= 1:
×
740
                    # Ignore crashes that only happen on one installation.
741
                    continue
×
742

743
                first_date = facets["histogram_date"][0]["term"]
×
744
                if first_date < earliest_allowed_date:
×
745
                    # The crash is not new, skip it.
746
                    continue
×
747

748
                if any(
×
749
                    reason["term"].startswith(io_error_prefix)
750
                    for reason in facets["reason"]
751
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
752
                ):
753
                    # Ignore Network or I/O error crashes.
754
                    continue
×
755

756
                if crash["count"] < 20:
×
757
                    # For signatures with low volume, having multiple types of
758
                    # memory errors indicates potential bad hardware crashes.
759
                    num_memory_error_types = sum(
×
760
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
761
                        for reason in facets["reason"]
762
                    )
763
                    if num_memory_error_types > 1:
×
764
                        # Potential bad hardware crash, skip it.
765
                        continue
×
766

767
                bit_flips_count = sum(
×
768
                    row["count"] for row in facets["possible_bit_flips_max_confidence"]
769
                )
770
                bit_flips_percentage = bit_flips_count / crash["count"]
×
771
                if bit_flips_percentage >= 0.2:
×
772
                    # Potential bad hardware crash, skip it.
773
                    continue
×
774

775
                # TODO(investigate): is this needed since we are already
776
                # filtering signatures that start with "OOM | "
777
                if facets["cardinality_oom_allocation_size"]["value"]:
×
778
                    # If one of the crashes is an OOM crash, skip it.
779
                    continue
×
780

781
                # TODO(investigate): do we need to check for the `moz_crash_reason`
782
                moz_crash_reasons = facets["moz_crash_reason"]
×
783
                if moz_crash_reasons and any(
×
784
                    excluded_reason in reason["term"]
785
                    for reason in moz_crash_reasons
786
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
787
                ):
788
                    continue
×
789

790
                data.append(signature)
×
791

792
        signatures: list = []
×
793
        socorro.SuperSearch(
×
794
            params=params,
795
            handler=handler,
796
            handlerdata=signatures,
797
        ).wait()
798

799
        logger.debug(
×
800
            "Total of %d signatures left after applying the filtering criteria",
801
            len(signatures),
802
        )
803

804
        return cls(signatures, product, channel)
×
805

806
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
807
        """Fetch the crash reports data from Crash Clouseau."""
808
        if not self._signatures:
×
809
            return {}
×
810

811
        logger.debug(
×
812
            "Fetch from Clouseau: requesting reports for %d signatures",
813
            len(self._signatures),
814
        )
815

816
        signature_reports = clouseau.Reports.get_by_signatures(
×
817
            self._signatures,
818
            product=self._product,
819
            channel=self._channel,
820
        )
821

822
        logger.debug(
×
823
            "Fetch from Clouseau: received reports for %d signatures",
824
            len(signature_reports),
825
        )
826

827
        return signature_reports
×
828

829
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
830
        """Fetch the signature data from Socorro."""
831
        if not self._signatures:
×
832
            return [], 0
×
833

834
        end_date = lmdutils.get_date_ymd("today")
×
835
        start_date = end_date - self.SUMMARY_DURATION
×
836
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
837

838
        params = {
×
839
            "product": self._product,
840
            # TODO(investigate): should we included all release channels?
841
            "release_channel": self._channel,
842
            # TODO(investigate): should we limit based on the build date as well?
843
            "date": date_range,
844
            # TODO: split signatures into chunks to avoid very long query URLs
845
            "signature": ["=" + signature for signature in self._signatures],
846
            "_aggs.signature": [
847
                "address",
848
                "build_id",
849
                "cpu_arch",
850
                "proto_signature",
851
                "_cardinality.user_comments",
852
                "cpu_arch",
853
                "platform_pretty_version",
854
                "_histogram.date",
855
                # The following are needed for SignatureStats:
856
                "platform",
857
                "is_garbage_collecting",
858
                "_cardinality.install_time",
859
                "startup_crash",
860
                "_histogram.uptime",
861
                "process_type",
862
                "moz_crash_reason",
863
            ],
864
            "_results_number": 0,
865
            "_facets_size": 10000,
866
        }
867

868
        def handler(search_results: dict, data: dict):
×
869
            data["num_total_crashes"] = search_results["total"]
×
870
            data["signatures"] = search_results["facets"]["signature"]
×
871

872
        logger.debug(
×
873
            "Fetch from Socorro: requesting info for %d signatures",
874
            len(self._signatures),
875
        )
876

877
        data: dict = {}
×
878
        socorro.SuperSearchUnredacted(
×
879
            params=params,
880
            handler=handler,
881
            handlerdata=data,
882
        ).wait()
883

884
        logger.debug(
×
885
            "Fetch from Socorro: received info for %d signatures",
886
            len(data["signatures"]),
887
        )
888

889
        return data["signatures"], data["num_total_crashes"]
×
890

891
    def fetch_bugs(
×
892
        self, include_fields: list[str] | None = None
893
    ) -> dict[str, list[dict]]:
894
        """Fetch bugs that are filed against the given signatures."""
895
        if not self._signatures:
×
896
            return {}
×
897

898
        params_base: dict = {
×
899
            "include_fields": [
900
                "cf_crash_signature",
901
            ],
902
        }
903

904
        if include_fields:
×
905
            params_base["include_fields"].extend(include_fields)
×
906

907
        params_list = []
×
908
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
909
            params = params_base.copy()
×
910
            n = int(utils.get_last_field_num(params))
×
911
            params[f"f{n}"] = "OP"
×
912
            params[f"j{n}"] = "OR"
×
913
            for signature in signatures_chunk:
×
914
                n += 1
×
915
                params[f"f{n}"] = "cf_crash_signature"
×
916
                params[f"o{n}"] = "regexp"
×
917
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
918
            params[f"f{n+1}"] = "CP"
×
919
            params_list.append(params)
×
920

921
        signatures_bugs: dict = defaultdict(list)
×
922

923
        def handler(res, data):
×
924
            for bug in res["bugs"]:
×
925
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
926
                    if signature in self._signatures:
×
927
                        data[signature].append(bug)
×
928

929
        logger.debug(
×
930
            "Fetch from Bugzilla: requesting bugs for %d signatures",
931
            len(self._signatures),
932
        )
933
        timeout = utils.get_config("common", "bz_query_timeout")
×
934
        Bugzilla(
×
935
            timeout=timeout,
936
            queries=[
937
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
938
                for params in params_list
939
            ],
940
        ).wait()
941

942
        logger.debug(
×
943
            "Fetch from Bugzilla: received bugs for %d signatures", len(signatures_bugs)
944
        )
945

946
        return signatures_bugs
×
947

948
    def analyze(self) -> list[SignatureAnalyzer]:
×
949
        """Analyze the data related to the signatures."""
950
        bugs = self.fetch_bugs()
×
951
        # TODO(investigate): For now, we are ignoring signatures that have bugs
952
        # filed even if they are closed long time ago. We should investigate
953
        # whether we should include the ones with closed bugs. For example, if
954
        # the bug was closed as Fixed years ago.
955
        self._signatures.difference_update(bugs.keys())
×
956

957
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
958
        # TODO(investigate): For now, we are ignoring signatures that are not
959
        # analyzed by clouseau. We should investigate why they are not analyzed
960
        # and whether we should include them.
961
        self._signatures.intersection_update(clouseau_reports.keys())
×
962

963
        signatures, num_total_crashes = self.fetch_socorro_info()
×
964
        bugs_store = BugsStore()
×
965

966
        return [
×
967
            SignatureAnalyzer(
968
                signature,
969
                num_total_crashes,
970
                clouseau_reports[signature["term"]],
971
                bugs_store,
972
            )
973
            for signature in signatures
974
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc