• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4804

01 Nov 2023 05:04AM CUT coverage: 22.036% (-0.01%) from 22.048%
#4804

push

coveralls-python

suhaibmujahid
[file_crash_bug] Fix detecting addresses with near allocator poison values

716 of 3578 branches covered (0.0%)

2 of 2 new or added lines in 1 file covered. (100.0%)

1924 of 8731 relevant lines covered (22.04%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import date, timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.bug.analyzer import BugAnalyzer, BugsStore
×
19
from bugbot.components import ComponentName
×
20
from bugbot.crash import socorro_util
×
21

22
# The max offset from a memory address to be considered "near".
23
OFFSET_64_BIT = 0x1000
×
24
OFFSET_32_BIT = 0x100
×
25
# Allocator poison value addresses.
26
ALLOCATOR_ADDRESSES_64_BIT = (
×
27
    (0xE5E5E5E5E5E5E5E5, OFFSET_64_BIT),
28
    # On 64-bit windows, sometimes it could be doing something with a 32-bit
29
    # value gotten from freed memory, so it'll be 0X00000000E5E5E5E5 +/-, and
30
    # because of the address limitation, quite often it will be
31
    # 0X0000E5E5E5E5E5E5 +/-.
32
    (0x00000000E5E5E5E5, OFFSET_32_BIT),
33
    (0x0000E5E5E5E5E5E5, OFFSET_64_BIT),
34
    (0x4B4B4B4B4B4B4B4B, OFFSET_64_BIT),
35
)
36
ALLOCATOR_ADDRESSES_32_BIT = (
×
37
    (0xE5E5E5E5, OFFSET_32_BIT),
38
    (0x4B4B4B4B, OFFSET_32_BIT),
39
)
40
# Ranges where addresses are considered near allocator poison values.
41
ALLOCATOR_RANGES_64_BIT = tuple(
×
42
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_64_BIT
43
)
44
ALLOCATOR_RANGES_32_BIT = tuple(
×
45
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_32_BIT
46
)
47

48

49
def is_near_null_address(str_address) -> bool:
×
50
    """Check if the address is near null.
51

52
    Args:
53
        str_address: The memory address to check.
54

55
    Returns:
56
        True if the address is near null, False otherwise.
57
    """
58
    address = int(str_address, 0)
×
59
    is_64_bit = len(str_address) >= 18
×
60

61
    if is_64_bit:
×
62
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT
×
63

64
    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT
×
65

66

67
def is_near_allocator_address(str_address) -> bool:
×
68
    """Check if the address is near an allocator poison value.
69

70
    Args:
71
        str_address: The memory address to check.
72

73
    Returns:
74
        True if the address is near an allocator poison value, False otherwise.
75
    """
76
    address = int(str_address, 0)
×
77
    is_64_bit = len(str_address) >= 18
×
78

79
    return any(
×
80
        low <= address <= high
81
        for low, high in (
82
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
83
        )
84
    )
85

86

87
# TODO: Move this to libmozdata
88
def generate_signature_page_url(params: dict, tab: str) -> str:
×
89
    """Generate a URL to the signature page on Socorro
90

91
    Args:
92
        params: the parameters for the search query.
93
        tab: the page tab that should be selected.
94

95
    Returns:
96
        The URL of the signature page on Socorro
97
    """
98
    web_url = socorro.Socorro.CRASH_STATS_URL
×
99
    query = lmdutils.get_params_for_url(params)
×
100
    return f"{web_url}/signature/{query}#{tab}"
×
101

102

103
class NoCrashReportFoundError(Exception):
×
104
    """There are no crash reports that meet the required criteria."""
105

106

107
class ClouseauDataAnalyzer:
×
108
    """Analyze the data returned by Crash Clouseau about a specific crash
109
    signature.
110
    """
111

112
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
113
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
114

115
    def __init__(self, reports: Iterable[dict], bugs_store: BugsStore):
×
116
        self._clouseau_reports = reports
×
117
        self.bugs_store = bugs_store
×
118

119
    @cached_property
×
120
    def max_clouseau_score(self):
×
121
        """The maximum Clouseau score in the crash reports."""
122
        if not self._clouseau_reports:
×
123
            return 0
×
124
        return max(report["max_score"] for report in self._clouseau_reports)
×
125

126
    @cached_property
×
127
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
128
        """The IDs for the bugs that their patches could have caused the crash."""
129
        minimum_accepted_score = max(
×
130
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
131
        )
132
        return {
×
133
            changeset["bug_id"]
134
            for report in self._clouseau_reports
135
            if report["max_score"] >= minimum_accepted_score
136
            for changeset in report["changesets"]
137
            if changeset["max_score"] >= minimum_accepted_score
138
            and not changeset["is_merge"]
139
            and not changeset["is_backedout"]
140
        }
141

142
    @cached_property
×
143
    def regressed_by_patch(self) -> str | None:
×
144
        """The hash of the patch that could have caused the crash."""
145
        minimum_accepted_score = max(
×
146
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
147
        )
148
        potential_patches = {
×
149
            changeset["changeset"]
150
            for report in self._clouseau_reports
151
            if report["max_score"] >= minimum_accepted_score
152
            for changeset in report["changesets"]
153
            if changeset["max_score"] >= minimum_accepted_score
154
            and not changeset["is_merge"]
155
            and not changeset["is_backedout"]
156
        }
157
        if len(potential_patches) == 1:
×
158
            return next(iter(potential_patches))
×
159
        return None
×
160

161
    @cached_property
×
162
    def regressed_by(self) -> int | None:
×
163
        """The ID of the bug that one of its patches could have caused
164
        the crash.
165

166
        If there are multiple bugs, the value will be `None`.
167
        """
168
        bug_ids = self.regressed_by_potential_bug_ids
×
169
        if len(bug_ids) == 1:
×
170
            return next(iter(bug_ids))
×
171
        return None
×
172

173
    @cached_property
×
174
    def regressed_by_potential_bugs(self) -> list[BugAnalyzer]:
×
175
        """The bugs whose patches could have caused the crash."""
176
        self.bugs_store.fetch_bugs(
×
177
            self.regressed_by_potential_bug_ids,
178
            [
179
                "id",
180
                "groups",
181
                "assigned_to",
182
                "product",
183
                "component",
184
            ],
185
        )
186
        return [
×
187
            self.bugs_store.get_bug_by_id(bug_id)
188
            for bug_id in self.regressed_by_potential_bug_ids
189
        ]
190

191
    @cached_property
×
192
    def regressed_by_author(self) -> dict | None:
×
193
        """The author of the patch that could have caused the crash.
194

195
        If there are multiple regressors, the value will be `None`.
196

197
        The regressor bug assignee is considered as the author, even if the
198
        assignee is not the patch author.
199
        """
200

201
        if not self.regressed_by:
×
202
            return None
×
203

204
        bug = self.regressed_by_potential_bugs[0]
×
205
        assert bug.id == self.regressed_by
×
206
        return bug.get_field("assigned_to_detail")
×
207

208
    @cached_property
×
209
    def crash_component(self) -> ComponentName:
×
210
        """The component that the crash belongs to.
211

212
        If there are multiple components, the value will be the default one.
213
        """
214
        potential_components = {
×
215
            bug.component for bug in self.regressed_by_potential_bugs
216
        }
217
        if len(potential_components) == 1:
×
218
            return next(iter(potential_components))
×
219
        return self.DEFAULT_CRASH_COMPONENT
×
220

221

222
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
223
    """Analyze the data returned by Socorro."""
224

225
    _bugzilla_os_legal_values = None
×
226
    _bugzilla_cpu_legal_values_map = None
×
227
    _platforms = [
×
228
        {"short_name": "win", "name": "Windows"},
229
        {"short_name": "mac", "name": "Mac OS X"},
230
        {"short_name": "lin", "name": "Linux"},
231
        {"short_name": "and", "name": "Android"},
232
        {"short_name": "unknown", "name": "Unknown"},
233
    ]
234

235
    def __init__(
×
236
        self,
237
        signature: dict,
238
        num_total_crashes: int,
239
    ):
240
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
241

242
    @classmethod
×
243
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
244
        """Return the corresponding OS name in Bugzilla for the provided OS name
245
        from Socorro.
246

247
        If the OS name is not recognized, return "Other".
248
        """
249
        if cls._bugzilla_os_legal_values is None:
×
250
            cls._bugzilla_os_legal_values = set(
×
251
                bugzilla.BugFields.fetch_field_values("op_sys")
252
            )
253

254
        if op_sys in cls._bugzilla_os_legal_values:
×
255
            return op_sys
×
256

257
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
258
            op_sys = "macOS"
×
259
        elif op_sys.startswith("Windows"):
×
260
            op_sys = "Windows"
×
261
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
262
            op_sys = "Linux"
×
263
        else:
264
            op_sys = "Other"
×
265

266
        return op_sys
×
267

268
    @property
×
269
    def first_crash_date(self) -> str:
×
270
        """The date of the first crash within the query time range.
271

272
        The date is in YYYY-MM-DD format.
273
        """
274
        return self.signature["facets"]["histogram_date"][0]["term"][:10]
×
275

276
    @property
×
277
    def bugzilla_op_sys(self) -> str:
×
278
        """The name of the OS where the crash happens.
279

280
        The value is one of the legal values for Bugzilla's `op_sys` field.
281

282
        - If no OS name is found, the value will be "Unspecified".
283
        - If the OS name is not recognized, the value will be "Other".
284
        - If multiple OS names are found, the value will be "All". Unless the OS
285
          names can be resolved to a common name without a version. For example,
286
          "Windows 10" and "Windows 7" will become "Windows".
287
        """
288
        all_op_sys = {
×
289
            self.to_bugzilla_op_sys(op_sys["term"])
290
            for op_sys in self.signature["facets"]["platform_pretty_version"]
291
        }
292

293
        if len(all_op_sys) > 1:
×
294
            # Resolve to root OS name by removing the version number.
295
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
296

297
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
298
            # TODO: explain this workaround.
299
            all_op_sys.remove("Other")
×
300

301
        if len(all_op_sys) == 1:
×
302
            return next(iter(all_op_sys))
×
303

304
        if len(all_op_sys) == 0:
×
305
            return "Unspecified"
×
306

307
        return "All"
×
308

309
    @classmethod
×
310
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
311
        """Return the corresponding CPU name in Bugzilla for the provided name
312
        from Socorro.
313

314
        If the CPU is not recognized, return "Other".
315
        """
316
        if cls._bugzilla_cpu_legal_values_map is None:
×
317
            cls._bugzilla_cpu_legal_values_map = {
×
318
                value.lower(): value
319
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
320
            }
321

322
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
323

324
    @property
×
325
    def bugzilla_cpu_arch(self) -> str:
×
326
        """The CPU architecture of the devices where the crash happens.
327

328
        The value is one of the legal values for Bugzilla's `rep_platform` field.
329

330
        - If no CPU architecture is found, the value will be "Unspecified".
331
        - If the CPU architecture is not recognized, the value will be "Other".
332
        - If multiple CPU architectures are found, the value will "All".
333
        """
334
        all_cpu_arch = {
×
335
            self.to_bugzilla_cpu(cpu["term"])
336
            for cpu in self.signature["facets"]["cpu_arch"]
337
        }
338

339
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
340
            all_cpu_arch.remove("Other")
×
341

342
        if len(all_cpu_arch) == 1:
×
343
            return next(iter(all_cpu_arch))
×
344

345
        if len(all_cpu_arch) == 0:
×
346
            return "Unspecified"
×
347

348
        return "All"
×
349

350
    @property
×
351
    def user_comments_page_url(self) -> str:
×
352
        """The URL to the Signature page on Socorro where the Comments tab is
353
        selected.
354
        """
355
        start_date = date.today() - timedelta(weeks=26)
×
356
        params = {
×
357
            "signature": self.signature_term,
358
            "date": socorro.SuperSearch.get_search_date(start_date),
359
        }
360
        return generate_signature_page_url(params, "comments")
×
361

362
    @property
×
363
    def num_user_comments(self) -> int:
×
364
        """The number of crash reports with user comments."""
365
        # TODO: count useful/interesting user comments (e.g., exclude one word comments)
366
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
367

368
    @property
×
369
    def has_user_comments(self) -> bool:
×
370
        """Whether the crash signature has any reports with a user comment."""
371
        return self.num_user_comments > 0
×
372

373
    @property
×
374
    def top_proto_signature(self) -> str:
×
375
        """The proto signature that occurs the most."""
376
        return self.signature["facets"]["proto_signature"][0]["term"]
×
377

378
    @property
×
379
    def num_top_proto_signature_crashes(self) -> int:
×
380
        """The number of crashes for the most occurring proto signature."""
381
        return self.signature["facets"]["proto_signature"][0]["count"]
×
382

383
    def _build_ids(self) -> Iterator[int]:
×
384
        """Yields the build IDs where the crash occurred."""
385
        for build_id in self.signature["facets"]["build_id"]:
×
386
            yield build_id["term"]
×
387

388
    @property
×
389
    def top_build_id(self) -> int:
×
390
        """The build ID where most crashes occurred."""
391
        return self.signature["facets"]["build_id"][0]["term"]
×
392

393
    @cached_property
×
394
    def num_near_null_crashes(self) -> int:
×
395
        """The number of crashes that occurred on addresses near null."""
396
        return sum(
×
397
            address["count"]
398
            for address in self.signature["facets"]["address"]
399
            if is_near_null_address(address["term"])
400
        )
401

402
    @property
×
403
    def is_near_null_crash(self) -> bool:
×
404
        """Whether all crashes occurred on addresses near null."""
405
        return self.num_near_null_crashes == self.num_crashes
×
406

407
    @property
×
408
    def is_potential_near_null_crash(self) -> bool:
×
409
        """Whether the signature is a potential near null crash.
410

411
        The value will be True if some but not all crashes occurred on addresses
412
        near null.
413
        """
414
        return not self.is_near_null_crash and self.num_near_null_crashes > 0
×
415

416
    @property
×
417
    def is_near_null_related_crash(self) -> bool:
×
418
        """Whether the signature is related to near null crashes.
419

420
        The value will be True if any of the crashes occurred on addresses near
421
        null.
422
        """
423
        return self.is_near_null_crash or self.is_potential_near_null_crash
×
424

425
    @cached_property
×
426
    def num_near_allocator_crashes(self) -> int:
×
427
        """The number of crashes that occurred on addresses near an allocator
428
        poison value.
429
        """
430
        return sum(
×
431
            address["count"]
432
            for address in self.signature["facets"]["address"]
433
            if is_near_allocator_address(address["term"])
434
        )
435

436
    @property
×
437
    def is_near_allocator_crash(self) -> bool:
×
438
        """Whether all crashes occurred on addresses near an allocator poison
439
        value.
440
        """
441
        return self.num_near_allocator_crashes == self.num_crashes
×
442

443
    @property
×
444
    def is_potential_near_allocator_crash(self) -> bool:
×
445
        """Whether the signature is a potential near allocator poison value
446
        crash.
447

448
        The value will be True if some but not all crashes occurred on addresses
449
        near an allocator poison value.
450
        """
451
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0
×
452

453
    @property
×
454
    def is_near_allocator_related_crash(self) -> bool:
×
455
        """Whether the signature is related to near allocator poison value
456
        crashes.
457

458
        The value will be True if any of the crashes occurred on addresses near
459
        an allocator poison value.
460
        """
461
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash
×
462

463

464
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
465
    """Analyze the data related to a signature.
466

467
    This includes data from Socorro and Clouseau.
468
    """
469

470
    def __init__(
×
471
        self,
472
        socorro_signature: dict,
473
        num_total_crashes: int,
474
        clouseau_reports: list[dict],
475
        bugs_store: BugsStore,
476
    ):
477
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
478
        ClouseauDataAnalyzer.__init__(self, clouseau_reports, bugs_store)
×
479

480
    def _fetch_crash_reports(
×
481
        self,
482
        proto_signature: str,
483
        build_id: int | Iterable[int],
484
        limit: int = 1,
485
    ) -> Iterator[dict]:
486
        params = {
×
487
            "proto_signature": "=" + proto_signature,
488
            "build_id": build_id,
489
            "_columns": [
490
                "uuid",
491
            ],
492
            "_results_number": limit,
493
        }
494

495
        def handler(res: dict, data: dict):
×
496
            data.update(res)
×
497

498
        data: dict = {}
×
499
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
500

501
        yield from data["hits"]
×
502

503
    def _is_corrupted_crash_stack(self, processed_crash: dict) -> bool:
×
504
        """Whether the crash stack is corrupted.
505

506
        Args:
507
            processed_crash: The processed crash to check.
508

509
        Returns:
510
            True if the crash stack is corrupted, False otherwise.
511
        """
512

513
        return any(
×
514
            not frame["module"]
515
            for frame in processed_crash["json_dump"]["crashing_thread"]["frames"]
516
        )
517

518
    def fetch_representative_processed_crash(self) -> dict:
×
519
        """Fetch a processed crash to represent the signature.
520

521
        This could fetch multiple processed crashes and return the one that is
522
        most likely to be useful.
523
        """
524
        limit_to_top_proto_signature = (
×
525
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
526
        )
527

528
        candidate_reports = itertools.chain(
×
529
            # Reports with a higher score from clouseau are more likely to be
530
            # useful.
531
            sorted(
532
                self._clouseau_reports,
533
                key=lambda report: report["max_score"],
534
                reverse=True,
535
            ),
536
            # Next we try find reports from the top crashing build because they
537
            # are likely to be representative.
538
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
539
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
540
        )
541

542
        first_representative_report = None
×
543
        for i, report in enumerate(candidate_reports):
×
544
            uuid = report["uuid"]
×
545
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
546
            if (
×
547
                limit_to_top_proto_signature
548
                and processed_crash["proto_signature"] != self.top_proto_signature
549
            ):
550
                continue
×
551

552
            if first_representative_report is None:
×
553
                first_representative_report = processed_crash
×
554

555
            if not self._is_corrupted_crash_stack(processed_crash):
×
556
                return processed_crash
×
557

558
            if i >= 20:
×
559
                # We have tried enough reports, give up.
560
                break
×
561

562
        if first_representative_report is not None:
×
563
            # Fall back to the first representative report that we found, even
564
            # if it's corrupted.
565
            return first_representative_report
×
566

567
        raise NoCrashReportFoundError(
×
568
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
569
        )
570

571
    @cached_property
×
572
    def is_potential_security_crash(self) -> bool:
×
573
        """Whether the crash is related to a potential security bug.
574

575
        The value will be True if:
576
            - the signature is related to near allocator poison value crashes, or
577
            - one of the potential regressors is a security bug
578
        """
579
        return self.is_near_allocator_related_crash or any(
×
580
            bug.is_security for bug in self.regressed_by_potential_bugs
581
        )
582

583
    def has_moz_crash_reason(self, reason: str) -> bool:
×
584
        """Whether the crash has a specific MOZ_CRASH reason.
585

586
        Args:
587
            reason: The MOZ_CRASH reason to check.
588

589
        Returns:
590
            True if the any of the MOZ_CRASH reasons has a partial match with
591
            the provided reason.
592
        """
593
        return any(
×
594
            reason in moz_crash_reason["term"]
595
            for moz_crash_reason in self.signature["facets"]["moz_crash_reason"]
596
        )
597

598
    @property
×
599
    def process_type_summary(self) -> str:
×
600
        """The summary of the process types for the crash signature."""
601
        process_types = self.signature["facets"]["process_type"]
×
602
        if len(process_types) == 0:
×
603
            return "Unknown"
×
604

605
        if len(process_types) == 1:
×
606
            process_type = process_types[0]["term"]
×
607
            # Small process types are usually acronyms (e.g., gpu for GPU), thus
608
            # we use upper case for them. Otherwise, we capitalize the first letter.
609
            if len(process_type) <= 3:
×
610
                return process_type.upper()
×
611
            return process_type.capitalize()
×
612

613
        return "Multiple distinct types"
×
614

615

616
class SignaturesDataFetcher:
×
617
    """Fetch the data related to the given signatures."""
618

619
    MEMORY_ACCESS_ERROR_REASONS = (
×
620
        # On Windows:
621
        "EXCEPTION_ACCESS_VIOLATION_READ",
622
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
623
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
624
        # On Linux:
625
        "SIGSEGV / SEGV_MAPERR",
626
        "SIGSEGV / SEGV_ACCERR",
627
    )
628

629
    EXCLUDED_MOZ_REASON_STRINGS = (
×
630
        "MOZ_CRASH(OOM)",
631
        "MOZ_CRASH(Out of memory)",
632
        "out of memory",
633
        "Shutdown hanging",
634
        # TODO(investigate): do we need to exclude signatures that their reason
635
        # contains `[unhandlable oom]`?
636
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
637
        # "[unhandlable oom]",
638
    )
639

640
    # If any of the crash reason starts with any of the following, then it is
641
    # Network or I/O error.
642
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
643
        "EXCEPTION_IN_PAGE_ERROR_READ",
644
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
645
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
646
    )
647

648
    # TODO(investigate): do we need to exclude all these signatures prefixes?
649
    EXCLUDED_SIGNATURE_PREFIXES = (
×
650
        "OOM | ",
651
        "bad hardware | ",
652
        "shutdownhang | ",
653
    )
654

655
    SUMMARY_DURATION = timedelta(weeks=10)
×
656

657
    def __init__(
×
658
        self,
659
        signatures: Iterable[str],
660
        product: str = "Firefox",
661
        channel: str = "nightly",
662
    ):
663
        self._signatures = set(signatures)
×
664
        self._product = product
×
665
        self._channel = channel
×
666

667
    @classmethod
×
668
    def find_new_actionable_crashes(
×
669
        cls,
670
        product: str,
671
        channel: str,
672
        days_to_check: int = 7,
673
        days_without_crashes: int = 7,
674
    ) -> "SignaturesDataFetcher":
675
        """Find new actionable crashes.
676

677
        Args:
678
            product: The product to check.
679
            channel: The release channel to check.
680
            days_to_check: The number of days to check for crashes.
681
            days_without_crashes: The number of days without crashes before the
682
                `days_to_check` to consider the signature new.
683

684
        Returns:
685
            A list of actionable signatures.
686
        """
687
        duration = days_to_check + days_without_crashes
×
688
        end_date = lmdutils.get_date_ymd("today")
×
689
        start_date = end_date - timedelta(duration)
×
690
        earliest_allowed_date = lmdutils.get_date_str(
×
691
            end_date - timedelta(days_to_check)
692
        )
693
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
694

695
        params = {
×
696
            "product": product,
697
            "release_channel": channel,
698
            "date": date_range,
699
            # TODO(investigate): should we do a local filter instead of the
700
            # following (should we exclude the signature if one of the crashes
701
            # is a shutdown hang?):
702
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
703
            # non-empty then it's a shutdown hang.
704
            "ipc_shutdown_state": "__null__",
705
            "shutdown_progress": "__null__",
706
            # TODO(investigate): should we use the following instead of the
707
            # local filter.
708
            # "oom_allocation_size": "!__null__",
709
            "_aggs.signature": [
710
                "moz_crash_reason",
711
                "reason",
712
                "possible_bit_flips_max_confidence",
713
                "_histogram.date",
714
                "_cardinality.install_time",
715
                "_cardinality.oom_allocation_size",
716
            ],
717
            "_results_number": 0,
718
            "_facets_size": 10000,
719
        }
720

721
        def handler(search_resp: dict, data: list):
×
722
            logger.debug(
×
723
                "Total of %d signatures received from Socorro",
724
                len(search_resp["facets"]["signature"]),
725
            )
726

727
            for crash in search_resp["facets"]["signature"]:
×
728
                signature = crash["term"]
×
729
                if any(
×
730
                    signature.startswith(excluded_prefix)
731
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
732
                ):
733
                    # Ignore signatures that start with any of the excluded prefixes.
734
                    continue
×
735

736
                facets = crash["facets"]
×
737
                installations = facets["cardinality_install_time"]["value"]
×
738
                if installations <= 1:
×
739
                    # Ignore crashes that only happen on one installation.
740
                    continue
×
741

742
                first_date = facets["histogram_date"][0]["term"]
×
743
                if first_date < earliest_allowed_date:
×
744
                    # The crash is not new, skip it.
745
                    continue
×
746

747
                if any(
×
748
                    reason["term"].startswith(io_error_prefix)
749
                    for reason in facets["reason"]
750
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
751
                ):
752
                    # Ignore Network or I/O error crashes.
753
                    continue
×
754

755
                if crash["count"] < 20:
×
756
                    # For signatures with low volume, having multiple types of
757
                    # memory errors indicates potential bad hardware crashes.
758
                    num_memory_error_types = sum(
×
759
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
760
                        for reason in facets["reason"]
761
                    )
762
                    if num_memory_error_types > 1:
×
763
                        # Potential bad hardware crash, skip it.
764
                        continue
×
765

766
                bit_flips_count = sum(
×
767
                    row["count"] for row in facets["possible_bit_flips_max_confidence"]
768
                )
769
                bit_flips_percentage = bit_flips_count / crash["count"]
×
770
                if bit_flips_percentage >= 0.2:
×
771
                    # Potential bad hardware crash, skip it.
772
                    continue
×
773

774
                # TODO(investigate): is this needed since we are already
775
                # filtering signatures that start with "OOM | "
776
                if facets["cardinality_oom_allocation_size"]["value"]:
×
777
                    # If one of the crashes is an OOM crash, skip it.
778
                    continue
×
779

780
                # TODO(investigate): do we need to check for the `moz_crash_reason`
781
                moz_crash_reasons = facets["moz_crash_reason"]
×
782
                if moz_crash_reasons and any(
×
783
                    excluded_reason in reason["term"]
784
                    for reason in moz_crash_reasons
785
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
786
                ):
787
                    continue
×
788

789
                data.append(signature)
×
790

791
        signatures: list = []
×
792
        socorro.SuperSearch(
×
793
            params=params,
794
            handler=handler,
795
            handlerdata=signatures,
796
        ).wait()
797

798
        logger.debug(
×
799
            "Total of %d signatures left after applying the filtering criteria",
800
            len(signatures),
801
        )
802

803
        return cls(signatures, product, channel)
×
804

805
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
806
        """Fetch the crash reports data from Crash Clouseau."""
807
        if not self._signatures:
×
808
            return {}
×
809

810
        logger.debug(
×
811
            "Fetch from Clouseau: requesting reports for %d signatures",
812
            len(self._signatures),
813
        )
814

815
        signature_reports = clouseau.Reports.get_by_signatures(
×
816
            self._signatures,
817
            product=self._product,
818
            channel=self._channel,
819
        )
820

821
        logger.debug(
×
822
            "Fetch from Clouseau: received reports for %d signatures",
823
            len(signature_reports),
824
        )
825

826
        return signature_reports
×
827

828
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
829
        """Fetch the signature data from Socorro."""
830
        if not self._signatures:
×
831
            return [], 0
×
832

833
        end_date = lmdutils.get_date_ymd("today")
×
834
        start_date = end_date - self.SUMMARY_DURATION
×
835
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
836

837
        params = {
×
838
            "product": self._product,
839
            # TODO(investigate): should we included all release channels?
840
            "release_channel": self._channel,
841
            # TODO(investigate): should we limit based on the build date as well?
842
            "date": date_range,
843
            # TODO: split signatures into chunks to avoid very long query URLs
844
            "signature": ["=" + signature for signature in self._signatures],
845
            "_aggs.signature": [
846
                "address",
847
                "build_id",
848
                "cpu_arch",
849
                "proto_signature",
850
                "_cardinality.user_comments",
851
                "cpu_arch",
852
                "platform_pretty_version",
853
                "_histogram.date",
854
                # The following are needed for SignatureStats:
855
                "platform",
856
                "is_garbage_collecting",
857
                "_cardinality.install_time",
858
                "startup_crash",
859
                "_histogram.uptime",
860
                "process_type",
861
                "moz_crash_reason",
862
            ],
863
            "_results_number": 0,
864
            "_facets_size": 10000,
865
        }
866

867
        def handler(search_results: dict, data: dict):
×
868
            data["num_total_crashes"] = search_results["total"]
×
869
            data["signatures"] = search_results["facets"]["signature"]
×
870

871
        logger.debug(
×
872
            "Fetch from Socorro: requesting info for %d signatures",
873
            len(self._signatures),
874
        )
875

876
        data: dict = {}
×
877
        socorro.SuperSearchUnredacted(
×
878
            params=params,
879
            handler=handler,
880
            handlerdata=data,
881
        ).wait()
882

883
        logger.debug(
×
884
            "Fetch from Socorro: received info for %d signatures",
885
            len(data["signatures"]),
886
        )
887

888
        return data["signatures"], data["num_total_crashes"]
×
889

890
    def fetch_bugs(
×
891
        self, include_fields: list[str] | None = None
892
    ) -> dict[str, list[dict]]:
893
        """Fetch bugs that are filed against the given signatures."""
894
        if not self._signatures:
×
895
            return {}
×
896

897
        params_base: dict = {
×
898
            "include_fields": [
899
                "cf_crash_signature",
900
            ],
901
        }
902

903
        if include_fields:
×
904
            params_base["include_fields"].extend(include_fields)
×
905

906
        params_list = []
×
907
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
908
            params = params_base.copy()
×
909
            n = int(utils.get_last_field_num(params))
×
910
            params[f"f{n}"] = "OP"
×
911
            params[f"j{n}"] = "OR"
×
912
            for signature in signatures_chunk:
×
913
                n += 1
×
914
                params[f"f{n}"] = "cf_crash_signature"
×
915
                params[f"o{n}"] = "regexp"
×
916
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
917
            params[f"f{n+1}"] = "CP"
×
918
            params_list.append(params)
×
919

920
        signatures_bugs: dict = defaultdict(list)
×
921

922
        def handler(res, data):
×
923
            for bug in res["bugs"]:
×
924
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
925
                    if signature in self._signatures:
×
926
                        data[signature].append(bug)
×
927

928
        logger.debug(
×
929
            "Fetch from Bugzilla: requesting bugs for %d signatures",
930
            len(self._signatures),
931
        )
932
        timeout = utils.get_config("common", "bz_query_timeout")
×
933
        Bugzilla(
×
934
            timeout=timeout,
935
            queries=[
936
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
937
                for params in params_list
938
            ],
939
        ).wait()
940

941
        logger.debug(
×
942
            "Fetch from Bugzilla: received bugs for %d signatures", len(signatures_bugs)
943
        )
944

945
        return signatures_bugs
×
946

947
    def analyze(self) -> list[SignatureAnalyzer]:
×
948
        """Analyze the data related to the signatures."""
949
        bugs = self.fetch_bugs()
×
950
        # TODO(investigate): For now, we are ignoring signatures that have bugs
951
        # filed even if they are closed long time ago. We should investigate
952
        # whether we should include the ones with closed bugs. For example, if
953
        # the bug was closed as Fixed years ago.
954
        self._signatures.difference_update(bugs.keys())
×
955

956
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
957
        # TODO(investigate): For now, we are ignoring signatures that are not
958
        # analyzed by clouseau. We should investigate why they are not analyzed
959
        # and whether we should include them.
960
        self._signatures.intersection_update(clouseau_reports.keys())
×
961

962
        signatures, num_total_crashes = self.fetch_socorro_info()
×
963
        bugs_store = BugsStore()
×
964

965
        return [
×
966
            SignatureAnalyzer(
967
                signature,
968
                num_total_crashes,
969
                clouseau_reports[signature["term"]],
970
                bugs_store,
971
            )
972
            for signature in signatures
973
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc