• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4781

25 Oct 2023 02:45AM CUT coverage: 22.083% (-0.008%) from 22.091%
#4781

push

coveralls-python

suhaibmujahid
[file_crash_bug] Exclude possible bit flip crashes

716 of 3560 branches covered (0.0%)

3 of 3 new or added lines in 1 file covered. (100.0%)

1925 of 8717 relevant lines covered (22.08%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import date, timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.bug.analyzer import BugAnalyzer, BugsStore
×
19
from bugbot.components import ComponentName
×
20
from bugbot.crash import socorro_util
×
21

22
# The max offset from a memory address to be considered "near".
23
OFFSET_64_BIT = 0x1000
×
24
OFFSET_32_BIT = 0x100
×
25
# Allocator poison value addresses.
26
ALLOCATOR_ADDRESSES_64_BIT = (
×
27
    (0xE5E5E5E5E5E5E5E5, OFFSET_64_BIT),
28
    # On 64-bit windows, sometimes it could be doing something with a 32-bit
29
    # value gotten from freed memory, so it'll be 0X00000000E5E5E5E5 +/-, and
30
    # because of the address limitation, quite often it will be
31
    # 0X0000E5E5E5E5E5E5 +/-.
32
    (0x00000000E5E5E5E5, OFFSET_32_BIT),
33
    (0x0000E5E5E5E5E5E5, OFFSET_64_BIT),
34
    (0x4B4B4B4B4B4B4B4B, OFFSET_64_BIT),
35
)
36
ALLOCATOR_ADDRESSES_32_BIT = (
×
37
    (0xE5E5E5E5, OFFSET_32_BIT),
38
    (0x4B4B4B4B, OFFSET_32_BIT),
39
)
40
# Ranges where addresses are considered near allocator poison values.
41
ALLOCATOR_RANGES_64_BIT = (
×
42
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_64_BIT
43
)
44
ALLOCATOR_RANGES_32_BIT = (
×
45
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_32_BIT
46
)
47

48
# NOTE: If you make changes that affect the output of the analysis, you should
49
# increment this number. This is needed in the experimental phase only.
50
EXPERIMENT_VERSION = 3
×
51

52

53
def is_near_null_address(str_address) -> bool:
×
54
    """Check if the address is near null.
55

56
    Args:
57
        str_address: The memory address to check.
58

59
    Returns:
60
        True if the address is near null, False otherwise.
61
    """
62
    address = int(str_address, 0)
×
63
    is_64_bit = len(str_address) >= 18
×
64

65
    if is_64_bit:
×
66
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT
×
67

68
    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT
×
69

70

71
def is_near_allocator_address(str_address) -> bool:
×
72
    """Check if the address is near an allocator poison value.
73

74
    Args:
75
        str_address: The memory address to check.
76

77
    Returns:
78
        True if the address is near an allocator poison value, False otherwise.
79
    """
80
    address = int(str_address, 0)
×
81
    is_64_bit = len(str_address) >= 18
×
82

83
    return any(
×
84
        low <= address <= high
85
        for low, high in (
86
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
87
        )
88
    )
89

90

91
# TODO: Move this to libmozdata
92
def generate_signature_page_url(params: dict, tab: str) -> str:
×
93
    """Generate a URL to the signature page on Socorro
94

95
    Args:
96
        params: the parameters for the search query.
97
        tab: the page tab that should be selected.
98

99
    Returns:
100
        The URL of the signature page on Socorro
101
    """
102
    web_url = socorro.Socorro.CRASH_STATS_URL
×
103
    query = lmdutils.get_params_for_url(params)
×
104
    return f"{web_url}/signature/{query}#{tab}"
×
105

106

107
# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident
108
# that the bug filing is working as expected, we can switch to filing bugs in
109
# the production instance of Bugzilla.
110
class DevBugzilla(Bugzilla):
×
111
    URL = "https://bugzilla-dev.allizom.org"
×
112
    API_URL = URL + "/rest/bug"
×
113
    ATTACHMENT_API_URL = API_URL + "/attachment"
×
114
    TOKEN = utils.get_login_info()["bz_api_key_dev"]
×
115
    # Note(suhaib): the dev instance of bugzilla has a smaller cluster, so we
116
    # need to go easy on it.
117
    MAX_WORKERS = 1
×
118

119

120
class NoCrashReportFoundError(Exception):
×
121
    """There are no crash reports that meet the required criteria."""
122

123

124
class ClouseauDataAnalyzer:
×
125
    """Analyze the data returned by Crash Clouseau about a specific crash
126
    signature.
127
    """
128

129
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
130
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
131

132
    def __init__(self, reports: Iterable[dict], bugs_store: BugsStore):
×
133
        self._clouseau_reports = reports
×
134
        self.bugs_store = bugs_store
×
135

136
    @cached_property
×
137
    def max_clouseau_score(self):
×
138
        """The maximum Clouseau score in the crash reports."""
139
        if not self._clouseau_reports:
×
140
            return 0
×
141
        return max(report["max_score"] for report in self._clouseau_reports)
×
142

143
    @cached_property
×
144
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
145
        """The IDs for the bugs that their patches could have caused the crash."""
146
        minimum_accepted_score = max(
×
147
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
148
        )
149
        return {
×
150
            changeset["bug_id"]
151
            for report in self._clouseau_reports
152
            if report["max_score"] >= minimum_accepted_score
153
            for changeset in report["changesets"]
154
            if changeset["max_score"] >= minimum_accepted_score
155
            and not changeset["is_merge"]
156
            and not changeset["is_backedout"]
157
        }
158

159
    @cached_property
×
160
    def regressed_by_patch(self) -> str | None:
×
161
        """The hash of the patch that could have caused the crash."""
162
        minimum_accepted_score = max(
×
163
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
164
        )
165
        potential_patches = {
×
166
            changeset["changeset"]
167
            for report in self._clouseau_reports
168
            if report["max_score"] >= minimum_accepted_score
169
            for changeset in report["changesets"]
170
            if changeset["max_score"] >= minimum_accepted_score
171
            and not changeset["is_merge"]
172
            and not changeset["is_backedout"]
173
        }
174
        if len(potential_patches) == 1:
×
175
            return next(iter(potential_patches))
×
176
        return None
×
177

178
    @cached_property
×
179
    def regressed_by(self) -> int | None:
×
180
        """The ID of the bug that one of its patches could have caused
181
        the crash.
182

183
        If there are multiple bugs, the value will be `None`.
184
        """
185
        bug_ids = self.regressed_by_potential_bug_ids
×
186
        if len(bug_ids) == 1:
×
187
            return next(iter(bug_ids))
×
188
        return None
×
189

190
    @cached_property
×
191
    def regressed_by_potential_bugs(self) -> list[BugAnalyzer]:
×
192
        """The bugs whose patches could have caused the crash."""
193
        self.bugs_store.fetch_bugs(
×
194
            self.regressed_by_potential_bug_ids,
195
            [
196
                "id",
197
                "groups",
198
                "assigned_to",
199
                "product",
200
                "component",
201
            ],
202
        )
203
        return [
×
204
            self.bugs_store.get_bug_by_id(bug_id)
205
            for bug_id in self.regressed_by_potential_bug_ids
206
        ]
207

208
    @cached_property
×
209
    def regressed_by_author(self) -> dict | None:
×
210
        """The author of the patch that could have caused the crash.
211

212
        If there are multiple regressors, the value will be `None`.
213

214
        The regressor bug assignee is considered as the author, even if the
215
        assignee is not the patch author.
216
        """
217

218
        if not self.regressed_by:
×
219
            return None
×
220

221
        bug = self.regressed_by_potential_bugs[0]
×
222
        assert bug.id == self.regressed_by
×
223
        return bug.get_field("assigned_to_detail")
×
224

225
    @cached_property
×
226
    def crash_component(self) -> ComponentName:
×
227
        """The component that the crash belongs to.
228

229
        If there are multiple components, the value will be the default one.
230
        """
231
        potential_components = {
×
232
            bug.component for bug in self.regressed_by_potential_bugs
233
        }
234
        if len(potential_components) == 1:
×
235
            return next(iter(potential_components))
×
236
        return self.DEFAULT_CRASH_COMPONENT
×
237

238

239
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
240
    """Analyze the data returned by Socorro."""
241

242
    _bugzilla_os_legal_values = None
×
243
    _bugzilla_cpu_legal_values_map = None
×
244
    _platforms = [
×
245
        {"short_name": "win", "name": "Windows"},
246
        {"short_name": "mac", "name": "Mac OS X"},
247
        {"short_name": "lin", "name": "Linux"},
248
        {"short_name": "and", "name": "Android"},
249
        {"short_name": "unknown", "name": "Unknown"},
250
    ]
251

252
    def __init__(
×
253
        self,
254
        signature: dict,
255
        num_total_crashes: int,
256
    ):
257
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
258

259
    @classmethod
×
260
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
261
        """Return the corresponding OS name in Bugzilla for the provided OS name
262
        from Socorro.
263

264
        If the OS name is not recognized, return "Other".
265
        """
266
        if cls._bugzilla_os_legal_values is None:
×
267
            cls._bugzilla_os_legal_values = set(
×
268
                bugzilla.BugFields.fetch_field_values("op_sys")
269
            )
270

271
        if op_sys in cls._bugzilla_os_legal_values:
×
272
            return op_sys
×
273

274
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
275
            op_sys = "macOS"
×
276
        elif op_sys.startswith("Windows"):
×
277
            op_sys = "Windows"
×
278
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
279
            op_sys = "Linux"
×
280
        else:
281
            op_sys = "Other"
×
282

283
        return op_sys
×
284

285
    @property
×
286
    def first_crash_date(self) -> str:
×
287
        """The date of the first crash within the query time range.
288

289
        The date is in YYYY-MM-DD format.
290
        """
291
        return self.signature["facets"]["histogram_date"][0]["term"][:10]
×
292

293
    @property
×
294
    def bugzilla_op_sys(self) -> str:
×
295
        """The name of the OS where the crash happens.
296

297
        The value is one of the legal values for Bugzilla's `op_sys` field.
298

299
        - If no OS name is found, the value will be "Unspecified".
300
        - If the OS name is not recognized, the value will be "Other".
301
        - If multiple OS names are found, the value will be "All". Unless the OS
302
          names can be resolved to a common name without a version. For example,
303
          "Windows 10" and "Windows 7" will become "Windows".
304
        """
305
        all_op_sys = {
×
306
            self.to_bugzilla_op_sys(op_sys["term"])
307
            for op_sys in self.signature["facets"]["platform_pretty_version"]
308
        }
309

310
        if len(all_op_sys) > 1:
×
311
            # Resolve to root OS name by removing the version number.
312
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
313

314
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
315
            # TODO: explain this workaround.
316
            all_op_sys.remove("Other")
×
317

318
        if len(all_op_sys) == 1:
×
319
            return next(iter(all_op_sys))
×
320

321
        if len(all_op_sys) == 0:
×
322
            return "Unspecified"
×
323

324
        return "All"
×
325

326
    @classmethod
×
327
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
328
        """Return the corresponding CPU name in Bugzilla for the provided name
329
        from Socorro.
330

331
        If the CPU is not recognized, return "Other".
332
        """
333
        if cls._bugzilla_cpu_legal_values_map is None:
×
334
            cls._bugzilla_cpu_legal_values_map = {
×
335
                value.lower(): value
336
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
337
            }
338

339
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
340

341
    @property
×
342
    def bugzilla_cpu_arch(self) -> str:
×
343
        """The CPU architecture of the devices where the crash happens.
344

345
        The value is one of the legal values for Bugzilla's `rep_platform` field.
346

347
        - If no CPU architecture is found, the value will be "Unspecified".
348
        - If the CPU architecture is not recognized, the value will be "Other".
349
        - If multiple CPU architectures are found, the value will "All".
350
        """
351
        all_cpu_arch = {
×
352
            self.to_bugzilla_cpu(cpu["term"])
353
            for cpu in self.signature["facets"]["cpu_arch"]
354
        }
355

356
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
357
            all_cpu_arch.remove("Other")
×
358

359
        if len(all_cpu_arch) == 1:
×
360
            return next(iter(all_cpu_arch))
×
361

362
        if len(all_cpu_arch) == 0:
×
363
            return "Unspecified"
×
364

365
        return "All"
×
366

367
    @property
×
368
    def user_comments_page_url(self) -> str:
×
369
        """The URL to the Signature page on Socorro where the Comments tab is
370
        selected.
371
        """
372
        start_date = date.today() - timedelta(weeks=26)
×
373
        params = {
×
374
            "signature": self.signature_term,
375
            "date": socorro.SuperSearch.get_search_date(start_date),
376
        }
377
        return generate_signature_page_url(params, "comments")
×
378

379
    @property
×
380
    def num_user_comments(self) -> int:
×
381
        """The number of crash reports with user comments."""
382
        # TODO: count useful/interesting user comments (e.g., exclude one word comments)
383
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
384

385
    @property
×
386
    def has_user_comments(self) -> bool:
×
387
        """Whether the crash signature has any reports with a user comment."""
388
        return self.num_user_comments > 0
×
389

390
    @property
×
391
    def top_proto_signature(self) -> str:
×
392
        """The proto signature that occurs the most."""
393
        return self.signature["facets"]["proto_signature"][0]["term"]
×
394

395
    @property
×
396
    def num_top_proto_signature_crashes(self) -> int:
×
397
        """The number of crashes for the most occurring proto signature."""
398
        return self.signature["facets"]["proto_signature"][0]["count"]
×
399

400
    def _build_ids(self) -> Iterator[int]:
×
401
        """Yields the build IDs where the crash occurred."""
402
        for build_id in self.signature["facets"]["build_id"]:
×
403
            yield build_id["term"]
×
404

405
    @property
×
406
    def top_build_id(self) -> int:
×
407
        """The build ID where most crashes occurred."""
408
        return self.signature["facets"]["build_id"][0]["term"]
×
409

410
    @cached_property
×
411
    def num_near_null_crashes(self) -> int:
×
412
        """The number of crashes that occurred on addresses near null."""
413
        return sum(
×
414
            address["count"]
415
            for address in self.signature["facets"]["address"]
416
            if is_near_null_address(address["term"])
417
        )
418

419
    @property
×
420
    def is_near_null_crash(self) -> bool:
×
421
        """Whether all crashes occurred on addresses near null."""
422
        return self.num_near_null_crashes == self.num_crashes
×
423

424
    @property
×
425
    def is_potential_near_null_crash(self) -> bool:
×
426
        """Whether the signature is a potential near null crash.
427

428
        The value will be True if some but not all crashes occurred on addresses
429
        near null.
430
        """
431
        return not self.is_near_null_crash and self.num_near_null_crashes > 0
×
432

433
    @property
×
434
    def is_near_null_related_crash(self) -> bool:
×
435
        """Whether the signature is related to near null crashes.
436

437
        The value will be True if any of the crashes occurred on addresses near
438
        null.
439
        """
440
        return self.is_near_null_crash or self.is_potential_near_null_crash
×
441

442
    @cached_property
×
443
    def num_near_allocator_crashes(self) -> int:
×
444
        """The number of crashes that occurred on addresses near an allocator
445
        poison value.
446
        """
447
        return sum(
×
448
            address["count"]
449
            for address in self.signature["facets"]["address"]
450
            if is_near_allocator_address(address["term"])
451
        )
452

453
    @property
×
454
    def is_near_allocator_crash(self) -> bool:
×
455
        """Whether all crashes occurred on addresses near an allocator poison
456
        value.
457
        """
458
        return self.num_near_allocator_crashes == self.num_crashes
×
459

460
    @property
×
461
    def is_potential_near_allocator_crash(self) -> bool:
×
462
        """Whether the signature is a potential near allocator poison value
463
        crash.
464

465
        The value will be True if some but not all crashes occurred on addresses
466
        near an allocator poison value.
467
        """
468
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0
×
469

470
    @property
×
471
    def is_near_allocator_related_crash(self) -> bool:
×
472
        """Whether the signature is related to near allocator poison value
473
        crashes.
474

475
        The value will be True if any of the crashes occurred on addresses near
476
        an allocator poison value.
477
        """
478
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash
×
479

480

481
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
482
    """Analyze the data related to a signature.
483

484
    This includes data from Socorro and Clouseau.
485
    """
486

487
    def __init__(
×
488
        self,
489
        socorro_signature: dict,
490
        num_total_crashes: int,
491
        clouseau_reports: list[dict],
492
        bugs_store: BugsStore,
493
    ):
494
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
495
        ClouseauDataAnalyzer.__init__(self, clouseau_reports, bugs_store)
×
496

497
    def _fetch_crash_reports(
×
498
        self,
499
        proto_signature: str,
500
        build_id: int | Iterable[int],
501
        limit: int = 1,
502
    ) -> Iterator[dict]:
503
        params = {
×
504
            "proto_signature": "=" + proto_signature,
505
            "build_id": build_id,
506
            "_columns": [
507
                "uuid",
508
            ],
509
            "_results_number": limit,
510
        }
511

512
        def handler(res: dict, data: dict):
×
513
            data.update(res)
×
514

515
        data: dict = {}
×
516
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
517

518
        yield from data["hits"]
×
519

520
    def fetch_representative_processed_crash(self) -> dict:
×
521
        """Fetch a processed crash to represent the signature.
522

523
        This could fetch multiple processed crashes and return the one that is
524
        most likely to be useful.
525
        """
526
        limit_to_top_proto_signature = (
×
527
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
528
        )
529

530
        reports = itertools.chain(
×
531
            # Reports with a higher score from clouseau are more likely to be
532
            # useful.
533
            sorted(
534
                self._clouseau_reports,
535
                key=lambda report: report["max_score"],
536
                reverse=True,
537
            ),
538
            # Next we try find reports from the top crashing build because they
539
            # are likely to be representative.
540
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
541
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
542
        )
543
        for report in reports:
×
544
            uuid = report["uuid"]
×
545
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
546
            if (
×
547
                not limit_to_top_proto_signature
548
                or processed_crash["proto_signature"] == self.top_proto_signature
549
            ):
550
                # TODO(investigate): maybe we should check if the stack is
551
                # corrupted (ask gsvelto or willkg about how to detect that)
552
                return processed_crash
×
553

554
        raise NoCrashReportFoundError(
×
555
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
556
        )
557

558
    @cached_property
×
559
    def is_potential_security_crash(self) -> bool:
×
560
        """Whether the crash is related to a potential security bug.
561

562
        The value will be True if:
563
            - the signature is related to near allocator poison value crashes, or
564
            - one of the potential regressors is a security bug
565
        """
566
        return self.is_near_allocator_related_crash or any(
×
567
            bug.is_security for bug in self.regressed_by_potential_bugs
568
        )
569

570

571
class SignaturesDataFetcher:
×
572
    """Fetch the data related to the given signatures."""
573

574
    MEMORY_ACCESS_ERROR_REASONS = (
×
575
        # On Windows:
576
        "EXCEPTION_ACCESS_VIOLATION_READ",
577
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
578
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
579
        # On Linux:
580
        "SIGSEGV / SEGV_MAPERR",
581
        "SIGSEGV / SEGV_ACCERR",
582
    )
583

584
    EXCLUDED_MOZ_REASON_STRINGS = (
×
585
        "MOZ_CRASH(OOM)",
586
        "MOZ_CRASH(Out of memory)",
587
        "out of memory",
588
        "Shutdown hanging",
589
        # TODO(investigate): do we need to exclude signatures that their reason
590
        # contains `[unhandlable oom]`?
591
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
592
        # "[unhandlable oom]",
593
    )
594

595
    # If any of the crash reason starts with any of the following, then it is
596
    # Network or I/O error.
597
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
598
        "EXCEPTION_IN_PAGE_ERROR_READ",
599
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
600
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
601
    )
602

603
    # TODO(investigate): do we need to exclude all these signatures prefixes?
604
    EXCLUDED_SIGNATURE_PREFIXES = (
×
605
        "OOM | ",
606
        "bad hardware | ",
607
        "shutdownhang | ",
608
    )
609

610
    SUMMARY_DURATION = timedelta(weeks=10)
×
611

612
    def __init__(
×
613
        self,
614
        signatures: Iterable[str],
615
        product: str = "Firefox",
616
        channel: str = "nightly",
617
    ):
618
        self._signatures = set(signatures)
×
619
        self._product = product
×
620
        self._channel = channel
×
621

622
    @classmethod
×
623
    def find_new_actionable_crashes(
×
624
        cls,
625
        product: str,
626
        channel: str,
627
        days_to_check: int = 7,
628
        days_without_crashes: int = 7,
629
    ) -> "SignaturesDataFetcher":
630
        """Find new actionable crashes.
631

632
        Args:
633
            product: The product to check.
634
            channel: The release channel to check.
635
            days_to_check: The number of days to check for crashes.
636
            days_without_crashes: The number of days without crashes before the
637
                `days_to_check` to consider the signature new.
638

639
        Returns:
640
            A list of actionable signatures.
641
        """
642
        duration = days_to_check + days_without_crashes
×
643
        end_date = lmdutils.get_date_ymd("today")
×
644
        start_date = end_date - timedelta(duration)
×
645
        earliest_allowed_date = lmdutils.get_date_str(
×
646
            end_date - timedelta(days_to_check)
647
        )
648
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
649

650
        params = {
×
651
            "product": product,
652
            "release_channel": channel,
653
            "date": date_range,
654
            # TODO(investigate): should we do a local filter instead of the
655
            # following (should we exclude the signature if one of the crashes
656
            # is a shutdown hang?):
657
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
658
            # non-empty then it's a shutdown hang.
659
            "ipc_shutdown_state": "__null__",
660
            "shutdown_progress": "__null__",
661
            # TODO(investigate): should we use the following instead of the
662
            # local filter.
663
            # "oom_allocation_size": "!__null__",
664
            "_aggs.signature": [
665
                "moz_crash_reason",
666
                "reason",
667
                "_histogram.date",
668
                "_cardinality.install_time",
669
                "_cardinality.oom_allocation_size",
670
                "_cardinality.possible_bit_flips_max_confidence",
671
            ],
672
            "_results_number": 0,
673
            "_facets_size": 10000,
674
        }
675

676
        def handler(search_resp: dict, data: list):
×
677
            logger.debug(
×
678
                "Total of %d signatures received from Socorro",
679
                len(search_resp["facets"]["signature"]),
680
            )
681

682
            for crash in search_resp["facets"]["signature"]:
×
683
                signature = crash["term"]
×
684
                if any(
×
685
                    signature.startswith(excluded_prefix)
686
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
687
                ):
688
                    # Ignore signatures that start with any of the excluded prefixes.
689
                    continue
×
690

691
                facets = crash["facets"]
×
692
                installations = facets["cardinality_install_time"]["value"]
×
693
                if installations <= 1:
×
694
                    # Ignore crashes that only happen on one installation.
695
                    continue
×
696

697
                first_date = facets["histogram_date"][0]["term"]
×
698
                if first_date < earliest_allowed_date:
×
699
                    # The crash is not new, skip it.
700
                    continue
×
701

702
                if any(
×
703
                    reason["term"].startswith(io_error_prefix)
704
                    for reason in facets["reason"]
705
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
706
                ):
707
                    # Ignore Network or I/O error crashes.
708
                    continue
×
709

710
                if crash["count"] < 20:
×
711
                    # For signatures with low volume, having multiple types of
712
                    # memory errors indicates potential bad hardware crashes.
713
                    num_memory_error_types = sum(
×
714
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
715
                        for reason in facets["reason"]
716
                    )
717
                    if num_memory_error_types > 1:
×
718
                        # Potential bad hardware crash, skip it.
719
                        continue
×
720

721
                bit_flips_percentage = (
×
722
                    facets["cardinality_possible_bit_flips_max_confidence"]["value"]
723
                    / crash["count"]
724
                )
725
                if bit_flips_percentage >= 0.2:
×
726
                    # Potential bad hardware crash, skip it.
727
                    continue
×
728

729
                # TODO(investigate): is this needed since we are already
730
                # filtering signatures that start with "OOM | "
731
                if facets["cardinality_oom_allocation_size"]["value"]:
×
732
                    # If one of the crashes is an OOM crash, skip it.
733
                    continue
×
734

735
                # TODO(investigate): do we need to check for the `moz_crash_reason`
736
                moz_crash_reasons = facets["moz_crash_reason"]
×
737
                if moz_crash_reasons and any(
×
738
                    excluded_reason in reason["term"]
739
                    for reason in moz_crash_reasons
740
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
741
                ):
742
                    continue
×
743

744
                data.append(signature)
×
745

746
        signatures: list = []
×
747
        socorro.SuperSearch(
×
748
            params=params,
749
            handler=handler,
750
            handlerdata=signatures,
751
        ).wait()
752

753
        logger.debug(
×
754
            "Total of %d signatures left after applying the filtering criteria",
755
            len(signatures),
756
        )
757

758
        return cls(signatures, product, channel)
×
759

760
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
761
        """Fetch the crash reports data from Crash Clouseau."""
762
        if not self._signatures:
×
763
            return {}
×
764

765
        logger.debug(
×
766
            "Fetch from Clouseau: requesting reports for %d signatures",
767
            len(self._signatures),
768
        )
769

770
        signature_reports = clouseau.Reports.get_by_signatures(
×
771
            self._signatures,
772
            product=self._product,
773
            channel=self._channel,
774
        )
775

776
        logger.debug(
×
777
            "Fetch from Clouseau: received reports for %d signatures",
778
            len(signature_reports),
779
        )
780

781
        return signature_reports
×
782

783
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
784
        """Fetch the signature data from Socorro."""
785
        if not self._signatures:
×
786
            return [], 0
×
787

788
        end_date = lmdutils.get_date_ymd("today")
×
789
        start_date = end_date - self.SUMMARY_DURATION
×
790
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
791

792
        params = {
×
793
            "product": self._product,
794
            # TODO(investigate): should we included all release channels?
795
            "release_channel": self._channel,
796
            # TODO(investigate): should we limit based on the build date as well?
797
            "date": date_range,
798
            # TODO: split signatures into chunks to avoid very long query URLs
799
            "signature": ["=" + signature for signature in self._signatures],
800
            "_aggs.signature": [
801
                "address",
802
                "build_id",
803
                "cpu_arch",
804
                "proto_signature",
805
                "_cardinality.user_comments",
806
                "cpu_arch",
807
                "platform_pretty_version",
808
                "_histogram.date",
809
                # The following are needed for SignatureStats:
810
                "platform",
811
                "is_garbage_collecting",
812
                "_cardinality.install_time",
813
                "startup_crash",
814
                "_histogram.uptime",
815
                "process_type",
816
            ],
817
            "_results_number": 0,
818
            "_facets_size": 10000,
819
        }
820

821
        def handler(search_results: dict, data: dict):
×
822
            data["num_total_crashes"] = search_results["total"]
×
823
            data["signatures"] = search_results["facets"]["signature"]
×
824

825
        logger.debug(
×
826
            "Fetch from Socorro: requesting info for %d signatures",
827
            len(self._signatures),
828
        )
829

830
        data: dict = {}
×
831
        socorro.SuperSearchUnredacted(
×
832
            params=params,
833
            handler=handler,
834
            handlerdata=data,
835
        ).wait()
836

837
        logger.debug(
×
838
            "Fetch from Socorro: received info for %d signatures",
839
            len(data["signatures"]),
840
        )
841

842
        return data["signatures"], data["num_total_crashes"]
×
843

844
    def fetch_bugs(
×
845
        self, include_fields: list[str] | None = None
846
    ) -> dict[str, list[dict]]:
847
        """Fetch bugs that are filed against the given signatures."""
848
        if not self._signatures:
×
849
            return {}
×
850

851
        params_base: dict = {
×
852
            "include_fields": [
853
                "cf_crash_signature",
854
            ],
855
        }
856

857
        if include_fields:
×
858
            params_base["include_fields"].extend(include_fields)
×
859

860
        params_list = []
×
861
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
862
            params = params_base.copy()
×
863
            n = int(utils.get_last_field_num(params))
×
864
            params[f"f{n}"] = "OP"
×
865
            params[f"j{n}"] = "OR"
×
866
            for signature in signatures_chunk:
×
867
                n += 1
×
868
                params[f"f{n}"] = "cf_crash_signature"
×
869
                params[f"o{n}"] = "regexp"
×
870
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
871
            params[f"f{n+1}"] = "CP"
×
872
            params_list.append(params)
×
873

874
        signatures_bugs: dict = defaultdict(list)
×
875

876
        def handler(res, data):
×
877
            for bug in res["bugs"]:
×
878
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
879
                    if signature in self._signatures:
×
880
                        data[signature].append(bug)
×
881

882
        logger.debug(
×
883
            "Fetch from Bugzilla: requesting bugs for %d signatures",
884
            len(self._signatures),
885
        )
886
        timeout = utils.get_config("common", "bz_query_timeout")
×
887
        Bugzilla(
×
888
            timeout=timeout,
889
            queries=[
890
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
891
                for params in params_list
892
            ],
893
        ).wait()
894

895
        # TODO: remove the call to DevBugzilla after moving to production
896
        for params in params_list:
×
897
            # Excluded only filed bugs with the latest version. This will
898
            # re-generate the bugs after bumping the version.
899
            n = int(utils.get_last_field_num(params))
×
900
            params[f"f{n}"] = "status_whiteboard"
×
901
            params[f"o{n}"] = "substring"
×
902
            params[f"v{n}"] = f"[bugbot-crash-v{EXPERIMENT_VERSION}]"
×
903
        DevBugzilla(
×
904
            timeout=timeout,
905
            queries=[
906
                connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs)
907
                for params in params_list
908
            ],
909
        ).wait()
910

911
        logger.debug(
×
912
            "Fetch from Bugzilla: received bugs for %d signatures", len(signatures_bugs)
913
        )
914

915
        return signatures_bugs
×
916

917
    def analyze(self) -> list[SignatureAnalyzer]:
×
918
        """Analyze the data related to the signatures."""
919
        bugs = self.fetch_bugs()
×
920
        # TODO(investigate): For now, we are ignoring signatures that have bugs
921
        # filed even if they are closed long time ago. We should investigate
922
        # whether we should include the ones with closed bugs. For example, if
923
        # the bug was closed as Fixed years ago.
924
        self._signatures.difference_update(bugs.keys())
×
925

926
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
927
        # TODO(investigate): For now, we are ignoring signatures that are not
928
        # analyzed by clouseau. We should investigate why they are not analyzed
929
        # and whether we should include them.
930
        self._signatures.intersection_update(clouseau_reports.keys())
×
931

932
        signatures, num_total_crashes = self.fetch_socorro_info()
×
933
        bugs_store = BugsStore()
×
934

935
        return [
×
936
            SignatureAnalyzer(
937
                signature,
938
                num_total_crashes,
939
                clouseau_reports[signature["term"]],
940
                bugs_store,
941
            )
942
            for signature in signatures
943
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc