• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4622

pending completion
#4622

push

coveralls-python

web-flow
Update bugbot/rules/file_crash_bug.py

Co-authored-by: Marco Castelluccio <mcastelluccio@mozilla.com>

646 of 3434 branches covered (18.81%)

1828 of 8539 relevant lines covered (21.41%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import date, timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.components import ComponentName
×
19
from bugbot.crash import socorro_util
×
20

21
# Allocator poison value addresses.
22
ALLOCATOR_ADDRESSES_64_BIT = (
×
23
    0xE5E5E5E5E5E5E5E5,
24
    0x4B4B4B4B4B4B4B4B,
25
)
26
ALLOCATOR_ADDRESSES_32_BIT = (
×
27
    0xE5E5E5E5,
28
    0x4B4B4B4B,
29
)
30
# The max offset from a memory address to be considered "near".
31
OFFSET_64_BIT = 0x1000
×
32
OFFSET_32_BIT = 0x100
×
33
# Ranges where addresses are considered near allocator poison values.
34
ALLOCATOR_RANGES_64_BIT = (
×
35
    (addr - OFFSET_64_BIT, addr + OFFSET_64_BIT) for addr in ALLOCATOR_ADDRESSES_64_BIT
36
)
37
ALLOCATOR_RANGES_32_BIT = (
×
38
    (addr - OFFSET_32_BIT, addr + OFFSET_32_BIT) for addr in ALLOCATOR_ADDRESSES_32_BIT
39
)
40

41

42
def is_near_null_address(str_address) -> bool:
×
43
    """Check if the address is near null.
44

45
    Args:
46
        str_address: The memory address to check.
47

48
    Returns:
49
        True if the address is near null, False otherwise.
50
    """
51
    address = int(str_address, 0)
×
52
    is_64_bit = len(str_address) >= 18
×
53

54
    if is_64_bit:
×
55
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT
×
56

57
    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT
×
58

59

60
def is_near_allocator_address(str_address) -> bool:
×
61
    """Check if the address is near an allocator poison value.
62

63
    Args:
64
        str_address: The memory address to check.
65

66
    Returns:
67
        True if the address is near an allocator poison value, False otherwise.
68
    """
69
    address = int(str_address, 0)
×
70
    is_64_bit = len(str_address) >= 18
×
71

72
    return any(
×
73
        low <= address <= high
74
        for low, high in (
75
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
76
        )
77
    )
78

79

80
# TODO: Move this to libmozdata
81
def generate_signature_page_url(params: dict, tab: str) -> str:
×
82
    """Generate a URL to the signature page on Socorro
83

84
    Args:
85
        params: the parameters for the search query.
86
        tab: the page tab that should be selected.
87

88
    Returns:
89
        The URL of the signature page on Socorro
90
    """
91
    web_url = socorro.Socorro.CRASH_STATS_URL
×
92
    query = lmdutils.get_params_for_url(params)
×
93
    return f"{web_url}/signature/{query}#{tab}"
×
94

95

96
# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident
97
# that the bug filing is working as expected, we can switch to filing bugs in
98
# the production instance of Bugzilla.
99
class DevBugzilla(Bugzilla):
×
100
    URL = "https://bugzilla-dev.allizom.org"
×
101
    API_URL = URL + "/rest/bug"
×
102
    ATTACHMENT_API_URL = API_URL + "/attachment"
×
103
    TOKEN = utils.get_login_info()["bz_api_key_dev"]
×
104

105

106
class NoCrashReportFoundError(Exception):
×
107
    """There are no crash reports that meet the required criteria."""
108

109

110
class ClouseauDataAnalyzer:
×
111
    """Analyze the data returned by Crash Clouseau"""
112

113
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
114
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
115

116
    def __init__(self, reports: Iterable[dict]):
×
117
        self._clouseau_reports = reports
×
118

119
    @cached_property
×
120
    def max_clouseau_score(self):
×
121
        """The maximum Clouseau score in the crash reports."""
122
        if not self._clouseau_reports:
×
123
            return 0
×
124
        return max(report["max_score"] for report in self._clouseau_reports)
×
125

126
    @cached_property
×
127
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
128
        """The IDs for the bugs that their patches could have caused the crash."""
129
        minimum_accepted_score = max(
×
130
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
131
        )
132
        return {
×
133
            changeset["bug_id"]
134
            for report in self._clouseau_reports
135
            if report["max_score"] >= minimum_accepted_score
136
            for changeset in report["changesets"]
137
            if changeset["max_score"] >= minimum_accepted_score
138
            and not changeset["is_merge"]
139
            and not changeset["is_backedout"]
140
        }
141

142
    @cached_property
×
143
    def regressed_by_patch(self) -> str | None:
×
144
        """The hash of the patch that could have caused the crash."""
145
        minimum_accepted_score = max(
×
146
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
147
        )
148
        potential_patches = {
×
149
            changeset["changeset"]
150
            for report in self._clouseau_reports
151
            if report["max_score"] >= minimum_accepted_score
152
            for changeset in report["changesets"]
153
            if changeset["max_score"] >= minimum_accepted_score
154
            and not changeset["is_merge"]
155
            and not changeset["is_backedout"]
156
        }
157
        if len(potential_patches) == 1:
×
158
            return next(iter(potential_patches))
×
159
        return None
×
160

161
    @cached_property
×
162
    def regressed_by(self) -> int | None:
×
163
        """The ID of the bug that one of its patches could have caused
164
        the crash.
165

166
        If there are multiple bugs, the value will be `None`.
167
        """
168
        bug_ids = self.regressed_by_potential_bug_ids
×
169
        if len(bug_ids) == 1:
×
170
            return next(iter(bug_ids))
×
171
        return None
×
172

173
    @cached_property
×
174
    def regressed_by_potential_bugs(self) -> list[dict]:
×
175
        """The bugs whose patches could have caused the crash."""
176

177
        def handler(bug: dict, data: list):
×
178
            data.append(bug)
×
179

180
        bugs: list[dict] = []
×
181
        Bugzilla(
×
182
            bugids=self.regressed_by_potential_bug_ids,
183
            include_fields=[
184
                "id",
185
                "assigned_to",
186
                "product",
187
                "component",
188
            ],
189
            bughandler=handler,
190
            bugdata=bugs,
191
        ).wait()
192

193
        return bugs
×
194

195
    @cached_property
×
196
    def regressed_by_author(self) -> dict | None:
×
197
        """The author of the patch that could have caused the crash.
198

199
        If there are multiple regressors, the value will be `None`.
200

201
        The regressor bug assignee is considered as the author, even if the
202
        assignee is not the patch author.
203
        """
204

205
        if not self.regressed_by:
×
206
            return None
×
207

208
        bug = self.regressed_by_potential_bugs[0]
×
209
        assert bug["id"] == self.regressed_by
×
210
        return bug["assigned_to_detail"]
×
211

212
    @cached_property
×
213
    def crash_component(self) -> ComponentName:
×
214
        """The component that the crash belongs to.
215

216
        If there are multiple components, the value will be the default one.
217
        """
218
        potential_components = {
×
219
            ComponentName(bug["product"], bug["component"])
220
            for bug in self.regressed_by_potential_bugs
221
        }
222
        if len(potential_components) == 1:
×
223
            return next(iter(potential_components))
×
224
        return self.DEFAULT_CRASH_COMPONENT
×
225

226

227
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
228
    """Analyze the data returned by Socorro."""
229

230
    _bugzilla_os_legal_values = None
×
231
    _bugzilla_cpu_legal_values_map = None
×
232
    _platforms = [
×
233
        {"short_name": "win", "name": "Windows"},
234
        {"short_name": "mac", "name": "Mac OS X"},
235
        {"short_name": "lin", "name": "Linux"},
236
        {"short_name": "and", "name": "Android"},
237
        {"short_name": "unknown", "name": "Unknown"},
238
    ]
239

240
    def __init__(
×
241
        self,
242
        signature: dict,
243
        num_total_crashes: int,
244
    ):
245
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
246

247
    @classmethod
×
248
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
249
        """Return the corresponding OS name in Bugzilla for the provided OS name
250
        from Socorro.
251

252
        If the OS name is not recognized, return "Other".
253
        """
254
        if cls._bugzilla_os_legal_values is None:
×
255
            cls._bugzilla_os_legal_values = set(
×
256
                bugzilla.BugFields.fetch_field_values("op_sys")
257
            )
258

259
        if op_sys in cls._bugzilla_os_legal_values:
×
260
            return op_sys
×
261

262
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
263
            op_sys = "macOS"
×
264
        elif op_sys.startswith("Windows"):
×
265
            op_sys = "Windows"
×
266
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
267
            op_sys = "Linux"
×
268
        else:
269
            op_sys = "Other"
×
270

271
        return op_sys
×
272

273
    @property
×
274
    def bugzilla_op_sys(self) -> str:
×
275
        """The name of the OS where the crash happens.
276

277
        The value is one of the legal values for Bugzilla's `op_sys` field.
278

279
        - If no OS name is found, the value will be "Unspecified".
280
        - If the OS name is not recognized, the value will be "Other".
281
        - If multiple OS names are found, the value will be "All". Unless the OS
282
          names can be resolved to a common name without a version. For example,
283
          "Windows 10" and "Windows 7" will become "Windows".
284
        """
285
        all_op_sys = {
×
286
            self.to_bugzilla_op_sys(op_sys["term"])
287
            for op_sys in self.signature["facets"]["platform_pretty_version"]
288
        }
289

290
        if len(all_op_sys) > 1:
×
291
            # Resolve to root OS name by removing the version number.
292
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
293

294
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
295
            # TODO: explain this workaround.
296
            all_op_sys.remove("Other")
×
297

298
        if len(all_op_sys) == 1:
×
299
            return next(iter(all_op_sys))
×
300

301
        if len(all_op_sys) == 0:
×
302
            return "Unspecified"
×
303

304
        return "All"
×
305

306
    @classmethod
×
307
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
308
        """Return the corresponding CPU name in Bugzilla for the provided name
309
        from Socorro.
310

311
        If the CPU is not recognized, return "Other".
312
        """
313
        if cls._bugzilla_cpu_legal_values_map is None:
×
314
            cls._bugzilla_cpu_legal_values_map = {
×
315
                value.lower(): value
316
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
317
            }
318

319
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
320

321
    @property
×
322
    def bugzilla_cpu_arch(self) -> str:
×
323
        """The CPU architecture of the devices where the crash happens.
324

325
        The value is one of the legal values for Bugzilla's `rep_platform` field.
326

327
        - If no CPU architecture is found, the value will be "Unspecified".
328
        - If the CPU architecture is not recognized, the value will be "Other".
329
        - If multiple CPU architectures are found, the value will "All".
330
        """
331
        all_cpu_arch = {
×
332
            self.to_bugzilla_cpu(cpu["term"])
333
            for cpu in self.signature["facets"]["cpu_arch"]
334
        }
335

336
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
337
            all_cpu_arch.remove("Other")
×
338

339
        if len(all_cpu_arch) == 1:
×
340
            return next(iter(all_cpu_arch))
×
341

342
        if len(all_cpu_arch) == 0:
×
343
            return "Unspecified"
×
344

345
        return "All"
×
346

347
    @property
×
348
    def user_comments_page_url(self) -> str:
×
349
        """The URL to the Signature page on Socorro where the Comments tab is
350
        selected.
351
        """
352
        start_date = date.today() - timedelta(weeks=26)
×
353
        params = {
×
354
            "signature": self.signature_term,
355
            "date": socorro.SuperSearch.get_search_date(start_date),
356
        }
357
        return generate_signature_page_url(params, "comments")
×
358

359
    @property
×
360
    def num_user_comments(self) -> int:
×
361
        """The number of crash reports with user comments."""
362
        # TODO: count useful/interesting user comments (e.g., exclude one word comments)
363
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
364

365
    @property
×
366
    def has_user_comments(self) -> bool:
×
367
        """Whether the crash signature has any reports with a user comment."""
368
        return self.num_user_comments > 0
×
369

370
    @property
×
371
    def top_proto_signature(self) -> str:
×
372
        """The proto signature that occurs the most."""
373
        return self.signature["facets"]["proto_signature"][0]["term"]
×
374

375
    @property
×
376
    def num_top_proto_signature_crashes(self) -> int:
×
377
        """The number of crashes for the most occurring proto signature."""
378
        return self.signature["facets"]["proto_signature"][0]["count"]
×
379

380
    def _build_ids(self) -> Iterator[int]:
×
381
        """Yields the build IDs where the crash occurred."""
382
        for build_id in self.signature["facets"]["build_id"]:
×
383
            yield build_id["term"]
×
384

385
    @property
×
386
    def top_build_id(self) -> int:
×
387
        """The build ID where most crashes occurred."""
388
        return self.signature["facets"]["build_id"][0]["term"]
×
389

390
    @cached_property
×
391
    def num_near_null_crashes(self) -> int:
×
392
        """The number of crashes that occurred on addresses near null."""
393
        return sum(
×
394
            address["count"]
395
            for address in self.signature["facets"]["address"]
396
            if is_near_null_address(address["term"])
397
        )
398

399
    @property
×
400
    def is_near_null_crash(self) -> bool:
×
401
        """Whether all crashes occurred on addresses near null."""
402
        return self.num_near_null_crashes == self.num_crashes
×
403

404
    @property
×
405
    def is_potential_near_null_crash(self) -> bool:
×
406
        """Whether the signature is a potential near null crash.
407

408
        The value will be True if some but not all crashes occurred on addresses
409
        near null.
410
        """
411
        return not self.is_near_null_crash and self.num_near_null_crashes > 0
×
412

413
    @property
×
414
    def is_near_null_related_crash(self) -> bool:
×
415
        """Whether the signature is related to near null crashes.
416

417
        The value will be True if any of the crashes occurred on addresses near
418
        null.
419
        """
420
        return self.is_near_null_crash or self.is_potential_near_null_crash
×
421

422
    @cached_property
×
423
    def num_near_allocator_crashes(self) -> int:
×
424
        """The number of crashes that occurred on addresses near an allocator
425
        poison value.
426
        """
427
        return sum(
×
428
            address["count"]
429
            for address in self.signature["facets"]["address"]
430
            if is_near_allocator_address(address["term"])
431
        )
432

433
    @property
×
434
    def is_near_allocator_crash(self) -> bool:
×
435
        """Whether all crashes occurred on addresses near an allocator poison
436
        value.
437
        """
438
        return self.num_near_allocator_crashes == self.num_crashes
×
439

440
    @property
×
441
    def is_potential_near_allocator_crash(self) -> bool:
×
442
        """Whether the signature is a potential near allocator poison value
443
        crash.
444

445
        The value will be True if some but not all crashes occurred on addresses
446
        near an allocator poison value.
447
        """
448
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0
×
449

450
    @property
×
451
    def is_near_allocator_related_crash(self) -> bool:
×
452
        """Whether the signature is related to near allocator poison value
453
        crashes.
454

455
        The value will be True if any of the crashes occurred on addresses near
456
        an allocator poison value.
457
        """
458
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash
×
459

460

461
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
462
    """Analyze the data related to a signature.
463

464
    This includes data from Socorro and Clouseau.
465
    """
466

467
    def __init__(
×
468
        self,
469
        socorro_signature: dict,
470
        num_total_crashes: int,
471
        clouseau_reports: list[dict],
472
    ):
473
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
474
        ClouseauDataAnalyzer.__init__(self, clouseau_reports)
×
475

476
    def _fetch_crash_reports(
×
477
        self,
478
        proto_signature: str,
479
        build_id: int | Iterable[int],
480
        limit: int = 1,
481
    ) -> Iterator[dict]:
482
        params = {
×
483
            "proto_signature": "=" + proto_signature,
484
            "build_id": build_id,
485
            "_columns": [
486
                "uuid",
487
            ],
488
            "_results_number": limit,
489
        }
490

491
        def handler(res: dict, data: dict):
×
492
            data.update(res)
×
493

494
        data: dict = {}
×
495
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
496

497
        yield from data["hits"]
×
498

499
    def fetch_representative_processed_crash(self) -> dict:
×
500
        """Fetch a processed crash to represent the signature.
501

502
        This could fetch multiple processed crashes and return the one that is
503
        most likely to be useful.
504
        """
505
        limit_to_top_proto_signature = (
×
506
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
507
        )
508

509
        reports = itertools.chain(
×
510
            # Reports with a higher score from clouseau are more likely to be
511
            # useful.
512
            sorted(
513
                self._clouseau_reports,
514
                key=lambda report: report["max_score"],
515
                reverse=True,
516
            ),
517
            # Next we try find reports from the top crashing build because they
518
            # are likely to be representative.
519
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
520
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
521
        )
522
        for report in reports:
×
523
            uuid = report["uuid"]
×
524
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
525
            if (
×
526
                not limit_to_top_proto_signature
527
                or processed_crash["proto_signature"] == self.top_proto_signature
528
            ):
529
                # TODO(investigate): maybe we should check if the stack is
530
                # corrupted (ask gsvelto or willkg about how to detect that)
531
                return processed_crash
×
532

533
        raise NoCrashReportFoundError(
×
534
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
535
        )
536

537

538
class SignaturesDataFetcher:
×
539
    """Fetch the data related to the given signatures."""
540

541
    MEMORY_ACCESS_ERROR_REASONS = (
×
542
        # On Windows:
543
        "EXCEPTION_ACCESS_VIOLATION_READ",
544
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
545
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
546
        # On Linux:
547
        "SIGSEGV / SEGV_MAPERR",
548
        "SIGSEGV / SEGV_ACCERR",
549
    )
550

551
    EXCLUDED_MOZ_REASON_STRINGS = (
×
552
        "MOZ_CRASH(OOM)",
553
        "MOZ_CRASH(Out of memory)",
554
        "out of memory",
555
        "Shutdown hanging",
556
        # TODO(investigate): do we need to exclude signatures that their reason
557
        # contains `[unhandlable oom]`?
558
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
559
        # "[unhandlable oom]",
560
    )
561

562
    # If any of the crash reason starts with any of the following, then it is
563
    # Network or I/O error.
564
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
565
        "EXCEPTION_IN_PAGE_ERROR_READ",
566
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
567
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
568
    )
569

570
    # TODO(investigate): do we need to exclude all these signatures prefixes?
571
    EXCLUDED_SIGNATURE_PREFIXES = (
×
572
        "OOM | ",
573
        "bad hardware | ",
574
        "shutdownhang | ",
575
    )
576

577
    def __init__(
×
578
        self,
579
        signatures: Iterable[str],
580
        product: str = "Firefox",
581
        channel: str = "nightly",
582
    ):
583
        self._signatures = set(signatures)
×
584
        self._product = product
×
585
        self._channel = channel
×
586

587
    @classmethod
×
588
    def find_new_actionable_crashes(
×
589
        cls,
590
        product: str,
591
        channel: str,
592
        days_to_check: int = 7,
593
        days_without_crashes: int = 7,
594
    ) -> "SignaturesDataFetcher":
595
        """Find new actionable crashes.
596

597
        Args:
598
            product: The product to check.
599
            channel: The release channel to check.
600
            days_to_check: The number of days to check for crashes.
601
            days_without_crashes: The number of days without crashes before the
602
                `days_to_check` to consider the signature new.
603

604
        Returns:
605
            A list of actionable signatures.
606
        """
607
        duration = days_to_check + days_without_crashes
×
608
        end_date = lmdutils.get_date_ymd("today")
×
609
        start_date = end_date - timedelta(duration)
×
610
        earliest_allowed_date = lmdutils.get_date_str(
×
611
            end_date - timedelta(days_to_check)
612
        )
613
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
614

615
        params = {
×
616
            "product": product,
617
            "release_channel": channel,
618
            "date": date_range,
619
            # TODO(investigate): should we do a local filter instead of the
620
            # following (should we exclude the signature if one of the crashes
621
            # is a shutdown hang?):
622
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
623
            # non-empty then it's a shutdown hang.
624
            "ipc_shutdown_state": "__null__",
625
            "shutdown_progress": "__null__",
626
            # TODO(investigate): should we use the following instead of the
627
            # local filter.
628
            # "oom_allocation_size": "!__null__",
629
            "_aggs.signature": [
630
                "moz_crash_reason",
631
                "reason",
632
                "_histogram.date",
633
                "_cardinality.install_time",
634
                "_cardinality.oom_allocation_size",
635
            ],
636
            "_results_number": 0,
637
            "_facets_size": 10000,
638
        }
639

640
        def handler(search_resp: dict, data: list):
×
641
            logger.debug(
×
642
                "Total of %d signatures received from Socorro",
643
                len(search_resp["facets"]["signature"]),
644
            )
645

646
            for crash in search_resp["facets"]["signature"]:
×
647
                signature = crash["term"]
×
648
                if any(
×
649
                    signature.startswith(excluded_prefix)
650
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
651
                ):
652
                    # Ignore signatures that start with any of the excluded prefixes.
653
                    continue
×
654

655
                facets = crash["facets"]
×
656
                installations = facets["cardinality_install_time"]["value"]
×
657
                if installations <= 1:
×
658
                    # Ignore crashes that only happen on one installation.
659
                    continue
×
660

661
                first_date = facets["histogram_date"][0]["term"]
×
662
                if first_date < earliest_allowed_date:
×
663
                    # The crash is not new, skip it.
664
                    continue
×
665

666
                if any(
×
667
                    reason["term"].startswith(io_error_prefix)
668
                    for reason in facets["reason"]
669
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
670
                ):
671
                    # Ignore Network or I/O error crashes.
672
                    continue
×
673

674
                if crash["count"] < 20:
×
675
                    # For signatures with low volume, having multiple types of
676
                    # memory errors indicates potential bad hardware crashes.
677
                    num_memory_error_types = sum(
×
678
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
679
                        for reason in facets["reason"]
680
                    )
681
                    if num_memory_error_types > 1:
×
682
                        # Potential bad hardware crash, skip it.
683
                        continue
×
684

685
                # TODO: Add a filter using the `possible_bit_flips_max_confidence`
686
                # field to exclude bad hardware crashes. The filed is not available yet.
687
                # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1816669#c3
688

689
                # TODO(investigate): is this needed since we are already
690
                # filtering signatures that start with "OOM | "
691
                if facets["cardinality_oom_allocation_size"]["value"]:
×
692
                    # If one of the crashes is an OOM crash, skip it.
693
                    continue
×
694

695
                # TODO(investigate): do we need to check for the `moz_crash_reason`
696
                moz_crash_reasons = facets["moz_crash_reason"]
×
697
                if moz_crash_reasons and any(
×
698
                    excluded_reason in reason["term"]
699
                    for reason in moz_crash_reasons
700
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
701
                ):
702
                    continue
×
703

704
                data.append(signature)
×
705

706
        signatures: list = []
×
707
        socorro.SuperSearch(
×
708
            params=params,
709
            handler=handler,
710
            handlerdata=signatures,
711
        ).wait()
712

713
        logger.debug(
×
714
            "Total of %d signatures left after applying the filtering criteria",
715
            len(signatures),
716
        )
717

718
        return cls(signatures, product, channel)
×
719

720
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
721
        """Fetch the crash reports data from Crash Clouseau."""
722
        signature_reports = clouseau.Reports.get_by_signatures(
×
723
            self._signatures,
724
            product=self._product,
725
            channel=self._channel,
726
        )
727

728
        logger.debug(
×
729
            "Total of %d signatures received from Clouseau", len(signature_reports)
730
        )
731

732
        return signature_reports
×
733

734
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
735
        """Fetch the signature data from Socorro."""
736
        # TODO(investigate): should we increase the duration to 6 months?
737
        duration = timedelta(weeks=1)
×
738
        end_date = lmdutils.get_date_ymd("today")
×
739
        start_date = end_date - duration
×
740
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
741

742
        params = {
×
743
            "product": self._product,
744
            # TODO(investigate): should we included all release channels?
745
            "release_channel": self._channel,
746
            # TODO(investigate): should we limit based on the build date as well?
747
            "date": date_range,
748
            # TODO: split signatures into chunks to avoid very long query URLs
749
            "signature": ["=" + signature for signature in self._signatures],
750
            "_aggs.signature": [
751
                "address",
752
                "build_id",
753
                "cpu_arch",
754
                "proto_signature",
755
                "_cardinality.user_comments",
756
                "cpu_arch",
757
                "platform_pretty_version",
758
                # The following are needed for SignatureStats:
759
                "platform",
760
                "is_garbage_collecting",
761
                "_cardinality.install_time",
762
                "startup_crash",
763
                "_histogram.uptime",
764
                "process_type",
765
            ],
766
            "_results_number": 0,
767
            "_facets_size": 10000,
768
        }
769

770
        def handler(search_results: dict, data: dict):
×
771
            data["num_total_crashes"] = search_results["total"]
×
772
            data["signatures"] = search_results["facets"]["signature"]
×
773

774
        data: dict = {}
×
775
        socorro.SuperSearchUnredacted(
×
776
            params=params,
777
            handler=handler,
778
            handlerdata=data,
779
        ).wait()
780

781
        logger.debug(
×
782
            "Fetch info from Socorro for %d signatures", len(data["signatures"])
783
        )
784

785
        return data["signatures"], data["num_total_crashes"]
×
786

787
    def fetch_bugs(self, include_fields: list[str] = None) -> dict[str, list[dict]]:
×
788
        """Fetch bugs that are filed against the given signatures."""
789

790
        params_base: dict = {
×
791
            "include_fields": [
792
                "cf_crash_signature",
793
            ],
794
        }
795

796
        if include_fields:
×
797
            params_base["include_fields"].extend(include_fields)
×
798

799
        params_list = []
×
800
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
801
            params = params_base.copy()
×
802
            n = int(utils.get_last_field_num(params))
×
803
            params[f"f{n}"] = "OP"
×
804
            params[f"j{n}"] = "OR"
×
805
            for signature in signatures_chunk:
×
806
                n += 1
×
807
                params[f"f{n}"] = "cf_crash_signature"
×
808
                params[f"o{n}"] = "regexp"
×
809
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
810
            params[f"f{n+1}"] = "CP"
×
811
            params_list.append(params)
×
812

813
        signatures_bugs: dict = defaultdict(list)
×
814

815
        def handler(res, data):
×
816
            for bug in res["bugs"]:
×
817
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
818
                    if signature in self._signatures:
×
819
                        data[signature].append(bug)
×
820

821
        Bugzilla(
×
822
            queries=[
823
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
824
                for params in params_list
825
            ],
826
        ).wait()
827

828
        # TODO: remove the call to DevBugzilla after moving to production
829
        DevBugzilla(
×
830
            queries=[
831
                connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs)
832
                for params in params_list
833
            ],
834
        ).wait()
835

836
        logger.debug(
×
837
            "Total of %d signatures already have bugs filed", len(signatures_bugs)
838
        )
839

840
        return signatures_bugs
×
841

842
    def analyze(self) -> list[SignatureAnalyzer]:
×
843
        """Analyze the data related to the signatures."""
844
        bugs = self.fetch_bugs()
×
845
        # TODO(investigate): For now, we are ignoring signatures that have bugs
846
        # filed even if they are closed long time ago. We should investigate
847
        # whether we should include the ones with closed bugs. For example, if
848
        # the bug was closed as Fixed years ago.
849
        self._signatures.difference_update(bugs.keys())
×
850

851
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
852
        # TODO(investigate): For now, we are ignoring signatures that are not
853
        # analyzed by clouseau. We should investigate why they are not analyzed
854
        # and whether we should include them.
855
        self._signatures.intersection_update(clouseau_reports.keys())
×
856

857
        signatures, num_total_crashes = self.fetch_socorro_info()
×
858
        logger.debug("Total of %d signatures will be analyzed", len(signatures))
×
859

860
        return [
×
861
            SignatureAnalyzer(
862
                signature,
863
                num_total_crashes,
864
                clouseau_reports[signature["term"]],
865
            )
866
            for signature in signatures
867
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc