• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4730

07 Sep 2023 03:00PM CUT coverage: 22.004% (-0.01%) from 22.018%
#4730

push

coveralls-python

suhaibmujahid
[crash/analyzer] Reduce the number of workers for bugzilla-dev

716 of 3576 branches covered (0.0%)

1 of 1 new or added line in 1 file covered. (100.0%)

1924 of 8744 relevant lines covered (22.0%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import date, timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.bug.analyzer import BugAnalyzer, BugsStore
×
19
from bugbot.components import ComponentName
×
20
from bugbot.crash import socorro_util
×
21

22
# The max offset from a memory address to be considered "near".
23
OFFSET_64_BIT = 0x1000
×
24
OFFSET_32_BIT = 0x100
×
25
# Allocator poison value addresses.
26
ALLOCATOR_ADDRESSES_64_BIT = (
×
27
    (0xE5E5E5E5E5E5E5E5, OFFSET_64_BIT),
28
    # On 64-bit windows, sometimes it could be doing something with a 32-bit
29
    # value gotten from freed memory, so it'll be 0X00000000E5E5E5E5 +/-, and
30
    # because of the address limitation, quite often it will be
31
    # 0X0000E5E5E5E5E5E5 +/-.
32
    (0x00000000E5E5E5E5, OFFSET_32_BIT),
33
    (0x0000E5E5E5E5E5E5, OFFSET_64_BIT),
34
    (0x4B4B4B4B4B4B4B4B, OFFSET_64_BIT),
35
)
36
ALLOCATOR_ADDRESSES_32_BIT = (
×
37
    (0xE5E5E5E5, OFFSET_32_BIT),
38
    (0x4B4B4B4B, OFFSET_32_BIT),
39
)
40
# Ranges where addresses are considered near allocator poison values.
41
ALLOCATOR_RANGES_64_BIT = (
×
42
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_64_BIT
43
)
44
ALLOCATOR_RANGES_32_BIT = (
×
45
    (addr - offset, addr + offset) for addr, offset in ALLOCATOR_ADDRESSES_32_BIT
46
)
47

48
# NOTE: If you make changes that affect the output of the analysis, you should
49
# increment this number. This is needed in the experimental phase only.
50
EXPERIMENT_VERSION = 3
×
51

52

53
def is_near_null_address(str_address) -> bool:
×
54
    """Check if the address is near null.
55

56
    Args:
57
        str_address: The memory address to check.
58

59
    Returns:
60
        True if the address is near null, False otherwise.
61
    """
62
    address = int(str_address, 0)
×
63
    is_64_bit = len(str_address) >= 18
×
64

65
    if is_64_bit:
×
66
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT
×
67

68
    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT
×
69

70

71
def is_near_allocator_address(str_address) -> bool:
×
72
    """Check if the address is near an allocator poison value.
73

74
    Args:
75
        str_address: The memory address to check.
76

77
    Returns:
78
        True if the address is near an allocator poison value, False otherwise.
79
    """
80
    address = int(str_address, 0)
×
81
    is_64_bit = len(str_address) >= 18
×
82

83
    return any(
×
84
        low <= address <= high
85
        for low, high in (
86
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
87
        )
88
    )
89

90

91
# TODO: Move this to libmozdata
92
def generate_signature_page_url(params: dict, tab: str) -> str:
×
93
    """Generate a URL to the signature page on Socorro
94

95
    Args:
96
        params: the parameters for the search query.
97
        tab: the page tab that should be selected.
98

99
    Returns:
100
        The URL of the signature page on Socorro
101
    """
102
    web_url = socorro.Socorro.CRASH_STATS_URL
×
103
    query = lmdutils.get_params_for_url(params)
×
104
    return f"{web_url}/signature/{query}#{tab}"
×
105

106

107
# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident
108
# that the bug filing is working as expected, we can switch to filing bugs in
109
# the production instance of Bugzilla.
110
class DevBugzilla(Bugzilla):
×
111
    URL = "https://bugzilla-dev.allizom.org"
×
112
    API_URL = URL + "/rest/bug"
×
113
    ATTACHMENT_API_URL = API_URL + "/attachment"
×
114
    TOKEN = utils.get_login_info()["bz_api_key_dev"]
×
115
    # Note(suhaib): the dev instance of bugzilla has a smaller cluster, so we
116
    # need to go easy on it.
117
    MAX_WORKERS = 1
×
118

119

120
class NoCrashReportFoundError(Exception):
×
121
    """There are no crash reports that meet the required criteria."""
122

123

124
class ClouseauDataAnalyzer:
×
125
    """Analyze the data returned by Crash Clouseau about a specific crash
126
    signature.
127
    """
128

129
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
130
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
131

132
    def __init__(self, reports: Iterable[dict], bugs_store: BugsStore):
×
133
        self._clouseau_reports = reports
×
134
        self.bugs_store = bugs_store
×
135

136
    @cached_property
×
137
    def max_clouseau_score(self):
×
138
        """The maximum Clouseau score in the crash reports."""
139
        if not self._clouseau_reports:
×
140
            return 0
×
141
        return max(report["max_score"] for report in self._clouseau_reports)
×
142

143
    @cached_property
×
144
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
145
        """The IDs for the bugs that their patches could have caused the crash."""
146
        minimum_accepted_score = max(
×
147
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
148
        )
149
        return {
×
150
            changeset["bug_id"]
151
            for report in self._clouseau_reports
152
            if report["max_score"] >= minimum_accepted_score
153
            for changeset in report["changesets"]
154
            if changeset["max_score"] >= minimum_accepted_score
155
            and not changeset["is_merge"]
156
            and not changeset["is_backedout"]
157
        }
158

159
    @cached_property
×
160
    def regressed_by_patch(self) -> str | None:
×
161
        """The hash of the patch that could have caused the crash."""
162
        minimum_accepted_score = max(
×
163
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
164
        )
165
        potential_patches = {
×
166
            changeset["changeset"]
167
            for report in self._clouseau_reports
168
            if report["max_score"] >= minimum_accepted_score
169
            for changeset in report["changesets"]
170
            if changeset["max_score"] >= minimum_accepted_score
171
            and not changeset["is_merge"]
172
            and not changeset["is_backedout"]
173
        }
174
        if len(potential_patches) == 1:
×
175
            return next(iter(potential_patches))
×
176
        return None
×
177

178
    @cached_property
×
179
    def regressed_by(self) -> int | None:
×
180
        """The ID of the bug that one of its patches could have caused
181
        the crash.
182

183
        If there are multiple bugs, the value will be `None`.
184
        """
185
        bug_ids = self.regressed_by_potential_bug_ids
×
186
        if len(bug_ids) == 1:
×
187
            return next(iter(bug_ids))
×
188
        return None
×
189

190
    @cached_property
×
191
    def regressed_by_potential_bugs(self) -> list[BugAnalyzer]:
×
192
        """The bugs whose patches could have caused the crash."""
193
        self.bugs_store.fetch_bugs(
×
194
            self.regressed_by_potential_bug_ids,
195
            [
196
                "id",
197
                "groups",
198
                "assigned_to",
199
                "product",
200
                "component",
201
            ],
202
        )
203
        return [
×
204
            self.bugs_store.get_bug_by_id(bug_id)
205
            for bug_id in self.regressed_by_potential_bug_ids
206
        ]
207

208
    @cached_property
×
209
    def regressed_by_author(self) -> dict | None:
×
210
        """The author of the patch that could have caused the crash.
211

212
        If there are multiple regressors, the value will be `None`.
213

214
        The regressor bug assignee is considered as the author, even if the
215
        assignee is not the patch author.
216
        """
217

218
        if not self.regressed_by:
×
219
            return None
×
220

221
        bug = self.regressed_by_potential_bugs[0]
×
222
        assert bug.id == self.regressed_by
×
223
        return bug.get_field("assigned_to_detail")
×
224

225
    @cached_property
×
226
    def crash_component(self) -> ComponentName:
×
227
        """The component that the crash belongs to.
228

229
        If there are multiple components, the value will be the default one.
230
        """
231
        potential_components = {
×
232
            bug.component for bug in self.regressed_by_potential_bugs
233
        }
234
        if len(potential_components) == 1:
×
235
            return next(iter(potential_components))
×
236
        return self.DEFAULT_CRASH_COMPONENT
×
237

238

239
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
240
    """Analyze the data returned by Socorro."""
241

242
    _bugzilla_os_legal_values = None
×
243
    _bugzilla_cpu_legal_values_map = None
×
244
    _platforms = [
×
245
        {"short_name": "win", "name": "Windows"},
246
        {"short_name": "mac", "name": "Mac OS X"},
247
        {"short_name": "lin", "name": "Linux"},
248
        {"short_name": "and", "name": "Android"},
249
        {"short_name": "unknown", "name": "Unknown"},
250
    ]
251

252
    def __init__(
×
253
        self,
254
        signature: dict,
255
        num_total_crashes: int,
256
    ):
257
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
258

259
    @classmethod
×
260
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
261
        """Return the corresponding OS name in Bugzilla for the provided OS name
262
        from Socorro.
263

264
        If the OS name is not recognized, return "Other".
265
        """
266
        if cls._bugzilla_os_legal_values is None:
×
267
            cls._bugzilla_os_legal_values = set(
×
268
                bugzilla.BugFields.fetch_field_values("op_sys")
269
            )
270

271
        if op_sys in cls._bugzilla_os_legal_values:
×
272
            return op_sys
×
273

274
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
275
            op_sys = "macOS"
×
276
        elif op_sys.startswith("Windows"):
×
277
            op_sys = "Windows"
×
278
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
279
            op_sys = "Linux"
×
280
        else:
281
            op_sys = "Other"
×
282

283
        return op_sys
×
284

285
    @property
×
286
    def first_crash_date(self) -> str:
×
287
        """The date of the first crash within the query time range.
288

289
        The date is in YYYY-MM-DD format.
290
        """
291
        return self.signature["facets"]["histogram_date"][0]["term"][:10]
×
292

293
    @property
×
294
    def bugzilla_op_sys(self) -> str:
×
295
        """The name of the OS where the crash happens.
296

297
        The value is one of the legal values for Bugzilla's `op_sys` field.
298

299
        - If no OS name is found, the value will be "Unspecified".
300
        - If the OS name is not recognized, the value will be "Other".
301
        - If multiple OS names are found, the value will be "All". Unless the OS
302
          names can be resolved to a common name without a version. For example,
303
          "Windows 10" and "Windows 7" will become "Windows".
304
        """
305
        all_op_sys = {
×
306
            self.to_bugzilla_op_sys(op_sys["term"])
307
            for op_sys in self.signature["facets"]["platform_pretty_version"]
308
        }
309

310
        if len(all_op_sys) > 1:
×
311
            # Resolve to root OS name by removing the version number.
312
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
313

314
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
315
            # TODO: explain this workaround.
316
            all_op_sys.remove("Other")
×
317

318
        if len(all_op_sys) == 1:
×
319
            return next(iter(all_op_sys))
×
320

321
        if len(all_op_sys) == 0:
×
322
            return "Unspecified"
×
323

324
        return "All"
×
325

326
    @classmethod
×
327
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
328
        """Return the corresponding CPU name in Bugzilla for the provided name
329
        from Socorro.
330

331
        If the CPU is not recognized, return "Other".
332
        """
333
        if cls._bugzilla_cpu_legal_values_map is None:
×
334
            cls._bugzilla_cpu_legal_values_map = {
×
335
                value.lower(): value
336
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
337
            }
338

339
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
340

341
    @property
×
342
    def bugzilla_cpu_arch(self) -> str:
×
343
        """The CPU architecture of the devices where the crash happens.
344

345
        The value is one of the legal values for Bugzilla's `rep_platform` field.
346

347
        - If no CPU architecture is found, the value will be "Unspecified".
348
        - If the CPU architecture is not recognized, the value will be "Other".
349
        - If multiple CPU architectures are found, the value will "All".
350
        """
351
        all_cpu_arch = {
×
352
            self.to_bugzilla_cpu(cpu["term"])
353
            for cpu in self.signature["facets"]["cpu_arch"]
354
        }
355

356
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
357
            all_cpu_arch.remove("Other")
×
358

359
        if len(all_cpu_arch) == 1:
×
360
            return next(iter(all_cpu_arch))
×
361

362
        if len(all_cpu_arch) == 0:
×
363
            return "Unspecified"
×
364

365
        return "All"
×
366

367
    @property
×
368
    def user_comments_page_url(self) -> str:
×
369
        """The URL to the Signature page on Socorro where the Comments tab is
370
        selected.
371
        """
372
        start_date = date.today() - timedelta(weeks=26)
×
373
        params = {
×
374
            "signature": self.signature_term,
375
            "date": socorro.SuperSearch.get_search_date(start_date),
376
        }
377
        return generate_signature_page_url(params, "comments")
×
378

379
    @property
×
380
    def num_user_comments(self) -> int:
×
381
        """The number of crash reports with user comments."""
382
        # TODO: count useful/interesting user comments (e.g., exclude one word comments)
383
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
384

385
    @property
×
386
    def has_user_comments(self) -> bool:
×
387
        """Whether the crash signature has any reports with a user comment."""
388
        return self.num_user_comments > 0
×
389

390
    @property
×
391
    def top_proto_signature(self) -> str:
×
392
        """The proto signature that occurs the most."""
393
        return self.signature["facets"]["proto_signature"][0]["term"]
×
394

395
    @property
×
396
    def num_top_proto_signature_crashes(self) -> int:
×
397
        """The number of crashes for the most occurring proto signature."""
398
        return self.signature["facets"]["proto_signature"][0]["count"]
×
399

400
    def _build_ids(self) -> Iterator[int]:
×
401
        """Yields the build IDs where the crash occurred."""
402
        for build_id in self.signature["facets"]["build_id"]:
×
403
            yield build_id["term"]
×
404

405
    @property
×
406
    def top_build_id(self) -> int:
×
407
        """The build ID where most crashes occurred."""
408
        return self.signature["facets"]["build_id"][0]["term"]
×
409

410
    @cached_property
×
411
    def num_near_null_crashes(self) -> int:
×
412
        """The number of crashes that occurred on addresses near null."""
413
        return sum(
×
414
            address["count"]
415
            for address in self.signature["facets"]["address"]
416
            if is_near_null_address(address["term"])
417
        )
418

419
    @property
×
420
    def is_near_null_crash(self) -> bool:
×
421
        """Whether all crashes occurred on addresses near null."""
422
        return self.num_near_null_crashes == self.num_crashes
×
423

424
    @property
×
425
    def is_potential_near_null_crash(self) -> bool:
×
426
        """Whether the signature is a potential near null crash.
427

428
        The value will be True if some but not all crashes occurred on addresses
429
        near null.
430
        """
431
        return not self.is_near_null_crash and self.num_near_null_crashes > 0
×
432

433
    @property
×
434
    def is_near_null_related_crash(self) -> bool:
×
435
        """Whether the signature is related to near null crashes.
436

437
        The value will be True if any of the crashes occurred on addresses near
438
        null.
439
        """
440
        return self.is_near_null_crash or self.is_potential_near_null_crash
×
441

442
    @cached_property
×
443
    def num_near_allocator_crashes(self) -> int:
×
444
        """The number of crashes that occurred on addresses near an allocator
445
        poison value.
446
        """
447
        return sum(
×
448
            address["count"]
449
            for address in self.signature["facets"]["address"]
450
            if is_near_allocator_address(address["term"])
451
        )
452

453
    @property
×
454
    def is_near_allocator_crash(self) -> bool:
×
455
        """Whether all crashes occurred on addresses near an allocator poison
456
        value.
457
        """
458
        return self.num_near_allocator_crashes == self.num_crashes
×
459

460
    @property
×
461
    def is_potential_near_allocator_crash(self) -> bool:
×
462
        """Whether the signature is a potential near allocator poison value
463
        crash.
464

465
        The value will be True if some but not all crashes occurred on addresses
466
        near an allocator poison value.
467
        """
468
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0
×
469

470
    @property
×
471
    def is_near_allocator_related_crash(self) -> bool:
×
472
        """Whether the signature is related to near allocator poison value
473
        crashes.
474

475
        The value will be True if any of the crashes occurred on addresses near
476
        an allocator poison value.
477
        """
478
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash
×
479

480

481
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
482
    """Analyze the data related to a signature.
483

484
    This includes data from Socorro and Clouseau.
485
    """
486

487
    def __init__(
×
488
        self,
489
        socorro_signature: dict,
490
        num_total_crashes: int,
491
        clouseau_reports: list[dict],
492
        bugs_store: BugsStore,
493
    ):
494
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
495
        ClouseauDataAnalyzer.__init__(self, clouseau_reports, bugs_store)
×
496

497
    def _fetch_crash_reports(
×
498
        self,
499
        proto_signature: str,
500
        build_id: int | Iterable[int],
501
        limit: int = 1,
502
    ) -> Iterator[dict]:
503
        params = {
×
504
            "proto_signature": "=" + proto_signature,
505
            "build_id": build_id,
506
            "_columns": [
507
                "uuid",
508
            ],
509
            "_results_number": limit,
510
        }
511

512
        def handler(res: dict, data: dict):
×
513
            data.update(res)
×
514

515
        data: dict = {}
×
516
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
517

518
        yield from data["hits"]
×
519

520
    def fetch_representative_processed_crash(self) -> dict:
×
521
        """Fetch a processed crash to represent the signature.
522

523
        This could fetch multiple processed crashes and return the one that is
524
        most likely to be useful.
525
        """
526
        limit_to_top_proto_signature = (
×
527
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
528
        )
529

530
        reports = itertools.chain(
×
531
            # Reports with a higher score from clouseau are more likely to be
532
            # useful.
533
            sorted(
534
                self._clouseau_reports,
535
                key=lambda report: report["max_score"],
536
                reverse=True,
537
            ),
538
            # Next we try find reports from the top crashing build because they
539
            # are likely to be representative.
540
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
541
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
542
        )
543
        for report in reports:
×
544
            uuid = report["uuid"]
×
545
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
546
            if (
×
547
                not limit_to_top_proto_signature
548
                or processed_crash["proto_signature"] == self.top_proto_signature
549
            ):
550
                # TODO(investigate): maybe we should check if the stack is
551
                # corrupted (ask gsvelto or willkg about how to detect that)
552
                return processed_crash
×
553

554
        raise NoCrashReportFoundError(
×
555
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
556
        )
557

558
    @cached_property
×
559
    def is_potential_security_crash(self) -> bool:
×
560
        """Whether the crash is related to a potential security bug.
561

562
        The value will be True if:
563
            - the signature is related to near allocator poison value crashes, or
564
            - one of the potential regressors is a security bug
565
        """
566
        return self.is_near_allocator_related_crash or any(
×
567
            bug.is_security for bug in self.regressed_by_potential_bugs
568
        )
569

570

571
class SignaturesDataFetcher:
×
572
    """Fetch the data related to the given signatures."""
573

574
    MEMORY_ACCESS_ERROR_REASONS = (
×
575
        # On Windows:
576
        "EXCEPTION_ACCESS_VIOLATION_READ",
577
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
578
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
579
        # On Linux:
580
        "SIGSEGV / SEGV_MAPERR",
581
        "SIGSEGV / SEGV_ACCERR",
582
    )
583

584
    EXCLUDED_MOZ_REASON_STRINGS = (
×
585
        "MOZ_CRASH(OOM)",
586
        "MOZ_CRASH(Out of memory)",
587
        "out of memory",
588
        "Shutdown hanging",
589
        # TODO(investigate): do we need to exclude signatures that their reason
590
        # contains `[unhandlable oom]`?
591
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
592
        # "[unhandlable oom]",
593
    )
594

595
    # If any of the crash reason starts with any of the following, then it is
596
    # Network or I/O error.
597
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
598
        "EXCEPTION_IN_PAGE_ERROR_READ",
599
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
600
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
601
    )
602

603
    # TODO(investigate): do we need to exclude all these signatures prefixes?
604
    EXCLUDED_SIGNATURE_PREFIXES = (
×
605
        "OOM | ",
606
        "bad hardware | ",
607
        "shutdownhang | ",
608
    )
609

610
    SUMMARY_DURATION = timedelta(weeks=10)
×
611

612
    def __init__(
×
613
        self,
614
        signatures: Iterable[str],
615
        product: str = "Firefox",
616
        channel: str = "nightly",
617
    ):
618
        self._signatures = set(signatures)
×
619
        self._product = product
×
620
        self._channel = channel
×
621

622
    @classmethod
×
623
    def find_new_actionable_crashes(
×
624
        cls,
625
        product: str,
626
        channel: str,
627
        days_to_check: int = 7,
628
        days_without_crashes: int = 7,
629
    ) -> "SignaturesDataFetcher":
630
        """Find new actionable crashes.
631

632
        Args:
633
            product: The product to check.
634
            channel: The release channel to check.
635
            days_to_check: The number of days to check for crashes.
636
            days_without_crashes: The number of days without crashes before the
637
                `days_to_check` to consider the signature new.
638

639
        Returns:
640
            A list of actionable signatures.
641
        """
642
        duration = days_to_check + days_without_crashes
×
643
        end_date = lmdutils.get_date_ymd("today")
×
644
        start_date = end_date - timedelta(duration)
×
645
        earliest_allowed_date = lmdutils.get_date_str(
×
646
            end_date - timedelta(days_to_check)
647
        )
648
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
649

650
        params = {
×
651
            "product": product,
652
            "release_channel": channel,
653
            "date": date_range,
654
            # TODO(investigate): should we do a local filter instead of the
655
            # following (should we exclude the signature if one of the crashes
656
            # is a shutdown hang?):
657
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
658
            # non-empty then it's a shutdown hang.
659
            "ipc_shutdown_state": "__null__",
660
            "shutdown_progress": "__null__",
661
            # TODO(investigate): should we use the following instead of the
662
            # local filter.
663
            # "oom_allocation_size": "!__null__",
664
            "_aggs.signature": [
665
                "moz_crash_reason",
666
                "reason",
667
                "_histogram.date",
668
                "_cardinality.install_time",
669
                "_cardinality.oom_allocation_size",
670
            ],
671
            "_results_number": 0,
672
            "_facets_size": 10000,
673
        }
674

675
        def handler(search_resp: dict, data: list):
×
676
            logger.debug(
×
677
                "Total of %d signatures received from Socorro",
678
                len(search_resp["facets"]["signature"]),
679
            )
680

681
            for crash in search_resp["facets"]["signature"]:
×
682
                signature = crash["term"]
×
683
                if any(
×
684
                    signature.startswith(excluded_prefix)
685
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
686
                ):
687
                    # Ignore signatures that start with any of the excluded prefixes.
688
                    continue
×
689

690
                facets = crash["facets"]
×
691
                installations = facets["cardinality_install_time"]["value"]
×
692
                if installations <= 1:
×
693
                    # Ignore crashes that only happen on one installation.
694
                    continue
×
695

696
                first_date = facets["histogram_date"][0]["term"]
×
697
                if first_date < earliest_allowed_date:
×
698
                    # The crash is not new, skip it.
699
                    continue
×
700

701
                if any(
×
702
                    reason["term"].startswith(io_error_prefix)
703
                    for reason in facets["reason"]
704
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
705
                ):
706
                    # Ignore Network or I/O error crashes.
707
                    continue
×
708

709
                if crash["count"] < 20:
×
710
                    # For signatures with low volume, having multiple types of
711
                    # memory errors indicates potential bad hardware crashes.
712
                    num_memory_error_types = sum(
×
713
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
714
                        for reason in facets["reason"]
715
                    )
716
                    if num_memory_error_types > 1:
×
717
                        # Potential bad hardware crash, skip it.
718
                        continue
×
719

720
                # TODO: Add a filter using the `possible_bit_flips_max_confidence`
721
                # field to exclude bad hardware crashes. The filed is not available yet.
722
                # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1816669#c3
723

724
                # TODO(investigate): is this needed since we are already
725
                # filtering signatures that start with "OOM | "
726
                if facets["cardinality_oom_allocation_size"]["value"]:
×
727
                    # If one of the crashes is an OOM crash, skip it.
728
                    continue
×
729

730
                # TODO(investigate): do we need to check for the `moz_crash_reason`
731
                moz_crash_reasons = facets["moz_crash_reason"]
×
732
                if moz_crash_reasons and any(
×
733
                    excluded_reason in reason["term"]
734
                    for reason in moz_crash_reasons
735
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
736
                ):
737
                    continue
×
738

739
                data.append(signature)
×
740

741
        signatures: list = []
×
742
        socorro.SuperSearch(
×
743
            params=params,
744
            handler=handler,
745
            handlerdata=signatures,
746
        ).wait()
747

748
        logger.debug(
×
749
            "Total of %d signatures left after applying the filtering criteria",
750
            len(signatures),
751
        )
752

753
        return cls(signatures, product, channel)
×
754

755
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
756
        """Fetch the crash reports data from Crash Clouseau."""
757
        if not self._signatures:
×
758
            return {}
×
759

760
        logger.debug(
×
761
            "Fetch from Clouseau: requesting reports for %d signatures",
762
            len(self._signatures),
763
        )
764

765
        signature_reports = clouseau.Reports.get_by_signatures(
×
766
            self._signatures,
767
            product=self._product,
768
            channel=self._channel,
769
        )
770

771
        logger.debug(
×
772
            "Fetch from Clouseau: received reports for %d signatures",
773
            len(signature_reports),
774
        )
775

776
        return signature_reports
×
777

778
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
779
        """Fetch the signature data from Socorro."""
780
        if not self._signatures:
×
781
            return [], 0
×
782

783
        end_date = lmdutils.get_date_ymd("today")
×
784
        start_date = end_date - self.SUMMARY_DURATION
×
785
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
786

787
        params = {
×
788
            "product": self._product,
789
            # TODO(investigate): should we included all release channels?
790
            "release_channel": self._channel,
791
            # TODO(investigate): should we limit based on the build date as well?
792
            "date": date_range,
793
            # TODO: split signatures into chunks to avoid very long query URLs
794
            "signature": ["=" + signature for signature in self._signatures],
795
            "_aggs.signature": [
796
                "address",
797
                "build_id",
798
                "cpu_arch",
799
                "proto_signature",
800
                "_cardinality.user_comments",
801
                "cpu_arch",
802
                "platform_pretty_version",
803
                "_histogram.date",
804
                # The following are needed for SignatureStats:
805
                "platform",
806
                "is_garbage_collecting",
807
                "_cardinality.install_time",
808
                "startup_crash",
809
                "_histogram.uptime",
810
                "process_type",
811
            ],
812
            "_results_number": 0,
813
            "_facets_size": 10000,
814
        }
815

816
        def handler(search_results: dict, data: dict):
×
817
            data["num_total_crashes"] = search_results["total"]
×
818
            data["signatures"] = search_results["facets"]["signature"]
×
819

820
        logger.debug(
×
821
            "Fetch from Socorro: requesting info for %d signatures",
822
            len(self._signatures),
823
        )
824

825
        data: dict = {}
×
826
        socorro.SuperSearchUnredacted(
×
827
            params=params,
828
            handler=handler,
829
            handlerdata=data,
830
        ).wait()
831

832
        logger.debug(
×
833
            "Fetch from Socorro: received info for %d signatures",
834
            len(data["signatures"]),
835
        )
836

837
        return data["signatures"], data["num_total_crashes"]
×
838

839
    def fetch_bugs(self, include_fields: list[str] = None) -> dict[str, list[dict]]:
×
840
        """Fetch bugs that are filed against the given signatures."""
841
        if not self._signatures:
×
842
            return {}
×
843

844
        params_base: dict = {
×
845
            "include_fields": [
846
                "cf_crash_signature",
847
            ],
848
        }
849

850
        if include_fields:
×
851
            params_base["include_fields"].extend(include_fields)
×
852

853
        params_list = []
×
854
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
855
            params = params_base.copy()
×
856
            n = int(utils.get_last_field_num(params))
×
857
            params[f"f{n}"] = "OP"
×
858
            params[f"j{n}"] = "OR"
×
859
            for signature in signatures_chunk:
×
860
                n += 1
×
861
                params[f"f{n}"] = "cf_crash_signature"
×
862
                params[f"o{n}"] = "regexp"
×
863
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
864
            params[f"f{n+1}"] = "CP"
×
865
            params_list.append(params)
×
866

867
        signatures_bugs: dict = defaultdict(list)
×
868

869
        def handler(res, data):
×
870
            for bug in res["bugs"]:
×
871
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
872
                    if signature in self._signatures:
×
873
                        data[signature].append(bug)
×
874

875
        logger.debug(
×
876
            "Fetch from Bugzilla: requesting bugs for %d signatures",
877
            len(self._signatures),
878
        )
879
        timeout = utils.get_config("common", "bz_query_timeout")
×
880
        Bugzilla(
×
881
            timeout=timeout,
882
            queries=[
883
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
884
                for params in params_list
885
            ],
886
        ).wait()
887

888
        # TODO: remove the call to DevBugzilla after moving to production
889
        for params in params_list:
×
890
            # Excluded only filed bugs with the latest version. This will
891
            # re-generate the bugs after bumping the version.
892
            n = int(utils.get_last_field_num(params))
×
893
            params[f"f{n}"] = "status_whiteboard"
×
894
            params[f"o{n}"] = "substring"
×
895
            params[f"v{n}"] = f"[bugbot-crash-v{EXPERIMENT_VERSION}]"
×
896
        DevBugzilla(
×
897
            timeout=timeout,
898
            queries=[
899
                connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs)
900
                for params in params_list
901
            ],
902
        ).wait()
903

904
        logger.debug(
×
905
            "Fetch from Bugzilla: received bugs for %d signatures", len(signatures_bugs)
906
        )
907

908
        return signatures_bugs
×
909

910
    def analyze(self) -> list[SignatureAnalyzer]:
×
911
        """Analyze the data related to the signatures."""
912
        bugs = self.fetch_bugs()
×
913
        # TODO(investigate): For now, we are ignoring signatures that have bugs
914
        # filed even if they are closed long time ago. We should investigate
915
        # whether we should include the ones with closed bugs. For example, if
916
        # the bug was closed as Fixed years ago.
917
        self._signatures.difference_update(bugs.keys())
×
918

919
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
920
        # TODO(investigate): For now, we are ignoring signatures that are not
921
        # analyzed by clouseau. We should investigate why they are not analyzed
922
        # and whether we should include them.
923
        self._signatures.intersection_update(clouseau_reports.keys())
×
924

925
        signatures, num_total_crashes = self.fetch_socorro_info()
×
926
        bugs_store = BugsStore()
×
927

928
        return [
×
929
            SignatureAnalyzer(
930
                signature,
931
                num_total_crashes,
932
                clouseau_reports[signature["term"]],
933
                bugs_store,
934
            )
935
            for signature in signatures
936
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc