• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4611

pending completion
#4611

push

coveralls-python

suhaibmujahid
Link to the user comments page on Socorro

646 of 3416 branches covered (18.91%)

10 of 10 new or added lines in 1 file covered. (100.0%)

1828 of 8490 relevant lines covered (21.53%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import date, timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.components import ComponentName
×
19
from bugbot.crash import socorro_util
×
20

21

22
# TODO: Move this to libmozdata
23
def generate_signature_page_url(params: dict, tab: str) -> str:
×
24
    """Generate a URL to the signature page on Socorro
25

26
    Args:
27
        params: the parameters for the search query.
28
        tab: the page tab that should be selected.
29

30
    Returns:
31
        The URL of the signature page on Socorro
32
    """
33
    web_url = socorro.Socorro.CRASH_STATS_URL
×
34
    query = lmdutils.get_params_for_url(params)
×
35
    return f"{web_url}/signature/{query}#{tab}"
×
36

37

38
# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident
39
# that the bug filing is working as expected, we can switch to filing bugs in
40
# the production instance of Bugzilla.
41
class DevBugzilla(Bugzilla):
×
42
    URL = "https://bugzilla-dev.allizom.org"
×
43
    API_URL = URL + "/rest/bug"
×
44
    ATTACHMENT_API_URL = API_URL + "/attachment"
×
45
    TOKEN = utils.get_login_info()["bz_api_key_dev"]
×
46

47

48
class NoCrashReportFoundError(Exception):
×
49
    """There are no crash reports that meet the required criteria."""
50

51

52
class ClouseauDataAnalyzer:
×
53
    """Analyze the data returned by Crash Clouseau"""
54

55
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
56
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
57

58
    def __init__(self, reports: Iterable[dict]):
×
59
        self._clouseau_reports = reports
×
60

61
    @cached_property
×
62
    def max_clouseau_score(self):
×
63
        """The maximum Clouseau score in the crash reports."""
64
        if not self._clouseau_reports:
×
65
            return 0
×
66
        return max(report["max_score"] for report in self._clouseau_reports)
×
67

68
    @cached_property
×
69
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
70
        """The IDs for the bugs that their patches could have caused the crash."""
71
        minimum_accepted_score = max(
×
72
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
73
        )
74
        return {
×
75
            changeset["bug_id"]
76
            for report in self._clouseau_reports
77
            if report["max_score"] >= minimum_accepted_score
78
            for changeset in report["changesets"]
79
            if changeset["max_score"] >= minimum_accepted_score
80
            and not changeset["is_merge"]
81
            and not changeset["is_backedout"]
82
        }
83

84
    @cached_property
×
85
    def regressed_by_patch(self) -> str | None:
×
86
        """The hash of the patch that could have caused the crash."""
87
        minimum_accepted_score = max(
×
88
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
89
        )
90
        potential_patches = {
×
91
            changeset["changeset"]
92
            for report in self._clouseau_reports
93
            if report["max_score"] >= minimum_accepted_score
94
            for changeset in report["changesets"]
95
            if changeset["max_score"] >= minimum_accepted_score
96
            and not changeset["is_merge"]
97
            and not changeset["is_backedout"]
98
        }
99
        if len(potential_patches) == 1:
×
100
            return next(iter(potential_patches))
×
101
        return None
×
102

103
    @cached_property
×
104
    def regressed_by(self) -> int | None:
×
105
        """The ID of the bug that one of its patches could have caused
106
        the crash.
107

108
        If there are multiple bugs, the value will be `None`.
109
        """
110
        bug_ids = self.regressed_by_potential_bug_ids
×
111
        if len(bug_ids) == 1:
×
112
            return next(iter(bug_ids))
×
113
        return None
×
114

115
    @cached_property
×
116
    def regressed_by_potential_bugs(self) -> list[dict]:
×
117
        """The bugs whose patches could have caused the crash."""
118

119
        def handler(bug: dict, data: list):
×
120
            data.append(bug)
×
121

122
        bugs: list[dict] = []
×
123
        Bugzilla(
×
124
            bugids=self.regressed_by_potential_bug_ids,
125
            include_fields=[
126
                "id",
127
                "assigned_to",
128
                "product",
129
                "component",
130
            ],
131
            bughandler=handler,
132
            bugdata=bugs,
133
        ).wait()
134

135
        return bugs
×
136

137
    @cached_property
×
138
    def regressed_by_author(self) -> dict | None:
×
139
        """The author of the patch that could have caused the crash.
140

141
        If there are multiple regressors, the value will be `None`.
142

143
        The regressor bug assignee is considered as the author, even if the
144
        assignee is not the patch author.
145
        """
146

147
        if not self.regressed_by:
×
148
            return None
×
149

150
        bug = self.regressed_by_potential_bugs[0]
×
151
        assert bug["id"] == self.regressed_by
×
152
        return bug["assigned_to_detail"]
×
153

154
    @cached_property
×
155
    def crash_component(self) -> ComponentName:
×
156
        """The component that the crash belongs to.
157

158
        If there are multiple components, the value will be the default one.
159
        """
160
        potential_components = {
×
161
            ComponentName(bug["product"], bug["component"])
162
            for bug in self.regressed_by_potential_bugs
163
        }
164
        if len(potential_components) == 1:
×
165
            return next(iter(potential_components))
×
166
        return self.DEFAULT_CRASH_COMPONENT
×
167

168

169
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
170
    """Analyze the data returned by Socorro."""
171

172
    _bugzilla_os_legal_values = None
×
173
    _bugzilla_cpu_legal_values_map = None
×
174
    _platforms = [
×
175
        {"short_name": "win", "name": "Windows"},
176
        {"short_name": "mac", "name": "Mac OS X"},
177
        {"short_name": "lin", "name": "Linux"},
178
        {"short_name": "and", "name": "Android"},
179
        {"short_name": "unknown", "name": "Unknown"},
180
    ]
181

182
    def __init__(
×
183
        self,
184
        signature: dict,
185
        num_total_crashes: int,
186
    ):
187
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
188

189
    @classmethod
×
190
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
191
        """Return the corresponding OS name in Bugzilla for the provided OS name
192
        from Socorro.
193

194
        If the OS name is not recognized, return "Other".
195
        """
196
        if cls._bugzilla_os_legal_values is None:
×
197
            cls._bugzilla_os_legal_values = set(
×
198
                bugzilla.BugFields.fetch_field_values("op_sys")
199
            )
200

201
        if op_sys in cls._bugzilla_os_legal_values:
×
202
            return op_sys
×
203

204
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
205
            op_sys = "macOS"
×
206
        elif op_sys.startswith("Windows"):
×
207
            op_sys = "Windows"
×
208
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
209
            op_sys = "Linux"
×
210
        else:
211
            op_sys = "Other"
×
212

213
        return op_sys
×
214

215
    @property
×
216
    def bugzilla_op_sys(self) -> str:
×
217
        """The name of the OS where the crash happens.
218

219
        The value is one of the legal values for Bugzilla's `op_sys` field.
220

221
        - If no OS name is found, the value will be "Unspecified".
222
        - If the OS name is not recognized, the value will be "Other".
223
        - If multiple OS names are found, the value will be "All". Unless the OS
224
          names can be resolved to a common name without a version. For example,
225
          "Windows 10" and "Windows 7" will become "Windows".
226
        """
227
        all_op_sys = {
×
228
            self.to_bugzilla_op_sys(op_sys["term"])
229
            for op_sys in self.signature["facets"]["platform_pretty_version"]
230
        }
231

232
        if len(all_op_sys) > 1:
×
233
            # Resolve to root OS name by removing the version number.
234
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
235

236
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
237
            # TODO: explain this workaround.
238
            all_op_sys.remove("Other")
×
239

240
        if len(all_op_sys) == 1:
×
241
            return next(iter(all_op_sys))
×
242

243
        if len(all_op_sys) == 0:
×
244
            return "Unspecified"
×
245

246
        return "All"
×
247

248
    @classmethod
×
249
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
250
        """Return the corresponding CPU name in Bugzilla for the provided name
251
        from Socorro.
252

253
        If the CPU is not recognized, return "Other".
254
        """
255
        if cls._bugzilla_cpu_legal_values_map is None:
×
256
            cls._bugzilla_cpu_legal_values_map = {
×
257
                value.lower(): value
258
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
259
            }
260

261
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
262

263
    @property
×
264
    def bugzilla_cpu_arch(self) -> str:
×
265
        """The CPU architecture of the devices where the crash happens.
266

267
        The value is one of the legal values for Bugzilla's `rep_platform` field.
268

269
        - If no CPU architecture is found, the value will be "Unspecified".
270
        - If the CPU architecture is not recognized, the value will be "Other".
271
        - If multiple CPU architectures are found, the value will "All".
272
        """
273
        all_cpu_arch = {
×
274
            self.to_bugzilla_cpu(cpu["term"])
275
            for cpu in self.signature["facets"]["cpu_arch"]
276
        }
277

278
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
279
            all_cpu_arch.remove("Other")
×
280

281
        if len(all_cpu_arch) == 1:
×
282
            return next(iter(all_cpu_arch))
×
283

284
        if len(all_cpu_arch) == 0:
×
285
            return "Unspecified"
×
286

287
        return "All"
×
288

289
    @property
×
290
    def user_comments_page_url(self) -> str:
×
291
        """The URL to the Signature page on Socorro where the Comments tab is
292
        selected.
293
        """
294
        start_date = date.today() - timedelta(weeks=26)
×
295
        params = {
×
296
            "signature": self.signature_term,
297
            "date": socorro.SuperSearch.get_search_date(start_date),
298
        }
299
        return generate_signature_page_url(params, "comments")
×
300

301
    @property
×
302
    def num_user_comments(self) -> int:
×
303
        """The number of crash reports with user comments."""
304
        # TODO: count useful/interesting user comments (e.g., exclude one word comments)
305
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
306

307
    @property
×
308
    def has_user_comments(self) -> bool:
×
309
        """Whether the crash signature has any reports with a user comment."""
310
        return self.num_user_comments > 0
×
311

312
    @property
×
313
    def top_proto_signature(self) -> str:
×
314
        """The proto signature that occurs the most."""
315
        return self.signature["facets"]["proto_signature"][0]["term"]
×
316

317
    @property
×
318
    def num_top_proto_signature_crashes(self) -> int:
×
319
        """The number of crashes for the most occurring proto signature."""
320
        return self.signature["facets"]["proto_signature"][0]["count"]
×
321

322
    def _build_ids(self) -> Iterator[int]:
×
323
        """Yields the build IDs where the crash occurred."""
324
        for build_id in self.signature["facets"]["build_id"]:
×
325
            yield build_id["term"]
×
326

327
    @property
×
328
    def top_build_id(self) -> int:
×
329
        """The build ID where most crashes occurred."""
330
        return self.signature["facets"]["build_id"][0]["term"]
×
331

332

333
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
334
    """Analyze the data related to a signature.
335

336
    This includes data from Socorro and Clouseau.
337
    """
338

339
    def __init__(
×
340
        self,
341
        socorro_signature: dict,
342
        num_total_crashes: int,
343
        clouseau_reports: list[dict],
344
    ):
345
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
346
        ClouseauDataAnalyzer.__init__(self, clouseau_reports)
×
347

348
    def _fetch_crash_reports(
×
349
        self,
350
        proto_signature: str,
351
        build_id: int | Iterable[int],
352
        limit: int = 1,
353
    ) -> Iterator[dict]:
354
        params = {
×
355
            "proto_signature": "=" + proto_signature,
356
            "build_id": build_id,
357
            "_columns": [
358
                "uuid",
359
            ],
360
            "_results_number": limit,
361
        }
362

363
        def handler(res: dict, data: dict):
×
364
            data.update(res)
×
365

366
        data: dict = {}
×
367
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
368

369
        yield from data["hits"]
×
370

371
    def fetch_representing_processed_crash(self) -> dict:
×
372
        """Fetch a processed crash to represent the signature.
373

374
        This could fetch multiple processed crashes and return the one that is
375
        most likely to be useful.
376
        """
377
        limit_to_top_proto_signature = (
×
378
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
379
        )
380

381
        reports = itertools.chain(
×
382
            # Reports with a higher score from clouseau are more likely to be
383
            # useful.
384
            sorted(
385
                self._clouseau_reports,
386
                key=lambda report: report["max_score"],
387
                reverse=True,
388
            ),
389
            # Next we try find reports from the top crashing build because they
390
            # are likely to be representative.
391
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
392
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
393
        )
394
        for report in reports:
×
395
            uuid = report["uuid"]
×
396
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
397
            if (
×
398
                not limit_to_top_proto_signature
399
                or processed_crash["proto_signature"] == self.top_proto_signature
400
            ):
401
                # TODO(investigate): maybe we should check if the stack is
402
                # corrupted (ask gsvelto or willkg about how to detect that)
403
                return processed_crash
×
404

405
        raise NoCrashReportFoundError(
×
406
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
407
        )
408

409

410
class SignaturesDataFetcher:
×
411
    """Fetch the data related to the given signatures."""
412

413
    MEMORY_ACCESS_ERROR_REASONS = (
×
414
        # On Windows:
415
        "EXCEPTION_ACCESS_VIOLATION_READ",
416
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
417
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
418
        # On Linux:
419
        "SIGSEGV / SEGV_MAPERR",
420
        "SIGSEGV / SEGV_ACCERR",
421
    )
422

423
    EXCLUDED_MOZ_REASON_STRINGS = (
×
424
        "MOZ_CRASH(OOM)",
425
        "MOZ_CRASH(Out of memory)",
426
        "out of memory",
427
        "Shutdown hanging",
428
        # TODO(investigate): do we need to exclude signatures that their reason
429
        # contains `[unhandlable oom]`?
430
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
431
        # "[unhandlable oom]",
432
    )
433

434
    # If any of the crash reason starts with any of the following, then it is
435
    # Network or I/O error.
436
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
437
        "EXCEPTION_IN_PAGE_ERROR_READ",
438
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
439
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
440
    )
441

442
    # TODO(investigate): do we need to exclude all these signatures prefixes?
443
    EXCLUDED_SIGNATURE_PREFIXES = (
×
444
        "OOM | ",
445
        "bad hardware | ",
446
        "shutdownhang | ",
447
    )
448

449
    def __init__(
×
450
        self,
451
        signatures: Iterable[str],
452
        product: str = "Firefox",
453
        channel: str = "nightly",
454
    ):
455
        self._signatures = set(signatures)
×
456
        self._product = product
×
457
        self._channel = channel
×
458

459
    @classmethod
×
460
    def find_new_actionable_crashes(
×
461
        cls,
462
        product: str,
463
        channel: str,
464
        days_to_check: int = 7,
465
        days_without_crashes: int = 7,
466
    ) -> "SignaturesDataFetcher":
467
        """Find new actionable crashes.
468

469
        Args:
470
            product: The product to check.
471
            channel: The release channel to check.
472
            days_to_check: The number of days to check for crashes.
473
            days_without_crashes: The number of days without crashes before the
474
                `days_to_check` to consider the signature new.
475

476
        Returns:
477
            A list of actionable signatures.
478
        """
479
        duration = days_to_check + days_without_crashes
×
480
        end_date = lmdutils.get_date_ymd("today")
×
481
        start_date = end_date - timedelta(duration)
×
482
        earliest_allowed_date = lmdutils.get_date_str(
×
483
            end_date - timedelta(days_to_check)
484
        )
485
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
486

487
        params = {
×
488
            "product": product,
489
            "release_channel": channel,
490
            "date": date_range,
491
            # TODO(investigate): should we do a local filter instead of the
492
            # following (should we exclude the signature if one of the crashes
493
            # is a shutdown hang?):
494
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
495
            # non-empty then it's a shutdown hang.
496
            "ipc_shutdown_state": "__null__",
497
            "shutdown_progress": "__null__",
498
            # TODO(investigate): should we use the following instead of the
499
            # local filter.
500
            # "oom_allocation_size": "!__null__",
501
            "_aggs.signature": [
502
                "moz_crash_reason",
503
                "reason",
504
                "_histogram.date",
505
                "_cardinality.install_time",
506
                "_cardinality.oom_allocation_size",
507
            ],
508
            "_results_number": 0,
509
            "_facets_size": 10000,
510
        }
511

512
        def handler(search_resp: dict, data: list):
×
513
            logger.debug(
×
514
                "Total of %d signatures received from Socorro",
515
                len(search_resp["facets"]["signature"]),
516
            )
517

518
            for crash in search_resp["facets"]["signature"]:
×
519
                signature = crash["term"]
×
520
                if any(
×
521
                    signature.startswith(excluded_prefix)
522
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
523
                ):
524
                    # Ignore signatures that start with any of the excluded prefixes.
525
                    continue
×
526

527
                facets = crash["facets"]
×
528
                installations = facets["cardinality_install_time"]["value"]
×
529
                if installations <= 1:
×
530
                    # Ignore crashes that only happen on one installation.
531
                    continue
×
532

533
                first_date = facets["histogram_date"][0]["term"]
×
534
                if first_date < earliest_allowed_date:
×
535
                    # The crash is not new, skip it.
536
                    continue
×
537

538
                if any(
×
539
                    reason["term"].startswith(io_error_prefix)
540
                    for reason in facets["reason"]
541
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
542
                ):
543
                    # Ignore Network or I/O error crashes.
544
                    continue
×
545

546
                if crash["count"] < 20:
×
547
                    # For signatures with low volume, having multiple types of
548
                    # memory errors indicates potential bad hardware crashes.
549
                    num_memory_error_types = sum(
×
550
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
551
                        for reason in facets["reason"]
552
                    )
553
                    if num_memory_error_types > 1:
×
554
                        # Potential bad hardware crash, skip it.
555
                        continue
×
556

557
                # TODO: Add a filter using the `possible_bit_flips_max_confidence`
558
                # field to exclude bad hardware crashes. The filed is not available yet.
559
                # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1816669#c3
560

561
                # TODO(investigate): is this needed since we are already
562
                # filtering signatures that start with "OOM | "
563
                if facets["cardinality_oom_allocation_size"]["value"]:
×
564
                    # If one of the crashes is an OOM crash, skip it.
565
                    continue
×
566

567
                # TODO(investigate): do we need to check for the `moz_crash_reason`
568
                moz_crash_reasons = facets["moz_crash_reason"]
×
569
                if moz_crash_reasons and any(
×
570
                    excluded_reason in reason["term"]
571
                    for reason in moz_crash_reasons
572
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
573
                ):
574
                    continue
×
575

576
                data.append(signature)
×
577

578
        signatures: list = []
×
579
        socorro.SuperSearch(
×
580
            params=params,
581
            handler=handler,
582
            handlerdata=signatures,
583
        ).wait()
584

585
        logger.debug(
×
586
            "Total of %d signatures left after applying the filtering criteria",
587
            len(signatures),
588
        )
589

590
        return cls(signatures, product, channel)
×
591

592
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
593
        """Fetch the crash reports data from Crash Clouseau."""
594
        signature_reports = clouseau.Reports.get_by_signatures(
×
595
            self._signatures,
596
            product=self._product,
597
            channel=self._channel,
598
        )
599

600
        logger.debug(
×
601
            "Total of %d signatures received from Clouseau", len(signature_reports)
602
        )
603

604
        return signature_reports
×
605

606
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
607
        """Fetch the signature data from Socorro."""
608
        # TODO(investigate): should we increase the duration to 6 months?
609
        duration = timedelta(weeks=1)
×
610
        end_date = lmdutils.get_date_ymd("today")
×
611
        start_date = end_date - duration
×
612
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
613

614
        params = {
×
615
            "product": self._product,
616
            # TODO(investigate): should we included all release channels?
617
            "release_channel": self._channel,
618
            # TODO(investigate): should we limit based on the build date as well?
619
            "date": date_range,
620
            # TODO: split signatures into chunks to avoid very long query URLs
621
            "signature": ["=" + signature for signature in self._signatures],
622
            "_aggs.signature": [
623
                "build_id",
624
                "cpu_arch",
625
                "proto_signature",
626
                "_cardinality.user_comments",
627
                "cpu_arch",
628
                "platform_pretty_version",
629
                # The following are needed for SignatureStats:
630
                "platform",
631
                "is_garbage_collecting",
632
                "_cardinality.install_time",
633
                "startup_crash",
634
                "_histogram.uptime",
635
                "process_type",
636
            ],
637
            "_results_number": 0,
638
            "_facets_size": 10000,
639
        }
640

641
        def handler(search_results: dict, data: dict):
×
642
            data["num_total_crashes"] = search_results["total"]
×
643
            data["signatures"] = search_results["facets"]["signature"]
×
644

645
        data: dict = {}
×
646
        socorro.SuperSearchUnredacted(
×
647
            params=params,
648
            handler=handler,
649
            handlerdata=data,
650
        ).wait()
651

652
        logger.debug(
×
653
            "Fetch info from Socorro for %d signatures", len(data["signatures"])
654
        )
655

656
        return data["signatures"], data["num_total_crashes"]
×
657

658
    def fetch_bugs(self, include_fields: list[str] = None) -> dict[str, list[dict]]:
×
659
        """Fetch bugs that are filed against the given signatures."""
660

661
        params_base: dict = {
×
662
            "include_fields": [
663
                "cf_crash_signature",
664
            ],
665
        }
666

667
        if include_fields:
×
668
            params_base["include_fields"].extend(include_fields)
×
669

670
        params_list = []
×
671
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
672
            params = params_base.copy()
×
673
            n = int(utils.get_last_field_num(params))
×
674
            params[f"f{n}"] = "OP"
×
675
            params[f"j{n}"] = "OR"
×
676
            for signature in signatures_chunk:
×
677
                n += 1
×
678
                params[f"f{n}"] = "cf_crash_signature"
×
679
                params[f"o{n}"] = "regexp"
×
680
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
681
            params[f"f{n+1}"] = "CP"
×
682
            params_list.append(params)
×
683

684
        signatures_bugs: dict = defaultdict(list)
×
685

686
        def handler(res, data):
×
687
            for bug in res["bugs"]:
×
688
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
689
                    if signature in self._signatures:
×
690
                        data[signature].append(bug)
×
691

692
        Bugzilla(
×
693
            queries=[
694
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
695
                for params in params_list
696
            ],
697
        ).wait()
698

699
        # TODO: remove the call to DevBugzilla after moving to production
700
        DevBugzilla(
×
701
            queries=[
702
                connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs)
703
                for params in params_list
704
            ],
705
        ).wait()
706

707
        logger.debug(
×
708
            "Total of %d signatures already have bugs filed", len(signatures_bugs)
709
        )
710

711
        return signatures_bugs
×
712

713
    def analyze(self) -> list[SignatureAnalyzer]:
×
714
        """Analyze the data related to the signatures."""
715
        bugs = self.fetch_bugs()
×
716
        # TODO(investigate): For now, we are ignoring signatures that have bugs
717
        # filed even if they are closed long time ago. We should investigate
718
        # whether we should include the ones with closed bugs. For example, if
719
        # the bug was closed as Fixed years ago.
720
        self._signatures.difference_update(bugs.keys())
×
721

722
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
723
        # TODO(investigate): For now, we are ignoring signatures that are not
724
        # analyzed by clouseau. We should investigate why they are not analyzed
725
        # and whether we should include them.
726
        self._signatures.intersection_update(clouseau_reports.keys())
×
727

728
        signatures, num_total_crashes = self.fetch_socorro_info()
×
729
        logger.debug("Total of %d signatures will be analyzed", len(signatures))
×
730

731
        return [
×
732
            SignatureAnalyzer(
733
                signature,
734
                num_total_crashes,
735
                clouseau_reports[signature["term"]],
736
            )
737
            for signature in signatures
738
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc