• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4605

pending completion
#4605

push

coveralls-python

suhaibmujahid
Some refactoring

Move the fetching of Bugzilla data to `SignaturesDataFetcher`
Move finding actionable crashes to `SignaturesDataFetcher`

646 of 3416 branches covered (18.91%)

100 of 100 new or added lines in 2 files covered. (100.0%)

1828 of 8481 relevant lines covered (21.55%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
import re
×
7
from collections import defaultdict
×
8
from datetime import timedelta
×
9
from functools import cached_property
×
10
from typing import Iterable, Iterator
×
11

12
from libmozdata import bugzilla, clouseau, connection, socorro
×
13
from libmozdata import utils as lmdutils
×
14
from libmozdata.bugzilla import Bugzilla
×
15
from libmozdata.connection import Connection
×
16

17
from bugbot import logger, utils
×
18
from bugbot.components import ComponentName
×
19
from bugbot.crash import socorro_util
×
20

21

22
# NOTE: At this point, we will file bugs on bugzilla-dev. Once we are confident
23
# that the bug filing is working as expected, we can switch to filing bugs in
24
# the production instance of Bugzilla.
25
class DevBugzilla(Bugzilla):
×
26
    URL = "https://bugzilla-dev.allizom.org"
×
27
    API_URL = URL + "/rest/bug"
×
28
    ATTACHMENT_API_URL = API_URL + "/attachment"
×
29
    TOKEN = utils.get_login_info()["bz_api_key_dev"]
×
30

31

32
class NoCrashReportFoundError(Exception):
×
33
    """There are no crash reports that meet the required criteria."""
34

35

36
class ClouseauDataAnalyzer:
×
37
    """Analyze the data returned by Crash Clouseau"""
38

39
    MINIMUM_CLOUSEAU_SCORE_THRESHOLD: int = 8
×
40
    DEFAULT_CRASH_COMPONENT = ComponentName("Core", "General")
×
41

42
    def __init__(self, reports: Iterable[dict]):
×
43
        self._clouseau_reports = reports
×
44

45
    @cached_property
×
46
    def max_clouseau_score(self):
×
47
        """The maximum Clouseau score in the crash reports."""
48
        if not self._clouseau_reports:
×
49
            return 0
×
50
        return max(report["max_score"] for report in self._clouseau_reports)
×
51

52
    @cached_property
×
53
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
54
        """The IDs for the bugs that their patches could have caused the crash."""
55
        minimum_accepted_score = max(
×
56
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
57
        )
58
        return {
×
59
            changeset["bug_id"]
60
            for report in self._clouseau_reports
61
            if report["max_score"] >= minimum_accepted_score
62
            for changeset in report["changesets"]
63
            if changeset["max_score"] >= minimum_accepted_score
64
            and not changeset["is_merge"]
65
            and not changeset["is_backedout"]
66
        }
67

68
    @cached_property
×
69
    def regressed_by_patch(self) -> str | None:
×
70
        """The hash of the patch that could have caused the crash."""
71
        minimum_accepted_score = max(
×
72
            self.MINIMUM_CLOUSEAU_SCORE_THRESHOLD, self.max_clouseau_score
73
        )
74
        potential_patches = {
×
75
            changeset["changeset"]
76
            for report in self._clouseau_reports
77
            if report["max_score"] >= minimum_accepted_score
78
            for changeset in report["changesets"]
79
            if changeset["max_score"] >= minimum_accepted_score
80
            and not changeset["is_merge"]
81
            and not changeset["is_backedout"]
82
        }
83
        if len(potential_patches) == 1:
×
84
            return next(iter(potential_patches))
×
85
        return None
×
86

87
    @cached_property
×
88
    def regressed_by(self) -> int | None:
×
89
        """The ID of the bug that one of its patches could have caused
90
        the crash.
91

92
        If there are multiple bugs, the value will be `None`.
93
        """
94
        bug_ids = self.regressed_by_potential_bug_ids
×
95
        if len(bug_ids) == 1:
×
96
            return next(iter(bug_ids))
×
97
        return None
×
98

99
    @cached_property
×
100
    def regressed_by_potential_bugs(self) -> list[dict]:
×
101
        """The bugs that their patches could have caused the crash."""
102

103
        def handler(bug: dict, data: list):
×
104
            data.append(bug)
×
105

106
        bugs: list[dict] = []
×
107
        Bugzilla(
×
108
            bugids=self.regressed_by_potential_bug_ids,
109
            include_fields=[
110
                "id",
111
                "assigned_to",
112
                "product",
113
                "component",
114
            ],
115
            bughandler=handler,
116
            bugdata=bugs,
117
        ).wait()
118

119
        return bugs
×
120

121
    @cached_property
×
122
    def regressed_by_author(self) -> dict | None:
×
123
        """The author of the patch that could have caused the crash.
124

125
        If there are multiple regressors, the value will be `None`.
126

127
        The regressor bug assignee is considered as the author, even if the
128
        assignee is not the patch author.
129
        """
130

131
        if not self.regressed_by:
×
132
            return None
×
133

134
        bug = self.regressed_by_potential_bugs[0]
×
135
        assert bug["id"] == self.regressed_by
×
136
        return bug["assigned_to_detail"]
×
137

138
    @cached_property
×
139
    def crash_component(self) -> ComponentName:
×
140
        """The component that the crash belongs to.
141

142
        If there are multiple components, the value will be the default one.
143
        """
144
        potential_components = {
×
145
            ComponentName(bug["product"], bug["component"])
146
            for bug in self.regressed_by_potential_bugs
147
        }
148
        if len(potential_components) == 1:
×
149
            return next(iter(potential_components))
×
150
        return self.DEFAULT_CRASH_COMPONENT
×
151

152

153
class SocorroDataAnalyzer(socorro_util.SignatureStats):
×
154
    """Analyze the data returned by Socorro."""
155

156
    _bugzilla_os_legal_values = None
×
157
    _bugzilla_cpu_legal_values_map = None
×
158
    _platforms = [
×
159
        {"short_name": "win", "name": "Windows"},
160
        {"short_name": "mac", "name": "Mac OS X"},
161
        {"short_name": "lin", "name": "Linux"},
162
        {"short_name": "and", "name": "Android"},
163
        {"short_name": "unknown", "name": "Unknown"},
164
    ]
165

166
    def __init__(
×
167
        self,
168
        signature: dict,
169
        num_total_crashes: int,
170
    ):
171
        super().__init__(signature, num_total_crashes, platforms=self._platforms)
×
172

173
    @classmethod
×
174
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
175
        """Return the corresponding OS name in Bugzilla for the provide OS name
176
        from Socorro.
177

178
        If the OS name is not recognized, return "Other".
179
        """
180
        if cls._bugzilla_os_legal_values is None:
×
181
            cls._bugzilla_os_legal_values = set(
×
182
                bugzilla.BugFields.fetch_field_values("op_sys")
183
            )
184

185
        if op_sys in cls._bugzilla_os_legal_values:
×
186
            return op_sys
×
187

188
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
189
            op_sys = "macOS"
×
190
        elif op_sys.startswith("Windows"):
×
191
            op_sys = "Windows"
×
192
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
193
            op_sys = "Linux"
×
194
        else:
195
            op_sys = "Other"
×
196

197
        return op_sys
×
198

199
    @property
×
200
    def bugzilla_op_sys(self) -> str:
×
201
        """The name of the OS where the crash happens.
202

203
        The value is one of the legal values for Bugzilla's `op_sys` field.
204

205
        - If no OS name is found, the value will be "Unspecified".
206
        - If the OS name is not recognized, the value will be "Other".
207
        - If multiple OS names are found, the value will "All". Unless the OS
208
          names can be resolved common name without a version. For example,
209
          "Windows 10" and "Windows 7" will become "Windows".
210
        """
211
        all_op_sys = {
×
212
            self.to_bugzilla_op_sys(op_sys["term"])
213
            for op_sys in self.signature["facets"]["platform_pretty_version"]
214
        }
215

216
        if len(all_op_sys) > 1:
×
217
            # Resolve to root OS name by removing the version number.
218
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
219

220
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
221
            # TODO: explain this workaround.
222
            all_op_sys.remove("Other")
×
223

224
        if len(all_op_sys) == 1:
×
225
            return next(iter(all_op_sys))
×
226

227
        if len(all_op_sys) == 0:
×
228
            return "Unspecified"
×
229

230
        return "All"
×
231

232
    @classmethod
×
233
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
234
        """Return the corresponding CPU name in Bugzilla for the provided name
235
        from Socorro.
236

237
        If the CPU is not recognized, return "Other".
238
        """
239
        if cls._bugzilla_cpu_legal_values_map is None:
×
240
            cls._bugzilla_cpu_legal_values_map = {
×
241
                value.lower(): value
242
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
243
            }
244

245
        return cls._bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
246

247
    @property
×
248
    def bugzilla_cpu_arch(self) -> str:
×
249
        """The CPU architecture of the devices where the crash happens.
250

251
        The value is one of the legal values for Bugzilla's `rep_platform` field.
252

253
        - If no CPU architecture is found, the value will be "Unspecified".
254
        - If the CPU architecture is not recognized, the value will be "Other".
255
        - If multiple CPU architectures are found, the value will "All".
256
        """
257
        all_cpu_arch = {
×
258
            self.to_bugzilla_cpu(cpu["term"])
259
            for cpu in self.signature["facets"]["cpu_arch"]
260
        }
261

262
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
263
            all_cpu_arch.remove("Other")
×
264

265
        if len(all_cpu_arch) == 1:
×
266
            return next(iter(all_cpu_arch))
×
267

268
        if len(all_cpu_arch) == 0:
×
269
            return "Unspecified"
×
270

271
        return "All"
×
272

273
    @property
×
274
    def num_user_comments(self) -> int:
×
275
        """The number crash reports with user comments."""
276
        # TODO: count useful/intrusting user comments (e.g., exclude one word comments)
277
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
278

279
    @property
×
280
    def has_user_comments(self) -> bool:
×
281
        """Whether the crash signature has any reports with a user comment."""
282
        return self.num_user_comments > 0
×
283

284
    @property
×
285
    def top_proto_signature(self) -> str:
×
286
        """The proto signature that occurs the most."""
287
        return self.signature["facets"]["proto_signature"][0]["term"]
×
288

289
    @property
×
290
    def num_top_proto_signature_crashes(self) -> int:
×
291
        """The number of crashes for the most occurring proto signature."""
292
        return self.signature["facets"]["proto_signature"][0]["count"]
×
293

294
    def _build_ids(self) -> Iterator[int]:
×
295
        """Yields the build IDs where the crash occurred."""
296
        for build_id in self.signature["facets"]["build_id"]:
×
297
            yield build_id["term"]
×
298

299
    @property
×
300
    def top_build_id(self) -> int:
×
301
        """The build ID where most crashes occurred."""
302
        return self.signature["facets"]["build_id"][0]["term"]
×
303

304

305
class SignatureAnalyzer(SocorroDataAnalyzer, ClouseauDataAnalyzer):
×
306
    """Analyze the data related to a signature.
307

308
    This includes data from Socorro and Clouseau.
309
    """
310

311
    def __init__(
×
312
        self,
313
        socorro_signature: dict,
314
        num_total_crashes: int,
315
        clouseau_reports: list[dict],
316
    ):
317
        SocorroDataAnalyzer.__init__(self, socorro_signature, num_total_crashes)
×
318
        ClouseauDataAnalyzer.__init__(self, clouseau_reports)
×
319

320
    def _fetch_crash_reports(
×
321
        self,
322
        proto_signature: str,
323
        build_id: int | Iterable[int],
324
        limit: int = 1,
325
    ) -> Iterator[dict]:
326
        params = {
×
327
            "proto_signature": "=" + proto_signature,
328
            "build_id": build_id,
329
            "_columns": [
330
                "uuid",
331
            ],
332
            "_results_number": limit,
333
        }
334

335
        def handler(res: dict, data: dict):
×
336
            data.update(res)
×
337

338
        data: dict = {}
×
339
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
340

341
        yield from data["hits"]
×
342

343
    def fetch_representing_processed_crash(self) -> dict:
×
344
        """Fetch a processed crash to represent the signature.
345

346
        This could fitch multiple processed crashes and return the one that is
347
        most likely to be useful.
348
        """
349
        limit_to_top_proto_signature = (
×
350
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
351
        )
352

353
        reports = itertools.chain(
×
354
            # Reports with a higher score from clouseau are more likely to be
355
            # useful.
356
            sorted(
357
                self._clouseau_reports,
358
                key=lambda report: report["max_score"],
359
                reverse=True,
360
            ),
361
            # Next we try find reports from the top crashing build because they
362
            # are likely to be representative.
363
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
364
            self._fetch_crash_reports(self.top_proto_signature, self._build_ids()),
365
        )
366
        for report in reports:
×
367
            uuid = report["uuid"]
×
368
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
369
            if (
×
370
                not limit_to_top_proto_signature
371
                or processed_crash["proto_signature"] == self.top_proto_signature
372
            ):
373
                # TODO(investigate): maybe we should check if the stack is
374
                # corrupted (ask gsvelto or willkg about how to detect that)
375
                return processed_crash
×
376

377
        raise NoCrashReportFoundError(
×
378
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
379
        )
380

381

382
class SignaturesDataFetcher:
×
383
    """Fetch the data related to the given signatures."""
384

385
    MEMORY_ACCESS_ERROR_REASONS = (
×
386
        # On Windows:
387
        "EXCEPTION_ACCESS_VIOLATION_READ",
388
        "EXCEPTION_ACCESS_VIOLATION_WRITE",
389
        "EXCEPTION_ACCESS_VIOLATION_EXEC"
390
        # On Linux:
391
        "SIGSEGV / SEGV_MAPERR",
392
        "SIGSEGV / SEGV_ACCERR",
393
    )
394

395
    EXCLUDED_MOZ_REASON_STRINGS = (
×
396
        "MOZ_CRASH(OOM)",
397
        "MOZ_CRASH(Out of memory)",
398
        "out of memory",
399
        "Shutdown hanging",
400
        # TODO(investigate): do we need to exclude signatures that their reason
401
        # contains `[unhandlable oom]`?
402
        # Example: arena_t::InitChunk | arena_t::AllocRun | arena_t::MallocLarge | arena_t::Malloc | BaseAllocator::malloc | Allocator::malloc | PageMalloc
403
        # "[unhandlable oom]",
404
    )
405

406
    # If any of the crash reason starts with any of the following, then it is
407
    # Network or I/O error.
408
    EXCLUDED_IO_ERROR_REASON_PREFIXES = (
×
409
        "EXCEPTION_IN_PAGE_ERROR_READ",
410
        "EXCEPTION_IN_PAGE_ERROR_WRITE",
411
        "EXCEPTION_IN_PAGE_ERROR_EXEC",
412
    )
413

414
    # TODO(investigate): do we need to exclude all these signatures prefixes?
415
    EXCLUDED_SIGNATURE_PREFIXES = (
×
416
        "OOM | ",
417
        "bad hardware | ",
418
        "shutdownhang | ",
419
    )
420

421
    def __init__(
×
422
        self,
423
        signatures: Iterable[str],
424
        product: str = "Firefox",
425
        channel: str = "nightly",
426
    ):
427
        self._signatures = set(signatures)
×
428
        self._product = product
×
429
        self._channel = channel
×
430

431
    @classmethod
×
432
    def find_new_actionable_crashes(
×
433
        cls,
434
        product: str,
435
        channel: str,
436
        days_to_check: int = 7,
437
        days_without_crashes: int = 7,
438
    ) -> "SignaturesDataFetcher":
439
        """Find new actionable crashes.
440

441
        Args:
442
            product: The product to check.
443
            channel: The release channel to check.
444
            days_to_check: The number of days to check for crashes.
445
            days_without_crashes: The number of days without crashes before the
446
                `days_to_check` to consider the signature new.
447

448
        Returns:
449
            A list of actionable signatures.
450
        """
451
        duration = days_to_check + days_without_crashes
×
452
        end_date = lmdutils.get_date_ymd("today")
×
453
        start_date = end_date - timedelta(duration)
×
454
        earliest_allowed_date = lmdutils.get_date_str(
×
455
            end_date - timedelta(days_to_check)
456
        )
457
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
458

459
        params = {
×
460
            "product": product,
461
            "release_channel": channel,
462
            "date": date_range,
463
            # TODO(investigate): should we do a local filter instead of the
464
            # following (should we exclude the signature if one of the crashes
465
            # is a shutdown hang?):
466
            # If the `ipc_shutdown_state` or `shutdown_progress` field are
467
            # non-empty then it's a shutdown hang.
468
            "ipc_shutdown_state": "__null__",
469
            "shutdown_progress": "__null__",
470
            # TODO(investigate): should we use the following instead of the
471
            # local filter.
472
            # "oom_allocation_size": "!__null__",
473
            "_aggs.signature": [
474
                "moz_crash_reason",
475
                "reason",
476
                "_histogram.date",
477
                "_cardinality.install_time",
478
                "_cardinality.oom_allocation_size",
479
            ],
480
            "_results_number": 0,
481
            "_facets_size": 10000,
482
        }
483

484
        def handler(search_resp: dict, data: list):
×
485
            logger.debug(
×
486
                "Total of %d signatures received from Socorro",
487
                len(search_resp["facets"]["signature"]),
488
            )
489

490
            for crash in search_resp["facets"]["signature"]:
×
491
                signature = crash["term"]
×
492
                if any(
×
493
                    signature.startswith(excluded_prefix)
494
                    for excluded_prefix in cls.EXCLUDED_SIGNATURE_PREFIXES
495
                ):
496
                    # Ignore signatures that start with any of the excluded prefixes.
497
                    continue
×
498

499
                facets = crash["facets"]
×
500
                installations = facets["cardinality_install_time"]["value"]
×
501
                if installations <= 1:
×
502
                    # Ignore crashes that only happen on one installation.
503
                    continue
×
504

505
                first_date = facets["histogram_date"][0]["term"]
×
506
                if first_date < earliest_allowed_date:
×
507
                    # The crash is not new, skip it.
508
                    continue
×
509

510
                if any(
×
511
                    reason["term"].startswith(io_error_prefix)
512
                    for reason in facets["reason"]
513
                    for io_error_prefix in cls.EXCLUDED_IO_ERROR_REASON_PREFIXES
514
                ):
515
                    # Ignore Network or I/O error crashes.
516
                    continue
×
517

518
                if crash["count"] < 20:
×
519
                    # For signatures with low volume, having multiple types of
520
                    # memory errors indicates potential bad hardware crashes.
521
                    num_memory_error_types = sum(
×
522
                        reason["term"] in cls.MEMORY_ACCESS_ERROR_REASONS
523
                        for reason in facets["reason"]
524
                    )
525
                    if num_memory_error_types > 1:
×
526
                        # Potential bad hardware crash, skip it.
527
                        continue
×
528

529
                # TODO: Add a filter using the `possible_bit_flips_max_confidence`
530
                # field to exclude bad hardware crashes. The filed is not available yet.
531
                # See: https://bugzilla.mozilla.org/show_bug.cgi?id=1816669#c3
532

533
                # TODO(investigate): is this needed since we are already
534
                # filtering signatures that start with "OOM | "
535
                if facets["cardinality_oom_allocation_size"]["value"]:
×
536
                    # If one of the crashes is an OOM crash, skip it.
537
                    continue
×
538

539
                # TODO(investigate): do we need to check for the `moz_crash_reason`
540
                moz_crash_reasons = facets["moz_crash_reason"]
×
541
                if moz_crash_reasons and any(
×
542
                    excluded_reason in reason["term"]
543
                    for reason in moz_crash_reasons
544
                    for excluded_reason in cls.EXCLUDED_MOZ_REASON_STRINGS
545
                ):
546
                    continue
×
547

548
                data.append(signature)
×
549

550
        signatures: list = []
×
551
        socorro.SuperSearch(
×
552
            params=params,
553
            handler=handler,
554
            handlerdata=signatures,
555
        ).wait()
556

557
        logger.debug(
×
558
            "Total of %d signatures left after applying the filtering criteria",
559
            len(signatures),
560
        )
561

562
        return cls(signatures, product, channel)
×
563

564
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
565
        """Fetch the crash reports data from Crash Clouseau."""
566
        signature_reports = clouseau.Reports.get_by_signatures(
×
567
            self._signatures,
568
            product=self._product,
569
            channel=self._channel,
570
        )
571

572
        logger.debug(
×
573
            "Total of %d signatures received from Clouseau", len(signature_reports)
574
        )
575

576
        return signature_reports
×
577

578
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
579
        """Fetch the signature data from Socorro."""
580
        # TODO(investigate): should we increase the duration to 6 months?
581
        duration = timedelta(weeks=1)
×
582
        end_date = lmdutils.get_date_ymd("today")
×
583
        start_date = end_date - duration
×
584
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
585

586
        params = {
×
587
            "product": self._product,
588
            # TODO(investigate): should we included all release channels?
589
            "release_channel": self._channel,
590
            # TODO(investigate): should we limit based on the build date as well?
591
            "date": date_range,
592
            # TODO: split signatures into chunks to avoid very long query URLs
593
            "signature": ["=" + signature for signature in self._signatures],
594
            "_aggs.signature": [
595
                "build_id",
596
                "cpu_arch",
597
                "proto_signature",
598
                "_cardinality.user_comments",
599
                "cpu_arch",
600
                "platform_pretty_version",
601
                # The following are needed for SignatureStats:
602
                "platform",
603
                "is_garbage_collecting",
604
                "_cardinality.install_time",
605
                "startup_crash",
606
                "_histogram.uptime",
607
                "process_type",
608
            ],
609
            "_results_number": 0,
610
            "_facets_size": 10000,
611
        }
612

613
        def handler(search_results: dict, data: dict):
×
614
            data["num_total_crashes"] = search_results["total"]
×
615
            data["signatures"] = search_results["facets"]["signature"]
×
616

617
        data: dict = {}
×
618
        socorro.SuperSearchUnredacted(
×
619
            params=params,
620
            handler=handler,
621
            handlerdata=data,
622
        ).wait()
623

624
        logger.debug(
×
625
            "Fetch info from Socorro for %d signatures", len(data["signatures"])
626
        )
627

628
        return data["signatures"], data["num_total_crashes"]
×
629

630
    def fetch_bugs(self, include_fields: list[str] = None) -> dict[str, list[dict]]:
×
631
        """Fetch bugs that are filed against the given signatures."""
632

633
        params_base: dict = {
×
634
            "include_fields": [
635
                "cf_crash_signature",
636
            ],
637
        }
638

639
        if include_fields:
×
640
            params_base["include_fields"].extend(include_fields)
×
641

642
        params_list = []
×
643
        for signatures_chunk in Connection.chunks(list(self._signatures), 30):
×
644
            params = params_base.copy()
×
645
            n = int(utils.get_last_field_num(params))
×
646
            params[f"f{n}"] = "OP"
×
647
            params[f"j{n}"] = "OR"
×
648
            for signature in signatures_chunk:
×
649
                n += 1
×
650
                params[f"f{n}"] = "cf_crash_signature"
×
651
                params[f"o{n}"] = "regexp"
×
652
                params[f"v{n}"] = rf"\[(@ |@){re.escape(signature)}( \]|\])"
×
653
            params[f"f{n+1}"] = "CP"
×
654
            params_list.append(params)
×
655

656
        signatures_bugs: dict = defaultdict(list)
×
657

658
        def handler(res, data):
×
659
            for bug in res["bugs"]:
×
660
                for signature in utils.get_signatures(bug["cf_crash_signature"]):
×
661
                    if signature in self._signatures:
×
662
                        data[signature].append(bug)
×
663

664
        Bugzilla(
×
665
            queries=[
666
                connection.Query(Bugzilla.API_URL, params, handler, signatures_bugs)
667
                for params in params_list
668
            ],
669
        ).wait()
670

671
        # TODO: remove the call to DevBugzilla after moving to production
672
        DevBugzilla(
×
673
            queries=[
674
                connection.Query(DevBugzilla.API_URL, params, handler, signatures_bugs)
675
                for params in params_list
676
            ],
677
        ).wait()
678

679
        logger.debug(
×
680
            "Total of %d signatures already have bugs filed", len(signatures_bugs)
681
        )
682

683
        return signatures_bugs
×
684

685
    def analyze(self) -> list[SignatureAnalyzer]:
×
686
        """Analyze the data related to the signatures."""
687
        bugs = self.fetch_bugs()
×
688
        # TODO(investigate): For now, we are ignoring signatures that have bugs
689
        # filed even if they are closed long time ago. We should investigate
690
        # whether we should include the ones with closed bugs. For example, if
691
        # the bug was closed as Fixed years ago.
692
        self._signatures.difference_update(bugs.keys())
×
693

694
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
695
        # TODO(investigate): For now, we are ignoring signatures that are not
696
        # analyzed by clouseau. We should investigate why they are not analyzed
697
        # and whether we should include them.
698
        self._signatures.intersection_update(clouseau_reports.keys())
×
699

700
        signatures, num_total_crashes = self.fetch_socorro_info()
×
701
        logger.debug("Total of %d signatures will be analyzed", len(signatures))
×
702

703
        return [
×
704
            SignatureAnalyzer(
705
                signature,
706
                num_total_crashes,
707
                clouseau_reports[signature["term"]],
708
            )
709
            for signature in signatures
710
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc