• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4591

pending completion
#4591

push

coveralls-python

suhaibmujahid
Add a new rule to automatically file bugs for new actionable crashes

646 of 3400 branches covered (19.0%)

457 of 457 new or added lines in 4 files covered. (100.0%)

1827 of 8458 relevant lines covered (21.6%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
from datetime import timedelta
×
7
from functools import cached_property
×
8
from typing import Iterable, Iterator
×
9

10
from libmozdata import bugzilla, clouseau, socorro
×
11
from libmozdata import utils as lmdutils
×
12

13
from bugbot.components import ComponentName
×
14
from bugbot.crash import socorro_util
×
15

16

17
class NoCrashReportFoundError(Exception):
×
18
    """Raised when no crash report is found with the required criteria."""
19

20

21
class ClouseauReportsAnalyzer:
×
22
    REGRESSOR_MINIMUM_SCORE: int = 8
×
23

24
    def __init__(self, reports: Iterable[dict]):
×
25
        self._clouseau_reports = reports
×
26

27
    @cached_property
×
28
    def max_score(self):
×
29
        if not self._clouseau_reports:
×
30
            return 0
×
31
        return max(report["max_score"] for report in self._clouseau_reports)
×
32

33
    @cached_property
×
34
    def regressed_by_candidate_ids(self) -> set[int]:
×
35
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
×
36
        return {
×
37
            changeset["bug_id"]
38
            for report in self._clouseau_reports
39
            if report["max_score"] >= minimum_accepted_score
40
            for changeset in report["changesets"]
41
            if changeset["max_score"] >= minimum_accepted_score
42
            and not changeset["is_merge"]
43
            and not changeset["is_backedout"]
44
        }
45

46
    @cached_property
×
47
    def regressed_by(self) -> int | None:
×
48
        bug_ids = self.regressed_by_candidate_ids
×
49
        if len(bug_ids) == 1:
×
50
            return next(iter(bug_ids))
×
51
        return None
×
52

53
    @cached_property
×
54
    def regressed_by_candidate_bugs(self) -> list[dict]:
×
55
        def handler(bug: dict, data: list):
×
56
            data.append(bug)
×
57

58
        bugs: list[dict] = []
×
59
        bugzilla.Bugzilla(
×
60
            bugids=self.regressed_by_candidate_ids,
61
            include_fields=[
62
                "id",
63
                "assigned_to",
64
                "product",
65
                "component",
66
            ],
67
            bughandler=handler,
68
            bugdata=bugs,
69
        ).wait()
70

71
        return bugs
×
72

73
    @cached_property
×
74
    def regressed_by_author(self) -> dict | None:
×
75
        if not self.regressed_by:
×
76
            return None
×
77

78
        bug = self.regressed_by_candidate_bugs[0]
×
79
        assert bug["id"] == self.regressed_by
×
80
        return bug["assigned_to_details"]
×
81

82
    @cached_property
×
83
    def crash_component(self) -> ComponentName:
×
84
        candidate_components = {
×
85
            ComponentName(bug["product"], bug["component"])
86
            for bug in self.regressed_by_candidate_bugs
87
        }
88
        if len(candidate_components) == 1:
×
89
            return next(iter(candidate_components))
×
90

91
        return ComponentName("Core", "General")
×
92

93

94
class SocorroInfoAnalyzer(socorro_util.SignatureStats):
×
95
    __bugzilla_os_values = set(bugzilla.BugFields.fetch_field_values("op_sys"))
×
96
    __bugzilla_cpu_values = {
×
97
        value.lower(): value
98
        for value in bugzilla.BugFields.fetch_field_values("rep_platform")
99
    }
100

101
    @classmethod
×
102
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
103
        if op_sys in cls.__bugzilla_os_values:
×
104
            return op_sys
×
105

106
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
107
            op_sys = "macOS"
×
108
        elif op_sys.startswith("Windows"):
×
109
            op_sys = "Windows"
×
110
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
111
            op_sys = "Linux"
×
112
        else:
113
            op_sys = "Other"
×
114

115
        return op_sys
×
116

117
    @property
×
118
    def bugzilla_op_sys(self) -> str:
×
119
        all_op_sys = {
×
120
            self.to_bugzilla_op_sys(op_sys["term"])
121
            for op_sys in self.signature["facets"]["platform_pretty_version"]
122
        }
123

124
        if len(all_op_sys) > 1:
×
125
            # TODO: explain this workaround
126
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
127

128
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
129
            all_op_sys.remove("Other")
×
130

131
        if len(all_op_sys) == 1:
×
132
            return next(iter(all_op_sys))
×
133

134
        if len(all_op_sys) == 0:
×
135
            return "Unspecified"
×
136

137
        return "All"
×
138

139
    @classmethod
×
140
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
141
        return cls.__bugzilla_cpu_values.get(cpu, "Other")
×
142

143
    @property
×
144
    def bugzilla_cpu_arch(self) -> str:
×
145
        all_cpu_arch = {
×
146
            self.to_bugzilla_cpu(cpu["term"])
147
            for cpu in self.signature["facets"]["cpu_arch"]
148
        }
149

150
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
151
            all_cpu_arch.remove("Other")
×
152

153
        if len(all_cpu_arch) == 1:
×
154
            return next(iter(all_cpu_arch))
×
155

156
        if len(all_cpu_arch) == 0:
×
157
            return "Unspecified"
×
158

159
        return "All"
×
160

161
    @property
×
162
    def num_user_comments(self) -> int:
×
163
        # TODO: count useful/intrusting user comments (e.g., exclude one word comments)
164
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
165

166
    @property
×
167
    def has_user_comments(self) -> bool:
×
168
        return self.num_user_comments > 0
×
169

170
    @property
×
171
    def top_proto_signature(self) -> str:
×
172
        return self.signature["facets"]["proto_signature"][0]["term"]
×
173

174
    @property
×
175
    def num_top_proto_signature_crashes(self) -> int:
×
176
        return self.signature["facets"]["proto_signature"][0]["count"]
×
177

178
    @property
×
179
    def build_ids(self) -> Iterator[int]:
×
180
        for build_id in self.signature["facets"]["build_id"]:
×
181
            yield build_id["term"]
×
182

183
    @property
×
184
    def top_build_id(self) -> int:
×
185
        return self.signature["facets"]["build_id"][0]["term"]
×
186

187

188
class SignatureAnalyzer(SocorroInfoAnalyzer, ClouseauReportsAnalyzer):
×
189
    platforms = [
×
190
        {"short_name": "win", "name": "Windows"},
191
        {"short_name": "mac", "name": "Mac OS X"},
192
        {"short_name": "lin", "name": "Linux"},
193
        {"short_name": "and", "name": "Android"},
194
        {"short_name": "unknown", "name": "Unknown"},
195
    ]
196

197
    def __init__(
×
198
        self,
199
        signature: dict,
200
        num_total_crashes: int,
201
        clouseau_reports: list[dict],
202
    ):
203
        SocorroInfoAnalyzer.__init__(
×
204
            self, signature, num_total_crashes, platforms=self.platforms
205
        )
206
        ClouseauReportsAnalyzer.__init__(self, clouseau_reports)
×
207

208
    def _fetch_crash_reports(
×
209
        self,
210
        proto_signature: str,
211
        build_id: int | Iterable[int],
212
        limit: int = 1,
213
    ) -> Iterator[dict]:
214
        params = {
×
215
            "proto_signature": "=" + proto_signature,
216
            "build_id": build_id,
217
            "_columns": [
218
                "uuid",
219
            ],
220
            "_results_number": limit,
221
        }
222

223
        def handler(res: dict, data: dict):
×
224
            data.update(res)
×
225

226
        data: dict = {}
×
227
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
228

229
        yield from data["hits"]
×
230

231
    def fetch_representing_processed_crash(self) -> dict:
×
232
        limit_to_top_proto_signature = (
×
233
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
234
        )
235

236
        reports = itertools.chain(
×
237
            # Reports with a higher score from clouseau are more likely to be
238
            # useful.
239
            sorted(
240
                self._clouseau_reports,
241
                key=lambda report: report["max_score"],
242
                reverse=True,
243
            ),
244
            # Next we try find reports from the top crashing build because they
245
            # are likely to be representative.
246
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
247
            self._fetch_crash_reports(self.top_proto_signature, self.build_ids),
248
        )
249
        for report in reports:
×
250
            uuid = report["uuid"]
×
251
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
252
            if (
×
253
                not limit_to_top_proto_signature
254
                or processed_crash["proto_signature"] == self.top_proto_signature
255
            ):
256
                # TODO(investigate): maybe we should check if the stack is
257
                # corrupted (ask gsvelto or willkg about how to detect that)
258
                return processed_crash
×
259

260
        raise NoCrashReportFoundError(
×
261
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
262
        )
263

264

265
class SignaturesDataFetcher:
×
266
    def __init__(
×
267
        self,
268
        signatures,
269
        product: str = "Firefox",
270
        channel: str = "nightly",
271
    ):
272
        self._signatures = signatures
×
273
        self._product = product
×
274
        self._channel = channel
×
275

276
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
277
        return clouseau.Reports.get_by_signatures(
×
278
            self._signatures,
279
            product=self._product,
280
            channel=self._channel,
281
        )
282

283
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
284
        # TODO(investigate): should we increase the duration to 6 months?
285
        duration = timedelta(weeks=1)
×
286
        end_date = lmdutils.get_date_ymd("today")
×
287
        start_date = end_date - duration
×
288
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
289

290
        params = {
×
291
            "product": self._product,
292
            # TODO(investigate): should we included all release channels?
293
            "release_channel": self._channel,
294
            # TODO(investigate): should we limit based on the build date as well?
295
            "date": date_range,
296
            # TODO: split signatures into chunks to avoid very long query URLs
297
            "signature": ["=" + signature for signature in self._signatures],
298
            "_aggs.signature": [
299
                "build_id",
300
                "cpu_arch",
301
                "proto_signature",
302
                "_cardinality.user_comments",
303
                "cpu_arch",
304
                "platform_pretty_version",
305
                # The following are needed for SignatureStats:
306
                "platform",
307
                "is_garbage_collecting",
308
                "_cardinality.install_time",
309
                "startup_crash",
310
                "_histogram.uptime",
311
                "process_type",
312
            ],
313
            "_results_number": 0,
314
            "_facets_size": 10000,
315
        }
316

317
        def handler(search_results: dict, data: dict):
×
318
            data["num_total_crashes"] = search_results["total"]
×
319
            data["signatures"] = search_results["facets"]["signature"]
×
320

321
        data: dict = {}
×
322
        socorro.SuperSearchUnredacted(
×
323
            params=params,
324
            handler=handler,
325
            handlerdata=data,
326
        ).wait()
327

328
        return data["signatures"], data["num_total_crashes"]
×
329

330
    def analyze(self) -> list[SignatureAnalyzer]:
×
331
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
332
        signatures, num_total_crashes = self.fetch_socorro_info()
×
333

334
        return [
×
335
            SignatureAnalyzer(
336
                signature,
337
                num_total_crashes,
338
                clouseau_reports[signature["term"]],
339
            )
340
            for signature in signatures
341
            # TODO(investigate): For now, we are ignoring signatures that are
342
            # not analyzed by clouseau. We should investigate why they are not
343
            # analyzed and whether we should include them.
344
            if signature["term"] in clouseau_reports
345
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc