• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4595

pending completion
#4595

push

coveralls-python

suhaibmujahid
Add a new rule to automatically file bugs for new actionable crashes

646 of 3406 branches covered (18.97%)

465 of 465 new or added lines in 4 files covered. (100.0%)

1828 of 8469 relevant lines covered (21.58%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
from datetime import timedelta
×
7
from functools import cached_property
×
8
from typing import Iterable, Iterator
×
9

10
from libmozdata import bugzilla, clouseau, socorro
×
11
from libmozdata import utils as lmdutils
×
12

13
from bugbot.components import ComponentName
×
14
from bugbot.crash import socorro_util
×
15

16

17
class NoCrashReportFoundError(Exception):
×
18
    """Raised when no crash report is found with the required criteria."""
19

20

21
class ClouseauReportsAnalyzer:
×
22
    REGRESSOR_MINIMUM_SCORE: int = 8
×
23

24
    def __init__(self, reports: Iterable[dict]):
×
25
        self._clouseau_reports = reports
×
26

27
    @cached_property
×
28
    def max_score(self):
×
29
        if not self._clouseau_reports:
×
30
            return 0
×
31
        return max(report["max_score"] for report in self._clouseau_reports)
×
32

33
    @cached_property
×
34
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
35
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
×
36
        return {
×
37
            changeset["bug_id"]
38
            for report in self._clouseau_reports
39
            if report["max_score"] >= minimum_accepted_score
40
            for changeset in report["changesets"]
41
            if changeset["max_score"] >= minimum_accepted_score
42
            and not changeset["is_merge"]
43
            and not changeset["is_backedout"]
44
        }
45

46
    @cached_property
×
47
    def regressed_by_patch(self) -> str | None:
×
48
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
×
49
        potential_patches = {
×
50
            changeset["changeset"]
51
            for report in self._clouseau_reports
52
            if report["max_score"] >= minimum_accepted_score
53
            for changeset in report["changesets"]
54
            if changeset["max_score"] >= minimum_accepted_score
55
            and not changeset["is_merge"]
56
            and not changeset["is_backedout"]
57
        }
58
        if len(potential_patches) == 1:
×
59
            return next(iter(potential_patches))
×
60
        return None
×
61

62
    @cached_property
×
63
    def regressed_by(self) -> int | None:
×
64
        bug_ids = self.regressed_by_potential_bug_ids
×
65
        if len(bug_ids) == 1:
×
66
            return next(iter(bug_ids))
×
67
        return None
×
68

69
    @cached_property
×
70
    def regressed_by_potential_bugs(self) -> list[dict]:
×
71
        def handler(bug: dict, data: list):
×
72
            data.append(bug)
×
73

74
        bugs: list[dict] = []
×
75
        bugzilla.Bugzilla(
×
76
            bugids=self.regressed_by_potential_bug_ids,
77
            include_fields=[
78
                "id",
79
                "assigned_to",
80
                "product",
81
                "component",
82
            ],
83
            bughandler=handler,
84
            bugdata=bugs,
85
        ).wait()
86

87
        return bugs
×
88

89
    @cached_property
×
90
    def regressed_by_author(self) -> dict | None:
×
91
        if not self.regressed_by:
×
92
            return None
×
93

94
        bug = self.regressed_by_potential_bugs[0]
×
95
        assert bug["id"] == self.regressed_by
×
96
        return bug["assigned_to_detail"]
×
97

98
    @cached_property
×
99
    def crash_component(self) -> ComponentName:
×
100
        potential_components = {
×
101
            ComponentName(bug["product"], bug["component"])
102
            for bug in self.regressed_by_potential_bugs
103
        }
104
        if len(potential_components) == 1:
×
105
            return next(iter(potential_components))
×
106
        return ComponentName("Core", "General")
×
107

108

109
class SocorroInfoAnalyzer(socorro_util.SignatureStats):
×
110
    __bugzilla_os_values = set(bugzilla.BugFields.fetch_field_values("op_sys"))
×
111
    __bugzilla_cpu_values = {
×
112
        value.lower(): value
113
        for value in bugzilla.BugFields.fetch_field_values("rep_platform")
114
    }
115

116
    @classmethod
×
117
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
118
        if op_sys in cls.__bugzilla_os_values:
×
119
            return op_sys
×
120

121
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
122
            op_sys = "macOS"
×
123
        elif op_sys.startswith("Windows"):
×
124
            op_sys = "Windows"
×
125
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
126
            op_sys = "Linux"
×
127
        else:
128
            op_sys = "Other"
×
129

130
        return op_sys
×
131

132
    @property
×
133
    def bugzilla_op_sys(self) -> str:
×
134
        all_op_sys = {
×
135
            self.to_bugzilla_op_sys(op_sys["term"])
136
            for op_sys in self.signature["facets"]["platform_pretty_version"]
137
        }
138

139
        if len(all_op_sys) > 1:
×
140
            # TODO: explain this workaround
141
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
142

143
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
144
            all_op_sys.remove("Other")
×
145

146
        if len(all_op_sys) == 1:
×
147
            return next(iter(all_op_sys))
×
148

149
        if len(all_op_sys) == 0:
×
150
            return "Unspecified"
×
151

152
        return "All"
×
153

154
    @classmethod
×
155
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
156
        return cls.__bugzilla_cpu_values.get(cpu, "Other")
×
157

158
    @property
×
159
    def bugzilla_cpu_arch(self) -> str:
×
160
        all_cpu_arch = {
×
161
            self.to_bugzilla_cpu(cpu["term"])
162
            for cpu in self.signature["facets"]["cpu_arch"]
163
        }
164

165
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
166
            all_cpu_arch.remove("Other")
×
167

168
        if len(all_cpu_arch) == 1:
×
169
            return next(iter(all_cpu_arch))
×
170

171
        if len(all_cpu_arch) == 0:
×
172
            return "Unspecified"
×
173

174
        return "All"
×
175

176
    @property
×
177
    def num_user_comments(self) -> int:
×
178
        # TODO: count useful/intrusting user comments (e.g., exclude one word comments)
179
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
180

181
    @property
×
182
    def has_user_comments(self) -> bool:
×
183
        return self.num_user_comments > 0
×
184

185
    @property
×
186
    def top_proto_signature(self) -> str:
×
187
        return self.signature["facets"]["proto_signature"][0]["term"]
×
188

189
    @property
×
190
    def num_top_proto_signature_crashes(self) -> int:
×
191
        return self.signature["facets"]["proto_signature"][0]["count"]
×
192

193
    @property
×
194
    def build_ids(self) -> Iterator[int]:
×
195
        for build_id in self.signature["facets"]["build_id"]:
×
196
            yield build_id["term"]
×
197

198
    @property
×
199
    def top_build_id(self) -> int:
×
200
        return self.signature["facets"]["build_id"][0]["term"]
×
201

202

203
class SignatureAnalyzer(SocorroInfoAnalyzer, ClouseauReportsAnalyzer):
×
204
    platforms = [
×
205
        {"short_name": "win", "name": "Windows"},
206
        {"short_name": "mac", "name": "Mac OS X"},
207
        {"short_name": "lin", "name": "Linux"},
208
        {"short_name": "and", "name": "Android"},
209
        {"short_name": "unknown", "name": "Unknown"},
210
    ]
211

212
    def __init__(
×
213
        self,
214
        signature: dict,
215
        num_total_crashes: int,
216
        clouseau_reports: list[dict],
217
    ):
218
        SocorroInfoAnalyzer.__init__(
×
219
            self, signature, num_total_crashes, platforms=self.platforms
220
        )
221
        ClouseauReportsAnalyzer.__init__(self, clouseau_reports)
×
222

223
    def _fetch_crash_reports(
×
224
        self,
225
        proto_signature: str,
226
        build_id: int | Iterable[int],
227
        limit: int = 1,
228
    ) -> Iterator[dict]:
229
        params = {
×
230
            "proto_signature": "=" + proto_signature,
231
            "build_id": build_id,
232
            "_columns": [
233
                "uuid",
234
            ],
235
            "_results_number": limit,
236
        }
237

238
        def handler(res: dict, data: dict):
×
239
            data.update(res)
×
240

241
        data: dict = {}
×
242
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
243

244
        yield from data["hits"]
×
245

246
    def fetch_representing_processed_crash(self) -> dict:
×
247
        limit_to_top_proto_signature = (
×
248
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
249
        )
250

251
        reports = itertools.chain(
×
252
            # Reports with a higher score from clouseau are more likely to be
253
            # useful.
254
            sorted(
255
                self._clouseau_reports,
256
                key=lambda report: report["max_score"],
257
                reverse=True,
258
            ),
259
            # Next we try find reports from the top crashing build because they
260
            # are likely to be representative.
261
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
262
            self._fetch_crash_reports(self.top_proto_signature, self.build_ids),
263
        )
264
        for report in reports:
×
265
            uuid = report["uuid"]
×
266
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
267
            if (
×
268
                not limit_to_top_proto_signature
269
                or processed_crash["proto_signature"] == self.top_proto_signature
270
            ):
271
                # TODO(investigate): maybe we should check if the stack is
272
                # corrupted (ask gsvelto or willkg about how to detect that)
273
                return processed_crash
×
274

275
        raise NoCrashReportFoundError(
×
276
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
277
        )
278

279

280
class SignaturesDataFetcher:
×
281
    def __init__(
×
282
        self,
283
        signatures,
284
        product: str = "Firefox",
285
        channel: str = "nightly",
286
    ):
287
        self._signatures = signatures
×
288
        self._product = product
×
289
        self._channel = channel
×
290

291
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
292
        return clouseau.Reports.get_by_signatures(
×
293
            self._signatures,
294
            product=self._product,
295
            channel=self._channel,
296
        )
297

298
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
299
        # TODO(investigate): should we increase the duration to 6 months?
300
        duration = timedelta(weeks=1)
×
301
        end_date = lmdutils.get_date_ymd("today")
×
302
        start_date = end_date - duration
×
303
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
304

305
        params = {
×
306
            "product": self._product,
307
            # TODO(investigate): should we included all release channels?
308
            "release_channel": self._channel,
309
            # TODO(investigate): should we limit based on the build date as well?
310
            "date": date_range,
311
            # TODO: split signatures into chunks to avoid very long query URLs
312
            "signature": ["=" + signature for signature in self._signatures],
313
            "_aggs.signature": [
314
                "build_id",
315
                "cpu_arch",
316
                "proto_signature",
317
                "_cardinality.user_comments",
318
                "cpu_arch",
319
                "platform_pretty_version",
320
                # The following are needed for SignatureStats:
321
                "platform",
322
                "is_garbage_collecting",
323
                "_cardinality.install_time",
324
                "startup_crash",
325
                "_histogram.uptime",
326
                "process_type",
327
            ],
328
            "_results_number": 0,
329
            "_facets_size": 10000,
330
        }
331

332
        def handler(search_results: dict, data: dict):
×
333
            data["num_total_crashes"] = search_results["total"]
×
334
            data["signatures"] = search_results["facets"]["signature"]
×
335

336
        data: dict = {}
×
337
        socorro.SuperSearchUnredacted(
×
338
            params=params,
339
            handler=handler,
340
            handlerdata=data,
341
        ).wait()
342

343
        return data["signatures"], data["num_total_crashes"]
×
344

345
    def analyze(self) -> list[SignatureAnalyzer]:
×
346
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
347
        signatures, num_total_crashes = self.fetch_socorro_info()
×
348

349
        return [
×
350
            SignatureAnalyzer(
351
                signature,
352
                num_total_crashes,
353
                clouseau_reports[signature["term"]],
354
            )
355
            for signature in signatures
356
            # TODO(investigate): For now, we are ignoring signatures that are
357
            # not analyzed by clouseau. We should investigate why they are not
358
            # analyzed and whether we should include them.
359
            if signature["term"] in clouseau_reports
360
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc