• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / relman-auto-nag / #4602

pending completion
#4602

push

coveralls-python

suhaibmujahid
Lazy load Bugzilla legal fields

646 of 3410 branches covered (18.94%)

8 of 8 new or added lines in 1 file covered. (100.0%)

1828 of 8473 relevant lines covered (21.57%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/bugbot/crash/analyzer.py
1
# This Source Code Form is subject to the terms of the Mozilla Public
2
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3
# You can obtain one at http://mozilla.org/MPL/2.0/.
4

5
import itertools
×
6
from datetime import timedelta
×
7
from functools import cached_property
×
8
from typing import Iterable, Iterator
×
9

10
from libmozdata import bugzilla, clouseau, socorro
×
11
from libmozdata import utils as lmdutils
×
12

13
from bugbot.components import ComponentName
×
14
from bugbot.crash import socorro_util
×
15

16

17
class NoCrashReportFoundError(Exception):
×
18
    """Raised when no crash report is found with the required criteria."""
19

20

21
class ClouseauReportsAnalyzer:
×
22
    REGRESSOR_MINIMUM_SCORE: int = 8
×
23

24
    def __init__(self, reports: Iterable[dict]):
×
25
        self._clouseau_reports = reports
×
26

27
    @cached_property
×
28
    def max_score(self):
×
29
        if not self._clouseau_reports:
×
30
            return 0
×
31
        return max(report["max_score"] for report in self._clouseau_reports)
×
32

33
    @cached_property
×
34
    def regressed_by_potential_bug_ids(self) -> set[int]:
×
35
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
×
36
        return {
×
37
            changeset["bug_id"]
38
            for report in self._clouseau_reports
39
            if report["max_score"] >= minimum_accepted_score
40
            for changeset in report["changesets"]
41
            if changeset["max_score"] >= minimum_accepted_score
42
            and not changeset["is_merge"]
43
            and not changeset["is_backedout"]
44
        }
45

46
    @cached_property
×
47
    def regressed_by_patch(self) -> str | None:
×
48
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
×
49
        potential_patches = {
×
50
            changeset["changeset"]
51
            for report in self._clouseau_reports
52
            if report["max_score"] >= minimum_accepted_score
53
            for changeset in report["changesets"]
54
            if changeset["max_score"] >= minimum_accepted_score
55
            and not changeset["is_merge"]
56
            and not changeset["is_backedout"]
57
        }
58
        if len(potential_patches) == 1:
×
59
            return next(iter(potential_patches))
×
60
        return None
×
61

62
    @cached_property
×
63
    def regressed_by(self) -> int | None:
×
64
        bug_ids = self.regressed_by_potential_bug_ids
×
65
        if len(bug_ids) == 1:
×
66
            return next(iter(bug_ids))
×
67
        return None
×
68

69
    @cached_property
×
70
    def regressed_by_potential_bugs(self) -> list[dict]:
×
71
        def handler(bug: dict, data: list):
×
72
            data.append(bug)
×
73

74
        bugs: list[dict] = []
×
75
        bugzilla.Bugzilla(
×
76
            bugids=self.regressed_by_potential_bug_ids,
77
            include_fields=[
78
                "id",
79
                "assigned_to",
80
                "product",
81
                "component",
82
            ],
83
            bughandler=handler,
84
            bugdata=bugs,
85
        ).wait()
86

87
        return bugs
×
88

89
    @cached_property
×
90
    def regressed_by_author(self) -> dict | None:
×
91
        if not self.regressed_by:
×
92
            return None
×
93

94
        bug = self.regressed_by_potential_bugs[0]
×
95
        assert bug["id"] == self.regressed_by
×
96
        return bug["assigned_to_detail"]
×
97

98
    @cached_property
×
99
    def crash_component(self) -> ComponentName:
×
100
        potential_components = {
×
101
            ComponentName(bug["product"], bug["component"])
102
            for bug in self.regressed_by_potential_bugs
103
        }
104
        if len(potential_components) == 1:
×
105
            return next(iter(potential_components))
×
106
        return ComponentName("Core", "General")
×
107

108

109
class SocorroInfoAnalyzer(socorro_util.SignatureStats):
×
110
    __bugzilla_os_legal_values = None
×
111
    __bugzilla_cpu_legal_values_map = None
×
112

113
    @classmethod
×
114
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
×
115
        if cls.__bugzilla_os_legal_values is None:
×
116
            cls.__bugzilla_os_legal_values = set(
×
117
                bugzilla.BugFields.fetch_field_values("op_sys")
118
            )
119

120
        if op_sys in cls.__bugzilla_os_legal_values:
×
121
            return op_sys
×
122

123
        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
×
124
            op_sys = "macOS"
×
125
        elif op_sys.startswith("Windows"):
×
126
            op_sys = "Windows"
×
127
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
×
128
            op_sys = "Linux"
×
129
        else:
130
            op_sys = "Other"
×
131

132
        return op_sys
×
133

134
    @property
×
135
    def bugzilla_op_sys(self) -> str:
×
136
        all_op_sys = {
×
137
            self.to_bugzilla_op_sys(op_sys["term"])
138
            for op_sys in self.signature["facets"]["platform_pretty_version"]
139
        }
140

141
        if len(all_op_sys) > 1:
×
142
            # TODO: explain this workaround
143
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}
×
144

145
        if len(all_op_sys) == 2 and "Other" in all_op_sys:
×
146
            all_op_sys.remove("Other")
×
147

148
        if len(all_op_sys) == 1:
×
149
            return next(iter(all_op_sys))
×
150

151
        if len(all_op_sys) == 0:
×
152
            return "Unspecified"
×
153

154
        return "All"
×
155

156
    @classmethod
×
157
    def to_bugzilla_cpu(cls, cpu: str) -> str:
×
158
        if cls.__bugzilla_cpu_legal_values_map is None:
×
159
            cls.__bugzilla_cpu_legal_values_map = {
×
160
                value.lower(): value
161
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
162
            }
163

164
        return cls.__bugzilla_cpu_legal_values_map.get(cpu, "Other")
×
165

166
    @property
×
167
    def bugzilla_cpu_arch(self) -> str:
×
168
        all_cpu_arch = {
×
169
            self.to_bugzilla_cpu(cpu["term"])
170
            for cpu in self.signature["facets"]["cpu_arch"]
171
        }
172

173
        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
×
174
            all_cpu_arch.remove("Other")
×
175

176
        if len(all_cpu_arch) == 1:
×
177
            return next(iter(all_cpu_arch))
×
178

179
        if len(all_cpu_arch) == 0:
×
180
            return "Unspecified"
×
181

182
        return "All"
×
183

184
    @property
×
185
    def num_user_comments(self) -> int:
×
186
        # TODO: count useful/intrusting user comments (e.g., exclude one word comments)
187
        return self.signature["facets"]["cardinality_user_comments"]["value"]
×
188

189
    @property
×
190
    def has_user_comments(self) -> bool:
×
191
        return self.num_user_comments > 0
×
192

193
    @property
×
194
    def top_proto_signature(self) -> str:
×
195
        return self.signature["facets"]["proto_signature"][0]["term"]
×
196

197
    @property
×
198
    def num_top_proto_signature_crashes(self) -> int:
×
199
        return self.signature["facets"]["proto_signature"][0]["count"]
×
200

201
    @property
×
202
    def build_ids(self) -> Iterator[int]:
×
203
        for build_id in self.signature["facets"]["build_id"]:
×
204
            yield build_id["term"]
×
205

206
    @property
×
207
    def top_build_id(self) -> int:
×
208
        return self.signature["facets"]["build_id"][0]["term"]
×
209

210

211
class SignatureAnalyzer(SocorroInfoAnalyzer, ClouseauReportsAnalyzer):
×
212
    platforms = [
×
213
        {"short_name": "win", "name": "Windows"},
214
        {"short_name": "mac", "name": "Mac OS X"},
215
        {"short_name": "lin", "name": "Linux"},
216
        {"short_name": "and", "name": "Android"},
217
        {"short_name": "unknown", "name": "Unknown"},
218
    ]
219

220
    def __init__(
×
221
        self,
222
        signature: dict,
223
        num_total_crashes: int,
224
        clouseau_reports: list[dict],
225
    ):
226
        SocorroInfoAnalyzer.__init__(
×
227
            self, signature, num_total_crashes, platforms=self.platforms
228
        )
229
        ClouseauReportsAnalyzer.__init__(self, clouseau_reports)
×
230

231
    def _fetch_crash_reports(
×
232
        self,
233
        proto_signature: str,
234
        build_id: int | Iterable[int],
235
        limit: int = 1,
236
    ) -> Iterator[dict]:
237
        params = {
×
238
            "proto_signature": "=" + proto_signature,
239
            "build_id": build_id,
240
            "_columns": [
241
                "uuid",
242
            ],
243
            "_results_number": limit,
244
        }
245

246
        def handler(res: dict, data: dict):
×
247
            data.update(res)
×
248

249
        data: dict = {}
×
250
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()
×
251

252
        yield from data["hits"]
×
253

254
    def fetch_representing_processed_crash(self) -> dict:
×
255
        limit_to_top_proto_signature = (
×
256
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
257
        )
258

259
        reports = itertools.chain(
×
260
            # Reports with a higher score from clouseau are more likely to be
261
            # useful.
262
            sorted(
263
                self._clouseau_reports,
264
                key=lambda report: report["max_score"],
265
                reverse=True,
266
            ),
267
            # Next we try find reports from the top crashing build because they
268
            # are likely to be representative.
269
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
270
            self._fetch_crash_reports(self.top_proto_signature, self.build_ids),
271
        )
272
        for report in reports:
×
273
            uuid = report["uuid"]
×
274
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
×
275
            if (
×
276
                not limit_to_top_proto_signature
277
                or processed_crash["proto_signature"] == self.top_proto_signature
278
            ):
279
                # TODO(investigate): maybe we should check if the stack is
280
                # corrupted (ask gsvelto or willkg about how to detect that)
281
                return processed_crash
×
282

283
        raise NoCrashReportFoundError(
×
284
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
285
        )
286

287

288
class SignaturesDataFetcher:
×
289
    def __init__(
×
290
        self,
291
        signatures,
292
        product: str = "Firefox",
293
        channel: str = "nightly",
294
    ):
295
        self._signatures = signatures
×
296
        self._product = product
×
297
        self._channel = channel
×
298

299
    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
×
300
        return clouseau.Reports.get_by_signatures(
×
301
            self._signatures,
302
            product=self._product,
303
            channel=self._channel,
304
        )
305

306
    def fetch_socorro_info(self) -> tuple[list[dict], int]:
×
307
        # TODO(investigate): should we increase the duration to 6 months?
308
        duration = timedelta(weeks=1)
×
309
        end_date = lmdutils.get_date_ymd("today")
×
310
        start_date = end_date - duration
×
311
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)
×
312

313
        params = {
×
314
            "product": self._product,
315
            # TODO(investigate): should we included all release channels?
316
            "release_channel": self._channel,
317
            # TODO(investigate): should we limit based on the build date as well?
318
            "date": date_range,
319
            # TODO: split signatures into chunks to avoid very long query URLs
320
            "signature": ["=" + signature for signature in self._signatures],
321
            "_aggs.signature": [
322
                "build_id",
323
                "cpu_arch",
324
                "proto_signature",
325
                "_cardinality.user_comments",
326
                "cpu_arch",
327
                "platform_pretty_version",
328
                # The following are needed for SignatureStats:
329
                "platform",
330
                "is_garbage_collecting",
331
                "_cardinality.install_time",
332
                "startup_crash",
333
                "_histogram.uptime",
334
                "process_type",
335
            ],
336
            "_results_number": 0,
337
            "_facets_size": 10000,
338
        }
339

340
        def handler(search_results: dict, data: dict):
×
341
            data["num_total_crashes"] = search_results["total"]
×
342
            data["signatures"] = search_results["facets"]["signature"]
×
343

344
        data: dict = {}
×
345
        socorro.SuperSearchUnredacted(
×
346
            params=params,
347
            handler=handler,
348
            handlerdata=data,
349
        ).wait()
350

351
        return data["signatures"], data["num_total_crashes"]
×
352

353
    def analyze(self) -> list[SignatureAnalyzer]:
×
354
        clouseau_reports = self.fetch_clouseau_crash_reports()
×
355
        signatures, num_total_crashes = self.fetch_socorro_info()
×
356

357
        return [
×
358
            SignatureAnalyzer(
359
                signature,
360
                num_total_crashes,
361
                clouseau_reports[signature["term"]],
362
            )
363
            for signature in signatures
364
            # TODO(investigate): For now, we are ignoring signatures that are
365
            # not analyzed by clouseau. We should investigate why they are not
366
            # analyzed and whether we should include them.
367
            if signature["term"] in clouseau_reports
368
        ]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc