#4602

pending completion

Build # #4602

Build Type

push

coveralls-python

Committed by

suhaibmujahid

Commit Message

Lazy load Bugzilla legal fields

Run Details

646 of 3410 branches covered (18.94%)

8 of 8 new or added lines in 1 file covered. (100.0%)

1828 of 8473 relevant lines covered (21.57%)

0.22 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/bugbot/crash/analyzer.py

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

import itertools
from datetime import timedelta
from functools import cached_property
from typing import Iterable, Iterator

from libmozdata import bugzilla, clouseau, socorro
from libmozdata import utils as lmdutils

from bugbot.components import ComponentName
from bugbot.crash import socorro_util


class NoCrashReportFoundError(Exception):
    """Raised when no crash report is found with the required criteria."""


class ClouseauReportsAnalyzer:
    REGRESSOR_MINIMUM_SCORE: int = 8

    def __init__(self, reports: Iterable[dict]):
        self._clouseau_reports = reports

    @cached_property
    def max_score(self):
        if not self._clouseau_reports:
            return 0
        return max(report["max_score"] for report in self._clouseau_reports)

    @cached_property
    def regressed_by_potential_bug_ids(self) -> set[int]:
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
        return {
            changeset["bug_id"]
            for report in self._clouseau_reports
            if report["max_score"] >= minimum_accepted_score
            for changeset in report["changesets"]
            if changeset["max_score"] >= minimum_accepted_score
            and not changeset["is_merge"]
            and not changeset["is_backedout"]
        }

    @cached_property
    def regressed_by_patch(self) -> str | None:
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
        potential_patches = {
            changeset["changeset"]
            for report in self._clouseau_reports
            if report["max_score"] >= minimum_accepted_score
            for changeset in report["changesets"]
            if changeset["max_score"] >= minimum_accepted_score
            and not changeset["is_merge"]
            and not changeset["is_backedout"]
        }
        if len(potential_patches) == 1:
            return next(iter(potential_patches))
        return None

    @cached_property
    def regressed_by(self) -> int | None:
        bug_ids = self.regressed_by_potential_bug_ids
        if len(bug_ids) == 1:
            return next(iter(bug_ids))
        return None

    @cached_property
    def regressed_by_potential_bugs(self) -> list[dict]:
        def handler(bug: dict, data: list):
            data.append(bug)

        bugs: list[dict] = []
        bugzilla.Bugzilla(
            bugids=self.regressed_by_potential_bug_ids,
            include_fields=[
                "id",
                "assigned_to",
                "product",
                "component",
            ],
            bughandler=handler,
            bugdata=bugs,
        ).wait()

        return bugs

    @cached_property
    def regressed_by_author(self) -> dict | None:
        if not self.regressed_by:
            return None

        bug = self.regressed_by_potential_bugs[0]
        assert bug["id"] == self.regressed_by
        return bug["assigned_to_detail"]

    @cached_property
    def crash_component(self) -> ComponentName:
        potential_components = {
            ComponentName(bug["product"], bug["component"])
            for bug in self.regressed_by_potential_bugs
        }
        if len(potential_components) == 1:
            return next(iter(potential_components))
        return ComponentName("Core", "General")


class SocorroInfoAnalyzer(socorro_util.SignatureStats):
    __bugzilla_os_legal_values = None
    __bugzilla_cpu_legal_values_map = None

    @classmethod
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
        if cls.__bugzilla_os_legal_values is None:
            cls.__bugzilla_os_legal_values = set(
                bugzilla.BugFields.fetch_field_values("op_sys")
            )

        if op_sys in cls.__bugzilla_os_legal_values:
            return op_sys

        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
            op_sys = "macOS"
        elif op_sys.startswith("Windows"):
            op_sys = "Windows"
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
            op_sys = "Linux"
        else:
            op_sys = "Other"

        return op_sys

    @property
    def bugzilla_op_sys(self) -> str:
        all_op_sys = {
            self.to_bugzilla_op_sys(op_sys["term"])
            for op_sys in self.signature["facets"]["platform_pretty_version"]
        }

        if len(all_op_sys) > 1:
            # TODO: explain this workaround
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}

        if len(all_op_sys) == 2 and "Other" in all_op_sys:
            all_op_sys.remove("Other")

        if len(all_op_sys) == 1:
            return next(iter(all_op_sys))

        if len(all_op_sys) == 0:
            return "Unspecified"

        return "All"

    @classmethod
    def to_bugzilla_cpu(cls, cpu: str) -> str:
        if cls.__bugzilla_cpu_legal_values_map is None:
            cls.__bugzilla_cpu_legal_values_map = {
                value.lower(): value
                for value in bugzilla.BugFields.fetch_field_values("rep_platform")
            }

        return cls.__bugzilla_cpu_legal_values_map.get(cpu, "Other")

    @property
    def bugzilla_cpu_arch(self) -> str:
        all_cpu_arch = {
            self.to_bugzilla_cpu(cpu["term"])
            for cpu in self.signature["facets"]["cpu_arch"]
        }

        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
            all_cpu_arch.remove("Other")

        if len(all_cpu_arch) == 1:
            return next(iter(all_cpu_arch))

        if len(all_cpu_arch) == 0:
            return "Unspecified"

        return "All"

    @property
    def num_user_comments(self) -> int:
        # TODO: count useful/intrusting user comments (e.g., exclude one word comments)
        return self.signature["facets"]["cardinality_user_comments"]["value"]

    @property
    def has_user_comments(self) -> bool:
        return self.num_user_comments > 0

    @property
    def top_proto_signature(self) -> str:
        return self.signature["facets"]["proto_signature"][0]["term"]

    @property
    def num_top_proto_signature_crashes(self) -> int:
        return self.signature["facets"]["proto_signature"][0]["count"]

    @property
    def build_ids(self) -> Iterator[int]:
        for build_id in self.signature["facets"]["build_id"]:
            yield build_id["term"]

    @property
    def top_build_id(self) -> int:
        return self.signature["facets"]["build_id"][0]["term"]


class SignatureAnalyzer(SocorroInfoAnalyzer, ClouseauReportsAnalyzer):
    platforms = [
        {"short_name": "win", "name": "Windows"},
        {"short_name": "mac", "name": "Mac OS X"},
        {"short_name": "lin", "name": "Linux"},
        {"short_name": "and", "name": "Android"},
        {"short_name": "unknown", "name": "Unknown"},
    ]

    def __init__(
        self,
        signature: dict,
        num_total_crashes: int,
        clouseau_reports: list[dict],
    ):
        SocorroInfoAnalyzer.__init__(
            self, signature, num_total_crashes, platforms=self.platforms
        )
        ClouseauReportsAnalyzer.__init__(self, clouseau_reports)

    def _fetch_crash_reports(
        self,
        proto_signature: str,
        build_id: int | Iterable[int],
        limit: int = 1,
    ) -> Iterator[dict]:
        params = {
            "proto_signature": "=" + proto_signature,
            "build_id": build_id,
            "_columns": [
                "uuid",
            ],
            "_results_number": limit,
        }

        def handler(res: dict, data: dict):
            data.update(res)

        data: dict = {}
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()

        yield from data["hits"]

    def fetch_representing_processed_crash(self) -> dict:
        limit_to_top_proto_signature = (
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
        )

        reports = itertools.chain(
            # Reports with a higher score from clouseau are more likely to be
            # useful.
            sorted(
                self._clouseau_reports,
                key=lambda report: report["max_score"],
                reverse=True,
            ),
            # Next we try find reports from the top crashing build because they
            # are likely to be representative.
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
            self._fetch_crash_reports(self.top_proto_signature, self.build_ids),
        )
        for report in reports:
            uuid = report["uuid"]
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
            if (
                not limit_to_top_proto_signature
                or processed_crash["proto_signature"] == self.top_proto_signature
            ):
                # TODO(investigate): maybe we should check if the stack is
                # corrupted (ask gsvelto or willkg about how to detect that)
                return processed_crash

        raise NoCrashReportFoundError(
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
        )


class SignaturesDataFetcher:
    def __init__(
        self,
        signatures,
        product: str = "Firefox",
        channel: str = "nightly",
    ):
        self._signatures = signatures
        self._product = product
        self._channel = channel

    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
        return clouseau.Reports.get_by_signatures(
            self._signatures,
            product=self._product,
            channel=self._channel,
        )

    def fetch_socorro_info(self) -> tuple[list[dict], int]:
        # TODO(investigate): should we increase the duration to 6 months?
        duration = timedelta(weeks=1)
        end_date = lmdutils.get_date_ymd("today")
        start_date = end_date - duration
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)

        params = {
            "product": self._product,
            # TODO(investigate): should we included all release channels?
            "release_channel": self._channel,
            # TODO(investigate): should we limit based on the build date as well?
            "date": date_range,
            # TODO: split signatures into chunks to avoid very long query URLs
            "signature": ["=" + signature for signature in self._signatures],
            "_aggs.signature": [
                "build_id",
                "cpu_arch",
                "proto_signature",
                "_cardinality.user_comments",
                "cpu_arch",
                "platform_pretty_version",
                # The following are needed for SignatureStats:
                "platform",
                "is_garbage_collecting",
                "_cardinality.install_time",
                "startup_crash",
                "_histogram.uptime",
                "process_type",
            ],
            "_results_number": 0,
            "_facets_size": 10000,
        }

        def handler(search_results: dict, data: dict):
            data["num_total_crashes"] = search_results["total"]
            data["signatures"] = search_results["facets"]["signature"]

        data: dict = {}
        socorro.SuperSearchUnredacted(
            params=params,
            handler=handler,
            handlerdata=data,
        ).wait()

        return data["signatures"], data["num_total_crashes"]

    def analyze(self) -> list[SignatureAnalyzer]:
        clouseau_reports = self.fetch_clouseau_crash_reports()
        signatures, num_total_crashes = self.fetch_socorro_info()

        return [
            SignatureAnalyzer(
                signature,
                num_total_crashes,
                clouseau_reports[signature["term"]],
            )
            for signature in signatures
            # TODO(investigate): For now, we are ignoring signatures that are
            # not analyzed by clouseau. We should investigate why they are not
            # analyzed and whether we should include them.
            if signature["term"] in clouseau_reports
        ]

1	# This Source Code Form is subject to the terms of the Mozilla Public
2	# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3	# You can obtain one at http://mozilla.org/MPL/2.0/.
4
5	import itertools	×
6	from datetime import timedelta	×
7	from functools import cached_property	×
8	from typing import Iterable, Iterator	×
9
10	from libmozdata import bugzilla, clouseau, socorro	×
11	from libmozdata import utils as lmdutils	×
12
13	from bugbot.components import ComponentName	×
14	from bugbot.crash import socorro_util	×
15
16
17	class NoCrashReportFoundError(Exception):	×
18	"""Raised when no crash report is found with the required criteria."""
19
20
21	class ClouseauReportsAnalyzer:	×
22	REGRESSOR_MINIMUM_SCORE: int = 8	×
23
24	def __init__(self, reports: Iterable[dict]):	×
25	self._clouseau_reports = reports	×
26
27	@cached_property	×
28	def max_score(self):	×
29	if not self._clouseau_reports:	×
30	return 0	×
31	return max(report["max_score"] for report in self._clouseau_reports)	×
32
33	@cached_property	×
34	def regressed_by_potential_bug_ids(self) -> set[int]:	×
35	minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)	×
36	return {	×
37	changeset["bug_id"]
38	for report in self._clouseau_reports
39	if report["max_score"] >= minimum_accepted_score
40	for changeset in report["changesets"]
41	if changeset["max_score"] >= minimum_accepted_score
42	and not changeset["is_merge"]
43	and not changeset["is_backedout"]
44	}
45
46	@cached_property	×
47	def regressed_by_patch(self) -> str \| None:	×
48	minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)	×
49	potential_patches = {	×
50	changeset["changeset"]
51	for report in self._clouseau_reports
52	if report["max_score"] >= minimum_accepted_score
53	for changeset in report["changesets"]
54	if changeset["max_score"] >= minimum_accepted_score
55	and not changeset["is_merge"]
56	and not changeset["is_backedout"]
57	}
58	if len(potential_patches) == 1:	×
59	return next(iter(potential_patches))	×
60	return None	×
61
62	@cached_property	×
63	def regressed_by(self) -> int \| None:	×
64	bug_ids = self.regressed_by_potential_bug_ids	×
65	if len(bug_ids) == 1:	×
66	return next(iter(bug_ids))	×
67	return None	×
68
69	@cached_property	×
70	def regressed_by_potential_bugs(self) -> list[dict]:	×
71	def handler(bug: dict, data: list):	×
72	data.append(bug)	×
73
74	bugs: list[dict] = []	×
75	bugzilla.Bugzilla(	×
76	bugids=self.regressed_by_potential_bug_ids,
77	include_fields=[
78	"id",
79	"assigned_to",
80	"product",
81	"component",
82	],
83	bughandler=handler,
84	bugdata=bugs,
85	).wait()
86
87	return bugs	×
88
89	@cached_property	×
90	def regressed_by_author(self) -> dict \| None:	×
91	if not self.regressed_by:	×
92	return None	×
93
94	bug = self.regressed_by_potential_bugs[0]	×
95	assert bug["id"] == self.regressed_by	×
96	return bug["assigned_to_detail"]	×
97
98	@cached_property	×
99	def crash_component(self) -> ComponentName:	×
100	potential_components = {	×
101	ComponentName(bug["product"], bug["component"])
102	for bug in self.regressed_by_potential_bugs
103	}
104	if len(potential_components) == 1:	×
105	return next(iter(potential_components))	×
106	return ComponentName("Core", "General")	×
107
108
109	class SocorroInfoAnalyzer(socorro_util.SignatureStats):	×
110	__bugzilla_os_legal_values = None	×
111	__bugzilla_cpu_legal_values_map = None	×
112
113	@classmethod	×
114	def to_bugzilla_op_sys(cls, op_sys: str) -> str:	×
115	if cls.__bugzilla_os_legal_values is None:	×
116	cls.__bugzilla_os_legal_values = set(	×
117	bugzilla.BugFields.fetch_field_values("op_sys")
118	)
119
120	if op_sys in cls.__bugzilla_os_legal_values:	×
121	return op_sys	×
122
123	if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):	×
124	op_sys = "macOS"	×
125	elif op_sys.startswith("Windows"):	×
126	op_sys = "Windows"	×
127	elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):	×
128	op_sys = "Linux"	×
129	else:
130	op_sys = "Other"	×
131
132	return op_sys	×
133
134	@property	×
135	def bugzilla_op_sys(self) -> str:	×
136	all_op_sys = {	×
137	self.to_bugzilla_op_sys(op_sys["term"])
138	for op_sys in self.signature["facets"]["platform_pretty_version"]
139	}
140
141	if len(all_op_sys) > 1:	×
142	# TODO: explain this workaround
143	all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}	×
144
145	if len(all_op_sys) == 2 and "Other" in all_op_sys:	×
146	all_op_sys.remove("Other")	×
147
148	if len(all_op_sys) == 1:	×
149	return next(iter(all_op_sys))	×
150
151	if len(all_op_sys) == 0:	×
152	return "Unspecified"	×
153
154	return "All"	×
155
156	@classmethod	×
157	def to_bugzilla_cpu(cls, cpu: str) -> str:	×
158	if cls.__bugzilla_cpu_legal_values_map is None:	×
159	cls.__bugzilla_cpu_legal_values_map = {	×
160	value.lower(): value
161	for value in bugzilla.BugFields.fetch_field_values("rep_platform")
162	}
163
164	return cls.__bugzilla_cpu_legal_values_map.get(cpu, "Other")	×
165
166	@property	×
167	def bugzilla_cpu_arch(self) -> str:	×
168	all_cpu_arch = {	×
169	self.to_bugzilla_cpu(cpu["term"])
170	for cpu in self.signature["facets"]["cpu_arch"]
171	}
172
173	if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:	×
174	all_cpu_arch.remove("Other")	×
175
176	if len(all_cpu_arch) == 1:	×
177	return next(iter(all_cpu_arch))	×
178
179	if len(all_cpu_arch) == 0:	×
180	return "Unspecified"	×
181
182	return "All"	×
183
184	@property	×
185	def num_user_comments(self) -> int:	×
186	# TODO: count useful/intrusting user comments (e.g., exclude one word comments)
187	return self.signature["facets"]["cardinality_user_comments"]["value"]	×
188
189	@property	×
190	def has_user_comments(self) -> bool:	×
191	return self.num_user_comments > 0	×
192
193	@property	×
194	def top_proto_signature(self) -> str:	×
195	return self.signature["facets"]["proto_signature"][0]["term"]	×
196
197	@property	×
198	def num_top_proto_signature_crashes(self) -> int:	×
199	return self.signature["facets"]["proto_signature"][0]["count"]	×
200
201	@property	×
202	def build_ids(self) -> Iterator[int]:	×
203	for build_id in self.signature["facets"]["build_id"]:	×
204	yield build_id["term"]	×
205
206	@property	×
207	def top_build_id(self) -> int:	×
208	return self.signature["facets"]["build_id"][0]["term"]	×
209
210
211	class SignatureAnalyzer(SocorroInfoAnalyzer, ClouseauReportsAnalyzer):	×
212	platforms = [	×
213	{"short_name": "win", "name": "Windows"},
214	{"short_name": "mac", "name": "Mac OS X"},
215	{"short_name": "lin", "name": "Linux"},
216	{"short_name": "and", "name": "Android"},
217	{"short_name": "unknown", "name": "Unknown"},
218	]
219
220	def __init__(	×
221	self,
222	signature: dict,
223	num_total_crashes: int,
224	clouseau_reports: list[dict],
225	):
226	SocorroInfoAnalyzer.__init__(	×
227	self, signature, num_total_crashes, platforms=self.platforms
228	)
229	ClouseauReportsAnalyzer.__init__(self, clouseau_reports)	×
230
231	def _fetch_crash_reports(	×
232	self,
233	proto_signature: str,
234	build_id: int \| Iterable[int],
235	limit: int = 1,
236	) -> Iterator[dict]:
237	params = {	×
238	"proto_signature": "=" + proto_signature,
239	"build_id": build_id,
240	"_columns": [
241	"uuid",
242	],
243	"_results_number": limit,
244	}
245
246	def handler(res: dict, data: dict):	×
247	data.update(res)	×
248
249	data: dict = {}	×
250	socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()	×
251
252	yield from data["hits"]	×
253
254	def fetch_representing_processed_crash(self) -> dict:	×
255	limit_to_top_proto_signature = (	×
256	self.num_top_proto_signature_crashes / self.num_crashes > 0.6
257	)
258
259	reports = itertools.chain(	×
260	# Reports with a higher score from clouseau are more likely to be
261	# useful.
262	sorted(
263	self._clouseau_reports,
264	key=lambda report: report["max_score"],
265	reverse=True,
266	),
267	# Next we try find reports from the top crashing build because they
268	# are likely to be representative.
269	self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
270	self._fetch_crash_reports(self.top_proto_signature, self.build_ids),
271	)
272	for report in reports:	×
273	uuid = report["uuid"]	×
274	processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]	×
275	if (	×
276	not limit_to_top_proto_signature
277	or processed_crash["proto_signature"] == self.top_proto_signature
278	):
279	# TODO(investigate): maybe we should check if the stack is
280	# corrupted (ask gsvelto or willkg about how to detect that)
281	return processed_crash	×
282
283	raise NoCrashReportFoundError(	×
284	f"No crash report found with the most frequent proto signature for {self.signature_term}."
285	)
286
287
288	class SignaturesDataFetcher:	×
289	def __init__(	×
290	self,
291	signatures,
292	product: str = "Firefox",
293	channel: str = "nightly",
294	):
295	self._signatures = signatures	×
296	self._product = product	×
297	self._channel = channel	×
298
299	def fetch_clouseau_crash_reports(self) -> dict[str, list]:	×
300	return clouseau.Reports.get_by_signatures(	×
301	self._signatures,
302	product=self._product,
303	channel=self._channel,
304	)
305
306	def fetch_socorro_info(self) -> tuple[list[dict], int]:	×
307	# TODO(investigate): should we increase the duration to 6 months?
308	duration = timedelta(weeks=1)	×
309	end_date = lmdutils.get_date_ymd("today")	×
310	start_date = end_date - duration	×
311	date_range = socorro.SuperSearch.get_search_date(start_date, end_date)	×
312
313	params = {	×
314	"product": self._product,
315	# TODO(investigate): should we included all release channels?
316	"release_channel": self._channel,
317	# TODO(investigate): should we limit based on the build date as well?
318	"date": date_range,
319	# TODO: split signatures into chunks to avoid very long query URLs
320	"signature": ["=" + signature for signature in self._signatures],
321	"_aggs.signature": [
322	"build_id",
323	"cpu_arch",
324	"proto_signature",
325	"_cardinality.user_comments",
326	"cpu_arch",
327	"platform_pretty_version",
328	# The following are needed for SignatureStats:
329	"platform",
330	"is_garbage_collecting",
331	"_cardinality.install_time",
332	"startup_crash",
333	"_histogram.uptime",
334	"process_type",
335	],
336	"_results_number": 0,
337	"_facets_size": 10000,
338	}
339
340	def handler(search_results: dict, data: dict):	×
341	data["num_total_crashes"] = search_results["total"]	×
342	data["signatures"] = search_results["facets"]["signature"]	×
343
344	data: dict = {}	×
345	socorro.SuperSearchUnredacted(	×
346	params=params,
347	handler=handler,
348	handlerdata=data,
349	).wait()
350
351	return data["signatures"], data["num_total_crashes"]	×
352
353	def analyze(self) -> list[SignatureAnalyzer]:	×
354	clouseau_reports = self.fetch_clouseau_crash_reports()	×
355	signatures, num_total_crashes = self.fetch_socorro_info()	×
356
357	return [	×
358	SignatureAnalyzer(
359	signature,
360	num_total_crashes,
361	clouseau_reports[signature["term"]],
362	)
363	for signature in signatures
364	# TODO(investigate): For now, we are ignoring signatures that are
365	# not analyzed by clouseau. We should investigate why they are not
366	# analyzed and whether we should include them.
367	if signature["term"] in clouseau_reports
368	]

mozilla / relman-auto-nag / #4602

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous