#4601

pending completion

Build # #4601

Build Type

push

coveralls-python

Committed by

suhaibmujahid

Commit Message

Highlight crash address commonalities

Run Details

646 of 3418 branches covered (18.9%)

40 of 40 new or added lines in 1 file covered. (100.0%)

1828 of 8509 relevant lines covered (21.48%)

0.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/bugbot/crash/analyzer.py

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.

import itertools
from datetime import timedelta
from functools import cached_property
from typing import Iterable, Iterator

from libmozdata import bugzilla, clouseau, socorro
from libmozdata import utils as lmdutils

from bugbot.components import ComponentName
from bugbot.crash import socorro_util

# Crash address commonalities: crashes were near null, or were near allocator
# poison values.
OFFSET_64_BIT = 0x1000
OFFSET_32_BIT = 0x100
ALLOCATOR_ADDRESSES_64_BIT = (
    0xE5E5E5E5E5E5E5E5,
    0x4B4B4B4B4B4B4B4B,
)
ALLOCATOR_ADDRESSES_32_BIT = (
    0xE5E5E5E5,
    0x4B4B4B4B,
)
ALLOCATOR_RANGES_64_BIT = (
    (addr - OFFSET_64_BIT, addr + OFFSET_64_BIT) for addr in ALLOCATOR_ADDRESSES_64_BIT
)
ALLOCATOR_RANGES_32_BIT = (
    (addr - OFFSET_32_BIT, addr + OFFSET_32_BIT) for addr in ALLOCATOR_ADDRESSES_32_BIT
)


def is_near_null_address(str_address) -> bool:
    address = int(str_address, 0)
    is_64_bit = len(str_address) >= 18

    if is_64_bit:
        return -OFFSET_64_BIT <= address <= OFFSET_64_BIT

    return -OFFSET_32_BIT <= address <= OFFSET_32_BIT


def is_near_allocator_address(str_address) -> bool:
    address = int(str_address, 0)
    is_64_bit = len(str_address) >= 18

    return any(
        low <= address <= high
        for low, high in (
            ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
        )
    )


class NoCrashReportFoundError(Exception):
    """Raised when no crash report is found with the required criteria."""


class ClouseauReportsAnalyzer:
    REGRESSOR_MINIMUM_SCORE: int = 8

    def __init__(self, reports: Iterable[dict]):
        self._clouseau_reports = reports

    @cached_property
    def max_score(self):
        if not self._clouseau_reports:
            return 0
        return max(report["max_score"] for report in self._clouseau_reports)

    @cached_property
    def regressed_by_potential_bug_ids(self) -> set[int]:
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
        return {
            changeset["bug_id"]
            for report in self._clouseau_reports
            if report["max_score"] >= minimum_accepted_score
            for changeset in report["changesets"]
            if changeset["max_score"] >= minimum_accepted_score
            and not changeset["is_merge"]
            and not changeset["is_backedout"]
        }

    @cached_property
    def regressed_by_patch(self) -> str | None:
        minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)
        potential_patches = {
            changeset["changeset"]
            for report in self._clouseau_reports
            if report["max_score"] >= minimum_accepted_score
            for changeset in report["changesets"]
            if changeset["max_score"] >= minimum_accepted_score
            and not changeset["is_merge"]
            and not changeset["is_backedout"]
        }
        if len(potential_patches) == 1:
            return next(iter(potential_patches))
        return None

    @cached_property
    def regressed_by(self) -> int | None:
        bug_ids = self.regressed_by_potential_bug_ids
        if len(bug_ids) == 1:
            return next(iter(bug_ids))
        return None

    @cached_property
    def regressed_by_potential_bugs(self) -> list[dict]:
        def handler(bug: dict, data: list):
            data.append(bug)

        bugs: list[dict] = []
        bugzilla.Bugzilla(
            bugids=self.regressed_by_potential_bug_ids,
            include_fields=[
                "id",
                "assigned_to",
                "product",
                "component",
            ],
            bughandler=handler,
            bugdata=bugs,
        ).wait()

        return bugs

    @cached_property
    def regressed_by_author(self) -> dict | None:
        if not self.regressed_by:
            return None

        bug = self.regressed_by_potential_bugs[0]
        assert bug["id"] == self.regressed_by
        return bug["assigned_to_detail"]

    @cached_property
    def crash_component(self) -> ComponentName:
        potential_components = {
            ComponentName(bug["product"], bug["component"])
            for bug in self.regressed_by_potential_bugs
        }
        if len(potential_components) == 1:
            return next(iter(potential_components))
        return ComponentName("Core", "General")


class SocorroInfoAnalyzer(socorro_util.SignatureStats):
    __bugzilla_os_values = set(bugzilla.BugFields.fetch_field_values("op_sys"))
    __bugzilla_cpu_values = {
        value.lower(): value
        for value in bugzilla.BugFields.fetch_field_values("rep_platform")
    }

    @classmethod
    def to_bugzilla_op_sys(cls, op_sys: str) -> str:
        if op_sys in cls.__bugzilla_os_values:
            return op_sys

        if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):
            op_sys = "macOS"
        elif op_sys.startswith("Windows"):
            op_sys = "Windows"
        elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):
            op_sys = "Linux"
        else:
            op_sys = "Other"

        return op_sys

    @property
    def bugzilla_op_sys(self) -> str:
        all_op_sys = {
            self.to_bugzilla_op_sys(op_sys["term"])
            for op_sys in self.signature["facets"]["platform_pretty_version"]
        }

        if len(all_op_sys) > 1:
            # TODO: explain this workaround
            all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}

        if len(all_op_sys) == 2 and "Other" in all_op_sys:
            all_op_sys.remove("Other")

        if len(all_op_sys) == 1:
            return next(iter(all_op_sys))

        if len(all_op_sys) == 0:
            return "Unspecified"

        return "All"

    @classmethod
    def to_bugzilla_cpu(cls, cpu: str) -> str:
        return cls.__bugzilla_cpu_values.get(cpu, "Other")

    @property
    def bugzilla_cpu_arch(self) -> str:
        all_cpu_arch = {
            self.to_bugzilla_cpu(cpu["term"])
            for cpu in self.signature["facets"]["cpu_arch"]
        }

        if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:
            all_cpu_arch.remove("Other")

        if len(all_cpu_arch) == 1:
            return next(iter(all_cpu_arch))

        if len(all_cpu_arch) == 0:
            return "Unspecified"

        return "All"

    @property
    def num_user_comments(self) -> int:
        # TODO: count useful/intrusting user comments (e.g., exclude one word comments)
        return self.signature["facets"]["cardinality_user_comments"]["value"]

    @property
    def has_user_comments(self) -> bool:
        return self.num_user_comments > 0

    @property
    def top_proto_signature(self) -> str:
        return self.signature["facets"]["proto_signature"][0]["term"]

    @property
    def num_top_proto_signature_crashes(self) -> int:
        return self.signature["facets"]["proto_signature"][0]["count"]

    @property
    def build_ids(self) -> Iterator[int]:
        for build_id in self.signature["facets"]["build_id"]:
            yield build_id["term"]

    @property
    def top_build_id(self) -> int:
        return self.signature["facets"]["build_id"][0]["term"]

    @cached_property
    def num_near_null_crashes(self) -> int:
        return sum(
            address["count"]
            for address in self.signature["facets"]["address"]
            if is_near_null_address(address["term"])
        )

    @property
    def is_near_null_crash(self) -> bool:
        return self.num_near_null_crashes == self.num_crashes

    @property
    def is_potential_near_null_crash(self) -> bool:
        return not self.is_near_null_crash and self.num_near_null_crashes > 0

    @property
    def is_near_null_related_crash(self) -> bool:
        return self.is_near_null_crash or self.is_potential_near_null_crash

    @cached_property
    def num_near_allocator_crashes(self) -> int:
        return sum(
            address["count"]
            for address in self.signature["facets"]["address"]
            if is_near_allocator_address(address["term"])
        )

    @property
    def is_near_allocator_crash(self) -> bool:
        return self.num_near_allocator_crashes == self.num_crashes

    @property
    def is_potential_near_allocator_crash(self) -> bool:
        return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0

    @property
    def is_near_allocator_related_crash(self) -> bool:
        return self.is_near_allocator_crash or self.is_potential_near_allocator_crash


class SignatureAnalyzer(SocorroInfoAnalyzer, ClouseauReportsAnalyzer):
    platforms = [
        {"short_name": "win", "name": "Windows"},
        {"short_name": "mac", "name": "Mac OS X"},
        {"short_name": "lin", "name": "Linux"},
        {"short_name": "and", "name": "Android"},
        {"short_name": "unknown", "name": "Unknown"},
    ]

    def __init__(
        self,
        signature: dict,
        num_total_crashes: int,
        clouseau_reports: list[dict],
    ):
        SocorroInfoAnalyzer.__init__(
            self, signature, num_total_crashes, platforms=self.platforms
        )
        ClouseauReportsAnalyzer.__init__(self, clouseau_reports)

    def _fetch_crash_reports(
        self,
        proto_signature: str,
        build_id: int | Iterable[int],
        limit: int = 1,
    ) -> Iterator[dict]:
        params = {
            "proto_signature": "=" + proto_signature,
            "build_id": build_id,
            "_columns": [
                "uuid",
            ],
            "_results_number": limit,
        }

        def handler(res: dict, data: dict):
            data.update(res)

        data: dict = {}
        socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()

        yield from data["hits"]

    def fetch_representing_processed_crash(self) -> dict:
        limit_to_top_proto_signature = (
            self.num_top_proto_signature_crashes / self.num_crashes > 0.6
        )

        reports = itertools.chain(
            # Reports with a higher score from clouseau are more likely to be
            # useful.
            sorted(
                self._clouseau_reports,
                key=lambda report: report["max_score"],
                reverse=True,
            ),
            # Next we try find reports from the top crashing build because they
            # are likely to be representative.
            self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
            self._fetch_crash_reports(self.top_proto_signature, self.build_ids),
        )
        for report in reports:
            uuid = report["uuid"]
            processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]
            if (
                not limit_to_top_proto_signature
                or processed_crash["proto_signature"] == self.top_proto_signature
            ):
                # TODO(investigate): maybe we should check if the stack is
                # corrupted (ask gsvelto or willkg about how to detect that)
                return processed_crash

        raise NoCrashReportFoundError(
            f"No crash report found with the most frequent proto signature for {self.signature_term}."
        )


class SignaturesDataFetcher:
    def __init__(
        self,
        signatures,
        product: str = "Firefox",
        channel: str = "nightly",
    ):
        self._signatures = signatures
        self._product = product
        self._channel = channel

    def fetch_clouseau_crash_reports(self) -> dict[str, list]:
        return clouseau.Reports.get_by_signatures(
            self._signatures,
            product=self._product,
            channel=self._channel,
        )

    def fetch_socorro_info(self) -> tuple[list[dict], int]:
        # TODO(investigate): should we increase the duration to 6 months?
        duration = timedelta(weeks=1)
        end_date = lmdutils.get_date_ymd("today")
        start_date = end_date - duration
        date_range = socorro.SuperSearch.get_search_date(start_date, end_date)

        params = {
            "product": self._product,
            # TODO(investigate): should we included all release channels?
            "release_channel": self._channel,
            # TODO(investigate): should we limit based on the build date as well?
            "date": date_range,
            # TODO: split signatures into chunks to avoid very long query URLs
            "signature": ["=" + signature for signature in self._signatures],
            "_aggs.signature": [
                "build_id",
                "cpu_arch",
                "proto_signature",
                "_cardinality.user_comments",
                "cpu_arch",
                "platform_pretty_version",
                # The following are needed for SignatureStats:
                "platform",
                "is_garbage_collecting",
                "_cardinality.install_time",
                "startup_crash",
                "_histogram.uptime",
                "process_type",
            ],
            "_results_number": 0,
            "_facets_size": 10000,
        }

        def handler(search_results: dict, data: dict):
            data["num_total_crashes"] = search_results["total"]
            data["signatures"] = search_results["facets"]["signature"]

        data: dict = {}
        socorro.SuperSearchUnredacted(
            params=params,
            handler=handler,
            handlerdata=data,
        ).wait()

        return data["signatures"], data["num_total_crashes"]

    def analyze(self) -> list[SignatureAnalyzer]:
        clouseau_reports = self.fetch_clouseau_crash_reports()
        signatures, num_total_crashes = self.fetch_socorro_info()

        return [
            SignatureAnalyzer(
                signature,
                num_total_crashes,
                clouseau_reports[signature["term"]],
            )
            for signature in signatures
            # TODO(investigate): For now, we are ignoring signatures that are
            # not analyzed by clouseau. We should investigate why they are not
            # analyzed and whether we should include them.
            if signature["term"] in clouseau_reports
        ]

1	# This Source Code Form is subject to the terms of the Mozilla Public
2	# License, v. 2.0. If a copy of the MPL was not distributed with this file,
3	# You can obtain one at http://mozilla.org/MPL/2.0/.
4
5	import itertools	×
6	from datetime import timedelta	×
7	from functools import cached_property	×
8	from typing import Iterable, Iterator	×
9
10	from libmozdata import bugzilla, clouseau, socorro	×
11	from libmozdata import utils as lmdutils	×
12
13	from bugbot.components import ComponentName	×
14	from bugbot.crash import socorro_util	×
15
16	# Crash address commonalities: crashes were near null, or were near allocator
17	# poison values.
18	OFFSET_64_BIT = 0x1000	×
19	OFFSET_32_BIT = 0x100	×
20	ALLOCATOR_ADDRESSES_64_BIT = (	×
21	0xE5E5E5E5E5E5E5E5,
22	0x4B4B4B4B4B4B4B4B,
23	)
24	ALLOCATOR_ADDRESSES_32_BIT = (	×
25	0xE5E5E5E5,
26	0x4B4B4B4B,
27	)
28	ALLOCATOR_RANGES_64_BIT = (	×
29	(addr - OFFSET_64_BIT, addr + OFFSET_64_BIT) for addr in ALLOCATOR_ADDRESSES_64_BIT
30	)
31	ALLOCATOR_RANGES_32_BIT = (	×
32	(addr - OFFSET_32_BIT, addr + OFFSET_32_BIT) for addr in ALLOCATOR_ADDRESSES_32_BIT
33	)
34
35
36	def is_near_null_address(str_address) -> bool:	×
37	address = int(str_address, 0)	×
38	is_64_bit = len(str_address) >= 18	×
39
40	if is_64_bit:	×
41	return -OFFSET_64_BIT <= address <= OFFSET_64_BIT	×
42
43	return -OFFSET_32_BIT <= address <= OFFSET_32_BIT	×
44
45
46	def is_near_allocator_address(str_address) -> bool:	×
47	address = int(str_address, 0)	×
48	is_64_bit = len(str_address) >= 18	×
49
50	return any(	×
51	low <= address <= high
52	for low, high in (
53	ALLOCATOR_RANGES_64_BIT if is_64_bit else ALLOCATOR_RANGES_32_BIT
54	)
55	)
56
57
58	class NoCrashReportFoundError(Exception):	×
59	"""Raised when no crash report is found with the required criteria."""
60
61
62	class ClouseauReportsAnalyzer:	×
63	REGRESSOR_MINIMUM_SCORE: int = 8	×
64
65	def __init__(self, reports: Iterable[dict]):	×
66	self._clouseau_reports = reports	×
67
68	@cached_property	×
69	def max_score(self):	×
70	if not self._clouseau_reports:	×
71	return 0	×
72	return max(report["max_score"] for report in self._clouseau_reports)	×
73
74	@cached_property	×
75	def regressed_by_potential_bug_ids(self) -> set[int]:	×
76	minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)	×
77	return {	×
78	changeset["bug_id"]
79	for report in self._clouseau_reports
80	if report["max_score"] >= minimum_accepted_score
81	for changeset in report["changesets"]
82	if changeset["max_score"] >= minimum_accepted_score
83	and not changeset["is_merge"]
84	and not changeset["is_backedout"]
85	}
86
87	@cached_property	×
88	def regressed_by_patch(self) -> str \| None:	×
89	minimum_accepted_score = max(self.REGRESSOR_MINIMUM_SCORE, self.max_score)	×
90	potential_patches = {	×
91	changeset["changeset"]
92	for report in self._clouseau_reports
93	if report["max_score"] >= minimum_accepted_score
94	for changeset in report["changesets"]
95	if changeset["max_score"] >= minimum_accepted_score
96	and not changeset["is_merge"]
97	and not changeset["is_backedout"]
98	}
99	if len(potential_patches) == 1:	×
100	return next(iter(potential_patches))	×
101	return None	×
102
103	@cached_property	×
104	def regressed_by(self) -> int \| None:	×
105	bug_ids = self.regressed_by_potential_bug_ids	×
106	if len(bug_ids) == 1:	×
107	return next(iter(bug_ids))	×
108	return None	×
109
110	@cached_property	×
111	def regressed_by_potential_bugs(self) -> list[dict]:	×
112	def handler(bug: dict, data: list):	×
113	data.append(bug)	×
114
115	bugs: list[dict] = []	×
116	bugzilla.Bugzilla(	×
117	bugids=self.regressed_by_potential_bug_ids,
118	include_fields=[
119	"id",
120	"assigned_to",
121	"product",
122	"component",
123	],
124	bughandler=handler,
125	bugdata=bugs,
126	).wait()
127
128	return bugs	×
129
130	@cached_property	×
131	def regressed_by_author(self) -> dict \| None:	×
132	if not self.regressed_by:	×
133	return None	×
134
135	bug = self.regressed_by_potential_bugs[0]	×
136	assert bug["id"] == self.regressed_by	×
137	return bug["assigned_to_detail"]	×
138
139	@cached_property	×
140	def crash_component(self) -> ComponentName:	×
141	potential_components = {	×
142	ComponentName(bug["product"], bug["component"])
143	for bug in self.regressed_by_potential_bugs
144	}
145	if len(potential_components) == 1:	×
146	return next(iter(potential_components))	×
147	return ComponentName("Core", "General")	×
148
149
150	class SocorroInfoAnalyzer(socorro_util.SignatureStats):	×
151	__bugzilla_os_values = set(bugzilla.BugFields.fetch_field_values("op_sys"))	×
152	__bugzilla_cpu_values = {	×
153	value.lower(): value
154	for value in bugzilla.BugFields.fetch_field_values("rep_platform")
155	}
156
157	@classmethod	×
158	def to_bugzilla_op_sys(cls, op_sys: str) -> str:	×
159	if op_sys in cls.__bugzilla_os_values:	×
160	return op_sys	×
161
162	if op_sys.startswith("OS X ") or op_sys.startswith("macOS "):	×
163	op_sys = "macOS"	×
164	elif op_sys.startswith("Windows"):	×
165	op_sys = "Windows"	×
166	elif "Linux" in op_sys or op_sys.startswith("Ubuntu"):	×
167	op_sys = "Linux"	×
168	else:
169	op_sys = "Other"	×
170
171	return op_sys	×
172
173	@property	×
174	def bugzilla_op_sys(self) -> str:	×
175	all_op_sys = {	×
176	self.to_bugzilla_op_sys(op_sys["term"])
177	for op_sys in self.signature["facets"]["platform_pretty_version"]
178	}
179
180	if len(all_op_sys) > 1:	×
181	# TODO: explain this workaround
182	all_op_sys = {op_sys.split(" ")[0] for op_sys in all_op_sys}	×
183
184	if len(all_op_sys) == 2 and "Other" in all_op_sys:	×
185	all_op_sys.remove("Other")	×
186
187	if len(all_op_sys) == 1:	×
188	return next(iter(all_op_sys))	×
189
190	if len(all_op_sys) == 0:	×
191	return "Unspecified"	×
192
193	return "All"	×
194
195	@classmethod	×
196	def to_bugzilla_cpu(cls, cpu: str) -> str:	×
197	return cls.__bugzilla_cpu_values.get(cpu, "Other")	×
198
199	@property	×
200	def bugzilla_cpu_arch(self) -> str:	×
201	all_cpu_arch = {	×
202	self.to_bugzilla_cpu(cpu["term"])
203	for cpu in self.signature["facets"]["cpu_arch"]
204	}
205
206	if len(all_cpu_arch) == 2 and "Other" in all_cpu_arch:	×
207	all_cpu_arch.remove("Other")	×
208
209	if len(all_cpu_arch) == 1:	×
210	return next(iter(all_cpu_arch))	×
211
212	if len(all_cpu_arch) == 0:	×
213	return "Unspecified"	×
214
215	return "All"	×
216
217	@property	×
218	def num_user_comments(self) -> int:	×
219	# TODO: count useful/intrusting user comments (e.g., exclude one word comments)
220	return self.signature["facets"]["cardinality_user_comments"]["value"]	×
221
222	@property	×
223	def has_user_comments(self) -> bool:	×
224	return self.num_user_comments > 0	×
225
226	@property	×
227	def top_proto_signature(self) -> str:	×
228	return self.signature["facets"]["proto_signature"][0]["term"]	×
229
230	@property	×
231	def num_top_proto_signature_crashes(self) -> int:	×
232	return self.signature["facets"]["proto_signature"][0]["count"]	×
233
234	@property	×
235	def build_ids(self) -> Iterator[int]:	×
236	for build_id in self.signature["facets"]["build_id"]:	×
237	yield build_id["term"]	×
238
239	@property	×
240	def top_build_id(self) -> int:	×
241	return self.signature["facets"]["build_id"][0]["term"]	×
242
243	@cached_property	×
244	def num_near_null_crashes(self) -> int:	×
245	return sum(	×
246	address["count"]
247	for address in self.signature["facets"]["address"]
248	if is_near_null_address(address["term"])
249	)
250
251	@property	×
252	def is_near_null_crash(self) -> bool:	×
253	return self.num_near_null_crashes == self.num_crashes	×
254
255	@property	×
256	def is_potential_near_null_crash(self) -> bool:	×
257	return not self.is_near_null_crash and self.num_near_null_crashes > 0	×
258
259	@property	×
260	def is_near_null_related_crash(self) -> bool:	×
261	return self.is_near_null_crash or self.is_potential_near_null_crash	×
262
263	@cached_property	×
264	def num_near_allocator_crashes(self) -> int:	×
265	return sum(	×
266	address["count"]
267	for address in self.signature["facets"]["address"]
268	if is_near_allocator_address(address["term"])
269	)
270
271	@property	×
272	def is_near_allocator_crash(self) -> bool:	×
273	return self.num_near_allocator_crashes == self.num_crashes	×
274
275	@property	×
276	def is_potential_near_allocator_crash(self) -> bool:	×
277	return not self.is_near_allocator_crash and self.num_near_allocator_crashes > 0	×
278
279	@property	×
280	def is_near_allocator_related_crash(self) -> bool:	×
281	return self.is_near_allocator_crash or self.is_potential_near_allocator_crash	×
282
283
284	class SignatureAnalyzer(SocorroInfoAnalyzer, ClouseauReportsAnalyzer):	×
285	platforms = [	×
286	{"short_name": "win", "name": "Windows"},
287	{"short_name": "mac", "name": "Mac OS X"},
288	{"short_name": "lin", "name": "Linux"},
289	{"short_name": "and", "name": "Android"},
290	{"short_name": "unknown", "name": "Unknown"},
291	]
292
293	def __init__(	×
294	self,
295	signature: dict,
296	num_total_crashes: int,
297	clouseau_reports: list[dict],
298	):
299	SocorroInfoAnalyzer.__init__(	×
300	self, signature, num_total_crashes, platforms=self.platforms
301	)
302	ClouseauReportsAnalyzer.__init__(self, clouseau_reports)	×
303
304	def _fetch_crash_reports(	×
305	self,
306	proto_signature: str,
307	build_id: int \| Iterable[int],
308	limit: int = 1,
309	) -> Iterator[dict]:
310	params = {	×
311	"proto_signature": "=" + proto_signature,
312	"build_id": build_id,
313	"_columns": [
314	"uuid",
315	],
316	"_results_number": limit,
317	}
318
319	def handler(res: dict, data: dict):	×
320	data.update(res)	×
321
322	data: dict = {}	×
323	socorro.SuperSearch(params=params, handler=handler, handlerdata=data).wait()	×
324
325	yield from data["hits"]	×
326
327	def fetch_representing_processed_crash(self) -> dict:	×
328	limit_to_top_proto_signature = (	×
329	self.num_top_proto_signature_crashes / self.num_crashes > 0.6
330	)
331
332	reports = itertools.chain(	×
333	# Reports with a higher score from clouseau are more likely to be
334	# useful.
335	sorted(
336	self._clouseau_reports,
337	key=lambda report: report["max_score"],
338	reverse=True,
339	),
340	# Next we try find reports from the top crashing build because they
341	# are likely to be representative.
342	self._fetch_crash_reports(self.top_proto_signature, self.top_build_id),
343	self._fetch_crash_reports(self.top_proto_signature, self.build_ids),
344	)
345	for report in reports:	×
346	uuid = report["uuid"]	×
347	processed_crash = socorro.ProcessedCrash.get_processed(uuid)[uuid]	×
348	if (	×
349	not limit_to_top_proto_signature
350	or processed_crash["proto_signature"] == self.top_proto_signature
351	):
352	# TODO(investigate): maybe we should check if the stack is
353	# corrupted (ask gsvelto or willkg about how to detect that)
354	return processed_crash	×
355
356	raise NoCrashReportFoundError(	×
357	f"No crash report found with the most frequent proto signature for {self.signature_term}."
358	)
359
360
361	class SignaturesDataFetcher:	×
362	def __init__(	×
363	self,
364	signatures,
365	product: str = "Firefox",
366	channel: str = "nightly",
367	):
368	self._signatures = signatures	×
369	self._product = product	×
370	self._channel = channel	×
371
372	def fetch_clouseau_crash_reports(self) -> dict[str, list]:	×
373	return clouseau.Reports.get_by_signatures(	×
374	self._signatures,
375	product=self._product,
376	channel=self._channel,
377	)
378
379	def fetch_socorro_info(self) -> tuple[list[dict], int]:	×
380	# TODO(investigate): should we increase the duration to 6 months?
381	duration = timedelta(weeks=1)	×
382	end_date = lmdutils.get_date_ymd("today")	×
383	start_date = end_date - duration	×
384	date_range = socorro.SuperSearch.get_search_date(start_date, end_date)	×
385
386	params = {	×
387	"product": self._product,
388	# TODO(investigate): should we included all release channels?
389	"release_channel": self._channel,
390	# TODO(investigate): should we limit based on the build date as well?
391	"date": date_range,
392	# TODO: split signatures into chunks to avoid very long query URLs
393	"signature": ["=" + signature for signature in self._signatures],
394	"_aggs.signature": [
395	"build_id",
396	"cpu_arch",
397	"proto_signature",
398	"_cardinality.user_comments",
399	"cpu_arch",
400	"platform_pretty_version",
401	# The following are needed for SignatureStats:
402	"platform",
403	"is_garbage_collecting",
404	"_cardinality.install_time",
405	"startup_crash",
406	"_histogram.uptime",
407	"process_type",
408	],
409	"_results_number": 0,
410	"_facets_size": 10000,
411	}
412
413	def handler(search_results: dict, data: dict):	×
414	data["num_total_crashes"] = search_results["total"]	×
415	data["signatures"] = search_results["facets"]["signature"]	×
416
417	data: dict = {}	×
418	socorro.SuperSearchUnredacted(	×
419	params=params,
420	handler=handler,
421	handlerdata=data,
422	).wait()
423
424	return data["signatures"], data["num_total_crashes"]	×
425
426	def analyze(self) -> list[SignatureAnalyzer]:	×
427	clouseau_reports = self.fetch_clouseau_crash_reports()	×
428	signatures, num_total_crashes = self.fetch_socorro_info()	×
429
430	return [	×
431	SignatureAnalyzer(
432	signature,
433	num_total_crashes,
434	clouseau_reports[signature["term"]],
435	)
436	for signature in signatures
437	# TODO(investigate): For now, we are ignoring signatures that are
438	# not analyzed by clouseau. We should investigate why they are not
439	# analyzed and whether we should include them.
440	if signature["term"] in clouseau_reports
441	]

mozilla / relman-auto-nag / #4601

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous