• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mozilla / fx-private-relay / e4ddffb3-f77a-4c5e-8e72-3a5ec8cfb487

27 Jun 2024 10:07PM CUT coverage: 85.401% (+0.1%) from 85.301%
e4ddffb3-f77a-4c5e-8e72-3a5ec8cfb487

push

circleci

web-flow
Merge pull request #4780 from mozilla/MPP-3825-top-200-generated-for-aggregate

Command to normalize and aggregate generated_for data

4075 of 5223 branches covered (78.02%)

Branch coverage included in aggregate %.

133 of 134 new or added lines in 2 files covered. (99.25%)

2 existing lines in 1 file now uncovered.

15891 of 18156 relevant lines covered (87.52%)

10.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.22
/privaterelay/management/commands/aggregate_generated_for.py
1
from argparse import ArgumentParser
1✔
2
from collections.abc import Iterable
1✔
3
from csv import DictReader, DictWriter
1✔
4
from pathlib import Path
1✔
5
from typing import Any
1✔
6
from urllib.parse import urlparse
1✔
7

8
from django.core.management.base import BaseCommand, CommandError
1✔
9

10

11
def normalize(url: str) -> str:
1✔
12
    """
13
    The url in data may not have // which urlparse requires to recognize the netloc
14
    script from:
15
    https://stackoverflow.com/questions/53816559/python-3-netloc-value-in-urllib-parse-is-empty-if-url-doesnt-have
16
    """
17
    if not (
1✔
18
        url.startswith("//") or url.startswith("http://") or url.startswith("https://")
19
    ):
20
        return "//" + url
1✔
21
    return url
1✔
22

23

24
def aggregate_by_generated_for(
1✔
25
    file_path: str, data: Iterable[dict[str, Any]]
26
) -> dict[str, dict[str, int]]:
27
    aggregate_usage: dict[str, dict[str, int]] = {}
1✔
28
    columns = [
1✔
29
        "count",  # Number of masks with the generated_for
30
        "total_usage",  # Sum of emails forwarded, emails blocked,
31
        # trackers blocked in emails, emails replied, and spam blocked
32
        "total_forwarded",  # Total emails forwarded to masks
33
        "total_blocked",  # Total emails blocked for masks
34
        "total_level_one_trackers_blocked",  # Total number of trackers
35
        # blocked in emails forwarded to masks
36
        "total_replied",  # Total number of emails replied to masks
37
        "total_spam",  # Total number of spam
38
    ]
39

40
    for row in data:
1✔
41
        aggregate_data: dict[str, int] = {
1✔
42
            "count": 0,
43
            "row_count": 0,
44
            "total_usage": 0,
45
            "total_forwarded": 0,
46
            "total_blocked": 0,
47
            "total_level_one_trackers_blocked": 0,
48
            "total_replied": 0,
49
            "total_spam": 0,
50
        }
51
        url = row["generated_for"]
1✔
52

53
        # TODO: good candidate for a unit-tested function
54
        # clean the domain for multiple domains in generated_for
55
        # separated by space, strip www, and others like stripping path
56
        if url:
1!
57
            normalized_url = normalize(url)
1✔
58
            domain = urlparse(normalized_url).netloc
1✔
59
        else:
NEW
60
            domain = url
×
61

62
        if domain in aggregate_usage:
1✔
63
            aggregate_data = aggregate_usage[domain]
1✔
64

65
        aggregate_data["row_count"] = aggregate_data["row_count"] + 1
1✔
66
        for col in columns:
1✔
67
            d = int(row[col])
1✔
68
            aggregate_data[col] += d
1✔
69
        aggregate_usage[domain] = aggregate_data
1✔
70
    return aggregate_usage
1✔
71

72

73
def generate_csv_file(
1✔
74
    file_path: str, aggregate_usage: dict[str, dict[str, Any]]
75
) -> Path:
76
    aggregate_file_path = Path(file_path).parent.joinpath("aggregate.csv")
1✔
77
    with open(aggregate_file_path, "w", newline="") as csvfile:
1✔
78
        field_names = [
1✔
79
            "domain",
80
            "rank",
81
            "count",
82
            "row_count",
83
            "total_usage",
84
            "ratio_usage",
85
            "total_forwarded",
86
            "ratio_forwarded",
87
            "total_blocked",
88
            "ratio_blocked",
89
            "total_level_one_trackers_blocked",
90
            "ratio_level_one_trackers_blocked",
91
            "total_replied",
92
            "ratio_replied",
93
            "total_spam",
94
            "ratio_spam",
95
        ]
96
        writer = DictWriter(csvfile, fieldnames=field_names)
1✔
97

98
        writer.writeheader()
1✔
99
        for k, v in aggregate_usage.items():
1✔
100
            row = {"domain": k}
1✔
101
            row.update(v)
1✔
102
            writer.writerow(row)
1✔
103
    return aggregate_file_path
1✔
104

105

106
class Command(BaseCommand):
1✔
107
    help = (
1✔
108
        "Takes CSV file with generated_for values and "
109
        "normalizes URLs in domain column and aggregates the values. "
110
        "Creates or updates aggregate.csv for quarterly mask acceptance testing. "
111
        "See instructions on how to get generated_for CSV file on MPP-3825."
112
    )
113

114
    def add_arguments(self, parser: ArgumentParser) -> None:
1✔
115
        parser.add_argument(
1✔
116
            "--path",
117
            type=str,
118
            required=True,
119
            help="Path to the CSV file to normalize and aggregate",
120
        )
121

122
    def handle(self, *args: Any, **options: Any) -> str:
1✔
123
        file_path: str = options.get("path", "")
1✔
124

125
        if file_path == "":
1✔
126
            raise CommandError(
1✔
127
                "Aggregate generated_for failed: File path must be entered"
128
            )
129

130
        with open(file_path, newline="") as csvfile:
1✔
131
            datareader = DictReader(csvfile, delimiter=",", quotechar="|")
1✔
132
            aggregate_usage = aggregate_by_generated_for(file_path, datareader)
1✔
133
            aggregate_file_path = generate_csv_file(file_path, aggregate_usage)
1✔
134
        return f"Completed updates to {aggregate_file_path}"
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc