• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OCHA-DAP / hdx-scraper-wfp-foodprices / 14844429278

05 May 2025 07:21PM UTC coverage: 78.703% (+1.3%) from 77.382%
14844429278

push

github

mcarans
Iterate over files repeatedly so as not to run out of memory

144 of 158 new or added lines in 6 files covered. (91.14%)

2 existing lines in 1 file now uncovered.

728 of 925 relevant lines covered (78.7%)

0.79 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.37
/src/hdx/scraper/wfp/foodprices/world/global_prices_generator.py
1
import logging
1✔
2
from datetime import datetime, timezone
1✔
3
from glob import iglob
1✔
4
from os.path import join
1✔
5
from typing import Dict, Tuple
1✔
6

7
from hdx.api.configuration import Configuration
1✔
8
from hdx.utilities.dateparse import default_date, default_enddate, parse_date
1✔
9
from hdx.utilities.dictandlist import dict_of_sets_add, write_list_to_csv
1✔
10
from hdx.utilities.downloader import Download
1✔
11

12
logger = logging.getLogger(__name__)
1✔
13

14

15
class GlobalPricesGenerator:
1✔
16
    filename = "wfp_food_prices_global_{}.csv"
1✔
17

18
    def __init__(self, configuration: Configuration, downloader: Download, folder: str):
1✔
19
        self._configuration = configuration
1✔
20
        self._downloader = downloader
1✔
21
        self._folder = folder
1✔
22
        self._prices_paths = {}
1✔
23
        self._years = None
1✔
24
        self._year_to_countries = {}
1✔
25

26
    def get_years_per_country(self) -> Tuple[datetime, datetime]:
1✔
27
        for filepath in sorted(
1✔
28
            iglob(f"{self._folder}/wfp_food_prices*.csv", recursive=False)
29
        ):
30
            if any(x in filepath for x in ("_global", "_qc")):
1✔
31
                continue
1✔
32
            countryiso3 = filepath[-7:-4].upper()
1✔
33
            self._prices_paths[countryiso3] = filepath
1✔
34
        earliest_date = default_enddate
1✔
35
        latest_date = default_date
1✔
36
        years = set()
1✔
37
        for countryiso3, filepath in self._prices_paths.items():
1✔
38
            _, iterator = self._downloader.get_tabular_rows(
1✔
39
                filepath, has_hxl=True, dict_form=True, encoding="utf-8"
40
            )
41
            logger.info(f"Reading year info from {countryiso3}: {filepath}")
1✔
42
            for row in iterator:
1✔
43
                date = row["date"]
1✔
44
                if date[0] == "#":
1✔
45
                    continue
1✔
46
                date = parse_date(date)
1✔
47
                if date < earliest_date:
1✔
48
                    earliest_date = date
1✔
49
                if date > latest_date:
1✔
50
                    latest_date = date
1✔
51
                years.add(date.year)
1✔
52
                dict_of_sets_add(self._year_to_countries, date.year, countryiso3)
1✔
53
        self._years = sorted(years, reverse=True)
1✔
54
        return earliest_date, latest_date
1✔
55

56
    def create_prices_files(self, output_dir: str = "") -> Dict:
1✔
57
        year_to_path = {}
1✔
58

59
        prices_headers = self._configuration["prices_headers"]
1✔
60
        prices_headers.insert(0, "countryiso3")
1✔
61
        hxltags = self._configuration["hxltags"]
1✔
62
        prices_hxltags = {header: hxltags[header] for header in prices_headers}
1✔
63

64
        for year in self._years:
1✔
65
            logger.info(f"Processing {year} prices")
1✔
66
            startdate = datetime(year, 1, 1, tzinfo=timezone.utc)
1✔
67
            enddate = datetime(year, 12, 31, 23, 59, 59, tzinfo=timezone.utc)
1✔
68
            rows = [Download.hxl_row(prices_headers, prices_hxltags, dict_form=True)]
1✔
69
            for countryiso3 in sorted(self._year_to_countries[year]):
1✔
70
                filepath = self._prices_paths[countryiso3]
1✔
71
                _, iterator = self._downloader.get_tabular_rows(
1✔
72
                    filepath, has_hxl=True, dict_form=True, encoding="utf-8"
73
                )
74
                for row in iterator:
1✔
75
                    date = row["date"]
1✔
76
                    if date[0] == "#":
1✔
77
                        continue
1✔
78
                    date = parse_date(date)
1✔
79
                    if date < startdate or date > enddate:
1✔
80
                        continue
1✔
81
                    row["countryiso3"] = countryiso3
1✔
82
                    rows.append(row)
1✔
83
            if len(rows) == 1:
1✔
NEW
84
                continue
×
85
            if not output_dir:
1✔
NEW
86
                output_dir = self._folder
×
87
            filename = self.filename.format(year)
1✔
88
            filepath = join(output_dir, filename)
1✔
89
            write_list_to_csv(filepath, rows, columns=prices_headers)
1✔
90
            year_to_path[year] = filepath
1✔
91
        return year_to_path
1✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc