• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

OCHA-DAP / hdx-ckan / #5847

09 Nov 2024 05:10PM UTC coverage: 74.501% (+0.4%) from 74.062%
#5847

Pull #6470

coveralls-python

danmihaila
HDX-10191 replace xls with xlsx label
Pull Request #6470: HDX-10191 org stats download as xlsx

75 of 115 new or added lines in 5 files covered. (65.22%)

1 existing line in 1 file now uncovered.

12403 of 16648 relevant lines covered (74.5%)

0.75 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.25
/ckanext-hdx_theme/ckanext/hdx_theme/util/jql.py
1
import requests
1✔
2
import logging
1✔
3

4
from dogpile.cache import make_region
1✔
5
from datetime import datetime, timedelta, timezone
1✔
6
from collections import OrderedDict
1✔
7
from functools import wraps
1✔
8

9
import ckan.plugins.toolkit as tk
1✔
10

11
import ckanext.hdx_theme.util.jql_queries as jql_queries
1✔
12
from ckanext.hdx_theme.util.timer import Timer
1✔
13
from ckanext.hdx_theme.helpers.caching import dogpile_standard_config, dogpile_config_filter, \
1✔
14
    HDXRedisInvalidationStrategy
15

16
config = tk.config
1✔
17

18
log = logging.getLogger(__name__)
1✔
19

20
dogpile_config = {
1✔
21
    'cache.redis.expiration_time': int(config.get('hdx.analytics.hours_for_results_in_cache', 24)) * 60 * 60,
22
}
23
dogpile_config.update(dogpile_standard_config)
1✔
24

25
dogpile_jql_region = make_region(key_mangler=lambda key: 'jql-' + key)
1✔
26
dogpile_jql_region.configure_from_config(dogpile_config, dogpile_config_filter)
1✔
27
if dogpile_config_filter == 'cache.redis.':
1✔
28
    dogpile_jql_region.region_invalidator = HDXRedisInvalidationStrategy(dogpile_jql_region)
×
29

30
CONFIG_API_SECRET = config.get('hdx.analytics.mixpanel.secret')
1✔
31

32
MIXPANEL_GROUPS = ['0123', '4567', '89ab', 'cdef']
1✔
33

34

35
class JqlQueryExecutor(object):
1✔
36
    def __init__(self, query):
1✔
37
        self.query = query
1✔
38
        self.args = []
1✔
39
        self.payload = None
1✔
40

41
    def run_query(self, transformer):
1✔
42
        self._compile_query()
1✔
43
        try:
1✔
44
            return self._run_query(transformer)
1✔
45
        except Exception as e:
×
46
            log.error('Ran into problems when getting data from mixpanel. Returning empty dict.')
×
47
            log.error(str(e))
×
48
            return {}
×
49

50
    def _compile_query(self):
1✔
51
        self.payload = {
1✔
52
            'script': self.query.format(*self.args)
53
        }
54

55
    def _run_query(self, transformer):
1✔
56
        """
57
        :param transformer: transforms the request result
58
        :type transformer: MappingResultTransformer
59
        :return: a dict mapping the key to the values
60
        :rtype: dict
61
        """
62
        nose_test = True if config.get('ckan.site_id') == 'test.ckan.net' else False
1✔
63
        if nose_test:
1✔
64
            return {}
1✔
65
        else:
66
            r = requests.post('https://mixpanel.com/api/2.0/jql', data=self.payload, auth=(CONFIG_API_SECRET, ''))
×
67
            r.raise_for_status()
×
68
            return transformer.transform(r)
×
69

70

71
class JqlQueryExecutorForHoursSinceNow(JqlQueryExecutor):
1✔
72
    def __init__(self, query, hours_since_now):
1✔
73
        super(JqlQueryExecutorForHoursSinceNow, self).__init__(query)
1✔
74
        self.args += self._compute_period(hours_since_now)
1✔
75

76
    @staticmethod
1✔
77
    def _compute_period(hours_since_now):
1✔
78
        """
79
        :param hours_since_now: for how many hours back should the mixpanel call be made
80
        :type hours_since_now: int
81
        :return: a list with 2 iso date strings representing the beginning and ending of the period
82
        :rtype: list[str]
83
        """
84
        until_date_str = datetime.utcnow().isoformat()[:10]
1✔
85

86
        from_date_str = (datetime.utcnow() - timedelta(hours=hours_since_now)).isoformat()[
1✔
87
                        :10] if hours_since_now else '2016-08-01'
88

89
        return [from_date_str, until_date_str]
1✔
90

91

92
class JqlQueryExecutorForWeeksSinceNow(JqlQueryExecutor):
1✔
93
    def __init__(self, query, weeks_since, since_date):
1✔
94
        """
95
        :param query:
96
        :type query: str
97
        :param weeks_since:
98
        :type weeks_since: int
99
        :param since_date:
100
        :type since_date: datetime
101
        """
102
        super(JqlQueryExecutorForWeeksSinceNow, self).__init__(query)
1✔
103
        self.args += self._compute_period(weeks_since, since_date)
1✔
104

105
    @staticmethod
1✔
106
    def _compute_period(weeks_since, since_date):
1✔
107
        """
108
        :param weeks_since_now: for how many weeks back should the mixpanel call be made ( a week starts monday )
109
        :type weeks_since_now: int
110
        :param since_date:
111
        :type since_date: datetime
112
        :return: a list with 2 iso date strings representing the beginning and ending of the period
113
        :rtype: list[str]
114
        """
115
        until_date = since_date
1✔
116
        until_date_str = until_date.isoformat()[:10]
1✔
117

118
        from_date = until_date - timedelta(weeks=weeks_since, days=until_date.weekday())
1✔
119
        from_date_str = from_date.isoformat()[:10]
1✔
120

121
        return [from_date_str, until_date_str]
1✔
122

123
class JqlQueryExecutorForLast5Years(JqlQueryExecutor):
1✔
124
    def __init__(self, query, org_id):
1✔
125
        """
126
        :param query:
127
        :type query: str
128
        """
NEW
129
        super(JqlQueryExecutorForLast5Years, self).__init__(query)
×
NEW
130
        self.args += self._compute_period()
×
NEW
131
        self.args += [org_id]
×
132

133
    @staticmethod
1✔
134
    def _compute_period():
1✔
135
        """
136
        :return: a list with 2 iso date strings representing the beginning and ending of the period,
137
                since 5 years ago on January 1st until last day of previous month
138
        :rtype: list[str]
139
        """
NEW
140
        today = datetime.now(timezone.utc)
×
141

142
        # Calculate the date 5 years ago on January 1st
NEW
143
        from_date = today.replace(year=today.year - 5, month=1, day=1)
×
NEW
144
        from_date_str = from_date.isoformat()[:10]
×
145

146
        # last day of previous month
NEW
147
        until_date = today.replace(day=1) - timedelta(days=1)
×
NEW
148
        until_date_str = until_date.isoformat()[:10]
×
149

NEW
150
        return [from_date_str, until_date_str]
×
151

152

153
class JqlQueryExecutorForWeeksSinceNowWithGroupFiltering(JqlQueryExecutorForWeeksSinceNow):
1✔
154
    def __init__(self, query, weeks_since, since_date, group):
1✔
155
        """
156
        :param query:
157
        :type query: str
158
        :param weeks_since:
159
        :type weeks_since: int
160
        :param since_date:
161
        :type since_date: datetime
162
        :param group:
163
        :type group: MixpanelDatasetGroups
164
        """
165
        super(JqlQueryExecutorForWeeksSinceNowWithGroupFiltering, self).__init__(query, weeks_since, since_date)
1✔
166
        self.args.append(group)
1✔
167

168

169
class MappingResultTransformer(object):
1✔
170
    def __init__(self, key_name):
1✔
171
        self.key_name = key_name
1✔
172

173
    def transform(self, response):
1✔
174
        """
175

176
        :param response: the HTTP response
177
        :type response: requests.Response
178
        :return:
179
        :rtype: dict
180
        """
181
        return {item.get(self.key_name): item.get('value') for item in response.json()}
×
182

183

184
class MappingCustomResultTransformer(object):
1✔
185
    # def __init__(self, key_name):
186
    #     self.key_name = key_name
187

188
    def __init__(self):
1✔
189
        # self.key_name = key_name
NEW
190
        pass
×
191

192
    def transform(self, response):
1✔
193
        """
194

195
        :param response: the HTTP response
196
        :type response: requests.Response
197
        :return:
198
        :rtype: dict
199
        """
200
        # return {item.get(self.key_name): item.get('value') for item in response.json()}
NEW
201
        result = OrderedDict()
×
NEW
202
        for item in response.json():
×
NEW
203
            if item.get('date') not in result:
×
NEW
204
                result[item.get('date')] = OrderedDict()
×
NEW
205
            if item.get('event_name') == 'page view':
×
NEW
206
                result[item.get('date')]['pageviews_unique'] = item.get('unique_count')
×
NEW
207
                result[item.get('date')]['pageviews_total'] = item.get('total_count')
×
NEW
208
            if item.get('event_name') == 'resource download':
×
NEW
209
                result[item.get('date')]['downloads_unique'] = item.get('unique_count')
×
NEW
210
                result[item.get('date')]['downloads_total'] = item.get('total_count')
×
NEW
211
        return dict(sorted(result.items()))
×
212

213
class MultipleValueMappingResultTransformer(MappingResultTransformer):
1✔
214
    def __init__(self, key_name, secondary_key_name):
1✔
215
        super(MultipleValueMappingResultTransformer, self).__init__(key_name)
1✔
216
        self.secondary_key_name = secondary_key_name
1✔
217

218
    def transform(self, response):
1✔
219
        result = {}
×
220
        ''':type : dict[str, OrderedDict]'''
×
221

222
        for item in response.json():
×
223
            main_key = item.get(self.key_name)
×
224
            secondary_key = item.get(self.secondary_key_name)
×
225

226
            if main_key not in result:
×
227
                result[main_key] = OrderedDict()
×
228

229
            result[main_key][secondary_key] = {'value': item.get('value', 0), self.secondary_key_name: secondary_key}
×
230

231
        return result
×
232

233

234
class MultipleValueMandatoryMappingResultTransformer(MappingResultTransformer):
1✔
235
    def __init__(self, key_name, mandatory_key, mandatory_values):
1✔
236
        super(MultipleValueMandatoryMappingResultTransformer, self).__init__(key_name)
1✔
237
        self.mandatory_key = mandatory_key
1✔
238
        self.mandatory_values = mandatory_values
1✔
239

240
        self.template = [(item, {mandatory_key: item, 'value': 0}) for item in mandatory_values]
1✔
241

242
    def transform(self, response):
1✔
243
        result = {}
×
244
        ''':type : dict[str, OrderedDict]'''
×
245

246
        for item in response.json():
×
247
            main_key = item.get(self.key_name)
×
248
            secondary_key = item.get(self.mandatory_key)
×
249

250
            if secondary_key not in self.mandatory_values:
×
251
                log.error('{} not in mandatory values {}'.format(secondary_key, ','.join(self.mandatory_values)))
×
252
                continue
×
253

254
            if main_key not in result:
×
255
                result[main_key] = OrderedDict(self.template)
×
256

257
            result[main_key][secondary_key] = {'value': item.get('value', 0), self.mandatory_key: secondary_key}
×
258

259
        return result
×
260

261

262
def get_dataset_mp_group(dataset_id):
1✔
263
    first_letter = dataset_id[0]
1✔
264
    for group in MIXPANEL_GROUPS:
1✔
265
        if first_letter in group:
1✔
266
            return group
1✔
267
    log.error('Dataset group could not be determined for JQL query')
×
268
    return None
×
269

270

271
def timer_wrapper(original_caching_function):
1✔
272
    @wraps(original_caching_function)
1✔
273
    def timed_caching_function(*args):
1✔
274
        args_to_name = ', '.join(args)
1✔
275
        name = '{} with args ({})'.format(original_caching_function.__name__, args_to_name)
1✔
276
        JQL_WARNING_THRESHOLD = config.get('hdx.analytics.mixpanel.warning_threshold_seconds', 90)
1✔
277
        timer = Timer(name,
1✔
278
                      init_message='creating cache',
279
                      in_millis=False, log_warning_step_threshold=JQL_WARNING_THRESHOLD)
280
        result = original_caching_function(*args)
1✔
281
        timer.next('finished')
1✔
282
        return result
1✔
283
    return timed_caching_function
1✔
284

285

286
@dogpile_jql_region.cache_on_arguments()
1✔
287
@timer_wrapper
1✔
288
def downloads_per_dataset_all_cached():
1✔
289
    return downloads_per_dataset()
1✔
290

291

292
def downloads_per_dataset(hours_since_now=None):
1✔
293
    query_executor = JqlQueryExecutorForHoursSinceNow(jql_queries.DOWNLOADS_PER_DATASET, hours_since_now)
1✔
294
    result = query_executor.run_query(MappingResultTransformer('dataset_id'))
1✔
295

296
    return result
1✔
297

298

299
def fetch_downloads_per_week_for_dataset(dataset_id):
1✔
300
    mixpanel_group = get_dataset_mp_group(dataset_id)
1✔
301
    if mixpanel_group:
1✔
302
        return downloads_per_dataset_per_week_last_24_weeks_cached(mixpanel_group).get(dataset_id, {})
1✔
303
    return {}
×
304

305

306
@dogpile_jql_region.cache_on_arguments()
1✔
307
@timer_wrapper
1✔
308
def downloads_per_dataset_per_week_last_24_weeks_cached(mixpanel_group):
1✔
309
    return downloads_per_dataset_per_week(mixpanel_group, 24)
1✔
310

311

312
def downloads_per_dataset_per_week(mixpanel_group, weeks=24):
1✔
313
    since = datetime.utcnow()
1✔
314
    query_executor = JqlQueryExecutorForWeeksSinceNowWithGroupFiltering(jql_queries.DOWNLOADS_PER_DATASET_PER_WEEK,
1✔
315
                                                                        weeks, since, mixpanel_group)
316

317
    mandatory_values = _generate_mandatory_dates(since, weeks)
1✔
318

319
    result = query_executor.run_query(
1✔
320
        MultipleValueMandatoryMappingResultTransformer('dataset_id', 'date', mandatory_values))
321

322
    return result
1✔
323

324

325
@dogpile_jql_region.cache_on_arguments()
1✔
326
@timer_wrapper
1✔
327
def downloads_per_organization_last_30_days_cached():
1✔
328
    return downloads_per_organization(30)
1✔
329

330

331
def downloads_per_organization(days_since_now=30):
1✔
332
    query_executor = JqlQueryExecutorForHoursSinceNow(jql_queries.DOWNLOADS_PER_ORGANIZATION, days_since_now * 24)
1✔
333
    result = query_executor.run_query(MappingResultTransformer('org_id'))
1✔
334

335
    return result
1✔
336

337

338
@dogpile_jql_region.cache_on_arguments()
1✔
339
@timer_wrapper
1✔
340
def downloads_per_organization_per_week_last_24_weeks_cached():
1✔
341
    return downloads_per_organization_per_week(24)
1✔
342

343

344
def downloads_per_organization_per_week(weeks=24):
1✔
345
    since = datetime.utcnow()
1✔
346
    query_executor = JqlQueryExecutorForWeeksSinceNow(jql_queries.DOWNLOADS_PER_ORGANIZATION_PER_WEEK, weeks, since)
1✔
347

348
    mandatory_values = _generate_mandatory_dates(since, weeks)
1✔
349

350
    result = query_executor.run_query(
1✔
351
        MultipleValueMandatoryMappingResultTransformer('org_id', 'date', mandatory_values))
352

353
    return result
1✔
354

355

356
@dogpile_jql_region.cache_on_arguments()
1✔
357
@timer_wrapper
1✔
358
def downloads_per_organization_per_dataset_last_24_weeks_cached():
1✔
359
    return downloads_per_organization_per_dataset(24)
1✔
360

361

362
def downloads_per_organization_per_dataset(weeks=24):
1✔
363
    since = datetime.utcnow()
1✔
364
    query_executor = JqlQueryExecutorForWeeksSinceNow(jql_queries.DOWNLOADS_PER_ORGANIZATION_PER_DATASET, weeks, since)
1✔
365

366
    result = query_executor.run_query(
1✔
367
        MultipleValueMappingResultTransformer('org_id', 'dataset_id'))
368

369
    return result
1✔
370

371

372
@dogpile_jql_region.cache_on_arguments()
1✔
373
@timer_wrapper
1✔
374
def pageviews_per_dataset_last_14_days_cached():
1✔
375
    hours = 14 * 24
1✔
376
    return pageviews_per_dataset(hours)
1✔
377

378

379
def pageviews_per_dataset(hours_since_now=None):
1✔
380
    query_executor = JqlQueryExecutorForHoursSinceNow(jql_queries.PAGEVIEWS_PER_DATASET, hours_since_now)
1✔
381
    result = query_executor.run_query(MappingResultTransformer('dataset_id'))
1✔
382

383
    return result
1✔
384

385

386
@dogpile_jql_region.cache_on_arguments()
1✔
387
@timer_wrapper
1✔
388
def pageviews_per_organization_last_30_days_cached():
1✔
389
    return pageviews_per_organization(30)
1✔
390

391

392
def pageviews_per_organization(days_since_now=30):
1✔
393
    query_executor = JqlQueryExecutorForHoursSinceNow(jql_queries.PAGEVIEWS_PER_ORGANIZATION, days_since_now * 24)
1✔
394
    result = query_executor.run_query(MappingResultTransformer('org_id'))
1✔
395

396
    return result
1✔
397

398

399
@dogpile_jql_region.cache_on_arguments()
1✔
400
@timer_wrapper
1✔
401
def pageviews_per_organization_per_week_last_24_weeks_cached():
1✔
402
    return pageviews_per_organization_per_week(24)
1✔
403

404

405
def pageviews_per_organization_per_week(weeks=24):
1✔
406
    since = datetime.utcnow()
1✔
407
    query_executor = JqlQueryExecutorForWeeksSinceNow(jql_queries.PAGEVIEWS_PER_ORGANIZATION_PER_WEEK, weeks, since)
1✔
408

409
    mandatory_values = _generate_mandatory_dates(since, weeks)
1✔
410

411
    result = query_executor.run_query(
1✔
412
        MultipleValueMandatoryMappingResultTransformer('org_id', 'date', mandatory_values))
413

414
    return result
1✔
415

416

417
def _generate_mandatory_dates(since, weeks):
1✔
418
    '''
419
    :param since: the datetime "until" object
420
    :type since: datetime
421
    :param weeks:
422
    :type weeks: int
423
    :return: list of mandatory dates
424
    :rtype: list[str]
425
    '''
426
    mandatory_dates = []
1✔
427

428
    ''':type : list[datetime]'''
1✔
429
    for i in range(0, weeks+1):
1✔
430
        mandatory_dates.insert(0, since - timedelta(weeks=i, days=since.weekday()))
1✔
431
    mandatory_values = list(map(lambda x: x.isoformat()[:10], mandatory_dates))
1✔
432
    return mandatory_values
1✔
433

434
@dogpile_jql_region.cache_on_arguments()
1✔
435
@timer_wrapper
1✔
436
def pageviews_downloads_per_organization_last_5_years(org_id):
1✔
NEW
437
    query_executor = JqlQueryExecutorForLast5Years(jql_queries.PAGEVIEWS_AND_DOWNLOADS_PER_ORGANIZATION, org_id = org_id)
×
438

NEW
439
    result = query_executor.run_query(MappingCustomResultTransformer())
×
440

NEW
441
    return result
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc