• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

mendersoftware / mender / 1645624556

23 Jan 2025 06:55PM UTC coverage: 75.942% (-0.02%) from 75.959%
1645624556

push

gitlab-ci

web-flow
Merge pull request #1724 from lluiscampos/MEN-7900-client-stuck-after-sync-error

MEN-7900: Fix Mender client getting stuck after failure in sync state

24 of 40 new or added lines in 3 files covered. (60.0%)

1 existing line in 1 file now uncovered.

7380 of 9718 relevant lines covered (75.94%)

11129.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

76.99
/src/mender-update/daemon/states.cpp
1
// Copyright 2023 Northern.tech AS
2
//
3
//    Licensed under the Apache License, Version 2.0 (the "License");
4
//    you may not use this file except in compliance with the License.
5
//    You may obtain a copy of the License at
6
//
7
//        http://www.apache.org/licenses/LICENSE-2.0
8
//
9
//    Unless required by applicable law or agreed to in writing, software
10
//    distributed under the License is distributed on an "AS IS" BASIS,
11
//    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
//    See the License for the specific language governing permissions and
13
//    limitations under the License.
14

15
#include <mender-update/daemon/states.hpp>
16

17
#include <client_shared/conf.hpp>
18
#include <common/events_io.hpp>
19
#include <common/log.hpp>
20
#include <common/path.hpp>
21

22
#include <mender-update/daemon/context.hpp>
23
#include <mender-update/inventory.hpp>
24

25
namespace mender {
26
namespace update {
27
namespace daemon {
28

29
namespace conf = mender::client_shared::conf;
30
namespace error = mender::common::error;
31
namespace events = mender::common::events;
32
namespace kv_db = mender::common::key_value_database;
33
namespace path = mender::common::path;
34
namespace log = mender::common::log;
35

36
namespace main_context = mender::update::context;
37
namespace inventory = mender::update::inventory;
38

39
class DefaultStateHandler {
40
public:
41
        void operator()(const error::Error &err) {
295✔
42
                if (err != error::NoError) {
295✔
43
                        log::Error(err.String());
23✔
44
                        poster.PostEvent(StateEvent::Failure);
23✔
45
                        return;
23✔
46
                }
47
                poster.PostEvent(StateEvent::Success);
272✔
48
        }
49

50
        sm::EventPoster<StateEvent> &poster;
51
};
52

53
static void DefaultAsyncErrorHandler(sm::EventPoster<StateEvent> &poster, const error::Error &err) {
413✔
54
        if (err != error::NoError) {
413✔
55
                log::Error(err.String());
×
56
                poster.PostEvent(StateEvent::Failure);
×
57
        }
58
}
413✔
59

60
void EmptyState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
148✔
61
        // Keep this state truly empty.
62
}
148✔
63

64
void InitState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
57✔
65
        // I will never run - just a placeholder to start the state-machine at
66
        poster.PostEvent(StateEvent::Started); // Start the state machine
57✔
67
}
57✔
68

69
void StateScriptState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
1,036✔
70
        string state_name {script_executor::Name(this->state_, this->action_)};
1,036✔
71
        log::Debug("Executing the  " + state_name + " State Scripts...");
2,072✔
72
        auto err = this->script_.AsyncRunScripts(
73
                this->state_,
74
                this->action_,
75
                [state_name, &poster](error::Error err) {
7,821✔
76
                        if (err != error::NoError) {
1,036✔
77
                                log::Error(
21✔
78
                                        "Received error: (" + err.String() + ") when running the State Script scripts "
42✔
79
                                        + state_name);
63✔
80
                                poster.PostEvent(StateEvent::Failure);
21✔
81
                                return;
21✔
82
                        }
83
                        log::Debug("Successfully ran the " + state_name + " State Scripts...");
2,030✔
84
                        poster.PostEvent(StateEvent::Success);
1,015✔
85
                },
86
                this->on_error_);
2,072✔
87

88
        if (err != error::NoError) {
1,036✔
89
                log::Error(
×
90
                        "Failed to schedule the state script execution for: " + state_name
×
91
                        + " got error: " + err.String());
×
92
                poster.PostEvent(StateEvent::Failure);
×
93
                return;
94
        }
95
}
96

97

98
void SaveStateScriptState::OnEnterSaveState(Context &ctx, sm::EventPoster<StateEvent> &poster) {
278✔
99
        return state_script_state_.OnEnter(ctx, poster);
278✔
100
}
101

102
void IdleState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
116✔
103
        log::Debug("Entering Idle state");
232✔
104
}
116✔
105

NEW
106
ScheduleNextPollState::ScheduleNextPollState(
×
107
        events::Timer &timer, const string &poll_action, const StateEvent event, int interval) :
108
        timer_ {timer},
109
        poll_action_ {poll_action},
110
        event_ {event},
NEW
111
        interval_ {interval} {
×
NEW
112
}
×
113

114
void ScheduleNextPollState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
115✔
115
        log::Debug("Scheduling the next " + poll_action_ + " in: " + to_string(interval_) + " seconds");
230✔
116
        timer_.AsyncWait(chrono::seconds(interval_), [this, &poster](error::Error err) {
115✔
117
                if (err != error::NoError) {
5✔
118
                        if (err.code != make_error_condition(errc::operation_canceled)) {
2✔
NEW
119
                                log::Error("Timer caused error: " + err.String());
×
120
                        }
121
                } else {
122
                        poster.PostEvent(event_);
3✔
123
                }
124
        });
5✔
125

126
        poster.PostEvent(StateEvent::Success);
115✔
127
}
115✔
128

129
SubmitInventoryState::SubmitInventoryState(int retry_interval_seconds, int retry_count) :
94✔
130
        backoff_ {chrono::seconds(retry_interval_seconds), retry_count} {
188✔
131
}
94✔
132

133
void SubmitInventoryState::HandlePollingError(Context &ctx, sm::EventPoster<StateEvent> &poster) {
×
134
        // When using short polling intervals, we should adjust the backoff to ensure
135
        // that the intervals do not exceed the maximum retry polling interval, which
136
        // converts the backoff to a fixed interval.
137
        chrono::milliseconds max_interval =
138
                chrono::seconds(ctx.mender_context.GetConfig().retry_poll_interval_seconds);
×
NEW
139
        if (max_interval < backoff_.SmallestInterval()) {
×
NEW
140
                backoff_.SetSmallestInterval(max_interval);
×
NEW
141
                backoff_.SetMaxInterval(max_interval);
×
142
        }
NEW
143
        auto exp_interval = backoff_.NextInterval();
×
144
        if (!exp_interval) {
×
145
                log::Debug(
×
146
                        "Not retrying with backoff, retrying InventoryPollIntervalSeconds: "
147
                        + exp_interval.error().String());
×
148
                return;
149
        }
150
        log::Info(
×
151
                "Retrying inventory polling in "
152
                + to_string(chrono::duration_cast<chrono::seconds>(*exp_interval).count()) + " seconds");
×
153

NEW
154
        ctx.inventory_timer.Cancel();
×
NEW
155
        ctx.inventory_timer.AsyncWait(*exp_interval, [&poster](error::Error err) {
×
156
                if (err != error::NoError) {
×
157
                        if (err.code != make_error_condition(errc::operation_canceled)) {
×
158
                                log::Error("Retry poll timer caused error: " + err.String());
×
159
                        }
160
                } else {
161
                        poster.PostEvent(StateEvent::InventoryPollingTriggered);
×
162
                }
163
        });
×
164
}
165

166
void SubmitInventoryState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
57✔
167
        log::Debug("Submitting inventory");
114✔
168

169
        auto handler = [this, &ctx, &poster](error::Error err) {
57✔
170
                if (err != error::NoError) {
57✔
171
                        log::Error("Failed to submit inventory: " + err.String());
×
172
                        // Replace the inventory poll timer with a backoff
173
                        HandlePollingError(ctx, poster);
×
174
                        poster.PostEvent(StateEvent::Failure);
×
175
                        return;
×
176
                }
177
                backoff_.Reset();
178
                ctx.inventory_client->has_submitted_inventory = true;
57✔
179
                poster.PostEvent(StateEvent::Success);
57✔
180
        };
57✔
181

182
        auto err = ctx.inventory_client->PushData(
183
                ctx.mender_context.GetConfig().paths.GetInventoryScriptsDir(),
57✔
184
                ctx.event_loop,
185
                ctx.http_client,
186
                handler);
57✔
187

188
        if (err != error::NoError) {
57✔
189
                // This is the only case the handler won't be called for us by
190
                // PushData() (see inventory::PushInventoryData()).
191
                handler(err);
×
192
        }
193
}
57✔
194

195
PollForDeploymentState::PollForDeploymentState(int retry_interval_seconds, int retry_count) :
94✔
196
        backoff_ {chrono::seconds(retry_interval_seconds), retry_count} {
188✔
197
}
94✔
198

199
void PollForDeploymentState::HandlePollingError(Context &ctx, sm::EventPoster<StateEvent> &poster) {
×
200
        // When using short polling intervals, we should adjust the backoff to ensure
201
        // that the intervals do not exceed the maximum retry polling interval, which
202
        // converts the backoff to a fixed interval.
203
        chrono::milliseconds max_interval =
204
                chrono::seconds(ctx.mender_context.GetConfig().retry_poll_interval_seconds);
×
NEW
205
        if (max_interval < backoff_.SmallestInterval()) {
×
NEW
206
                backoff_.SetSmallestInterval(max_interval);
×
NEW
207
                backoff_.SetMaxInterval(max_interval);
×
208
        }
NEW
209
        auto exp_interval = backoff_.NextInterval();
×
210
        if (!exp_interval) {
×
211
                log::Debug(
×
212
                        "Not retrying with backoff, retrying with UpdatePollIntervalSeconds: "
213
                        + exp_interval.error().String());
×
214
                return;
215
        }
216
        log::Info(
×
217
                "Retrying deployment polling in "
218
                + to_string(chrono::duration_cast<chrono::seconds>(*exp_interval).count()) + " seconds");
×
219

NEW
220
        ctx.deployment_timer.Cancel();
×
NEW
221
        ctx.deployment_timer.AsyncWait(*exp_interval, [&poster](error::Error err) {
×
222
                if (err != error::NoError) {
×
223
                        if (err.code != make_error_condition(errc::operation_canceled)) {
×
224
                                log::Error("Retry poll timer caused error: " + err.String());
×
225
                        }
226
                } else {
227
                        poster.PostEvent(StateEvent::DeploymentPollingTriggered);
×
228
                }
229
        });
×
230
}
231

232
void PollForDeploymentState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
57✔
233
        log::Debug("Polling for update");
114✔
234

235
        auto err = ctx.deployment_client->CheckNewDeployments(
236
                ctx.mender_context,
237
                ctx.http_client,
238
                [this, &ctx, &poster](mender::update::deployments::CheckUpdatesAPIResponse response) {
112✔
239
                        if (!response) {
56✔
240
                                log::Error("Error while polling for deployment: " + response.error().String());
×
241
                                // Replace the update poll timer with a backoff
242
                                HandlePollingError(ctx, poster);
×
243
                                poster.PostEvent(StateEvent::Failure);
×
244
                                return;
1✔
245
                        } else if (!response.value()) {
56✔
246
                                log::Info("No update available");
2✔
247
                                poster.PostEvent(StateEvent::NothingToDo);
1✔
248
                                if (not ctx.inventory_client->has_submitted_inventory) {
1✔
249
                                        // If we have not submitted inventory successfully at least
250
                                        // once, schedule this after receiving a successful response
251
                                        // with no update. This enables inventory to be submitted
252
                                        // immediately after the device has been accepted. If there
253
                                        // is an update available, an inventory update will be
254
                                        // scheduled at the end of it unconditionally.
255
                                        poster.PostEvent(StateEvent::InventoryPollingTriggered);
×
256
                                }
257

258
                                backoff_.Reset();
259
                                return;
1✔
260
                        }
261
                        backoff_.Reset();
262

263
                        auto exp_data = ApiResponseJsonToStateData(response.value().value());
55✔
264
                        if (!exp_data) {
55✔
265
                                log::Error("Error in API response: " + exp_data.error().String());
×
266
                                poster.PostEvent(StateEvent::Failure);
×
267
                                return;
268
                        }
269

270
                        // Make a new set of update data.
271
                        ctx.deployment.state_data.reset(new StateData(std::move(exp_data.value())));
55✔
272

273
                        ctx.BeginDeploymentLogging();
55✔
274

275
                        log::Info("Running Mender client " + conf::kMenderVersion);
110✔
276
                        log::Info(
55✔
277
                                "Deployment with ID " + ctx.deployment.state_data->update_info.id + " started.");
110✔
278

279
                        poster.PostEvent(StateEvent::DeploymentStarted);
55✔
280
                        poster.PostEvent(StateEvent::Success);
55✔
281
                });
57✔
282

283
        if (err != error::NoError) {
57✔
284
                log::Error("Error when trying to poll for deployment: " + err.String());
2✔
285
                poster.PostEvent(StateEvent::Failure);
1✔
286
        }
287
}
57✔
288

289
void SaveState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
534✔
290
        assert(ctx.deployment.state_data);
291

292
        ctx.deployment.state_data->state = DatabaseStateString();
534✔
293

294
        log::Trace("Storing deployment state in the DB (database-string): " + DatabaseStateString());
1,068✔
295

296
        auto err = ctx.SaveDeploymentStateData(*ctx.deployment.state_data);
534✔
297
        if (err != error::NoError) {
534✔
298
                log::Error(err.String());
10✔
299
                if (err.code
10✔
300
                        == main_context::MakeError(main_context::StateDataStoreCountExceededError, "").code) {
10✔
301
                        poster.PostEvent(StateEvent::StateLoopDetected);
1✔
302
                        return;
303
                } else if (!IsFailureState()) {
9✔
304
                        // Non-failure states should be interrupted, but failure states should be
305
                        // allowed to do their work, even if a database error was detected.
306
                        poster.PostEvent(StateEvent::Failure);
2✔
307
                        return;
308
                }
309
        }
310

311
        OnEnterSaveState(ctx, poster);
531✔
312
}
313

314
void UpdateDownloadState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
53✔
315
        log::Debug("Entering Download state");
106✔
316

317
        auto req = make_shared<http::OutgoingRequest>();
53✔
318
        req->SetMethod(http::Method::GET);
53✔
319
        auto err = req->SetAddress(ctx.deployment.state_data->update_info.artifact.source.uri);
53✔
320
        if (err != error::NoError) {
53✔
321
                log::Error(err.String());
×
322
                poster.PostEvent(StateEvent::Failure);
×
323
                return;
324
        }
325

326
        err = ctx.download_client->AsyncCall(
53✔
327
                req,
328
                [&ctx, &poster](http::ExpectedIncomingResponsePtr exp_resp) {
105✔
329
                        if (!exp_resp) {
53✔
330
                                log::Error("Unexpected error during download: " + exp_resp.error().String());
×
331
                                poster.PostEvent(StateEvent::Failure);
×
332
                                return;
1✔
333
                        }
334

335
                        auto &resp = exp_resp.value();
53✔
336
                        if (resp->GetStatusCode() != http::StatusOK) {
53✔
337
                                log::Error(
1✔
338
                                        "Unexpected status code while fetching artifact: " + resp->GetStatusMessage());
2✔
339
                                poster.PostEvent(StateEvent::Failure);
1✔
340
                                return;
1✔
341
                        }
342

343
                        auto http_reader = resp->MakeBodyAsyncReader();
52✔
344
                        if (!http_reader) {
52✔
345
                                log::Error(http_reader.error().String());
×
346
                                poster.PostEvent(StateEvent::Failure);
×
347
                                return;
348
                        }
349
                        ctx.deployment.artifact_reader =
350
                                make_shared<events::io::ReaderFromAsyncReader>(ctx.event_loop, http_reader.value());
52✔
351
                        ParseArtifact(ctx, poster);
52✔
352
                },
353
                [](http::ExpectedIncomingResponsePtr exp_resp) {
53✔
354
                        if (!exp_resp) {
53✔
355
                                log::Error(exp_resp.error().String());
6✔
356
                                // Cannot handle error here, because this handler is called at the
357
                                // end of the download, when we have already left this state. So
358
                                // rely on this error being propagated through the BodyAsyncReader
359
                                // above instead.
360
                                return;
6✔
361
                        }
362
                });
106✔
363

364
        if (err != error::NoError) {
53✔
365
                log::Error(err.String());
×
366
                poster.PostEvent(StateEvent::Failure);
×
367
                return;
368
        }
369
}
370

371
void UpdateDownloadState::ParseArtifact(Context &ctx, sm::EventPoster<StateEvent> &poster) {
52✔
372
        string art_scripts_path = ctx.mender_context.GetConfig().paths.GetArtScriptsPath();
52✔
373

374
        // Clear the artifact scripts directory so we don't risk old scripts lingering.
375
        auto err = path::DeleteRecursively(art_scripts_path);
52✔
376
        if (err != error::NoError) {
52✔
377
                log::Error("When preparing to parse artifact: " + err.String());
×
378
                poster.PostEvent(StateEvent::Failure);
×
379
                return;
380
        }
381

382
        artifact::config::ParserConfig config {
52✔
383
                .artifact_scripts_filesystem_path = art_scripts_path,
384
                .artifact_scripts_version = 3,
385
                .artifact_verify_keys = ctx.mender_context.GetConfig().artifact_verify_keys,
52✔
386
        };
100✔
387
        auto exp_parser = artifact::Parse(*ctx.deployment.artifact_reader, config);
104✔
388
        if (!exp_parser) {
52✔
389
                log::Error(exp_parser.error().String());
×
390
                poster.PostEvent(StateEvent::Failure);
×
391
                return;
392
        }
393
        ctx.deployment.artifact_parser.reset(new artifact::Artifact(std::move(exp_parser.value())));
52✔
394

395
        auto exp_header = artifact::View(*ctx.deployment.artifact_parser, 0);
52✔
396
        if (!exp_header) {
52✔
397
                log::Error(exp_header.error().String());
×
398
                poster.PostEvent(StateEvent::Failure);
×
399
                return;
400
        }
401
        auto &header = exp_header.value();
52✔
402

403
        auto exp_matches = ctx.mender_context.MatchesArtifactDepends(header.header);
52✔
404
        if (!exp_matches) {
52✔
405
                log::Error(exp_matches.error().String());
2✔
406
                poster.PostEvent(StateEvent::Failure);
2✔
407
                return;
408
        } else if (!exp_matches.value()) {
50✔
409
                // reasons already logged
410
                poster.PostEvent(StateEvent::Failure);
1✔
411
                return;
412
        }
413

414
        log::Info("Installing artifact...");
98✔
415

416
        ctx.deployment.state_data->FillUpdateDataFromArtifact(header);
49✔
417

418
        ctx.deployment.state_data->state = Context::kUpdateStateDownload;
49✔
419

420
        assert(ctx.deployment.state_data->update_info.artifact.payload_types.size() == 1);
421

422
        // Initial state data save, now that we have enough information from the artifact.
423
        err = ctx.SaveDeploymentStateData(*ctx.deployment.state_data);
49✔
424
        if (err != error::NoError) {
49✔
425
                log::Error(err.String());
×
426
                if (err.code
×
427
                        == main_context::MakeError(main_context::StateDataStoreCountExceededError, "").code) {
×
428
                        poster.PostEvent(StateEvent::StateLoopDetected);
×
429
                        return;
430
                } else {
431
                        poster.PostEvent(StateEvent::Failure);
×
432
                        return;
433
                }
434
        }
435

436
        if (header.header.payload_type == "") {
49✔
437
                // Empty-payload-artifact, aka "bootstrap artifact".
438
                poster.PostEvent(StateEvent::NothingToDo);
1✔
439
                return;
440
        }
441

442
        ctx.deployment.update_module.reset(
443
                new update_module::UpdateModule(ctx.mender_context, header.header.payload_type));
48✔
444

445
        err = ctx.deployment.update_module->CleanAndPrepareFileTree(
48✔
446
                ctx.deployment.update_module->GetUpdateModuleWorkDir(), header);
48✔
447
        if (err != error::NoError) {
48✔
448
                log::Error(err.String());
×
449
                poster.PostEvent(StateEvent::Failure);
×
450
                return;
451
        }
452

453
        err = ctx.deployment.update_module->AsyncProvidePayloadFileSizes(
48✔
454
                ctx.event_loop, [&ctx, &poster](expected::ExpectedBool download_with_sizes) {
48✔
455
                        if (!download_with_sizes.has_value()) {
48✔
456
                                log::Error(download_with_sizes.error().String());
×
457
                                poster.PostEvent(StateEvent::Failure);
×
458
                                return;
×
459
                        }
460
                        ctx.deployment.download_with_sizes = download_with_sizes.value();
48✔
461
                        DoDownload(ctx, poster);
48✔
462
                });
48✔
463

464
        if (err != error::NoError) {
48✔
465
                log::Error(err.String());
×
466
                poster.PostEvent(StateEvent::Failure);
×
467
                return;
468
        }
469
}
470

471
void UpdateDownloadState::DoDownload(Context &ctx, sm::EventPoster<StateEvent> &poster) {
48✔
472
        auto exp_payload = ctx.deployment.artifact_parser->Next();
48✔
473
        if (!exp_payload) {
48✔
474
                log::Error(exp_payload.error().String());
×
475
                poster.PostEvent(StateEvent::Failure);
×
476
                return;
477
        }
478
        ctx.deployment.artifact_payload.reset(new artifact::Payload(std::move(exp_payload.value())));
48✔
479

480
        auto handler = [&poster, &ctx](error::Error err) {
46✔
481
                if (err != error::NoError) {
48✔
482
                        log::Error(err.String());
2✔
483
                        poster.PostEvent(StateEvent::Failure);
2✔
484
                        return;
2✔
485
                }
486

487
                auto exp_payload = ctx.deployment.artifact_parser->Next();
46✔
488
                if (exp_payload) {
46✔
489
                        log::Error("Multiple payloads are not yet supported in daemon mode.");
×
490
                        poster.PostEvent(StateEvent::Failure);
×
491
                        return;
492
                } else if (
46✔
493
                        exp_payload.error().code
494
                        != artifact::parser_error::MakeError(artifact::parser_error::EOFError, "").code) {
46✔
495
                        log::Error(exp_payload.error().String());
×
496
                        poster.PostEvent(StateEvent::Failure);
×
497
                        return;
498
                }
499

500
                poster.PostEvent(StateEvent::Success);
46✔
501
        };
502

503
        if (ctx.deployment.download_with_sizes) {
48✔
504
                ctx.deployment.update_module->AsyncDownloadWithFileSizes(
1✔
505
                        ctx.event_loop, *ctx.deployment.artifact_payload, handler);
1✔
506
        } else {
507
                ctx.deployment.update_module->AsyncDownload(
47✔
508
                        ctx.event_loop, *ctx.deployment.artifact_payload, handler);
47✔
509
        }
510
}
511

512
void UpdateDownloadCancelState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
6✔
513
        log::Debug("Entering DownloadCancel state");
12✔
514
        ctx.download_client->Cancel();
6✔
515
        poster.PostEvent(StateEvent::Success);
6✔
516
}
6✔
517

518
SendStatusUpdateState::SendStatusUpdateState(optional<deployments::DeploymentStatus> status) :
×
519
        status_(status),
520
        mode_(FailureMode::Ignore) {
×
521
}
×
522

523
SendStatusUpdateState::SendStatusUpdateState(
188✔
524
        optional<deployments::DeploymentStatus> status,
525
        events::EventLoop &event_loop,
526
        int retry_interval_seconds,
527
        int retry_count) :
528
        status_(status),
529
        mode_(FailureMode::RetryThenFail),
530
        retry_(Retry {
188✔
531
                http::ExponentialBackoff(chrono::seconds(retry_interval_seconds), retry_count),
532
                event_loop}) {
564✔
533
}
188✔
534

535
void SendStatusUpdateState::SetSmallestWaitInterval(chrono::milliseconds interval) {
178✔
536
        if (retry_) {
178✔
537
                retry_->backoff.SetSmallestInterval(interval);
178✔
538
        }
539
}
178✔
540

541
void SendStatusUpdateState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
242✔
542
        // Reset this every time we enter the state, which means a new round of retries.
543
        if (retry_) {
242✔
544
                retry_->backoff.Reset();
545
        }
546

547
        DoStatusUpdate(ctx, poster);
242✔
548
}
242✔
549

550
void SendStatusUpdateState::DoStatusUpdate(Context &ctx, sm::EventPoster<StateEvent> &poster) {
261✔
551
        assert(ctx.deployment_client);
552
        assert(ctx.deployment.state_data);
553

554
        log::Info("Sending status update to server");
522✔
555

556
        auto result_handler = [this, &ctx, &poster](const error::Error &err) {
566✔
557
                if (err != error::NoError) {
261✔
558
                        log::Error("Could not send deployment status: " + err.String());
48✔
559

560
                        switch (mode_) {
24✔
561
                        case FailureMode::Ignore:
562
                                break;
3✔
563
                        case FailureMode::RetryThenFail:
564
                                if (err.code
21✔
565
                                        == deployments::MakeError(deployments::DeploymentAbortedError, "").code) {
21✔
566
                                        // If the deployment was aborted upstream it is an immediate
567
                                        // failure, even if retry is enabled.
568
                                        poster.PostEvent(StateEvent::Failure);
1✔
569
                                        return;
21✔
570
                                }
571

572
                                auto exp_interval = retry_->backoff.NextInterval();
20✔
573
                                if (!exp_interval) {
20✔
574
                                        log::Error(
1✔
575
                                                "Giving up on sending status updates to server: "
576
                                                + exp_interval.error().String());
2✔
577
                                        poster.PostEvent(StateEvent::Failure);
1✔
578
                                        return;
579
                                }
580

581
                                log::Info(
19✔
582
                                        "Retrying status update after "
583
                                        + to_string(chrono::duration_cast<chrono::seconds>(*exp_interval).count())
38✔
584
                                        + " seconds");
38✔
585
                                retry_->wait_timer.AsyncWait(
19✔
586
                                        *exp_interval, [this, &ctx, &poster](error::Error err) {
38✔
587
                                                // Error here is quite unexpected (from a timer), so treat
588
                                                // this as an immediate error, despite Retry flag.
589
                                                if (err != error::NoError) {
19✔
590
                                                        log::Error(
×
591
                                                                "Unexpected error in SendStatusUpdateState wait timer: "
592
                                                                + err.String());
×
593
                                                        poster.PostEvent(StateEvent::Failure);
×
594
                                                        return;
×
595
                                                }
596

597
                                                // Try again. Since both status and logs are sent
598
                                                // from here, there's a chance this might resubmit
599
                                                // the status, but there's no harm in it, and it
600
                                                // won't happen often.
601
                                                DoStatusUpdate(ctx, poster);
19✔
602
                                        });
19✔
603
                                return;
19✔
604
                        }
605
                }
606

607
                poster.PostEvent(StateEvent::Success);
240✔
608
        };
261✔
609

610
        deployments::DeploymentStatus status;
611
        if (status_) {
261✔
612
                status = status_.value();
170✔
613
        } else {
614
                // If nothing is specified, grab success/failure status from the deployment status.
615
                if (ctx.deployment.failed) {
91✔
616
                        status = deployments::DeploymentStatus::Failure;
617
                } else {
618
                        status = deployments::DeploymentStatus::Success;
619
                }
620
        }
621

622
        // Push status.
623
        log::Debug("Pushing deployment status: " + DeploymentStatusString(status));
522✔
624
        auto err = ctx.deployment_client->PushStatus(
625
                ctx.deployment.state_data->update_info.id,
261✔
626
                status,
627
                "",
628
                ctx.http_client,
629
                [result_handler, &ctx](error::Error err) {
73✔
630
                        // If there is an error, we don't submit logs now, but call the handler,
631
                        // which may schedule a retry later. If there is no error, and the
632
                        // deployment as a whole was successful, then also call the handler here,
633
                        // since we don't need to submit logs at all then.
634
                        if (err != error::NoError || !ctx.deployment.failed) {
261✔
635
                                result_handler(err);
188✔
636
                                return;
188✔
637
                        }
638

639
                        // Push logs.
640
                        err = ctx.deployment_client->PushLogs(
73✔
641
                                ctx.deployment.state_data->update_info.id,
73✔
642
                                ctx.deployment.logger->LogFilePath(),
146✔
643
                                ctx.http_client,
644
                                result_handler);
73✔
645

646
                        if (err != error::NoError) {
73✔
647
                                result_handler(err);
×
648
                        }
649
                });
522✔
650

651
        if (err != error::NoError) {
261✔
652
                result_handler(err);
×
653
        }
654

655
        // No action, wait for reply from status endpoint.
656
}
261✔
657

658
void UpdateInstallState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
42✔
659
        log::Debug("Entering ArtifactInstall state");
84✔
660

661
        DefaultAsyncErrorHandler(
42✔
662
                poster,
663
                ctx.deployment.update_module->AsyncArtifactInstall(
42✔
664
                        ctx.event_loop, DefaultStateHandler {poster}));
42✔
665
}
42✔
666

667
void UpdateCheckRebootState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
73✔
668
        DefaultAsyncErrorHandler(
73✔
669
                poster,
670
                ctx.deployment.update_module->AsyncNeedsReboot(
73✔
671
                        ctx.event_loop, [&ctx, &poster](update_module::ExpectedRebootAction reboot_action) {
144✔
672
                                if (!reboot_action.has_value()) {
73✔
673
                                        log::Error(reboot_action.error().String());
2✔
674
                                        poster.PostEvent(StateEvent::Failure);
2✔
675
                                        return;
2✔
676
                                }
677

678
                                ctx.deployment.state_data->update_info.reboot_requested.resize(1);
71✔
679
                                ctx.deployment.state_data->update_info.reboot_requested[0] =
680
                                        NeedsRebootToDbString(*reboot_action);
71✔
681
                                switch (*reboot_action) {
71✔
682
                                case update_module::RebootAction::No:
8✔
683
                                        poster.PostEvent(StateEvent::NothingToDo);
8✔
684
                                        break;
8✔
685
                                case update_module::RebootAction::Yes:
63✔
686
                                case update_module::RebootAction::Automatic:
687
                                        poster.PostEvent(StateEvent::Success);
63✔
688
                                        break;
63✔
689
                                }
690
                        }));
73✔
691
}
73✔
692

693
void UpdateRebootState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
27✔
694
        log::Debug("Entering ArtifactReboot state");
54✔
695

696
        assert(ctx.deployment.state_data->update_info.reboot_requested.size() == 1);
697
        auto exp_reboot_mode =
698
                DbStringToNeedsReboot(ctx.deployment.state_data->update_info.reboot_requested[0]);
27✔
699
        // Should always be true because we check it at load time.
700
        assert(exp_reboot_mode);
701

702
        switch (exp_reboot_mode.value()) {
27✔
703
        case update_module::RebootAction::No:
×
704
                // Should not happen because then we don't enter this state.
705
                assert(false);
706
                poster.PostEvent(StateEvent::Failure);
×
707
                break;
708
        case update_module::RebootAction::Yes:
27✔
709
                DefaultAsyncErrorHandler(
27✔
710
                        poster,
711
                        ctx.deployment.update_module->AsyncArtifactReboot(
27✔
712
                                ctx.event_loop, DefaultStateHandler {poster}));
27✔
713
                break;
27✔
714
        case update_module::RebootAction::Automatic:
×
715
                DefaultAsyncErrorHandler(
×
716
                        poster,
717
                        ctx.deployment.update_module->AsyncSystemReboot(
×
718
                                ctx.event_loop, DefaultStateHandler {poster}));
×
719
                break;
×
720
        }
721
}
27✔
722

723
void UpdateVerifyRebootState::OnEnterSaveState(Context &ctx, sm::EventPoster<StateEvent> &poster) {
30✔
724
        log::Debug("Entering ArtifactVerifyReboot state");
60✔
725

726
        ctx.deployment.update_module->EnsureRootfsImageFileTree(
30✔
727
                ctx.deployment.update_module->GetUpdateModuleWorkDir());
60✔
728

729
        DefaultAsyncErrorHandler(
30✔
730
                poster,
731
                ctx.deployment.update_module->AsyncArtifactVerifyReboot(
30✔
732
                        ctx.event_loop, DefaultStateHandler {poster}));
30✔
733
}
30✔
734

735
void UpdateBeforeCommitState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
23✔
736
        // It's possible that the update we have done has changed our credentials. Therefore it's
737
        // important that we try to log in from scratch and do not use the token we already have.
738
        ctx.http_client.ExpireToken();
23✔
739

740
        poster.PostEvent(StateEvent::Success);
23✔
741
}
23✔
742

743
void UpdateCommitState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
19✔
744
        log::Debug("Entering ArtifactCommit state");
38✔
745

746
        // Explicitly check if state scripts version is supported
747
        auto err = script_executor::CheckScriptsCompatibility(
748
                ctx.mender_context.GetConfig().paths.GetRootfsScriptsPath());
19✔
749
        if (err != error::NoError) {
19✔
750
                log::Error("Failed script compatibility check: " + err.String());
×
751
                poster.PostEvent(StateEvent::Failure);
×
752
                return;
753
        }
754

755
        DefaultAsyncErrorHandler(
19✔
756
                poster,
757
                ctx.deployment.update_module->AsyncArtifactCommit(
19✔
758
                        ctx.event_loop, DefaultStateHandler {poster}));
38✔
759
}
760

761
void UpdateAfterCommitState::OnEnterSaveState(Context &ctx, sm::EventPoster<StateEvent> &poster) {
19✔
762
        // Now we have committed. If we had a schema update, re-save state data with the new schema.
763
        assert(ctx.deployment.state_data);
764
        auto &state_data = *ctx.deployment.state_data;
765
        if (state_data.update_info.has_db_schema_update) {
19✔
766
                state_data.update_info.has_db_schema_update = false;
×
767
                auto err = ctx.SaveDeploymentStateData(state_data);
×
768
                if (err != error::NoError) {
×
769
                        log::Error("Not able to commit schema update: " + err.String());
×
770
                        poster.PostEvent(StateEvent::Failure);
×
771
                        return;
772
                }
773
        }
774

775
        poster.PostEvent(StateEvent::Success);
19✔
776
}
777

778
void UpdateCheckRollbackState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
45✔
779
        DefaultAsyncErrorHandler(
45✔
780
                poster,
781
                ctx.deployment.update_module->AsyncSupportsRollback(
45✔
782
                        ctx.event_loop, [&ctx, &poster](expected::ExpectedBool rollback_supported) {
89✔
783
                                if (!rollback_supported.has_value()) {
45✔
784
                                        log::Error(rollback_supported.error().String());
1✔
785
                                        poster.PostEvent(StateEvent::Failure);
1✔
786
                                        return;
1✔
787
                                }
788

789
                                ctx.deployment.state_data->update_info.supports_rollback =
790
                                        SupportsRollbackToDbString(*rollback_supported);
44✔
791
                                if (*rollback_supported) {
44✔
792
                                        poster.PostEvent(StateEvent::RollbackStarted);
38✔
793
                                        poster.PostEvent(StateEvent::Success);
38✔
794
                                } else {
795
                                        poster.PostEvent(StateEvent::NothingToDo);
6✔
796
                                }
797
                        }));
45✔
798
}
45✔
799

800
void UpdateRollbackState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
41✔
801
        log::Debug("Entering ArtifactRollback state");
82✔
802

803
        DefaultAsyncErrorHandler(
41✔
804
                poster,
805
                ctx.deployment.update_module->AsyncArtifactRollback(
41✔
806
                        ctx.event_loop, DefaultStateHandler {poster}));
41✔
807
}
41✔
808

809
void UpdateRollbackRebootState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
57✔
810
        log::Debug("Entering ArtifactRollbackReboot state");
114✔
811

812
        auto exp_reboot_mode =
813
                DbStringToNeedsReboot(ctx.deployment.state_data->update_info.reboot_requested[0]);
57✔
814
        // Should always be true because we check it at load time.
815
        assert(exp_reboot_mode);
816

817
        // We ignore errors in this state as long as the ArtifactVerifyRollbackReboot state
818
        // succeeds.
819
        auto handler = [&poster](error::Error err) {
114✔
820
                if (err != error::NoError) {
57✔
821
                        log::Error(err.String());
2✔
822
                }
823
                poster.PostEvent(StateEvent::Success);
57✔
824
        };
57✔
825

826
        error::Error err;
57✔
827
        switch (exp_reboot_mode.value()) {
57✔
828
        case update_module::RebootAction::No:
829
                // Should not happen because then we don't enter this state.
830
                assert(false);
831

832
                err = error::MakeError(
×
833
                        error::ProgrammingError, "Entered UpdateRollbackRebootState with RebootAction = No");
×
834
                break;
×
835

836
        case update_module::RebootAction::Yes:
57✔
837
                err = ctx.deployment.update_module->AsyncArtifactRollbackReboot(ctx.event_loop, handler);
114✔
838
                break;
57✔
839

840
        case update_module::RebootAction::Automatic:
×
841
                err = ctx.deployment.update_module->AsyncSystemReboot(ctx.event_loop, handler);
×
842
                break;
×
843
        }
844

845
        if (err != error::NoError) {
57✔
846
                log::Error(err.String());
×
847
                poster.PostEvent(StateEvent::Success);
×
848
        }
849
}
57✔
850

851
void UpdateVerifyRollbackRebootState::OnEnterSaveState(
60✔
852
        Context &ctx, sm::EventPoster<StateEvent> &poster) {
853
        log::Debug("Entering ArtifactVerifyRollbackReboot state");
120✔
854

855
        // In this state we only retry, we don't fail. If this keeps on going forever, then the
856
        // state loop detection will eventually kick in.
857
        auto err = ctx.deployment.update_module->AsyncArtifactVerifyRollbackReboot(
858
                ctx.event_loop, [&poster](error::Error err) {
120✔
859
                        if (err != error::NoError) {
60✔
860
                                log::Error(err.String());
22✔
861
                                poster.PostEvent(StateEvent::Retry);
22✔
862
                                return;
22✔
863
                        }
864
                        poster.PostEvent(StateEvent::Success);
38✔
865
                });
60✔
866
        if (err != error::NoError) {
60✔
867
                log::Error(err.String());
×
868
                poster.PostEvent(StateEvent::Retry);
×
869
        }
870
}
60✔
871

872
void UpdateRollbackSuccessfulState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
50✔
873
        ctx.deployment.state_data->update_info.all_rollbacks_successful = true;
50✔
874
        poster.PostEvent(StateEvent::Success);
50✔
875
}
50✔
876

877
void UpdateFailureState::OnEnterSaveState(Context &ctx, sm::EventPoster<StateEvent> &poster) {
55✔
878
        log::Debug("Entering ArtifactFailure state");
110✔
879

880
        DefaultAsyncErrorHandler(
55✔
881
                poster,
882
                ctx.deployment.update_module->AsyncArtifactFailure(
55✔
883
                        ctx.event_loop, DefaultStateHandler {poster}));
55✔
884
}
55✔
885

886
static string AddInconsistentSuffix(const string &str) {
21✔
887
        const auto &suffix = main_context::MenderContext::broken_artifact_name_suffix;
888
        // `string::ends_with` is C++20... grumble
889
        string ret {str};
21✔
890
        if (!common::EndsWith(ret, suffix)) {
21✔
891
                ret.append(suffix);
21✔
892
        }
893
        return ret;
21✔
894
}
895

896
void UpdateSaveProvidesState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
75✔
897
        if (ctx.deployment.failed && !ctx.deployment.rollback_failed) {
75✔
898
                // If the update failed, but we rolled back successfully, then we don't need to do
899
                // anything, just keep the old data.
900
                poster.PostEvent(StateEvent::Success);
38✔
901
                return;
38✔
902
        }
903

904
        assert(ctx.deployment.state_data);
905
        // This state should never happen: rollback failed, but update not failed??
906
        assert(!(!ctx.deployment.failed && ctx.deployment.rollback_failed));
907

908
        // We expect Cleanup to be the next state after this.
909
        ctx.deployment.state_data->state = ctx.kUpdateStateCleanup;
37✔
910

911
        auto &artifact = ctx.deployment.state_data->update_info.artifact;
912

913
        string artifact_name;
914
        if (ctx.deployment.rollback_failed) {
37✔
915
                artifact_name = AddInconsistentSuffix(artifact.artifact_name);
38✔
916
        } else {
917
                artifact_name = artifact.artifact_name;
18✔
918
        }
919

920
        bool deploy_failed = ctx.deployment.failed;
37✔
921

922
        // Only the artifact_name and group should be committed in the case of a
923
        // failing update in order to make this consistent with the old client
924
        // behaviour.
925
        auto err = ctx.mender_context.CommitArtifactData(
37✔
926
                artifact_name,
927
                artifact.artifact_group,
37✔
928
                deploy_failed ? nullopt : optional<context::ProvidesData>(artifact.type_info_provides),
74✔
929
                /* Special case: Keep existing provides */
930
                deploy_failed ? context::ClearsProvidesData {}
93✔
931
                                          : optional<context::ClearsProvidesData>(artifact.clears_artifact_provides),
18✔
932
                [&ctx](kv_db::Transaction &txn) {
37✔
933
                        // Save the Cleanup state together with the artifact data, atomically.
934
                        return ctx.SaveDeploymentStateData(txn, *ctx.deployment.state_data);
37✔
935
                });
74✔
936
        if (err != error::NoError) {
37✔
937
                log::Error("Error saving artifact data: " + err.String());
×
938
                if (err.code
×
939
                        == main_context::MakeError(main_context::StateDataStoreCountExceededError, "").code) {
×
940
                        poster.PostEvent(StateEvent::StateLoopDetected);
×
941
                        return;
942
                }
943
                poster.PostEvent(StateEvent::Failure);
×
944
                return;
945
        }
946

947
        poster.PostEvent(StateEvent::Success);
37✔
948
}
949

950
void UpdateCleanupState::OnEnterSaveState(Context &ctx, sm::EventPoster<StateEvent> &poster) {
89✔
951
        log::Debug("Entering ArtifactCleanup state");
178✔
952

953
        // It's possible for there not to be an initialized update_module structure, if the
954
        // deployment failed before we could successfully parse the artifact. If so, cleanup is a
955
        // no-op.
956
        if (!ctx.deployment.update_module) {
89✔
957
                poster.PostEvent(StateEvent::Success);
8✔
958
                return;
8✔
959
        }
960

961
        DefaultAsyncErrorHandler(
81✔
962
                poster,
963
                ctx.deployment.update_module->AsyncCleanup(ctx.event_loop, DefaultStateHandler {poster}));
162✔
964
}
965

966
void ClearArtifactDataState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
91✔
967
        auto err = ctx.mender_context.GetMenderStoreDB().WriteTransaction([](kv_db::Transaction &txn) {
91✔
968
                // Remove state data, since we're done now.
969
                auto err = txn.Remove(main_context::MenderContext::state_data_key);
89✔
970
                if (err != error::NoError) {
89✔
971
                        return err;
×
972
                }
973
                return txn.Remove(main_context::MenderContext::state_data_key_uncommitted);
89✔
974
        });
91✔
975
        if (err != error::NoError) {
91✔
976
                log::Error("Error removing artifact data: " + err.String());
4✔
977
                poster.PostEvent(StateEvent::Failure);
2✔
978
                return;
979
        }
980

981
        poster.PostEvent(StateEvent::Success);
89✔
982
}
983

984
void StateLoopState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
2✔
985
        assert(ctx.deployment.state_data);
986
        auto &artifact = ctx.deployment.state_data->update_info.artifact;
987

988
        // Mark update as inconsistent.
989
        string artifact_name = AddInconsistentSuffix(artifact.artifact_name);
2✔
990

991
        auto err = ctx.mender_context.CommitArtifactData(
2✔
992
                artifact_name,
993
                artifact.artifact_group,
2✔
994
                artifact.type_info_provides,
2✔
995
                artifact.clears_artifact_provides,
2✔
996
                [](kv_db::Transaction &txn) { return error::NoError; });
6✔
997
        if (err != error::NoError) {
2✔
998
                log::Error("Error saving inconsistent artifact data: " + err.String());
×
999
                poster.PostEvent(StateEvent::Failure);
×
1000
                return;
1001
        }
1002

1003
        poster.PostEvent(StateEvent::Success);
2✔
1004
}
1005

1006
void EndOfDeploymentState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
91✔
1007
        log::Info(
91✔
1008
                "Deployment with ID " + ctx.deployment.state_data->update_info.id
182✔
1009
                + " finished with status: " + string(ctx.deployment.failed ? "Failure" : "Success"));
382✔
1010

1011
        ctx.FinishDeploymentLogging();
91✔
1012

1013
        ctx.deployment = {};
91✔
1014
        poster.PostEvent(
91✔
1015
                StateEvent::InventoryPollingTriggered); // Submit the inventory right after an update
91✔
1016
        poster.PostEvent(StateEvent::DeploymentEnded);
91✔
1017
        poster.PostEvent(StateEvent::Success);
91✔
1018
}
91✔
1019

1020
ExitState::ExitState(events::EventLoop &event_loop) :
94✔
1021
        event_loop_(event_loop) {
188✔
1022
}
94✔
1023

1024
void ExitState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
91✔
1025
#ifndef NDEBUG
1026
        if (--iterations_left_ <= 0) {
1027
                event_loop_.Stop();
1028
        } else {
1029
                poster.PostEvent(StateEvent::Success);
1030
        }
1031
#else
1032
        event_loop_.Stop();
91✔
1033
#endif
1034
}
91✔
1035

1036
namespace deployment_tracking {
1037

1038
void NoFailuresState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
61✔
1039
        ctx.deployment.failed = false;
61✔
1040
        ctx.deployment.rollback_failed = false;
61✔
1041
}
61✔
1042

1043
void FailureState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
58✔
1044
        ctx.deployment.failed = true;
58✔
1045
        ctx.deployment.rollback_failed = true;
58✔
1046
}
58✔
1047

1048
void RollbackAttemptedState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
52✔
1049
        ctx.deployment.failed = true;
52✔
1050
        ctx.deployment.rollback_failed = false;
52✔
1051
}
52✔
1052

1053
void RollbackFailedState::OnEnter(Context &ctx, sm::EventPoster<StateEvent> &poster) {
12✔
1054
        ctx.deployment.failed = true;
12✔
1055
        ctx.deployment.rollback_failed = true;
12✔
1056
}
12✔
1057

1058
} // namespace deployment_tracking
1059

1060
} // namespace daemon
1061
} // namespace update
1062
} // namespace mender
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc