• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lightningnetwork / lnd / 12428593038

20 Dec 2024 09:02AM UTC coverage: 58.33% (-0.2%) from 58.576%
12428593038

Pull #9382

github

guggero
.golangci.yml: speed up linter by updating start commit

With this we allow the linter to only look at recent changes, since
everything between that old commit and this most recent one has been
linted correctly anyway.
Pull Request #9382: lint: deprecate old linters, use new ref commit

133769 of 229330 relevant lines covered (58.33%)

19284.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.95
/routing/payment_lifecycle.go
1
package routing
2

3
import (
4
        "context"
5
        "errors"
6
        "fmt"
7
        "time"
8

9
        "github.com/btcsuite/btcd/btcec/v2"
10
        "github.com/davecgh/go-spew/spew"
11
        sphinx "github.com/lightningnetwork/lightning-onion"
12
        "github.com/lightningnetwork/lnd/channeldb"
13
        "github.com/lightningnetwork/lnd/fn/v2"
14
        "github.com/lightningnetwork/lnd/graph/db/models"
15
        "github.com/lightningnetwork/lnd/htlcswitch"
16
        "github.com/lightningnetwork/lnd/lntypes"
17
        "github.com/lightningnetwork/lnd/lnwire"
18
        "github.com/lightningnetwork/lnd/routing/route"
19
        "github.com/lightningnetwork/lnd/routing/shards"
20
        "github.com/lightningnetwork/lnd/tlv"
21
)
22

23
// ErrPaymentLifecycleExiting is used when waiting for htlc attempt result, but
24
// the payment lifecycle is exiting .
25
var ErrPaymentLifecycleExiting = errors.New("payment lifecycle exiting")
26

27
// paymentLifecycle holds all information about the current state of a payment
28
// needed to resume if from any point.
29
type paymentLifecycle struct {
30
        router                *ChannelRouter
31
        feeLimit              lnwire.MilliSatoshi
32
        identifier            lntypes.Hash
33
        paySession            PaymentSession
34
        shardTracker          shards.ShardTracker
35
        currentHeight         int32
36
        firstHopCustomRecords lnwire.CustomRecords
37

38
        // quit is closed to signal the sub goroutines of the payment lifecycle
39
        // to stop.
40
        quit chan struct{}
41

42
        // resultCollected is used to signal that the result of an attempt has
43
        // been collected. A nil error means the attempt is either successful
44
        // or failed with temporary error. Otherwise, we should exit the
45
        // lifecycle loop as a terminal error has occurred.
46
        resultCollected chan error
47

48
        // resultCollector is a function that is used to collect the result of
49
        // an HTLC attempt, which is always mounted to `p.collectResultAsync`
50
        // except in unit test, where we use a much simpler resultCollector to
51
        // decouple the test flow for the payment lifecycle.
52
        resultCollector func(attempt *channeldb.HTLCAttempt)
53
}
54

55
// newPaymentLifecycle initiates a new payment lifecycle and returns it.
56
func newPaymentLifecycle(r *ChannelRouter, feeLimit lnwire.MilliSatoshi,
57
        identifier lntypes.Hash, paySession PaymentSession,
58
        shardTracker shards.ShardTracker, currentHeight int32,
59
        firstHopCustomRecords lnwire.CustomRecords) *paymentLifecycle {
41✔
60

41✔
61
        p := &paymentLifecycle{
41✔
62
                router:                r,
41✔
63
                feeLimit:              feeLimit,
41✔
64
                identifier:            identifier,
41✔
65
                paySession:            paySession,
41✔
66
                shardTracker:          shardTracker,
41✔
67
                currentHeight:         currentHeight,
41✔
68
                quit:                  make(chan struct{}),
41✔
69
                resultCollected:       make(chan error, 1),
41✔
70
                firstHopCustomRecords: firstHopCustomRecords,
41✔
71
        }
41✔
72

41✔
73
        // Mount the result collector.
41✔
74
        p.resultCollector = p.collectResultAsync
41✔
75

41✔
76
        return p
41✔
77
}
41✔
78

79
// calcFeeBudget returns the available fee to be used for sending HTLC
80
// attempts.
81
func (p *paymentLifecycle) calcFeeBudget(
82
        feesPaid lnwire.MilliSatoshi) lnwire.MilliSatoshi {
104✔
83

104✔
84
        budget := p.feeLimit
104✔
85

104✔
86
        // We'll subtract the used fee from our fee budget. In case of
104✔
87
        // overflow, we need to check whether feesPaid exceeds our budget
104✔
88
        // already.
104✔
89
        if feesPaid <= budget {
208✔
90
                budget -= feesPaid
104✔
91
        } else {
105✔
92
                budget = 0
1✔
93
        }
1✔
94

95
        return budget
104✔
96
}
97

98
// stateStep defines an action to be taken in our payment lifecycle. We either
99
// quit, continue, or exit the lifecycle, see details below.
100
type stateStep uint8
101

102
const (
103
        // stepSkip is used when we need to skip the current lifecycle and jump
104
        // to the next one.
105
        stepSkip stateStep = iota
106

107
        // stepProceed is used when we can proceed the current lifecycle.
108
        stepProceed
109

110
        // stepExit is used when we need to quit the current lifecycle.
111
        stepExit
112
)
113

114
// decideNextStep is used to determine the next step in the payment lifecycle.
115
func (p *paymentLifecycle) decideNextStep(
116
        payment DBMPPayment) (stateStep, error) {
75✔
117

75✔
118
        // Check whether we could make new HTLC attempts.
75✔
119
        allow, err := payment.AllowMoreAttempts()
75✔
120
        if err != nil {
77✔
121
                return stepExit, err
2✔
122
        }
2✔
123

124
        if !allow {
115✔
125
                // Check whether we need to wait for results.
42✔
126
                wait, err := payment.NeedWaitAttempts()
42✔
127
                if err != nil {
43✔
128
                        return stepExit, err
1✔
129
                }
1✔
130

131
                // If we are not allowed to make new HTLC attempts and there's
132
                // no need to wait, the lifecycle is done and we can exit.
133
                if !wait {
59✔
134
                        return stepExit, nil
18✔
135
                }
18✔
136

137
                log.Tracef("Waiting for attempt results for payment %v",
24✔
138
                        p.identifier)
24✔
139

24✔
140
                // Otherwise we wait for one HTLC attempt then continue
24✔
141
                // the lifecycle.
24✔
142
                //
24✔
143
                // NOTE: we don't check `p.quit` since `decideNextStep` is
24✔
144
                // running in the same goroutine as `resumePayment`.
24✔
145
                select {
24✔
146
                case err := <-p.resultCollected:
23✔
147
                        // If an error is returned, exit with it.
23✔
148
                        if err != nil {
24✔
149
                                return stepExit, err
1✔
150
                        }
1✔
151

152
                        log.Tracef("Received attempt result for payment %v",
23✔
153
                                p.identifier)
23✔
154

155
                case <-p.router.quit:
1✔
156
                        return stepExit, ErrRouterShuttingDown
1✔
157
                }
158

159
                return stepSkip, nil
23✔
160
        }
161

162
        // Otherwise we need to make more attempts.
163
        return stepProceed, nil
32✔
164
}
165

166
// resumePayment resumes the paymentLifecycle from the current state.
167
func (p *paymentLifecycle) resumePayment(ctx context.Context) ([32]byte,
168
        *route.Route, error) {
23✔
169

23✔
170
        // When the payment lifecycle loop exits, we make sure to signal any
23✔
171
        // sub goroutine of the HTLC attempt to exit, then wait for them to
23✔
172
        // return.
23✔
173
        defer p.stop()
23✔
174

23✔
175
        // If we had any existing attempts outstanding, we'll start by spinning
23✔
176
        // up goroutines that'll collect their results and deliver them to the
23✔
177
        // lifecycle loop below.
23✔
178
        payment, err := p.router.cfg.Control.FetchPayment(p.identifier)
23✔
179
        if err != nil {
24✔
180
                return [32]byte{}, nil, err
1✔
181
        }
1✔
182

183
        for _, a := range payment.InFlightHTLCs() {
23✔
184
                a := a
1✔
185

1✔
186
                log.Infof("Resuming HTLC attempt %v for payment %v",
1✔
187
                        a.AttemptID, p.identifier)
1✔
188

1✔
189
                p.resultCollector(&a)
1✔
190
        }
1✔
191

192
        // Get the payment status.
193
        status := payment.GetStatus()
22✔
194

22✔
195
        // exitWithErr is a helper closure that logs and returns an error.
22✔
196
        exitWithErr := func(err error) ([32]byte, *route.Route, error) {
28✔
197
                // Log an error with the latest payment status.
6✔
198
                //
6✔
199
                // NOTE: this `status` variable is reassigned in the loop
6✔
200
                // below. We could also call `payment.GetStatus` here, but in a
6✔
201
                // rare case when the critical log is triggered when using
6✔
202
                // postgres as db backend, the `payment` could be nil, causing
6✔
203
                // the payment fetching to return an error.
6✔
204
                log.Errorf("Payment %v with status=%v failed: %v", p.identifier,
6✔
205
                        status, err)
6✔
206

6✔
207
                return [32]byte{}, nil, err
6✔
208
        }
6✔
209

210
        // We'll continue until either our payment succeeds, or we encounter a
211
        // critical error during path finding.
212
lifecycle:
22✔
213
        for {
92✔
214
                // We update the payment state on every iteration. Since the
70✔
215
                // payment state is affected by multiple goroutines (ie,
70✔
216
                // collectResultAsync), it is NOT guaranteed that we always
70✔
217
                // have the latest state here. This is fine as long as the
70✔
218
                // state is consistent as a whole.
70✔
219
                payment, err = p.router.cfg.Control.FetchPayment(p.identifier)
70✔
220
                if err != nil {
70✔
221
                        return exitWithErr(err)
×
222
                }
×
223

224
                ps := payment.GetState()
70✔
225
                remainingFees := p.calcFeeBudget(ps.FeesPaid)
70✔
226

70✔
227
                status = payment.GetStatus()
70✔
228
                log.Debugf("Payment %v: status=%v, active_shards=%v, "+
70✔
229
                        "rem_value=%v, fee_limit=%v", p.identifier, status,
70✔
230
                        ps.NumAttemptsInFlight, ps.RemainingAmt, remainingFees)
70✔
231

70✔
232
                // We now proceed our lifecycle with the following tasks in
70✔
233
                // order,
70✔
234
                //   1. check context.
70✔
235
                //   2. request route.
70✔
236
                //   3. create HTLC attempt.
70✔
237
                //   4. send HTLC attempt.
70✔
238
                //   5. collect HTLC attempt result.
70✔
239
                //
70✔
240
                // Before we attempt any new shard, we'll check to see if we've
70✔
241
                // gone past the payment attempt timeout, or if the context was
70✔
242
                // cancelled, or the router is exiting. In any of these cases,
70✔
243
                // we'll stop this payment attempt short.
70✔
244
                if err := p.checkContext(ctx); err != nil {
71✔
245
                        return exitWithErr(err)
1✔
246
                }
1✔
247

248
                // Now decide the next step of the current lifecycle.
249
                step, err := p.decideNextStep(payment)
69✔
250
                if err != nil {
71✔
251
                        return exitWithErr(err)
2✔
252
                }
2✔
253

254
                switch step {
68✔
255
                // Exit the for loop and return below.
256
                case stepExit:
17✔
257
                        break lifecycle
17✔
258

259
                // Continue the for loop and skip the rest.
260
                case stepSkip:
22✔
261
                        continue lifecycle
22✔
262

263
                // Continue the for loop and proceed the rest.
264
                case stepProceed:
31✔
265

266
                // Unknown step received, exit with an error.
267
                default:
×
268
                        err = fmt.Errorf("unknown step: %v", step)
×
269
                        return exitWithErr(err)
×
270
                }
271

272
                // Now request a route to be used to create our HTLC attempt.
273
                rt, err := p.requestRoute(ps)
31✔
274
                if err != nil {
32✔
275
                        return exitWithErr(err)
1✔
276
                }
1✔
277

278
                // We may not be able to find a route for current attempt. In
279
                // that case, we continue the loop and move straight to the
280
                // next iteration in case there are results for inflight HTLCs
281
                // that still need to be collected.
282
                if rt == nil {
33✔
283
                        log.Errorf("No route found for payment %v",
3✔
284
                                p.identifier)
3✔
285

3✔
286
                        continue lifecycle
3✔
287
                }
288

289
                log.Tracef("Found route: %s", spew.Sdump(rt.Hops))
28✔
290

28✔
291
                // Allow the traffic shaper to add custom records to the
28✔
292
                // outgoing HTLC and also adjust the amount if needed.
28✔
293
                err = p.amendFirstHopData(rt)
28✔
294
                if err != nil {
28✔
295
                        return exitWithErr(err)
×
296
                }
×
297

298
                // We found a route to try, create a new HTLC attempt to try.
299
                attempt, err := p.registerAttempt(rt, ps.RemainingAmt)
28✔
300
                if err != nil {
29✔
301
                        return exitWithErr(err)
1✔
302
                }
1✔
303

304
                // Once the attempt is created, send it to the htlcswitch.
305
                result, err := p.sendAttempt(attempt)
27✔
306
                if err != nil {
28✔
307
                        return exitWithErr(err)
1✔
308
                }
1✔
309

310
                // Now that the shard was successfully sent, launch a go
311
                // routine that will handle its result when its back.
312
                if result.err == nil {
51✔
313
                        p.resultCollector(attempt)
25✔
314
                }
25✔
315
        }
316

317
        // Once we are out the lifecycle loop, it means we've reached a
318
        // terminal condition. We either return the settled preimage or the
319
        // payment's failure reason.
320
        //
321
        // Optionally delete the failed attempts from the database.
322
        err = p.router.cfg.Control.DeleteFailedAttempts(p.identifier)
17✔
323
        if err != nil {
17✔
324
                log.Errorf("Error deleting failed htlc attempts for payment "+
×
325
                        "%v: %v", p.identifier, err)
×
326
        }
×
327

328
        htlc, failure := payment.TerminalInfo()
17✔
329
        if htlc != nil {
30✔
330
                return htlc.Settle.Preimage, &htlc.Route, nil
13✔
331
        }
13✔
332

333
        // Otherwise return the payment failure reason.
334
        return [32]byte{}, nil, *failure
5✔
335
}
336

337
// checkContext checks whether the payment context has been canceled.
338
// Cancellation occurs manually or if the context times out.
339
func (p *paymentLifecycle) checkContext(ctx context.Context) error {
73✔
340
        select {
73✔
341
        case <-ctx.Done():
5✔
342
                // If the context was canceled, we'll mark the payment as
5✔
343
                // failed. There are two cases to distinguish here: Either a
5✔
344
                // user-provided timeout was reached, or the context was
5✔
345
                // canceled, either to a manual cancellation or due to an
5✔
346
                // unknown error.
5✔
347
                var reason channeldb.FailureReason
5✔
348
                if errors.Is(ctx.Err(), context.DeadlineExceeded) {
8✔
349
                        reason = channeldb.FailureReasonTimeout
3✔
350
                        log.Warnf("Payment attempt not completed before "+
3✔
351
                                "timeout, id=%s", p.identifier.String())
3✔
352
                } else {
5✔
353
                        reason = channeldb.FailureReasonCanceled
2✔
354
                        log.Warnf("Payment attempt context canceled, id=%s",
2✔
355
                                p.identifier.String())
2✔
356
                }
2✔
357

358
                // By marking the payment failed, depending on whether it has
359
                // inflight HTLCs or not, its status will now either be
360
                // `StatusInflight` or `StatusFailed`. In either case, no more
361
                // HTLCs will be attempted.
362
                err := p.router.cfg.Control.FailPayment(p.identifier, reason)
5✔
363
                if err != nil {
6✔
364
                        return fmt.Errorf("FailPayment got %w", err)
1✔
365
                }
1✔
366

367
        case <-p.router.quit:
2✔
368
                return fmt.Errorf("check payment timeout got: %w",
2✔
369
                        ErrRouterShuttingDown)
2✔
370

371
        // Fall through if we haven't hit our time limit.
372
        default:
67✔
373
        }
374

375
        return nil
70✔
376
}
377

378
// requestRoute is responsible for finding a route to be used to create an HTLC
379
// attempt.
380
func (p *paymentLifecycle) requestRoute(
381
        ps *channeldb.MPPaymentState) (*route.Route, error) {
35✔
382

35✔
383
        remainingFees := p.calcFeeBudget(ps.FeesPaid)
35✔
384

35✔
385
        // Query our payment session to construct a route.
35✔
386
        rt, err := p.paySession.RequestRoute(
35✔
387
                ps.RemainingAmt, remainingFees,
35✔
388
                uint32(ps.NumAttemptsInFlight), uint32(p.currentHeight),
35✔
389
                p.firstHopCustomRecords,
35✔
390
        )
35✔
391

35✔
392
        // Exit early if there's no error.
35✔
393
        if err == nil {
64✔
394
                return rt, nil
29✔
395
        }
29✔
396

397
        // Otherwise we need to handle the error.
398
        log.Warnf("Failed to find route for payment %v: %v", p.identifier, err)
7✔
399

7✔
400
        // If the error belongs to `noRouteError` set, it means a non-critical
7✔
401
        // error has happened during path finding, and we will mark the payment
7✔
402
        // failed with this reason. Otherwise, we'll return the critical error
7✔
403
        // found to abort the lifecycle.
7✔
404
        var routeErr noRouteError
7✔
405
        if !errors.As(err, &routeErr) {
9✔
406
                return nil, fmt.Errorf("requestRoute got: %w", err)
2✔
407
        }
2✔
408

409
        // It's the `paymentSession`'s responsibility to find a route for us
410
        // with the best effort. When it cannot find a path, we need to treat it
411
        // as a terminal condition and fail the payment no matter it has
412
        // inflight HTLCs or not.
413
        failureCode := routeErr.FailureReason()
5✔
414
        log.Warnf("Marking payment %v permanently failed with no route: %v",
5✔
415
                p.identifier, failureCode)
5✔
416

5✔
417
        err = p.router.cfg.Control.FailPayment(p.identifier, failureCode)
5✔
418
        if err != nil {
6✔
419
                return nil, fmt.Errorf("FailPayment got: %w", err)
1✔
420
        }
1✔
421

422
        // NOTE: we decide to not return the non-critical noRouteError here to
423
        // avoid terminating the payment lifecycle as there might be other
424
        // inflight HTLCs which we must wait for their results.
425
        return nil, nil
4✔
426
}
427

428
// stop signals any active shard goroutine to exit.
429
func (p *paymentLifecycle) stop() {
24✔
430
        close(p.quit)
24✔
431
}
24✔
432

433
// attemptResult holds the HTLC attempt and a possible error returned from
434
// sending it.
435
type attemptResult struct {
436
        // err is non-nil if a non-critical error was encountered when trying
437
        // to send the attempt, and we successfully updated the control tower
438
        // to reflect this error. This can be errors like not enough local
439
        // balance for the given route etc.
440
        err error
441

442
        // attempt is the attempt structure as recorded in the database.
443
        attempt *channeldb.HTLCAttempt
444
}
445

446
// collectResultAsync launches a goroutine that will wait for the result of the
447
// given HTLC attempt to be available then handle its result. Once received, it
448
// will send a nil error to channel `resultCollected` to indicate there's a
449
// result.
450
func (p *paymentLifecycle) collectResultAsync(attempt *channeldb.HTLCAttempt) {
23✔
451
        log.Debugf("Collecting result for attempt %v in payment %v",
23✔
452
                attempt.AttemptID, p.identifier)
23✔
453

23✔
454
        go func() {
46✔
455
                // Block until the result is available.
23✔
456
                _, err := p.collectResult(attempt)
23✔
457
                if err != nil {
24✔
458
                        log.Errorf("Error collecting result for attempt %v "+
1✔
459
                                "in payment %v: %v", attempt.AttemptID,
1✔
460
                                p.identifier, err)
1✔
461
                }
1✔
462

463
                log.Debugf("Result collected for attempt %v in payment %v",
23✔
464
                        attempt.AttemptID, p.identifier)
23✔
465

23✔
466
                // Once the result is collected, we signal it by writing the
23✔
467
                // error to `resultCollected`.
23✔
468
                select {
23✔
469
                // Send the signal or quit.
470
                case p.resultCollected <- err:
23✔
471

472
                case <-p.quit:
×
473
                        log.Debugf("Lifecycle exiting while collecting "+
×
474
                                "result for payment %v", p.identifier)
×
475

476
                case <-p.router.quit:
×
477
                        return
×
478
                }
479
        }()
480
}
481

482
// collectResult waits for the result for the given attempt to be available
483
// from the Switch, then records the attempt outcome with the control tower.
484
// An attemptResult is returned, indicating the final outcome of this HTLC
485
// attempt.
486
func (p *paymentLifecycle) collectResult(attempt *channeldb.HTLCAttempt) (
487
        *attemptResult, error) {
35✔
488

35✔
489
        log.Tracef("Collecting result for attempt %v", spew.Sdump(attempt))
35✔
490

35✔
491
        // We'll retrieve the hash specific to this shard from the
35✔
492
        // shardTracker, since it will be needed to regenerate the circuit
35✔
493
        // below.
35✔
494
        hash, err := p.shardTracker.GetHash(attempt.AttemptID)
35✔
495
        if err != nil {
35✔
496
                return p.failAttempt(attempt.AttemptID, err)
×
497
        }
×
498

499
        // Regenerate the circuit for this attempt.
500
        _, circuit, err := generateSphinxPacket(
35✔
501
                &attempt.Route, hash[:], attempt.SessionKey(),
35✔
502
        )
35✔
503
        // TODO(yy): We generate this circuit to create the error decryptor,
35✔
504
        // which is then used in htlcswitch as the deobfuscator to decode the
35✔
505
        // error from `UpdateFailHTLC`. However, suppose it's an
35✔
506
        // `UpdateFulfillHTLC` message yet for some reason the sphinx packet is
35✔
507
        // failed to be generated, we'd miss settling it. This means we should
35✔
508
        // give it a second chance to try the settlement path in case
35✔
509
        // `GetAttemptResult` gives us back the preimage. And move the circuit
35✔
510
        // creation into htlcswitch so it's only constructed when there's a
35✔
511
        // failure message we need to decode.
35✔
512
        if err != nil {
35✔
513
                log.Debugf("Unable to generate circuit for attempt %v: %v",
×
514
                        attempt.AttemptID, err)
×
515

×
516
                return p.failAttempt(attempt.AttemptID, err)
×
517
        }
×
518

519
        // Using the created circuit, initialize the error decrypter, so we can
520
        // parse+decode any failures incurred by this payment within the
521
        // switch.
522
        errorDecryptor := &htlcswitch.SphinxErrorDecrypter{
35✔
523
                OnionErrorDecrypter: sphinx.NewOnionErrorDecrypter(circuit),
35✔
524
        }
35✔
525

35✔
526
        // Now ask the switch to return the result of the payment when
35✔
527
        // available.
35✔
528
        //
35✔
529
        // TODO(yy): consider using htlcswitch to create the `errorDecryptor`
35✔
530
        // since the htlc is already in db. This will also make the interface
35✔
531
        // `PaymentAttemptDispatcher` deeper and easier to use. Moreover, we'd
35✔
532
        // only create the decryptor when received a failure, further saving us
35✔
533
        // a few CPU cycles.
35✔
534
        resultChan, err := p.router.cfg.Payer.GetAttemptResult(
35✔
535
                attempt.AttemptID, p.identifier, errorDecryptor,
35✔
536
        )
35✔
537
        // Handle the switch error.
35✔
538
        if err != nil {
36✔
539
                log.Errorf("Failed getting result for attemptID %d "+
1✔
540
                        "from switch: %v", attempt.AttemptID, err)
1✔
541

1✔
542
                return p.handleSwitchErr(attempt, err)
1✔
543
        }
1✔
544

545
        // The switch knows about this payment, we'll wait for a result to be
546
        // available.
547
        var (
34✔
548
                result *htlcswitch.PaymentResult
34✔
549
                ok     bool
34✔
550
        )
34✔
551

34✔
552
        select {
34✔
553
        case result, ok = <-resultChan:
32✔
554
                if !ok {
34✔
555
                        return nil, htlcswitch.ErrSwitchExiting
2✔
556
                }
2✔
557

558
        case <-p.quit:
1✔
559
                return nil, ErrPaymentLifecycleExiting
1✔
560

561
        case <-p.router.quit:
1✔
562
                return nil, ErrRouterShuttingDown
1✔
563
        }
564

565
        // In case of a payment failure, fail the attempt with the control
566
        // tower and return.
567
        if result.Error != nil {
48✔
568
                return p.handleSwitchErr(attempt, result.Error)
17✔
569
        }
17✔
570

571
        // We successfully got a payment result back from the switch.
572
        log.Debugf("Payment %v succeeded with pid=%v",
15✔
573
                p.identifier, attempt.AttemptID)
15✔
574

15✔
575
        // Report success to mission control.
15✔
576
        err = p.router.cfg.MissionControl.ReportPaymentSuccess(
15✔
577
                attempt.AttemptID, &attempt.Route,
15✔
578
        )
15✔
579
        if err != nil {
15✔
580
                log.Errorf("Error reporting payment success to mc: %v", err)
×
581
        }
×
582

583
        // In case of success we atomically store settle result to the DB move
584
        // the shard to the settled state.
585
        htlcAttempt, err := p.router.cfg.Control.SettleAttempt(
15✔
586
                p.identifier, attempt.AttemptID,
15✔
587
                &channeldb.HTLCSettleInfo{
15✔
588
                        Preimage:   result.Preimage,
15✔
589
                        SettleTime: p.router.cfg.Clock.Now(),
15✔
590
                },
15✔
591
        )
15✔
592
        if err != nil {
16✔
593
                log.Errorf("Error settling attempt %v for payment %v with "+
1✔
594
                        "preimage %v: %v", attempt.AttemptID, p.identifier,
1✔
595
                        result.Preimage, err)
1✔
596

1✔
597
                // We won't mark the attempt as failed since we already have
1✔
598
                // the preimage.
1✔
599
                return nil, err
1✔
600
        }
1✔
601

602
        return &attemptResult{
14✔
603
                attempt: htlcAttempt,
14✔
604
        }, nil
14✔
605
}
606

607
// registerAttempt is responsible for creating and saving an HTLC attempt in db
608
// by using the route info provided. The `remainingAmt` is used to decide
609
// whether this is the last attempt.
610
func (p *paymentLifecycle) registerAttempt(rt *route.Route,
611
        remainingAmt lnwire.MilliSatoshi) (*channeldb.HTLCAttempt, error) {
37✔
612

37✔
613
        // If this route will consume the last remaining amount to send
37✔
614
        // to the receiver, this will be our last shard (for now).
37✔
615
        isLastAttempt := rt.ReceiverAmt() == remainingAmt
37✔
616

37✔
617
        // Using the route received from the payment session, create a new
37✔
618
        // shard to send.
37✔
619
        attempt, err := p.createNewPaymentAttempt(rt, isLastAttempt)
37✔
620
        if err != nil {
38✔
621
                return nil, err
1✔
622
        }
1✔
623

624
        // Before sending this HTLC to the switch, we checkpoint the fresh
625
        // paymentID and route to the DB. This lets us know on startup the ID
626
        // of the payment that we attempted to send, such that we can query the
627
        // Switch for its whereabouts. The route is needed to handle the result
628
        // when it eventually comes back.
629
        err = p.router.cfg.Control.RegisterAttempt(
36✔
630
                p.identifier, &attempt.HTLCAttemptInfo,
36✔
631
        )
36✔
632

36✔
633
        return attempt, err
36✔
634
}
635

636
// createNewPaymentAttempt creates a new payment attempt from the given route.
637
func (p *paymentLifecycle) createNewPaymentAttempt(rt *route.Route,
638
        lastShard bool) (*channeldb.HTLCAttempt, error) {
37✔
639

37✔
640
        // Generate a new key to be used for this attempt.
37✔
641
        sessionKey, err := generateNewSessionKey()
37✔
642
        if err != nil {
37✔
643
                return nil, err
×
644
        }
×
645

646
        // We generate a new, unique payment ID that we will use for
647
        // this HTLC.
648
        attemptID, err := p.router.cfg.NextPaymentID()
37✔
649
        if err != nil {
37✔
650
                return nil, err
×
651
        }
×
652

653
        // Request a new shard from the ShardTracker. If this is an AMP
654
        // payment, and this is the last shard, the outstanding shards together
655
        // with this one will be enough for the receiver to derive all HTLC
656
        // preimages. If this a non-AMP payment, the ShardTracker will return a
657
        // simple shard with the payment's static payment hash.
658
        shard, err := p.shardTracker.NewShard(attemptID, lastShard)
37✔
659
        if err != nil {
38✔
660
                return nil, err
1✔
661
        }
1✔
662

663
        // If this shard carries MPP or AMP options, add them to the last hop
664
        // on the route.
665
        hop := rt.Hops[len(rt.Hops)-1]
36✔
666
        if shard.MPP() != nil {
41✔
667
                hop.MPP = shard.MPP()
5✔
668
        }
5✔
669

670
        if shard.AMP() != nil {
37✔
671
                hop.AMP = shard.AMP()
1✔
672
        }
1✔
673

674
        hash := shard.Hash()
36✔
675

36✔
676
        // We now have all the information needed to populate the current
36✔
677
        // attempt information.
36✔
678
        attempt := channeldb.NewHtlcAttempt(
36✔
679
                attemptID, sessionKey, *rt, p.router.cfg.Clock.Now(), &hash,
36✔
680
        )
36✔
681

36✔
682
        return attempt, nil
36✔
683
}
684

685
// sendAttempt attempts to send the current attempt to the switch to complete
686
// the payment. If this attempt fails, then we'll continue on to the next
687
// available route.
688
func (p *paymentLifecycle) sendAttempt(
689
        attempt *channeldb.HTLCAttempt) (*attemptResult, error) {
36✔
690

36✔
691
        log.Debugf("Sending HTLC attempt(id=%v, total_amt=%v, first_hop_amt=%d"+
36✔
692
                ") for payment %v", attempt.AttemptID,
36✔
693
                attempt.Route.TotalAmount, attempt.Route.FirstHopAmount.Val,
36✔
694
                p.identifier)
36✔
695

36✔
696
        rt := attempt.Route
36✔
697

36✔
698
        // Construct the first hop.
36✔
699
        firstHop := lnwire.NewShortChanIDFromInt(rt.Hops[0].ChannelID)
36✔
700

36✔
701
        // Craft an HTLC packet to send to the htlcswitch. The metadata within
36✔
702
        // this packet will be used to route the payment through the network,
36✔
703
        // starting with the first-hop.
36✔
704
        htlcAdd := &lnwire.UpdateAddHTLC{
36✔
705
                Amount:        rt.FirstHopAmount.Val.Int(),
36✔
706
                Expiry:        rt.TotalTimeLock,
36✔
707
                PaymentHash:   *attempt.Hash,
36✔
708
                CustomRecords: rt.FirstHopWireCustomRecords,
36✔
709
        }
36✔
710

36✔
711
        // Generate the raw encoded sphinx packet to be included along
36✔
712
        // with the htlcAdd message that we send directly to the
36✔
713
        // switch.
36✔
714
        onionBlob, _, err := generateSphinxPacket(
36✔
715
                &rt, attempt.Hash[:], attempt.SessionKey(),
36✔
716
        )
36✔
717
        if err != nil {
37✔
718
                log.Errorf("Failed to create onion blob: attempt=%d in "+
1✔
719
                        "payment=%v, err:%v", attempt.AttemptID,
1✔
720
                        p.identifier, err)
1✔
721

1✔
722
                return p.failAttempt(attempt.AttemptID, err)
1✔
723
        }
1✔
724

725
        copy(htlcAdd.OnionBlob[:], onionBlob)
35✔
726

35✔
727
        // Send it to the Switch. When this method returns we assume
35✔
728
        // the Switch successfully has persisted the payment attempt,
35✔
729
        // such that we can resume waiting for the result after a
35✔
730
        // restart.
35✔
731
        err = p.router.cfg.Payer.SendHTLC(firstHop, attempt.AttemptID, htlcAdd)
35✔
732
        if err != nil {
41✔
733
                log.Errorf("Failed sending attempt %d for payment %v to "+
6✔
734
                        "switch: %v", attempt.AttemptID, p.identifier, err)
6✔
735

6✔
736
                return p.handleSwitchErr(attempt, err)
6✔
737
        }
6✔
738

739
        log.Debugf("Attempt %v for payment %v successfully sent to switch, "+
30✔
740
                "route: %v", attempt.AttemptID, p.identifier, &attempt.Route)
30✔
741

30✔
742
        return &attemptResult{
30✔
743
                attempt: attempt,
30✔
744
        }, nil
30✔
745
}
746

747
// amendFirstHopData is a function that calls the traffic shaper to allow it to
748
// add custom records to the outgoing HTLC and also adjust the amount if
749
// needed.
750
func (p *paymentLifecycle) amendFirstHopData(rt *route.Route) error {
37✔
751
        // The first hop amount on the route is the full route amount if not
37✔
752
        // overwritten by the traffic shaper. So we set the initial value now
37✔
753
        // and potentially overwrite it later.
37✔
754
        rt.FirstHopAmount = tlv.NewRecordT[tlv.TlvType0](
37✔
755
                tlv.NewBigSizeT(rt.TotalAmount),
37✔
756
        )
37✔
757

37✔
758
        // By default, we set the first hop custom records to the initial
37✔
759
        // value requested by the RPC. The traffic shaper may overwrite this
37✔
760
        // value.
37✔
761
        rt.FirstHopWireCustomRecords = p.firstHopCustomRecords
37✔
762

37✔
763
        // extraDataRequest is a helper struct to pass the custom records and
37✔
764
        // amount back from the traffic shaper.
37✔
765
        type extraDataRequest struct {
37✔
766
                customRecords fn.Option[lnwire.CustomRecords]
37✔
767

37✔
768
                amount fn.Option[lnwire.MilliSatoshi]
37✔
769
        }
37✔
770

37✔
771
        // If a hook exists that may affect our outgoing message, we call it now
37✔
772
        // and apply its side effects to the UpdateAddHTLC message.
37✔
773
        result, err := fn.MapOptionZ(
37✔
774
                p.router.cfg.TrafficShaper,
37✔
775
                //nolint:ll
37✔
776
                func(ts htlcswitch.AuxTrafficShaper) fn.Result[extraDataRequest] {
73✔
777
                        newAmt, newRecords, err := ts.ProduceHtlcExtraData(
36✔
778
                                rt.TotalAmount, p.firstHopCustomRecords,
36✔
779
                        )
36✔
780
                        if err != nil {
36✔
781
                                return fn.Err[extraDataRequest](err)
×
782
                        }
×
783

784
                        // Make sure we only received valid records.
785
                        if err := newRecords.Validate(); err != nil {
36✔
786
                                return fn.Err[extraDataRequest](err)
×
787
                        }
×
788

789
                        log.Debugf("Aux traffic shaper returned custom "+
36✔
790
                                "records %v and amount %d msat for HTLC",
36✔
791
                                spew.Sdump(newRecords), newAmt)
36✔
792

36✔
793
                        return fn.Ok(extraDataRequest{
36✔
794
                                customRecords: fn.Some(newRecords),
36✔
795
                                amount:        fn.Some(newAmt),
36✔
796
                        })
36✔
797
                },
798
        ).Unpack()
799
        if err != nil {
37✔
800
                return fmt.Errorf("traffic shaper failed to produce extra "+
×
801
                        "data: %w", err)
×
802
        }
×
803

804
        // Apply the side effects to the UpdateAddHTLC message.
805
        result.customRecords.WhenSome(func(records lnwire.CustomRecords) {
73✔
806
                rt.FirstHopWireCustomRecords = records
36✔
807
        })
36✔
808
        result.amount.WhenSome(func(amount lnwire.MilliSatoshi) {
73✔
809
                rt.FirstHopAmount = tlv.NewRecordT[tlv.TlvType0](
36✔
810
                        tlv.NewBigSizeT(amount),
36✔
811
                )
36✔
812
        })
36✔
813

814
        return nil
37✔
815
}
816

817
// failAttemptAndPayment fails both the payment and its attempt via the
818
// router's control tower, which marks the payment as failed in db.
819
func (p *paymentLifecycle) failPaymentAndAttempt(
820
        attemptID uint64, reason *channeldb.FailureReason,
821
        sendErr error) (*attemptResult, error) {
6✔
822

6✔
823
        log.Errorf("Payment %v failed: final_outcome=%v, raw_err=%v",
6✔
824
                p.identifier, *reason, sendErr)
6✔
825

6✔
826
        // Fail the payment via control tower.
6✔
827
        //
6✔
828
        // NOTE: we must fail the payment first before failing the attempt.
6✔
829
        // Otherwise, once the attempt is marked as failed, another goroutine
6✔
830
        // might make another attempt while we are failing the payment.
6✔
831
        err := p.router.cfg.Control.FailPayment(p.identifier, *reason)
6✔
832
        if err != nil {
6✔
833
                log.Errorf("Unable to fail payment: %v", err)
×
834
                return nil, err
×
835
        }
×
836

837
        // Fail the attempt.
838
        return p.failAttempt(attemptID, sendErr)
6✔
839
}
840

841
// handleSwitchErr inspects the given error from the Switch and determines
842
// whether we should make another payment attempt, or if it should be
843
// considered a terminal error. Terminal errors will be recorded with the
844
// control tower. It analyzes the sendErr for the payment attempt received from
845
// the switch and updates mission control and/or channel policies. Depending on
846
// the error type, the error is either the final outcome of the payment or we
847
// need to continue with an alternative route. A final outcome is indicated by
848
// a non-nil reason value.
849
func (p *paymentLifecycle) handleSwitchErr(attempt *channeldb.HTLCAttempt,
850
        sendErr error) (*attemptResult, error) {
23✔
851

23✔
852
        internalErrorReason := channeldb.FailureReasonError
23✔
853
        attemptID := attempt.AttemptID
23✔
854

23✔
855
        // reportAndFail is a helper closure that reports the failure to the
23✔
856
        // mission control, which helps us to decide whether we want to retry
23✔
857
        // the payment or not. If a non nil reason is returned from mission
23✔
858
        // control, it will further fail the payment via control tower.
23✔
859
        reportAndFail := func(srcIdx *int,
23✔
860
                msg lnwire.FailureMessage) (*attemptResult, error) {
43✔
861

20✔
862
                // Report outcome to mission control.
20✔
863
                reason, err := p.router.cfg.MissionControl.ReportPaymentFail(
20✔
864
                        attemptID, &attempt.Route, srcIdx, msg,
20✔
865
                )
20✔
866
                if err != nil {
20✔
867
                        log.Errorf("Error reporting payment result to mc: %v",
×
868
                                err)
×
869

×
870
                        reason = &internalErrorReason
×
871
                }
×
872

873
                // Fail the attempt only if there's no reason.
874
                if reason == nil {
38✔
875
                        // Fail the attempt.
18✔
876
                        return p.failAttempt(attemptID, sendErr)
18✔
877
                }
18✔
878

879
                // Otherwise fail both the payment and the attempt.
880
                return p.failPaymentAndAttempt(attemptID, reason, sendErr)
3✔
881
        }
882

883
        // If this attempt ID is unknown to the Switch, it means it was never
884
        // checkpointed and forwarded by the switch before a restart. In this
885
        // case we can safely send a new payment attempt, and wait for its
886
        // result to be available.
887
        if errors.Is(sendErr, htlcswitch.ErrPaymentIDNotFound) {
23✔
888
                log.Debugf("Attempt ID %v for payment %v not found in the "+
×
889
                        "Switch, retrying.", attempt.AttemptID, p.identifier)
×
890

×
891
                return p.failAttempt(attemptID, sendErr)
×
892
        }
×
893

894
        if errors.Is(sendErr, htlcswitch.ErrUnreadableFailureMessage) {
24✔
895
                log.Warn("Unreadable failure when sending htlc: id=%v, hash=%v",
1✔
896
                        attempt.AttemptID, attempt.Hash)
1✔
897

1✔
898
                // Since this error message cannot be decrypted, we will send a
1✔
899
                // nil error message to our mission controller and fail the
1✔
900
                // payment.
1✔
901
                return reportAndFail(nil, nil)
1✔
902
        }
1✔
903

904
        // If the error is a ClearTextError, we have received a valid wire
905
        // failure message, either from our own outgoing link or from a node
906
        // down the route. If the error is not related to the propagation of
907
        // our payment, we can stop trying because an internal error has
908
        // occurred.
909
        var rtErr htlcswitch.ClearTextError
22✔
910
        ok := errors.As(sendErr, &rtErr)
22✔
911
        if !ok {
25✔
912
                return p.failPaymentAndAttempt(
3✔
913
                        attemptID, &internalErrorReason, sendErr,
3✔
914
                )
3✔
915
        }
3✔
916

917
        // failureSourceIdx is the index of the node that the failure occurred
918
        // at. If the ClearTextError received is not a ForwardingError the
919
        // payment error occurred at our node, so we leave this value as 0
920
        // to indicate that the failure occurred locally. If the error is a
921
        // ForwardingError, it did not originate at our node, so we set
922
        // failureSourceIdx to the index of the node where the failure occurred.
923
        failureSourceIdx := 0
19✔
924
        var source *htlcswitch.ForwardingError
19✔
925
        ok = errors.As(rtErr, &source)
19✔
926
        if ok {
38✔
927
                failureSourceIdx = source.FailureSourceIdx
19✔
928
        }
19✔
929

930
        // Extract the wire failure and apply channel update if it contains one.
931
        // If we received an unknown failure message from a node along the
932
        // route, the failure message will be nil.
933
        failureMessage := rtErr.WireMessage()
19✔
934
        err := p.handleFailureMessage(
19✔
935
                &attempt.Route, failureSourceIdx, failureMessage,
19✔
936
        )
19✔
937
        if err != nil {
19✔
938
                return p.failPaymentAndAttempt(
×
939
                        attemptID, &internalErrorReason, sendErr,
×
940
                )
×
941
        }
×
942

943
        log.Tracef("Node=%v reported failure when sending htlc",
19✔
944
                failureSourceIdx)
19✔
945

19✔
946
        return reportAndFail(&failureSourceIdx, failureMessage)
19✔
947
}
948

949
// handleFailureMessage tries to apply a channel update present in the failure
950
// message if any.
951
func (p *paymentLifecycle) handleFailureMessage(rt *route.Route,
952
        errorSourceIdx int, failure lnwire.FailureMessage) error {
19✔
953

19✔
954
        if failure == nil {
20✔
955
                return nil
1✔
956
        }
1✔
957

958
        // It makes no sense to apply our own channel updates.
959
        if errorSourceIdx == 0 {
19✔
960
                log.Errorf("Channel update of ourselves received")
1✔
961

1✔
962
                return nil
1✔
963
        }
1✔
964

965
        // Extract channel update if the error contains one.
966
        update := p.router.extractChannelUpdate(failure)
18✔
967
        if update == nil {
28✔
968
                return nil
10✔
969
        }
10✔
970

971
        // Parse pubkey to allow validation of the channel update. This should
972
        // always succeed, otherwise there is something wrong in our
973
        // implementation. Therefore, return an error.
974
        errVertex := rt.Hops[errorSourceIdx-1].PubKeyBytes
9✔
975
        errSource, err := btcec.ParsePubKey(errVertex[:])
9✔
976
        if err != nil {
9✔
977
                log.Errorf("Cannot parse pubkey: idx=%v, pubkey=%v",
×
978
                        errorSourceIdx, errVertex)
×
979

×
980
                return err
×
981
        }
×
982

983
        var (
9✔
984
                isAdditionalEdge bool
9✔
985
                policy           *models.CachedEdgePolicy
9✔
986
        )
9✔
987

9✔
988
        // Before we apply the channel update, we need to decide whether the
9✔
989
        // update is for additional (ephemeral) edge or normal edge stored in
9✔
990
        // db.
9✔
991
        //
9✔
992
        // Note: the p.paySession might be nil here if it's called inside
9✔
993
        // SendToRoute where there's no payment lifecycle.
9✔
994
        if p.paySession != nil {
15✔
995
                policy = p.paySession.GetAdditionalEdgePolicy(
6✔
996
                        errSource, update.ShortChannelID.ToUint64(),
6✔
997
                )
6✔
998
                if policy != nil {
9✔
999
                        isAdditionalEdge = true
3✔
1000
                }
3✔
1001
        }
1002

1003
        // Apply channel update to additional edge policy.
1004
        if isAdditionalEdge {
12✔
1005
                if !p.paySession.UpdateAdditionalEdge(
3✔
1006
                        update, errSource, policy) {
3✔
1007

×
1008
                        log.Debugf("Invalid channel update received: node=%v",
×
1009
                                errVertex)
×
1010
                }
×
1011
                return nil
3✔
1012
        }
1013

1014
        // Apply channel update to the channel edge policy in our db.
1015
        if !p.router.cfg.ApplyChannelUpdate(update) {
10✔
1016
                log.Debugf("Invalid channel update received: node=%v",
3✔
1017
                        errVertex)
3✔
1018
        }
3✔
1019
        return nil
7✔
1020
}
1021

1022
// failAttempt calls control tower to fail the current payment attempt.
1023
func (p *paymentLifecycle) failAttempt(attemptID uint64,
1024
        sendError error) (*attemptResult, error) {
24✔
1025

24✔
1026
        log.Warnf("Attempt %v for payment %v failed: %v", attemptID,
24✔
1027
                p.identifier, sendError)
24✔
1028

24✔
1029
        failInfo := marshallError(
24✔
1030
                sendError,
24✔
1031
                p.router.cfg.Clock.Now(),
24✔
1032
        )
24✔
1033

24✔
1034
        // Now that we are failing this payment attempt, cancel the shard with
24✔
1035
        // the ShardTracker such that it can derive the correct hash for the
24✔
1036
        // next attempt.
24✔
1037
        if err := p.shardTracker.CancelShard(attemptID); err != nil {
24✔
1038
                return nil, err
×
1039
        }
×
1040

1041
        attempt, err := p.router.cfg.Control.FailAttempt(
24✔
1042
                p.identifier, attemptID, failInfo,
24✔
1043
        )
24✔
1044
        if err != nil {
27✔
1045
                return nil, err
3✔
1046
        }
3✔
1047

1048
        return &attemptResult{
21✔
1049
                attempt: attempt,
21✔
1050
                err:     sendError,
21✔
1051
        }, nil
21✔
1052
}
1053

1054
// marshallError marshall an error as received from the switch to a structure
1055
// that is suitable for database storage.
1056
func marshallError(sendError error, time time.Time) *channeldb.HTLCFailInfo {
24✔
1057
        response := &channeldb.HTLCFailInfo{
24✔
1058
                FailTime: time,
24✔
1059
        }
24✔
1060

24✔
1061
        switch {
24✔
1062
        case errors.Is(sendError, htlcswitch.ErrPaymentIDNotFound):
×
1063
                response.Reason = channeldb.HTLCFailInternal
×
1064
                return response
×
1065

1066
        case errors.Is(sendError, htlcswitch.ErrUnreadableFailureMessage):
1✔
1067
                response.Reason = channeldb.HTLCFailUnreadable
1✔
1068
                return response
1✔
1069
        }
1070

1071
        var rtErr htlcswitch.ClearTextError
23✔
1072
        ok := errors.As(sendError, &rtErr)
23✔
1073
        if !ok {
27✔
1074
                response.Reason = channeldb.HTLCFailInternal
4✔
1075
                return response
4✔
1076
        }
4✔
1077

1078
        message := rtErr.WireMessage()
19✔
1079
        if message != nil {
37✔
1080
                response.Reason = channeldb.HTLCFailMessage
18✔
1081
                response.Message = message
18✔
1082
        } else {
19✔
1083
                response.Reason = channeldb.HTLCFailUnknown
1✔
1084
        }
1✔
1085

1086
        // If the ClearTextError received is a ForwardingError, the error
1087
        // originated from a node along the route, not locally on our outgoing
1088
        // link. We set failureSourceIdx to the index of the node where the
1089
        // failure occurred. If the error is not a ForwardingError, the failure
1090
        // occurred at our node, so we leave the index as 0 to indicate that
1091
        // we failed locally.
1092
        var fErr *htlcswitch.ForwardingError
19✔
1093
        ok = errors.As(rtErr, &fErr)
19✔
1094
        if ok {
38✔
1095
                response.FailureSourceIndex = uint32(fErr.FailureSourceIdx)
19✔
1096
        }
19✔
1097

1098
        return response
19✔
1099
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc