• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lightningnetwork / lnd / 9915780197

13 Jul 2024 12:30AM UTC coverage: 49.268% (-9.1%) from 58.413%
9915780197

push

github

web-flow
Merge pull request #8653 from ProofOfKeags/fn-prim

DynComms [0/n]: `fn` package additions

92837 of 188433 relevant lines covered (49.27%)

1.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.05
/routing/payment_lifecycle.go
1
package routing
2

3
import (
4
        "context"
5
        "errors"
6
        "fmt"
7
        "time"
8

9
        "github.com/btcsuite/btcd/btcec/v2"
10
        "github.com/davecgh/go-spew/spew"
11
        sphinx "github.com/lightningnetwork/lightning-onion"
12
        "github.com/lightningnetwork/lnd/channeldb"
13
        "github.com/lightningnetwork/lnd/channeldb/models"
14
        "github.com/lightningnetwork/lnd/htlcswitch"
15
        "github.com/lightningnetwork/lnd/lntypes"
16
        "github.com/lightningnetwork/lnd/lnwire"
17
        "github.com/lightningnetwork/lnd/routing/route"
18
        "github.com/lightningnetwork/lnd/routing/shards"
19
)
20

21
// ErrPaymentLifecycleExiting is used when waiting for htlc attempt result, but
22
// the payment lifecycle is exiting .
23
var ErrPaymentLifecycleExiting = errors.New("payment lifecycle exiting")
24

25
// paymentLifecycle holds all information about the current state of a payment
26
// needed to resume if from any point.
27
type paymentLifecycle struct {
28
        router        *ChannelRouter
29
        feeLimit      lnwire.MilliSatoshi
30
        identifier    lntypes.Hash
31
        paySession    PaymentSession
32
        shardTracker  shards.ShardTracker
33
        currentHeight int32
34

35
        // quit is closed to signal the sub goroutines of the payment lifecycle
36
        // to stop.
37
        quit chan struct{}
38

39
        // resultCollected is used to signal that the result of an attempt has
40
        // been collected. A nil error means the attempt is either successful
41
        // or failed with temporary error. Otherwise, we should exit the
42
        // lifecycle loop as a terminal error has occurred.
43
        resultCollected chan error
44

45
        // resultCollector is a function that is used to collect the result of
46
        // an HTLC attempt, which is always mounted to `p.collectResultAsync`
47
        // except in unit test, where we use a much simpler resultCollector to
48
        // decouple the test flow for the payment lifecycle.
49
        resultCollector func(attempt *channeldb.HTLCAttempt)
50
}
51

52
// newPaymentLifecycle initiates a new payment lifecycle and returns it.
53
func newPaymentLifecycle(r *ChannelRouter, feeLimit lnwire.MilliSatoshi,
54
        identifier lntypes.Hash, paySession PaymentSession,
55
        shardTracker shards.ShardTracker,
56
        currentHeight int32) *paymentLifecycle {
3✔
57

3✔
58
        p := &paymentLifecycle{
3✔
59
                router:          r,
3✔
60
                feeLimit:        feeLimit,
3✔
61
                identifier:      identifier,
3✔
62
                paySession:      paySession,
3✔
63
                shardTracker:    shardTracker,
3✔
64
                currentHeight:   currentHeight,
3✔
65
                quit:            make(chan struct{}),
3✔
66
                resultCollected: make(chan error, 1),
3✔
67
        }
3✔
68

3✔
69
        // Mount the result collector.
3✔
70
        p.resultCollector = p.collectResultAsync
3✔
71

3✔
72
        return p
3✔
73
}
3✔
74

75
// calcFeeBudget returns the available fee to be used for sending HTLC
76
// attempts.
77
func (p *paymentLifecycle) calcFeeBudget(
78
        feesPaid lnwire.MilliSatoshi) lnwire.MilliSatoshi {
3✔
79

3✔
80
        budget := p.feeLimit
3✔
81

3✔
82
        // We'll subtract the used fee from our fee budget. In case of
3✔
83
        // overflow, we need to check whether feesPaid exceeds our budget
3✔
84
        // already.
3✔
85
        if feesPaid <= budget {
6✔
86
                budget -= feesPaid
3✔
87
        } else {
6✔
88
                budget = 0
3✔
89
        }
3✔
90

91
        return budget
3✔
92
}
93

94
// stateStep defines an action to be taken in our payment lifecycle. We either
95
// quit, continue, or exit the lifecycle, see details below.
96
type stateStep uint8
97

98
const (
99
        // stepSkip is used when we need to skip the current lifecycle and jump
100
        // to the next one.
101
        stepSkip stateStep = iota
102

103
        // stepProceed is used when we can proceed the current lifecycle.
104
        stepProceed
105

106
        // stepExit is used when we need to quit the current lifecycle.
107
        stepExit
108
)
109

110
// decideNextStep is used to determine the next step in the payment lifecycle.
111
func (p *paymentLifecycle) decideNextStep(
112
        payment dbMPPayment) (stateStep, error) {
3✔
113

3✔
114
        // Check whether we could make new HTLC attempts.
3✔
115
        allow, err := payment.AllowMoreAttempts()
3✔
116
        if err != nil {
3✔
117
                return stepExit, err
×
118
        }
×
119

120
        if !allow {
6✔
121
                // Check whether we need to wait for results.
3✔
122
                wait, err := payment.NeedWaitAttempts()
3✔
123
                if err != nil {
3✔
124
                        return stepExit, err
×
125
                }
×
126

127
                // If we are not allowed to make new HTLC attempts and there's
128
                // no need to wait, the lifecycle is done and we can exit.
129
                if !wait {
6✔
130
                        return stepExit, nil
3✔
131
                }
3✔
132

133
                log.Tracef("Waiting for attempt results for payment %v",
3✔
134
                        p.identifier)
3✔
135

3✔
136
                // Otherwise we wait for one HTLC attempt then continue
3✔
137
                // the lifecycle.
3✔
138
                //
3✔
139
                // NOTE: we don't check `p.quit` since `decideNextStep` is
3✔
140
                // running in the same goroutine as `resumePayment`.
3✔
141
                select {
3✔
142
                case err := <-p.resultCollected:
3✔
143
                        // If an error is returned, exit with it.
3✔
144
                        if err != nil {
6✔
145
                                return stepExit, err
3✔
146
                        }
3✔
147

148
                        log.Tracef("Received attempt result for payment %v",
3✔
149
                                p.identifier)
3✔
150

151
                case <-p.router.quit:
2✔
152
                        return stepExit, ErrRouterShuttingDown
2✔
153
                }
154

155
                return stepSkip, nil
3✔
156
        }
157

158
        // Otherwise we need to make more attempts.
159
        return stepProceed, nil
3✔
160
}
161

162
// resumePayment resumes the paymentLifecycle from the current state.
163
func (p *paymentLifecycle) resumePayment(ctx context.Context) ([32]byte,
164
        *route.Route, error) {
3✔
165

3✔
166
        // When the payment lifecycle loop exits, we make sure to signal any
3✔
167
        // sub goroutine of the HTLC attempt to exit, then wait for them to
3✔
168
        // return.
3✔
169
        defer p.stop()
3✔
170

3✔
171
        // If we had any existing attempts outstanding, we'll start by spinning
3✔
172
        // up goroutines that'll collect their results and deliver them to the
3✔
173
        // lifecycle loop below.
3✔
174
        payment, err := p.router.cfg.Control.FetchPayment(p.identifier)
3✔
175
        if err != nil {
3✔
176
                return [32]byte{}, nil, err
×
177
        }
×
178

179
        for _, a := range payment.InFlightHTLCs() {
6✔
180
                a := a
3✔
181

3✔
182
                log.Infof("Resuming payment shard %v for payment %v",
3✔
183
                        a.AttemptID, p.identifier)
3✔
184

3✔
185
                p.resultCollector(&a)
3✔
186
        }
3✔
187

188
        // exitWithErr is a helper closure that logs and returns an error.
189
        exitWithErr := func(err error) ([32]byte, *route.Route, error) {
6✔
190
                log.Errorf("Payment %v with status=%v failed: %v",
3✔
191
                        p.identifier, payment.GetStatus(), err)
3✔
192
                return [32]byte{}, nil, err
3✔
193
        }
3✔
194

195
        // We'll continue until either our payment succeeds, or we encounter a
196
        // critical error during path finding.
197
lifecycle:
3✔
198
        for {
6✔
199
                // We update the payment state on every iteration. Since the
3✔
200
                // payment state is affected by multiple goroutines (ie,
3✔
201
                // collectResultAsync), it is NOT guaranteed that we always
3✔
202
                // have the latest state here. This is fine as long as the
3✔
203
                // state is consistent as a whole.
3✔
204
                payment, err = p.router.cfg.Control.FetchPayment(p.identifier)
3✔
205
                if err != nil {
3✔
206
                        return exitWithErr(err)
×
207
                }
×
208

209
                ps := payment.GetState()
3✔
210
                remainingFees := p.calcFeeBudget(ps.FeesPaid)
3✔
211

3✔
212
                log.Debugf("Payment %v: status=%v, active_shards=%v, "+
3✔
213
                        "rem_value=%v, fee_limit=%v", p.identifier,
3✔
214
                        payment.GetStatus(), ps.NumAttemptsInFlight,
3✔
215
                        ps.RemainingAmt, remainingFees)
3✔
216

3✔
217
                // We now proceed our lifecycle with the following tasks in
3✔
218
                // order,
3✔
219
                //   1. check context.
3✔
220
                //   2. request route.
3✔
221
                //   3. create HTLC attempt.
3✔
222
                //   4. send HTLC attempt.
3✔
223
                //   5. collect HTLC attempt result.
3✔
224
                //
3✔
225
                // Before we attempt any new shard, we'll check to see if we've
3✔
226
                // gone past the payment attempt timeout, or if the context was
3✔
227
                // cancelled, or the router is exiting. In any of these cases,
3✔
228
                // we'll stop this payment attempt short.
3✔
229
                if err := p.checkContext(ctx); err != nil {
3✔
230
                        return exitWithErr(err)
×
231
                }
×
232

233
                // Now decide the next step of the current lifecycle.
234
                step, err := p.decideNextStep(payment)
3✔
235
                if err != nil {
6✔
236
                        return exitWithErr(err)
3✔
237
                }
3✔
238

239
                switch step {
3✔
240
                // Exit the for loop and return below.
241
                case stepExit:
3✔
242
                        break lifecycle
3✔
243

244
                // Continue the for loop and skip the rest.
245
                case stepSkip:
3✔
246
                        continue lifecycle
3✔
247

248
                // Continue the for loop and proceed the rest.
249
                case stepProceed:
3✔
250

251
                // Unknown step received, exit with an error.
252
                default:
×
253
                        err = fmt.Errorf("unknown step: %v", step)
×
254
                        return exitWithErr(err)
×
255
                }
256

257
                // Now request a route to be used to create our HTLC attempt.
258
                rt, err := p.requestRoute(ps)
3✔
259
                if err != nil {
3✔
260
                        return exitWithErr(err)
×
261
                }
×
262

263
                // We may not be able to find a route for current attempt. In
264
                // that case, we continue the loop and move straight to the
265
                // next iteration in case there are results for inflight HTLCs
266
                // that still need to be collected.
267
                if rt == nil {
6✔
268
                        log.Errorf("No route found for payment %v",
3✔
269
                                p.identifier)
3✔
270

3✔
271
                        continue lifecycle
3✔
272
                }
273

274
                log.Tracef("Found route: %s", spew.Sdump(rt.Hops))
3✔
275

3✔
276
                // We found a route to try, create a new HTLC attempt to try.
3✔
277
                attempt, err := p.registerAttempt(rt, ps.RemainingAmt)
3✔
278
                if err != nil {
3✔
279
                        return exitWithErr(err)
×
280
                }
×
281

282
                // Once the attempt is created, send it to the htlcswitch.
283
                result, err := p.sendAttempt(attempt)
3✔
284
                if err != nil {
3✔
285
                        return exitWithErr(err)
×
286
                }
×
287

288
                // Now that the shard was successfully sent, launch a go
289
                // routine that will handle its result when its back.
290
                if result.err == nil {
6✔
291
                        p.resultCollector(attempt)
3✔
292
                }
3✔
293
        }
294

295
        // Once we are out the lifecycle loop, it means we've reached a
296
        // terminal condition. We either return the settled preimage or the
297
        // payment's failure reason.
298
        //
299
        // Optionally delete the failed attempts from the database.
300
        err = p.router.cfg.Control.DeleteFailedAttempts(p.identifier)
3✔
301
        if err != nil {
3✔
302
                log.Errorf("Error deleting failed htlc attempts for payment "+
×
303
                        "%v: %v", p.identifier, err)
×
304
        }
×
305

306
        htlc, failure := payment.TerminalInfo()
3✔
307
        if htlc != nil {
6✔
308
                return htlc.Settle.Preimage, &htlc.Route, nil
3✔
309
        }
3✔
310

311
        // Otherwise return the payment failure reason.
312
        return [32]byte{}, nil, *failure
3✔
313
}
314

315
// checkContext checks whether the payment context has been canceled.
316
// Cancellation occurs manually or if the context times out.
317
func (p *paymentLifecycle) checkContext(ctx context.Context) error {
3✔
318
        select {
3✔
319
        case <-ctx.Done():
×
320
                // If the context was canceled, we'll mark the payment as
×
321
                // failed. There are two cases to distinguish here: Either a
×
322
                // user-provided timeout was reached, or the context was
×
323
                // canceled, either to a manual cancellation or due to an
×
324
                // unknown error.
×
325
                if errors.Is(ctx.Err(), context.DeadlineExceeded) {
×
326
                        log.Warnf("Payment attempt not completed before "+
×
327
                                "timeout, id=%s", p.identifier.String())
×
328
                } else {
×
329
                        log.Warnf("Payment attempt context canceled, id=%s",
×
330
                                p.identifier.String())
×
331
                }
×
332

333
                // By marking the payment failed, depending on whether it has
334
                // inflight HTLCs or not, its status will now either be
335
                // `StatusInflight` or `StatusFailed`. In either case, no more
336
                // HTLCs will be attempted.
337
                reason := channeldb.FailureReasonTimeout
×
338
                err := p.router.cfg.Control.FailPayment(p.identifier, reason)
×
339
                if err != nil {
×
340
                        return fmt.Errorf("FailPayment got %w", err)
×
341
                }
×
342

343
        case <-p.router.quit:
×
344
                return fmt.Errorf("check payment timeout got: %w",
×
345
                        ErrRouterShuttingDown)
×
346

347
        // Fall through if we haven't hit our time limit.
348
        default:
3✔
349
        }
350

351
        return nil
3✔
352
}
353

354
// requestRoute is responsible for finding a route to be used to create an HTLC
355
// attempt.
356
func (p *paymentLifecycle) requestRoute(
357
        ps *channeldb.MPPaymentState) (*route.Route, error) {
3✔
358

3✔
359
        remainingFees := p.calcFeeBudget(ps.FeesPaid)
3✔
360

3✔
361
        // Query our payment session to construct a route.
3✔
362
        rt, err := p.paySession.RequestRoute(
3✔
363
                ps.RemainingAmt, remainingFees,
3✔
364
                uint32(ps.NumAttemptsInFlight), uint32(p.currentHeight),
3✔
365
        )
3✔
366

3✔
367
        // Exit early if there's no error.
3✔
368
        if err == nil {
6✔
369
                return rt, nil
3✔
370
        }
3✔
371

372
        // Otherwise we need to handle the error.
373
        log.Warnf("Failed to find route for payment %v: %v", p.identifier, err)
3✔
374

3✔
375
        // If the error belongs to `noRouteError` set, it means a non-critical
3✔
376
        // error has happened during path finding, and we will mark the payment
3✔
377
        // failed with this reason. Otherwise, we'll return the critical error
3✔
378
        // found to abort the lifecycle.
3✔
379
        var routeErr noRouteError
3✔
380
        if !errors.As(err, &routeErr) {
3✔
381
                return nil, fmt.Errorf("requestRoute got: %w", err)
×
382
        }
×
383

384
        // It's the `paymentSession`'s responsibility to find a route for us
385
        // with the best effort. When it cannot find a path, we need to treat it
386
        // as a terminal condition and fail the payment no matter it has
387
        // inflight HTLCs or not.
388
        failureCode := routeErr.FailureReason()
3✔
389
        log.Warnf("Marking payment %v permanently failed with no route: %v",
3✔
390
                p.identifier, failureCode)
3✔
391

3✔
392
        err = p.router.cfg.Control.FailPayment(p.identifier, failureCode)
3✔
393
        if err != nil {
3✔
394
                return nil, fmt.Errorf("FailPayment got: %w", err)
×
395
        }
×
396

397
        // NOTE: we decide to not return the non-critical noRouteError here to
398
        // avoid terminating the payment lifecycle as there might be other
399
        // inflight HTLCs which we must wait for their results.
400
        return nil, nil
3✔
401
}
402

403
// stop signals any active shard goroutine to exit.
404
func (p *paymentLifecycle) stop() {
3✔
405
        close(p.quit)
3✔
406
}
3✔
407

408
// attemptResult holds the HTLC attempt and a possible error returned from
409
// sending it.
410
type attemptResult struct {
411
        // err is non-nil if a non-critical error was encountered when trying
412
        // to send the attempt, and we successfully updated the control tower
413
        // to reflect this error. This can be errors like not enough local
414
        // balance for the given route etc.
415
        err error
416

417
        // attempt is the attempt structure as recorded in the database.
418
        attempt *channeldb.HTLCAttempt
419
}
420

421
// collectResultAsync launches a goroutine that will wait for the result of the
422
// given HTLC attempt to be available then handle its result. Once received, it
423
// will send a nil error to channel `resultCollected` to indicate there's a
424
// result.
425
func (p *paymentLifecycle) collectResultAsync(attempt *channeldb.HTLCAttempt) {
3✔
426
        log.Debugf("Collecting result for attempt %v in payment %v",
3✔
427
                attempt.AttemptID, p.identifier)
3✔
428

3✔
429
        go func() {
6✔
430
                // Block until the result is available.
3✔
431
                _, err := p.collectResult(attempt)
3✔
432
                if err != nil {
6✔
433
                        log.Errorf("Error collecting result for attempt %v "+
3✔
434
                                "in payment %v: %v", attempt.AttemptID,
3✔
435
                                p.identifier, err)
3✔
436
                }
3✔
437

438
                log.Debugf("Result collected for attempt %v in payment %v",
3✔
439
                        attempt.AttemptID, p.identifier)
3✔
440

3✔
441
                // Once the result is collected, we signal it by writing the
3✔
442
                // error to `resultCollected`.
3✔
443
                select {
3✔
444
                // Send the signal or quit.
445
                case p.resultCollected <- err:
3✔
446

447
                case <-p.quit:
1✔
448
                        log.Debugf("Lifecycle exiting while collecting "+
1✔
449
                                "result for payment %v", p.identifier)
1✔
450

451
                case <-p.router.quit:
2✔
452
                        return
2✔
453
                }
454
        }()
455
}
456

457
// collectResult waits for the result for the given attempt to be available
458
// from the Switch, then records the attempt outcome with the control tower.
459
// An attemptResult is returned, indicating the final outcome of this HTLC
460
// attempt.
461
func (p *paymentLifecycle) collectResult(attempt *channeldb.HTLCAttempt) (
462
        *attemptResult, error) {
3✔
463

3✔
464
        // We'll retrieve the hash specific to this shard from the
3✔
465
        // shardTracker, since it will be needed to regenerate the circuit
3✔
466
        // below.
3✔
467
        hash, err := p.shardTracker.GetHash(attempt.AttemptID)
3✔
468
        if err != nil {
3✔
469
                return p.failAttempt(attempt.AttemptID, err)
×
470
        }
×
471

472
        // Regenerate the circuit for this attempt.
473
        _, circuit, err := generateSphinxPacket(
3✔
474
                &attempt.Route, hash[:], attempt.SessionKey(),
3✔
475
        )
3✔
476
        // TODO(yy): We generate this circuit to create the error decryptor,
3✔
477
        // which is then used in htlcswitch as the deobfuscator to decode the
3✔
478
        // error from `UpdateFailHTLC`. However, suppose it's an
3✔
479
        // `UpdateFulfillHTLC` message yet for some reason the sphinx packet is
3✔
480
        // failed to be generated, we'd miss settling it. This means we should
3✔
481
        // give it a second chance to try the settlement path in case
3✔
482
        // `GetAttemptResult` gives us back the preimage. And move the circuit
3✔
483
        // creation into htlcswitch so it's only constructed when there's a
3✔
484
        // failure message we need to decode.
3✔
485
        if err != nil {
3✔
486
                log.Debugf("Unable to generate circuit for attempt %v: %v",
×
487
                        attempt.AttemptID, err)
×
488

×
489
                return p.failAttempt(attempt.AttemptID, err)
×
490
        }
×
491

492
        // Using the created circuit, initialize the error decrypter, so we can
493
        // parse+decode any failures incurred by this payment within the
494
        // switch.
495
        errorDecryptor := &htlcswitch.SphinxErrorDecrypter{
3✔
496
                OnionErrorDecrypter: sphinx.NewOnionErrorDecrypter(circuit),
3✔
497
        }
3✔
498

3✔
499
        // Now ask the switch to return the result of the payment when
3✔
500
        // available.
3✔
501
        //
3✔
502
        // TODO(yy): consider using htlcswitch to create the `errorDecryptor`
3✔
503
        // since the htlc is already in db. This will also make the interface
3✔
504
        // `PaymentAttemptDispatcher` deeper and easier to use. Moreover, we'd
3✔
505
        // only create the decryptor when received a failure, further saving us
3✔
506
        // a few CPU cycles.
3✔
507
        resultChan, err := p.router.cfg.Payer.GetAttemptResult(
3✔
508
                attempt.AttemptID, p.identifier, errorDecryptor,
3✔
509
        )
3✔
510
        // Handle the switch error.
3✔
511
        if err != nil {
3✔
512
                log.Errorf("Failed getting result for attemptID %d "+
×
513
                        "from switch: %v", attempt.AttemptID, err)
×
514

×
515
                return p.handleSwitchErr(attempt, err)
×
516
        }
×
517

518
        // The switch knows about this payment, we'll wait for a result to be
519
        // available.
520
        var (
3✔
521
                result *htlcswitch.PaymentResult
3✔
522
                ok     bool
3✔
523
        )
3✔
524

3✔
525
        select {
3✔
526
        case result, ok = <-resultChan:
3✔
527
                if !ok {
6✔
528
                        return nil, htlcswitch.ErrSwitchExiting
3✔
529
                }
3✔
530

531
        case <-p.quit:
×
532
                return nil, ErrPaymentLifecycleExiting
×
533

534
        case <-p.router.quit:
×
535
                return nil, ErrRouterShuttingDown
×
536
        }
537

538
        // In case of a payment failure, fail the attempt with the control
539
        // tower and return.
540
        if result.Error != nil {
6✔
541
                return p.handleSwitchErr(attempt, result.Error)
3✔
542
        }
3✔
543

544
        // We successfully got a payment result back from the switch.
545
        log.Debugf("Payment %v succeeded with pid=%v",
3✔
546
                p.identifier, attempt.AttemptID)
3✔
547

3✔
548
        // Report success to mission control.
3✔
549
        err = p.router.cfg.MissionControl.ReportPaymentSuccess(
3✔
550
                attempt.AttemptID, &attempt.Route,
3✔
551
        )
3✔
552
        if err != nil {
3✔
553
                log.Errorf("Error reporting payment success to mc: %v", err)
×
554
        }
×
555

556
        // In case of success we atomically store settle result to the DB move
557
        // the shard to the settled state.
558
        htlcAttempt, err := p.router.cfg.Control.SettleAttempt(
3✔
559
                p.identifier, attempt.AttemptID,
3✔
560
                &channeldb.HTLCSettleInfo{
3✔
561
                        Preimage:   result.Preimage,
3✔
562
                        SettleTime: p.router.cfg.Clock.Now(),
3✔
563
                },
3✔
564
        )
3✔
565
        if err != nil {
3✔
566
                log.Errorf("Error settling attempt %v for payment %v with "+
×
567
                        "preimage %v: %v", attempt.AttemptID, p.identifier,
×
568
                        result.Preimage, err)
×
569

×
570
                // We won't mark the attempt as failed since we already have
×
571
                // the preimage.
×
572
                return nil, err
×
573
        }
×
574

575
        return &attemptResult{
3✔
576
                attempt: htlcAttempt,
3✔
577
        }, nil
3✔
578
}
579

580
// registerAttempt is responsible for creating and saving an HTLC attempt in db
581
// by using the route info provided. The `remainingAmt` is used to decide
582
// whether this is the last attempt.
583
func (p *paymentLifecycle) registerAttempt(rt *route.Route,
584
        remainingAmt lnwire.MilliSatoshi) (*channeldb.HTLCAttempt, error) {
3✔
585

3✔
586
        // If this route will consume the last remaining amount to send
3✔
587
        // to the receiver, this will be our last shard (for now).
3✔
588
        isLastAttempt := rt.ReceiverAmt() == remainingAmt
3✔
589

3✔
590
        // Using the route received from the payment session, create a new
3✔
591
        // shard to send.
3✔
592
        attempt, err := p.createNewPaymentAttempt(rt, isLastAttempt)
3✔
593
        if err != nil {
3✔
594
                return nil, err
×
595
        }
×
596

597
        // Before sending this HTLC to the switch, we checkpoint the fresh
598
        // paymentID and route to the DB. This lets us know on startup the ID
599
        // of the payment that we attempted to send, such that we can query the
600
        // Switch for its whereabouts. The route is needed to handle the result
601
        // when it eventually comes back.
602
        err = p.router.cfg.Control.RegisterAttempt(
3✔
603
                p.identifier, &attempt.HTLCAttemptInfo,
3✔
604
        )
3✔
605

3✔
606
        return attempt, err
3✔
607
}
608

609
// createNewPaymentAttempt creates a new payment attempt from the given route.
610
func (p *paymentLifecycle) createNewPaymentAttempt(rt *route.Route,
611
        lastShard bool) (*channeldb.HTLCAttempt, error) {
3✔
612

3✔
613
        // Generate a new key to be used for this attempt.
3✔
614
        sessionKey, err := generateNewSessionKey()
3✔
615
        if err != nil {
3✔
616
                return nil, err
×
617
        }
×
618

619
        // We generate a new, unique payment ID that we will use for
620
        // this HTLC.
621
        attemptID, err := p.router.cfg.NextPaymentID()
3✔
622
        if err != nil {
3✔
623
                return nil, err
×
624
        }
×
625

626
        // Request a new shard from the ShardTracker. If this is an AMP
627
        // payment, and this is the last shard, the outstanding shards together
628
        // with this one will be enough for the receiver to derive all HTLC
629
        // preimages. If this a non-AMP payment, the ShardTracker will return a
630
        // simple shard with the payment's static payment hash.
631
        shard, err := p.shardTracker.NewShard(attemptID, lastShard)
3✔
632
        if err != nil {
3✔
633
                return nil, err
×
634
        }
×
635

636
        // If this shard carries MPP or AMP options, add them to the last hop
637
        // on the route.
638
        hop := rt.Hops[len(rt.Hops)-1]
3✔
639
        if shard.MPP() != nil {
6✔
640
                hop.MPP = shard.MPP()
3✔
641
        }
3✔
642

643
        if shard.AMP() != nil {
6✔
644
                hop.AMP = shard.AMP()
3✔
645
        }
3✔
646

647
        hash := shard.Hash()
3✔
648

3✔
649
        // We now have all the information needed to populate the current
3✔
650
        // attempt information.
3✔
651
        attempt := channeldb.NewHtlcAttempt(
3✔
652
                attemptID, sessionKey, *rt, p.router.cfg.Clock.Now(), &hash,
3✔
653
        )
3✔
654

3✔
655
        return attempt, nil
3✔
656
}
657

658
// sendAttempt attempts to send the current attempt to the switch to complete
659
// the payment. If this attempt fails, then we'll continue on to the next
660
// available route.
661
func (p *paymentLifecycle) sendAttempt(
662
        attempt *channeldb.HTLCAttempt) (*attemptResult, error) {
3✔
663

3✔
664
        log.Debugf("Attempting to send payment %v (pid=%v)", p.identifier,
3✔
665
                attempt.AttemptID)
3✔
666

3✔
667
        rt := attempt.Route
3✔
668

3✔
669
        // Construct the first hop.
3✔
670
        firstHop := lnwire.NewShortChanIDFromInt(rt.Hops[0].ChannelID)
3✔
671

3✔
672
        // Craft an HTLC packet to send to the htlcswitch. The metadata within
3✔
673
        // this packet will be used to route the payment through the network,
3✔
674
        // starting with the first-hop.
3✔
675
        htlcAdd := &lnwire.UpdateAddHTLC{
3✔
676
                Amount:      rt.TotalAmount,
3✔
677
                Expiry:      rt.TotalTimeLock,
3✔
678
                PaymentHash: *attempt.Hash,
3✔
679
        }
3✔
680

3✔
681
        // Generate the raw encoded sphinx packet to be included along
3✔
682
        // with the htlcAdd message that we send directly to the
3✔
683
        // switch.
3✔
684
        onionBlob, _, err := generateSphinxPacket(
3✔
685
                &rt, attempt.Hash[:], attempt.SessionKey(),
3✔
686
        )
3✔
687
        if err != nil {
3✔
688
                log.Errorf("Failed to create onion blob: attempt=%d in "+
×
689
                        "payment=%v, err:%v", attempt.AttemptID,
×
690
                        p.identifier, err)
×
691

×
692
                return p.failAttempt(attempt.AttemptID, err)
×
693
        }
×
694

695
        copy(htlcAdd.OnionBlob[:], onionBlob)
3✔
696

3✔
697
        // Send it to the Switch. When this method returns we assume
3✔
698
        // the Switch successfully has persisted the payment attempt,
3✔
699
        // such that we can resume waiting for the result after a
3✔
700
        // restart.
3✔
701
        err = p.router.cfg.Payer.SendHTLC(firstHop, attempt.AttemptID, htlcAdd)
3✔
702
        if err != nil {
6✔
703
                log.Errorf("Failed sending attempt %d for payment %v to "+
3✔
704
                        "switch: %v", attempt.AttemptID, p.identifier, err)
3✔
705

3✔
706
                return p.handleSwitchErr(attempt, err)
3✔
707
        }
3✔
708

709
        log.Debugf("Attempt %v for payment %v successfully sent to switch, "+
3✔
710
                "route: %v", attempt.AttemptID, p.identifier, &attempt.Route)
3✔
711

3✔
712
        return &attemptResult{
3✔
713
                attempt: attempt,
3✔
714
        }, nil
3✔
715
}
716

717
// failAttemptAndPayment fails both the payment and its attempt via the
718
// router's control tower, which marks the payment as failed in db.
719
func (p *paymentLifecycle) failPaymentAndAttempt(
720
        attemptID uint64, reason *channeldb.FailureReason,
721
        sendErr error) (*attemptResult, error) {
3✔
722

3✔
723
        log.Errorf("Payment %v failed: final_outcome=%v, raw_err=%v",
3✔
724
                p.identifier, *reason, sendErr)
3✔
725

3✔
726
        // Fail the payment via control tower.
3✔
727
        //
3✔
728
        // NOTE: we must fail the payment first before failing the attempt.
3✔
729
        // Otherwise, once the attempt is marked as failed, another goroutine
3✔
730
        // might make another attempt while we are failing the payment.
3✔
731
        err := p.router.cfg.Control.FailPayment(p.identifier, *reason)
3✔
732
        if err != nil {
3✔
733
                log.Errorf("Unable to fail payment: %v", err)
×
734
                return nil, err
×
735
        }
×
736

737
        // Fail the attempt.
738
        return p.failAttempt(attemptID, sendErr)
3✔
739
}
740

741
// handleSwitchErr inspects the given error from the Switch and determines
742
// whether we should make another payment attempt, or if it should be
743
// considered a terminal error. Terminal errors will be recorded with the
744
// control tower. It analyzes the sendErr for the payment attempt received from
745
// the switch and updates mission control and/or channel policies. Depending on
746
// the error type, the error is either the final outcome of the payment or we
747
// need to continue with an alternative route. A final outcome is indicated by
748
// a non-nil reason value.
749
func (p *paymentLifecycle) handleSwitchErr(attempt *channeldb.HTLCAttempt,
750
        sendErr error) (*attemptResult, error) {
3✔
751

3✔
752
        internalErrorReason := channeldb.FailureReasonError
3✔
753
        attemptID := attempt.AttemptID
3✔
754

3✔
755
        // reportAndFail is a helper closure that reports the failure to the
3✔
756
        // mission control, which helps us to decide whether we want to retry
3✔
757
        // the payment or not. If a non nil reason is returned from mission
3✔
758
        // control, it will further fail the payment via control tower.
3✔
759
        reportAndFail := func(srcIdx *int,
3✔
760
                msg lnwire.FailureMessage) (*attemptResult, error) {
6✔
761

3✔
762
                // Report outcome to mission control.
3✔
763
                reason, err := p.router.cfg.MissionControl.ReportPaymentFail(
3✔
764
                        attemptID, &attempt.Route, srcIdx, msg,
3✔
765
                )
3✔
766
                if err != nil {
3✔
767
                        log.Errorf("Error reporting payment result to mc: %v",
×
768
                                err)
×
769

×
770
                        reason = &internalErrorReason
×
771
                }
×
772

773
                // Fail the attempt only if there's no reason.
774
                if reason == nil {
6✔
775
                        // Fail the attempt.
3✔
776
                        return p.failAttempt(attemptID, sendErr)
3✔
777
                }
3✔
778

779
                // Otherwise fail both the payment and the attempt.
780
                return p.failPaymentAndAttempt(attemptID, reason, sendErr)
3✔
781
        }
782

783
        // If this attempt ID is unknown to the Switch, it means it was never
784
        // checkpointed and forwarded by the switch before a restart. In this
785
        // case we can safely send a new payment attempt, and wait for its
786
        // result to be available.
787
        if errors.Is(sendErr, htlcswitch.ErrPaymentIDNotFound) {
3✔
788
                log.Debugf("Attempt ID %v for payment %v not found in the "+
×
789
                        "Switch, retrying.", attempt.AttemptID, p.identifier)
×
790

×
791
                return p.failAttempt(attemptID, sendErr)
×
792
        }
×
793

794
        if errors.Is(sendErr, htlcswitch.ErrUnreadableFailureMessage) {
3✔
795
                log.Warn("Unreadable failure when sending htlc: id=%v, hash=%v",
×
796
                        attempt.AttemptID, attempt.Hash)
×
797

×
798
                // Since this error message cannot be decrypted, we will send a
×
799
                // nil error message to our mission controller and fail the
×
800
                // payment.
×
801
                return reportAndFail(nil, nil)
×
802
        }
×
803

804
        // If the error is a ClearTextError, we have received a valid wire
805
        // failure message, either from our own outgoing link or from a node
806
        // down the route. If the error is not related to the propagation of
807
        // our payment, we can stop trying because an internal error has
808
        // occurred.
809
        var rtErr htlcswitch.ClearTextError
3✔
810
        ok := errors.As(sendErr, &rtErr)
3✔
811
        if !ok {
3✔
812
                return p.failPaymentAndAttempt(
×
813
                        attemptID, &internalErrorReason, sendErr,
×
814
                )
×
815
        }
×
816

817
        // failureSourceIdx is the index of the node that the failure occurred
818
        // at. If the ClearTextError received is not a ForwardingError the
819
        // payment error occurred at our node, so we leave this value as 0
820
        // to indicate that the failure occurred locally. If the error is a
821
        // ForwardingError, it did not originate at our node, so we set
822
        // failureSourceIdx to the index of the node where the failure occurred.
823
        failureSourceIdx := 0
3✔
824
        var source *htlcswitch.ForwardingError
3✔
825
        ok = errors.As(rtErr, &source)
3✔
826
        if ok {
6✔
827
                failureSourceIdx = source.FailureSourceIdx
3✔
828
        }
3✔
829

830
        // Extract the wire failure and apply channel update if it contains one.
831
        // If we received an unknown failure message from a node along the
832
        // route, the failure message will be nil.
833
        failureMessage := rtErr.WireMessage()
3✔
834
        err := p.handleFailureMessage(
3✔
835
                &attempt.Route, failureSourceIdx, failureMessage,
3✔
836
        )
3✔
837
        if err != nil {
3✔
838
                return p.failPaymentAndAttempt(
×
839
                        attemptID, &internalErrorReason, sendErr,
×
840
                )
×
841
        }
×
842

843
        log.Tracef("Node=%v reported failure when sending htlc",
3✔
844
                failureSourceIdx)
3✔
845

3✔
846
        return reportAndFail(&failureSourceIdx, failureMessage)
3✔
847
}
848

849
// handleFailureMessage tries to apply a channel update present in the failure
850
// message if any.
851
func (p *paymentLifecycle) handleFailureMessage(rt *route.Route,
852
        errorSourceIdx int, failure lnwire.FailureMessage) error {
3✔
853

3✔
854
        if failure == nil {
3✔
855
                return nil
×
856
        }
×
857

858
        // It makes no sense to apply our own channel updates.
859
        if errorSourceIdx == 0 {
6✔
860
                log.Errorf("Channel update of ourselves received")
3✔
861

3✔
862
                return nil
3✔
863
        }
3✔
864

865
        // Extract channel update if the error contains one.
866
        update := p.router.extractChannelUpdate(failure)
3✔
867
        if update == nil {
6✔
868
                return nil
3✔
869
        }
3✔
870

871
        // Parse pubkey to allow validation of the channel update. This should
872
        // always succeed, otherwise there is something wrong in our
873
        // implementation. Therefore, return an error.
874
        errVertex := rt.Hops[errorSourceIdx-1].PubKeyBytes
3✔
875
        errSource, err := btcec.ParsePubKey(errVertex[:])
3✔
876
        if err != nil {
3✔
877
                log.Errorf("Cannot parse pubkey: idx=%v, pubkey=%v",
×
878
                        errorSourceIdx, errVertex)
×
879

×
880
                return err
×
881
        }
×
882

883
        var (
3✔
884
                isAdditionalEdge bool
3✔
885
                policy           *models.CachedEdgePolicy
3✔
886
        )
3✔
887

3✔
888
        // Before we apply the channel update, we need to decide whether the
3✔
889
        // update is for additional (ephemeral) edge or normal edge stored in
3✔
890
        // db.
3✔
891
        //
3✔
892
        // Note: the p.paySession might be nil here if it's called inside
3✔
893
        // SendToRoute where there's no payment lifecycle.
3✔
894
        if p.paySession != nil {
6✔
895
                policy = p.paySession.GetAdditionalEdgePolicy(
3✔
896
                        errSource, update.ShortChannelID.ToUint64(),
3✔
897
                )
3✔
898
                if policy != nil {
6✔
899
                        isAdditionalEdge = true
3✔
900
                }
3✔
901
        }
902

903
        // Apply channel update to additional edge policy.
904
        if isAdditionalEdge {
6✔
905
                if !p.paySession.UpdateAdditionalEdge(
3✔
906
                        update, errSource, policy) {
3✔
907

×
908
                        log.Debugf("Invalid channel update received: node=%v",
×
909
                                errVertex)
×
910
                }
×
911
                return nil
3✔
912
        }
913

914
        // Apply channel update to the channel edge policy in our db.
915
        if !p.router.applyChannelUpdate(update) {
6✔
916
                log.Debugf("Invalid channel update received: node=%v",
3✔
917
                        errVertex)
3✔
918
        }
3✔
919
        return nil
3✔
920
}
921

922
// failAttempt calls control tower to fail the current payment attempt.
923
func (p *paymentLifecycle) failAttempt(attemptID uint64,
924
        sendError error) (*attemptResult, error) {
3✔
925

3✔
926
        log.Warnf("Attempt %v for payment %v failed: %v", attemptID,
3✔
927
                p.identifier, sendError)
3✔
928

3✔
929
        failInfo := marshallError(
3✔
930
                sendError,
3✔
931
                p.router.cfg.Clock.Now(),
3✔
932
        )
3✔
933

3✔
934
        // Now that we are failing this payment attempt, cancel the shard with
3✔
935
        // the ShardTracker such that it can derive the correct hash for the
3✔
936
        // next attempt.
3✔
937
        if err := p.shardTracker.CancelShard(attemptID); err != nil {
3✔
938
                return nil, err
×
939
        }
×
940

941
        attempt, err := p.router.cfg.Control.FailAttempt(
3✔
942
                p.identifier, attemptID, failInfo,
3✔
943
        )
3✔
944
        if err != nil {
3✔
945
                return nil, err
×
946
        }
×
947

948
        return &attemptResult{
3✔
949
                attempt: attempt,
3✔
950
                err:     sendError,
3✔
951
        }, nil
3✔
952
}
953

954
// marshallError marshall an error as received from the switch to a structure
955
// that is suitable for database storage.
956
func marshallError(sendError error, time time.Time) *channeldb.HTLCFailInfo {
3✔
957
        response := &channeldb.HTLCFailInfo{
3✔
958
                FailTime: time,
3✔
959
        }
3✔
960

3✔
961
        switch {
3✔
962
        case errors.Is(sendError, htlcswitch.ErrPaymentIDNotFound):
×
963
                response.Reason = channeldb.HTLCFailInternal
×
964
                return response
×
965

966
        case errors.Is(sendError, htlcswitch.ErrUnreadableFailureMessage):
×
967
                response.Reason = channeldb.HTLCFailUnreadable
×
968
                return response
×
969
        }
970

971
        var rtErr htlcswitch.ClearTextError
3✔
972
        ok := errors.As(sendError, &rtErr)
3✔
973
        if !ok {
3✔
974
                response.Reason = channeldb.HTLCFailInternal
×
975
                return response
×
976
        }
×
977

978
        message := rtErr.WireMessage()
3✔
979
        if message != nil {
6✔
980
                response.Reason = channeldb.HTLCFailMessage
3✔
981
                response.Message = message
3✔
982
        } else {
3✔
983
                response.Reason = channeldb.HTLCFailUnknown
×
984
        }
×
985

986
        // If the ClearTextError received is a ForwardingError, the error
987
        // originated from a node along the route, not locally on our outgoing
988
        // link. We set failureSourceIdx to the index of the node where the
989
        // failure occurred. If the error is not a ForwardingError, the failure
990
        // occurred at our node, so we leave the index as 0 to indicate that
991
        // we failed locally.
992
        var fErr *htlcswitch.ForwardingError
3✔
993
        ok = errors.As(rtErr, &fErr)
3✔
994
        if ok {
6✔
995
                response.FailureSourceIndex = uint32(fErr.FailureSourceIdx)
3✔
996
        }
3✔
997

998
        return response
3✔
999
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc