• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lightningnetwork / lnd / 16877060063

11 Aug 2025 10:10AM UTC coverage: 66.918% (+0.002%) from 66.916%
16877060063

Pull #10141

github

web-flow
Merge dc7bea147 into 72e9ad8e6
Pull Request #10141: fix stuck payment

16 of 16 new or added lines in 1 file covered. (100.0%)

78 existing lines in 17 files now uncovered.

135714 of 202806 relevant lines covered (66.92%)

21573.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.4
/discovery/validation_barrier.go
1
package discovery
2

3
import (
4
        "errors"
5
        "fmt"
6
        "sync"
7
        "sync/atomic"
8

9
        "github.com/lightningnetwork/lnd/fn/v2"
10
        "github.com/lightningnetwork/lnd/lnwire"
11
        "github.com/lightningnetwork/lnd/routing/route"
12
)
13

14
var (
15
        // ErrVBarrierShuttingDown signals that the barrier has been requested
16
        // to shutdown, and that the caller should not treat the wait condition
17
        // as fulfilled.
18
        ErrVBarrierShuttingDown = errors.New("ValidationBarrier shutting down")
19
)
20

21
// JobID identifies an active job in the validation barrier. It is large so
22
// that we don't need to worry about overflows.
23
type JobID uint64
24

25
// jobInfo stores job dependency info for a set of dependent gossip messages.
26
type jobInfo struct {
27
        // activeParentJobIDs is the set of active parent job ids.
28
        activeParentJobIDs fn.Set[JobID]
29

30
        // activeDependentJobs is the set of active dependent job ids.
31
        activeDependentJobs fn.Set[JobID]
32
}
33

34
// ValidationBarrier is a barrier used to enforce a strict validation order
35
// while concurrently validating other updates for channel edges. It uses a set
36
// of maps to track validation dependencies. This is needed in practice because
37
// gossip messages for a given channel may arive in order, but then due to
38
// scheduling in different goroutines, may be validated in the wrong order.
39
// With the ValidationBarrier, the dependent update will wait until the parent
40
// update completes.
41
type ValidationBarrier struct {
42
        // validationSemaphore is a channel of structs which is used as a
43
        // semaphore. Initially we'll fill this with a buffered channel of the
44
        // size of the number of active requests. Each new job will consume
45
        // from this channel, then restore the value upon completion.
46
        validationSemaphore chan struct{}
47

48
        // jobInfoMap stores the set of job ids for each channel.
49
        // NOTE: This MUST be used with the mutex.
50
        // NOTE: This currently stores string representations of
51
        // lnwire.ShortChannelID and route.Vertex. Since these are of different
52
        // lengths, collision cannot occur in their string representations.
53
        // N.B.: Check that any new string-converted types don't collide with
54
        // existing string-converted types.
55
        jobInfoMap map[string]*jobInfo
56

57
        // jobDependencies is a mapping from a child's JobID to the set of
58
        // parent JobID that it depends on.
59
        // NOTE: This MUST be used with the mutex.
60
        jobDependencies map[JobID]fn.Set[JobID]
61

62
        // childJobChans stores the notification channel that each child job
63
        // listens on for parent job completions.
64
        // NOTE: This MUST be used with the mutex.
65
        childJobChans map[JobID]chan struct{}
66

67
        // idCtr is an atomic integer that is used to assign JobIDs.
68
        idCtr atomic.Uint64
69

70
        quit chan struct{}
71
        sync.Mutex
72
}
73

74
// NewValidationBarrier creates a new instance of a validation barrier given
75
// the total number of active requests, and a quit channel which will be used
76
// to know when to kill pending, but unfilled jobs.
77
func NewValidationBarrier(numActiveReqs int,
78
        quitChan chan struct{}) *ValidationBarrier {
36✔
79

36✔
80
        v := &ValidationBarrier{
36✔
81
                jobInfoMap:      make(map[string]*jobInfo),
36✔
82
                jobDependencies: make(map[JobID]fn.Set[JobID]),
36✔
83
                childJobChans:   make(map[JobID]chan struct{}),
36✔
84
                quit:            quitChan,
36✔
85
        }
36✔
86

36✔
87
        // We'll first initialize a set of semaphores to limit our concurrency
36✔
88
        // when validating incoming requests in parallel.
36✔
89
        v.validationSemaphore = make(chan struct{}, numActiveReqs)
36✔
90
        for i := 0; i < numActiveReqs; i++ {
30,071✔
91
                v.validationSemaphore <- struct{}{}
30,035✔
92
        }
30,035✔
93

94
        return v
36✔
95
}
96

97
// InitJobDependencies will wait for a new job slot to become open, and then
98
// sets up any dependent signals/trigger for the new job.
99
func (v *ValidationBarrier) InitJobDependencies(job interface{}) (JobID,
100
        error) {
356✔
101

356✔
102
        // We'll wait for either a new slot to become open, or for the quit
356✔
103
        // channel to be closed.
356✔
104
        select {
356✔
105
        case <-v.validationSemaphore:
356✔
106
        case <-v.quit:
×
107
        }
108

109
        v.Lock()
356✔
110
        defer v.Unlock()
356✔
111

356✔
112
        // updateOrCreateJobInfo modifies the set of activeParentJobs for this
356✔
113
        // annID and updates jobInfoMap.
356✔
114
        updateOrCreateJobInfo := func(annID string, annJobID JobID) {
1,085✔
115
                info, ok := v.jobInfoMap[annID]
729✔
116
                if ok {
735✔
117
                        // If an entry already exists for annID, then a job
6✔
118
                        // related to it is being validated. Add to the set of
6✔
119
                        // parent job ids. This addition will only affect
6✔
120
                        // _later_, _child_ jobs for the annID.
6✔
121
                        info.activeParentJobIDs.Add(annJobID)
6✔
122
                        return
6✔
123
                }
6✔
124

125
                // No entry exists for annID, meaning that we should create
126
                // one.
127
                parentJobSet := fn.NewSet(annJobID)
726✔
128

726✔
129
                info = &jobInfo{
726✔
130
                        activeParentJobIDs:  parentJobSet,
726✔
131
                        activeDependentJobs: fn.NewSet[JobID](),
726✔
132
                }
726✔
133
                v.jobInfoMap[annID] = info
726✔
134
        }
135

136
        // populateDependencies populates the job dependency mappings (i.e.
137
        // which should complete after another) for the (annID, childJobID)
138
        // tuple.
139
        populateDependencies := func(annID string, childJobID JobID) {
470✔
140
                // If there is no entry in the jobInfoMap, we don't have to
114✔
141
                // wait on any parent jobs to finish.
114✔
142
                info, ok := v.jobInfoMap[annID]
114✔
143
                if !ok {
218✔
144
                        return
104✔
145
                }
104✔
146

147
                // We want to see a snapshot of active parent jobs for this
148
                // annID that are already registered in activeParentJobIDs. The
149
                // child job identified by childJobID can only run after these
150
                // parent jobs have run. After grabbing the snapshot, we then
151
                // want to persist a slice of these jobs.
152

153
                // Create the notification chan that parent jobs will send (or
154
                // close) on when they complete.
155
                jobChan := make(chan struct{})
13✔
156

13✔
157
                // Add to set of activeDependentJobs for this annID.
13✔
158
                info.activeDependentJobs.Add(childJobID)
13✔
159

13✔
160
                // Store in childJobChans. The parent jobs will fetch this chan
13✔
161
                // to notify on. The child job will later fetch this chan to
13✔
162
                // listen on when WaitForParents is called.
13✔
163
                v.childJobChans[childJobID] = jobChan
13✔
164

13✔
165
                // Copy over the parent job IDs at this moment for this annID.
13✔
166
                // This job must be processed AFTER those parent IDs.
13✔
167
                parentJobs := info.activeParentJobIDs.Copy()
13✔
168

13✔
169
                // Populate the jobDependencies mapping.
13✔
170
                v.jobDependencies[childJobID] = parentJobs
13✔
171
        }
172

173
        // Once a slot is open, we'll examine the message of the job, to see if
174
        // there need to be any dependent barriers set up.
175
        switch msg := job.(type) {
356✔
176
        case *lnwire.ChannelAnnouncement1:
245✔
177
                id := JobID(v.idCtr.Add(1))
245✔
178

245✔
179
                updateOrCreateJobInfo(msg.ShortChannelID.String(), id)
245✔
180
                updateOrCreateJobInfo(route.Vertex(msg.NodeID1).String(), id)
245✔
181
                updateOrCreateJobInfo(route.Vertex(msg.NodeID2).String(), id)
245✔
182

245✔
183
                return id, nil
245✔
184

185
        // Populate the dependency mappings for the below child jobs.
186
        case *lnwire.ChannelUpdate1:
89✔
187
                childJobID := JobID(v.idCtr.Add(1))
89✔
188
                populateDependencies(msg.ShortChannelID.String(), childJobID)
89✔
189

89✔
190
                return childJobID, nil
89✔
191
        case *lnwire.NodeAnnouncement:
28✔
192
                childJobID := JobID(v.idCtr.Add(1))
28✔
193
                populateDependencies(
28✔
194
                        route.Vertex(msg.NodeID).String(), childJobID,
28✔
195
                )
28✔
196

28✔
197
                return childJobID, nil
28✔
198
        case *lnwire.AnnounceSignatures1:
×
199
                // TODO(roasbeef): need to wait on chan ann?
×
200
                // - We can do the above by calling populateDependencies. For
×
201
                //   now, while we evaluate potential side effects, don't do
×
202
                //   anything with childJobID and just return it.
×
203
                childJobID := JobID(v.idCtr.Add(1))
×
204
                return childJobID, nil
×
205

206
        default:
×
207
                // An invalid message was passed into InitJobDependencies.
×
208
                // Return an error.
×
209
                return JobID(0), errors.New("invalid message")
×
210
        }
211
}
212

213
// CompleteJob returns a free slot to the set of available job slots. This
214
// should be called once a job has been fully completed. Otherwise, slots may
215
// not be returned to the internal scheduling, causing a deadlock when a new
216
// overflow job is attempted.
217
func (v *ValidationBarrier) CompleteJob() {
331✔
218
        select {
331✔
219
        case v.validationSemaphore <- struct{}{}:
331✔
UNCOV
220
        case <-v.quit:
×
221
        }
222
}
223

224
// WaitForParents will block until all parent job dependencies have went
225
// through the validation pipeline. This allows us a graceful way to run jobs
226
// in goroutines and still have strict ordering guarantees. If this job doesn't
227
// have any parent job dependencies, then this function will return
228
// immediately.
229
func (v *ValidationBarrier) WaitForParents(childJobID JobID,
230
        job interface{}) error {
329✔
231

329✔
232
        var (
329✔
233
                ok      bool
329✔
234
                jobDesc string
329✔
235

329✔
236
                parentJobIDs fn.Set[JobID]
329✔
237
                annID        string
329✔
238
                jobChan      chan struct{}
329✔
239
        )
329✔
240

329✔
241
        // Acquire a lock to read ValidationBarrier.
329✔
242
        v.Lock()
329✔
243

329✔
244
        switch msg := job.(type) {
329✔
245
        // Any ChannelUpdate or NodeAnnouncement jobs will need to wait on the
246
        // completion of any active ChannelAnnouncement jobs related to them.
247
        case *lnwire.ChannelUpdate1:
73✔
248
                annID = msg.ShortChannelID.String()
73✔
249

73✔
250
                parentJobIDs, ok = v.jobDependencies[childJobID]
73✔
251
                if !ok {
137✔
252
                        // If ok is false, it means that this child job never
64✔
253
                        // had any parent jobs to wait on.
64✔
254
                        v.Unlock()
64✔
255
                        return nil
64✔
256
                }
64✔
257

258
                jobDesc = fmt.Sprintf("job=lnwire.ChannelUpdate, scid=%v",
12✔
259
                        msg.ShortChannelID.ToUint64())
12✔
260

261
        case *lnwire.NodeAnnouncement:
28✔
262
                annID = route.Vertex(msg.NodeID).String()
28✔
263

28✔
264
                parentJobIDs, ok = v.jobDependencies[childJobID]
28✔
265
                if !ok {
55✔
266
                        // If ok is false, it means that this child job never
27✔
267
                        // had any parent jobs to wait on.
27✔
268
                        v.Unlock()
27✔
269
                        return nil
27✔
270
                }
27✔
271

272
                jobDesc = fmt.Sprintf("job=lnwire.NodeAnnouncement, pub=%s",
4✔
273
                        route.Vertex(msg.NodeID))
4✔
274

275
        // Other types of jobs can be executed immediately, so we'll just
276
        // return directly.
277
        case *lnwire.AnnounceSignatures1:
×
278
                // TODO(roasbeef): need to wait on chan ann?
×
279
                v.Unlock()
×
280
                return nil
×
281

282
        case *lnwire.ChannelAnnouncement1:
234✔
283
                v.Unlock()
234✔
284
                return nil
234✔
285
        }
286

287
        // Release the lock once the above read is finished.
288
        v.Unlock()
13✔
289

13✔
290
        log.Debugf("Waiting for dependent on %s", jobDesc)
13✔
291

13✔
292
        v.Lock()
13✔
293
        jobChan, ok = v.childJobChans[childJobID]
13✔
294
        if !ok {
13✔
295
                v.Unlock()
×
296

×
297
                // The entry may not exist because this job does not depend on
×
298
                // any parent jobs.
×
299
                return nil
×
300
        }
×
301
        v.Unlock()
13✔
302

13✔
303
        for {
26✔
304
                select {
13✔
305
                case <-v.quit:
4✔
306
                        return ErrVBarrierShuttingDown
4✔
307

308
                case <-jobChan:
9✔
309
                        // Every time this is sent on or if it's closed, a
9✔
310
                        // parent job has finished. The parent jobs have to
9✔
311
                        // also potentially close the channel because if all
9✔
312
                        // the parent jobs finish and call SignalDependents
9✔
313
                        // before the goroutine running WaitForParents has a
9✔
314
                        // chance to grab the notification chan from
9✔
315
                        // childJobChans, then the running goroutine will wait
9✔
316
                        // here for a notification forever. By having the last
9✔
317
                        // parent job close the notificiation chan, we avoid
9✔
318
                        // this issue.
9✔
319

9✔
320
                        // Check and see if we have any parent jobs left. If we
9✔
321
                        // don't, we can finish up.
9✔
322
                        v.Lock()
9✔
323
                        info, found := v.jobInfoMap[annID]
9✔
324
                        if !found {
18✔
325
                                v.Unlock()
9✔
326

9✔
327
                                // No parent job info found, proceed with
9✔
328
                                // validation.
9✔
329
                                return nil
9✔
330
                        }
9✔
331

332
                        x := parentJobIDs.Intersect(info.activeParentJobIDs)
3✔
333
                        v.Unlock()
3✔
334
                        if x.IsEmpty() {
6✔
335
                                // The parent jobs have all completed. We can
3✔
336
                                // proceed with validation.
3✔
337
                                return nil
3✔
338
                        }
3✔
339

340
                        // If we've reached this point, we are still waiting on
341
                        // a parent job to complete.
342
                }
343
        }
344
}
345

346
// SignalDependents signals to any child jobs that this parent job has
347
// finished.
348
func (v *ValidationBarrier) SignalDependents(job interface{}, id JobID) error {
326✔
349
        v.Lock()
326✔
350
        defer v.Unlock()
326✔
351

326✔
352
        // removeJob either removes a child job or a parent job. If it is
326✔
353
        // removing a child job, then it removes the child's JobID from the set
326✔
354
        // of dependent jobs for the announcement ID. If this is removing a
326✔
355
        // parent job, then it removes the parentJobID from the set of active
326✔
356
        // parent jobs and notifies the child jobs that it has finished
326✔
357
        // validating.
326✔
358
        removeJob := func(annID string, id JobID, child bool) error {
1,128✔
359
                if child {
890✔
360
                        // If we're removing a child job, check jobInfoMap and
88✔
361
                        // remove this job from activeDependentJobs.
88✔
362
                        info, ok := v.jobInfoMap[annID]
88✔
363
                        if ok {
91✔
364
                                info.activeDependentJobs.Remove(id)
3✔
365
                        }
3✔
366

367
                        // Remove the notification chan from childJobChans.
368
                        delete(v.childJobChans, id)
88✔
369

88✔
370
                        // Remove this job's dependency mapping.
88✔
371
                        delete(v.jobDependencies, id)
88✔
372

88✔
373
                        return nil
88✔
374
                }
375

376
                // Otherwise, we are removing a parent job.
377
                jobInfo, found := v.jobInfoMap[annID]
717✔
378
                if !found {
717✔
379
                        // NOTE: Some sort of consistency guarantee has been
×
380
                        // broken.
×
381
                        return fmt.Errorf("no job info found for "+
×
382
                                "identifier(%v)", id)
×
383
                }
×
384

385
                jobInfo.activeParentJobIDs.Remove(id)
717✔
386

717✔
387
                lastJob := jobInfo.activeParentJobIDs.IsEmpty()
717✔
388

717✔
389
                // Notify all dependent jobs that a parent job has completed.
717✔
390
                for child := range jobInfo.activeDependentJobs {
729✔
391
                        notifyChan, ok := v.childJobChans[child]
12✔
392
                        if !ok {
12✔
393
                                // NOTE: Some sort of consistency guarantee has
×
394
                                // been broken.
×
395
                                return fmt.Errorf("no job info found for "+
×
396
                                        "identifier(%v)", id)
×
397
                        }
×
398

399
                        // We don't want to block when sending out the signal.
400
                        select {
12✔
401
                        case notifyChan <- struct{}{}:
9✔
402
                        default:
6✔
403
                        }
404

405
                        // If this is the last parent job for this annID, also
406
                        // close the channel. This is needed because it's
407
                        // possible that the parent job cleans up the job
408
                        // mappings before the goroutine handling the child job
409
                        // has a chance to call WaitForParents and catch the
410
                        // signal sent above. We are allowed to close because
411
                        // no other parent job will be able to send along the
412
                        // channel (or close) as we're removing the entry from
413
                        // the jobInfoMap below.
414
                        if lastJob {
21✔
415
                                close(notifyChan)
9✔
416
                        }
9✔
417
                }
418

419
                // Remove from jobInfoMap if last job.
420
                if lastJob {
1,431✔
421
                        delete(v.jobInfoMap, annID)
714✔
422
                }
714✔
423

424
                return nil
717✔
425
        }
426

427
        switch msg := job.(type) {
326✔
428
        case *lnwire.ChannelAnnouncement1:
241✔
429
                // Signal to the child jobs that parent validation has
241✔
430
                // finished. We have to call removeJob for each annID
241✔
431
                // that this ChannelAnnouncement can be associated with.
241✔
432
                err := removeJob(msg.ShortChannelID.String(), id, false)
241✔
433
                if err != nil {
241✔
434
                        return err
×
435
                }
×
436

437
                err = removeJob(route.Vertex(msg.NodeID1).String(), id, false)
241✔
438
                if err != nil {
241✔
439
                        return err
×
440
                }
×
441

442
                err = removeJob(route.Vertex(msg.NodeID2).String(), id, false)
241✔
443
                if err != nil {
241✔
444
                        return err
×
445
                }
×
446

447
                return nil
241✔
448

449
        case *lnwire.NodeAnnouncement:
27✔
450
                // Remove child job info.
27✔
451
                return removeJob(route.Vertex(msg.NodeID).String(), id, true)
27✔
452

453
        case *lnwire.ChannelUpdate1:
64✔
454
                // Remove child job info.
64✔
455
                return removeJob(msg.ShortChannelID.String(), id, true)
64✔
456

457
        case *lnwire.AnnounceSignatures1:
×
458
                // No dependency mappings are stored for AnnounceSignatures1,
×
459
                // so do nothing.
×
460
                return nil
×
461
        }
462

463
        return errors.New("invalid message - no job dependencies")
×
464
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc