Skip to content

Commit f0404ad

Browse files
authored
link: validate local links using validator framework/allow local reanchor. (#180)
* link: validate local links using validator framework/allow local reanchor Signed-off-by: bwplotka <bwplotka@gmail.com> * fix broken old link Signed-off-by: bwplotka <bwplotka@gmail.com> * add test Signed-off-by: bwplotka <bwplotka@gmail.com> --------- Signed-off-by: bwplotka <bwplotka@gmail.com>
1 parent 65d9272 commit f0404ad

5 files changed

Lines changed: 154 additions & 76 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Allow projects to have self-updating up-to-date documentation available in both
2121
* "Localizing" links to relative docs if specified (useful for multi-domain websites or multi-version doc). (see [#link-localization](#link-localization))
2222
* This allows smooth integration with static document websites like [Docusaurus](https://docusaurus.io/) or [hugo](https://gohugo.io) based themes!
2323
* Flexible pre-processing allowing easy to use GitHub experience as well as website. (see [#transform-usage](#transformation))
24-
* Allows profiling(using [fgprof](https://github.com/felixge/fgprof)) and exports metrics(saves to file in [OpenMetrics](https://openmetrics.io/) format) for easy debugging
24+
* Allows profiling(using [fgprof](https://github.com/felixge/fgprof)) and exports metrics(saves to file in [OpenMetrics](https://prometheus.io/docs/specs/om/open_metrics_spec/) format) for easy debugging
2525

2626
## Usage
2727

pkg/mdformatter/linktransformer/config.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ type Config struct {
2222

2323
Cache cache.Config `yaml:"cache"`
2424

25+
// ExplicitLocalValidators forces all links (remote and local) to go through validators.
26+
// If false (default), only http(s) links go to validators.
27+
// Use it for additional the validation options on local links.
2528
ExplicitLocalValidators bool `yaml:"explicitLocalValidators"`
2629
Validators []ValidatorConfig `yaml:"validators"`
2730
Timeout string `yaml:"timeout"`
@@ -38,14 +41,17 @@ type Config struct {
3841
type ValidatorConfig struct {
3942
// Regex for type of validator. For `githubPullsIssues` this is: (^http[s]?:\/\/)(www\.)?(github\.com\/){ORG_NAME}\/{REPO_NAME}(\/pull\/|\/issues\/).
4043
Regex string `yaml:"regex"`
41-
// By default type is `roundtrip`. Could be `githubPullsIssues` or `ignore`.
44+
// By default type is `roundtrip`. Could be `githubPullsIssues`, `ignore`, or `local`.
4245
Type ValidatorType `yaml:"type"`
4346
// GitHub repo token to avoid getting rate limited.
4447
Token string `yaml:"token"`
48+
// Anchor for additional path to add before the local link check.
49+
Anchor string `yaml:"anchor"`
4550

4651
ghValidator GitHubPullsIssuesValidator
4752
rtValidator RoundTripValidator
4853
igValidator IgnoreValidator
54+
lValidator LocalValidator
4955
}
5056

5157
type RoundTripValidator struct {
@@ -61,12 +67,17 @@ type IgnoreValidator struct {
6167
_regex *regexp.Regexp
6268
}
6369

70+
type LocalValidator struct {
71+
_regex *regexp.Regexp
72+
anchor string
73+
}
6474
type ValidatorType string
6575

6676
const (
6777
roundtripValidator ValidatorType = "roundtrip"
6878
githubPullsIssuesValidator ValidatorType = "githubPullsIssues"
6979
ignoreValidator ValidatorType = "ignore"
80+
localValidator ValidatorType = "local"
7081
)
7182

7283
const (
@@ -124,8 +135,12 @@ func ParseConfig(c []byte) (Config, error) {
124135
cfg.Validators[i].ghValidator._maxNum = maxNum
125136
case ignoreValidator:
126137
cfg.Validators[i].igValidator._regex = regexp.MustCompile(cfg.Validators[i].Regex)
138+
case localValidator:
139+
cfg.Validators[i].lValidator._regex = regexp.MustCompile(cfg.Validators[i].Regex)
140+
cfg.Validators[i].lValidator.anchor = cfg.Validators[i].Anchor
141+
127142
default:
128-
return Config{}, errors.New("Validator type not supported")
143+
return Config{}, fmt.Errorf("validator type %v not supported", cfg.Validators[i].Type)
129144
}
130145
}
131146
return cfg, nil

pkg/mdformatter/linktransformer/link.go

Lines changed: 4 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -398,28 +398,6 @@ func (v *validator) Close(ctx mdformatter.SourceContext) error {
398398
return merr.Err()
399399
}
400400

401-
func (v *validator) checkLocal(k futureKey) bool {
402-
v.l.localLinksChecked.Inc()
403-
// Check if link is email address.
404-
if email := strings.TrimPrefix(k.dest, "mailto:"); email != k.dest {
405-
if isValidEmail(email) {
406-
return true
407-
}
408-
v.destFutures[k].resultFn = func() error { return fmt.Errorf("provided mailto link is not a valid email, got %v", k.dest) }
409-
return false
410-
}
411-
412-
// Relative or absolute path. Check if exists.
413-
newDest := absLocalLink(v.anchorDir, k.filepath, k.dest)
414-
415-
// Local link. Check if exists.
416-
if err := v.localLinks.Lookup(newDest); err != nil {
417-
v.destFutures[k].resultFn = func() error { return fmt.Errorf("link %v, normalized to: %w", k.dest, err) }
418-
return false
419-
}
420-
return true
421-
}
422-
423401
func (v *validator) visit(filepath string, dest string, lineNumbers string) {
424402
v.futureMu.Lock()
425403
defer v.futureMu.Unlock()
@@ -432,39 +410,19 @@ func (v *validator) visit(filepath string, dest string, lineNumbers string) {
432410
if !v.validateConfig.ExplicitLocalValidators {
433411
matches := remoteLinkPrefixRe.FindAllStringIndex(dest, 1)
434412
if matches == nil {
435-
v.checkLocal(k)
413+
_, _ = LocalValidator{}.IsValid(k, v)
436414
return
437415
}
438416
v.l.remoteLinksChecked.Inc()
439417
}
440418

419+
// TODO: Capture error?
441420
validator := v.validateConfig.GetValidatorForURL(dest)
442421
if validator != nil {
443-
matched, err := validator.IsValid(k, v)
444-
if matched && err == nil {
445-
return
446-
}
422+
_, _ = validator.IsValid(k, v)
423+
return
447424
}
448-
}
449425

450-
// isValidEmail checks email structure and domain.
451-
func isValidEmail(email string) bool {
452-
// Check length.
453-
if len(email) < 3 && len(email) > 254 {
454-
return false
455-
}
456-
// Regex from https://www.w3.org/TR/2016/REC-html51-20161101/sec-forms.html#email-state-typeemail.
457-
var emailRe = regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$")
458-
if !emailRe.MatchString(email) {
459-
return false
460-
}
461-
// Check email domain.
462-
domain := strings.Split(email, "@")
463-
mx, err := net.LookupMX(domain[1])
464-
if err != nil || len(mx) == 0 {
465-
return false
466-
}
467-
return true
468426
}
469427

470428
type localLinksCache map[string]*[]string

pkg/mdformatter/linktransformer/link_test.go

Lines changed: 76 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -167,23 +167,62 @@ func TestValidator_TransformDestination(t *testing.T) {
167167
})
168168

169169
t.Run("check valid local links", func(t *testing.T) {
170+
for _, viaLocalValidator := range []bool{false, true} {
171+
t.Run("viaLocal="+fmt.Sprint(viaLocalValidator), func(t *testing.T) {
172+
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links.md")
173+
testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo
174+
175+
[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](../a/doc.md)
176+
`), os.ModePerm))
177+
178+
diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
179+
testutil.Ok(t, err)
180+
testutil.Equals(t, 0, len(diff), diff.String())
181+
182+
lt := MustNewValidator(logger, []byte(""), anchorDir, nil)
183+
if viaLocalValidator {
184+
lt = MustNewValidator(logger, []byte(`
185+
version: 1
186+
explicitLocalValidators: true
187+
validators:
188+
- type: local
189+
regex: '^.*$'
190+
`), anchorDir, nil)
191+
}
192+
diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(lt))
193+
testutil.Ok(t, err)
194+
testutil.Equals(t, 0, len(diff), diff.String())
195+
})
196+
}
197+
})
198+
t.Run("check valid local links with anchor and ignore", func(t *testing.T) {
170199
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links.md")
171200
testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo
172201
173-
[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](../a/doc.md)
202+
[1](.) [2](#yolo) [3](../test/valid-local-links.md) [4](../test/valid-local-links.md#yolo) [5](/doc.md) [6](../a/does-not-exists-on-purpose.md)
174203
`), os.ModePerm))
175204

176205
diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
177206
testutil.Ok(t, err)
178207
testutil.Equals(t, 0, len(diff), diff.String())
179208

180209
diff, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
181-
MustNewValidator(logger, []byte(""), anchorDir, nil),
210+
MustNewValidator(logger, []byte(`
211+
version: 1
212+
explicitLocalValidators: true
213+
validators:
214+
- type: ignore
215+
regex: '^../a/does-not-exists-on-purpose.md$'
216+
- type: local
217+
regex: '^/doc.md$'
218+
anchor: "a"
219+
- type: local
220+
regex: '^.*$'
221+
`), anchorDir, nil),
182222
))
183223
testutil.Ok(t, err)
184224
testutil.Equals(t, 0, len(diff), diff.String())
185225
})
186-
187226
t.Run("check valid local links with dash", func(t *testing.T) {
188227
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "valid-local-links-with-dash.md")
189228
testutil.Ok(t, os.WriteFile(testFile, []byte(`# Expose UI on a sub-path
@@ -229,32 +268,44 @@ func TestValidator_TransformDestination(t *testing.T) {
229268
})
230269

231270
t.Run("check invalid local links", func(t *testing.T) {
232-
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "invalid-local-links.md")
233-
filePath := "/repo/docs/test/invalid-local-links.md"
234-
wdir, err := os.Getwd()
235-
testutil.Ok(t, err)
236-
relDirPath, err := filepath.Rel(wdir, tmpDir)
237-
testutil.Ok(t, err)
238-
testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo
271+
for _, viaLocalValidator := range []bool{false, true} {
272+
t.Run("viaLocal="+fmt.Sprint(viaLocalValidator), func(t *testing.T) {
273+
testFile := filepath.Join(tmpDir, "repo", "docs", "test", "invalid-local-links.md")
274+
filePath := "/repo/docs/test/invalid-local-links.md"
275+
wdir, err := os.Getwd()
276+
testutil.Ok(t, err)
277+
relDirPath, err := filepath.Rel(wdir, tmpDir)
278+
testutil.Ok(t, err)
279+
testutil.Ok(t, os.WriteFile(testFile, []byte(`# yolo
239280
240281
[1](.) [2](#not-yolo) [3](../test2/invalid-local-links.md) [4](../test/invalid-local-links.md#not-yolo) [5](../test/doc.md)
241282
`), os.ModePerm))
242283

243-
diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
244-
testutil.Ok(t, err)
245-
testutil.Equals(t, 0, len(diff), diff.String())
246-
247-
_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(
248-
MustNewValidator(logger, []byte(""), anchorDir, nil),
249-
))
250-
testutil.NotOk(t, err)
251-
252-
testutil.Equals(t, fmt.Sprintf("%v: 4 errors: "+
253-
"%v:3: link ../test2/invalid-local-links.md, normalized to: %v/repo/docs/test2/invalid-local-links.md: file not found; "+
254-
"%v:3: link ../test/invalid-local-links.md#not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id; "+
255-
"%v:3: link ../test/doc.md, normalized to: %v/repo/docs/test/doc.md: file not found; "+
256-
"%v:3: link #not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id",
257-
tmpDir+filePath, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir), err.Error())
284+
diff, err := mdformatter.IsFormatted(context.TODO(), logger, []string{testFile})
285+
testutil.Ok(t, err)
286+
testutil.Equals(t, 0, len(diff), diff.String())
287+
288+
lt := MustNewValidator(logger, []byte(""), anchorDir, nil)
289+
if viaLocalValidator {
290+
lt = MustNewValidator(logger, []byte(`
291+
version: 1
292+
explicitLocalValidators: true
293+
validators:
294+
- type: local
295+
regex: '^.*$'
296+
`), anchorDir, nil)
297+
}
298+
_, err = mdformatter.IsFormatted(context.TODO(), logger, []string{testFile}, mdformatter.WithLinkTransformer(lt))
299+
testutil.NotOk(t, err)
300+
301+
testutil.Equals(t, fmt.Sprintf("%v: 4 errors: "+
302+
"%v:3: link ../test2/invalid-local-links.md, normalized to: %v/repo/docs/test2/invalid-local-links.md: file not found; "+
303+
"%v:3: link ../test/invalid-local-links.md#not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id; "+
304+
"%v:3: link ../test/doc.md, normalized to: %v/repo/docs/test/doc.md: file not found; "+
305+
"%v:3: link #not-yolo, normalized to: link %v/repo/docs/test/invalid-local-links.md#not-yolo, existing ids: [yolo]: file exists, but does not have such id",
306+
tmpDir+filePath, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir, relDirPath+filePath, tmpDir), err.Error())
307+
})
308+
}
258309
})
259310

260311
t.Run("check valid email link", func(t *testing.T) {

pkg/mdformatter/linktransformer/validator.go

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ package linktransformer
55

66
import (
77
"fmt"
8+
"net"
9+
"path/filepath"
10+
"regexp"
811
"strconv"
912
"strings"
1013
)
@@ -13,6 +16,52 @@ type Validator interface {
1316
IsValid(k futureKey, r *validator) (bool, error)
1417
}
1518

19+
func (v LocalValidator) IsValid(k futureKey, r *validator) (bool, error) {
20+
r.l.localLinksChecked.Inc()
21+
// Check if link is email address.
22+
if email := strings.TrimPrefix(k.dest, "mailto:"); email != k.dest {
23+
if isValidEmail(email) {
24+
return true, nil
25+
}
26+
r.destFutures[k].resultFn = func() error { return fmt.Errorf("provided mailto link is not a valid email, got %v", k.dest) }
27+
return false, nil
28+
}
29+
30+
anchorDir := r.anchorDir
31+
if v.anchor != "" {
32+
anchorDir = filepath.Join(anchorDir, v.anchor)
33+
}
34+
// Relative or absolute path. Check if exists.
35+
newDest := absLocalLink(anchorDir, k.filepath, k.dest)
36+
37+
// Local link. Check if exists.
38+
if err := r.localLinks.Lookup(newDest); err != nil {
39+
r.destFutures[k].resultFn = func() error { return fmt.Errorf("link %v, normalized to: %w", k.dest, err) }
40+
return false, nil
41+
}
42+
return true, nil
43+
}
44+
45+
// isValidEmail checks email structure and domain.
46+
func isValidEmail(email string) bool {
47+
// Check length.
48+
if len(email) < 3 && len(email) > 254 {
49+
return false
50+
}
51+
// Regex from https://www.w3.org/TR/2016/REC-html51-20161101/sec-forms.html#email-state-typeemail.
52+
var emailRe = regexp.MustCompile("^[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$")
53+
if !emailRe.MatchString(email) {
54+
return false
55+
}
56+
// Check email domain.
57+
domain := strings.Split(email, "@")
58+
mx, err := net.LookupMX(domain[1])
59+
if err != nil || len(mx) == 0 {
60+
return false
61+
}
62+
return true
63+
}
64+
1665
// GitHubPullsIssuesValidator.IsValid skips visiting all GitHub issue/PR links.
1766
func (v GitHubPullsIssuesValidator) IsValid(k futureKey, r *validator) (bool, error) {
1867
r.l.githubSkippedLinks.Inc()
@@ -36,7 +85,7 @@ func (v RoundTripValidator) IsValid(k futureKey, r *validator) (bool, error) {
3685
matches := remoteLinkPrefixRe.FindAllStringIndex(k.dest, 1)
3786
if matches == nil && r.validateConfig.ExplicitLocalValidators {
3887
r.l.localLinksChecked.Inc()
39-
return r.checkLocal(k), nil
88+
return LocalValidator{}.IsValid(k, r)
4089
}
4190

4291
// Result will be in future.
@@ -72,7 +121,7 @@ func (v RoundTripValidator) IsValid(k futureKey, r *validator) (bool, error) {
72121
return true, nil
73122
}
74123

75-
// IgnoreValidator.IsValid returns true if matched so that link in not checked.
124+
// IsValid returns true if matched so that link in not checked.
76125
func (v IgnoreValidator) IsValid(k futureKey, r *validator) (bool, error) {
77126
r.l.ignoreSkippedLinks.Inc()
78127

@@ -98,6 +147,11 @@ func (v Config) GetValidatorForURL(URL string) Validator {
98147
continue
99148
}
100149
return val.igValidator
150+
case localValidator:
151+
if !val.lValidator._regex.MatchString(URL) {
152+
continue
153+
}
154+
return val.lValidator
101155
default:
102156
panic("unexpected validator type")
103157
}

0 commit comments

Comments
 (0)