optimize markdown parsing

This commit is contained in:
Benjamin Schoch 2022-08-30 23:52:53 +02:00 committed by Avelino
parent 6a3844d040
commit 4b54315c48
6 changed files with 78 additions and 23 deletions

3
go.mod
View File

@ -5,8 +5,7 @@ go 1.17
require ( require (
github.com/PuerkitoBio/goquery v1.8.0 github.com/PuerkitoBio/goquery v1.8.0
github.com/avelino/slugify v0.0.0-20180501145920-855f152bd774 github.com/avelino/slugify v0.0.0-20180501145920-855f152bd774
github.com/gomarkdown/markdown v0.0.0-20211212230626-5af6ad2f47df github.com/yuin/goldmark v1.4.13
github.com/russross/blackfriday v1.6.0
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8
) )

6
go.sum
View File

@ -78,8 +78,6 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/gomarkdown/markdown v0.0.0-20211212230626-5af6ad2f47df h1:M7mdNDTRraBcrHZg2aOYiFP9yTDajb6fquRZRpXnbVA=
github.com/gomarkdown/markdown v0.0.0-20211212230626-5af6ad2f47df/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@ -114,13 +112,13 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=

View File

@ -10,7 +10,7 @@ import (
"text/template" "text/template"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/avelino/slugify" "github.com/avelino/awesome-go/pkg/slug"
) )
type Link struct { type Link struct {
@ -89,7 +89,7 @@ func makeObjById(selector string, s *goquery.Selection) (obj Object) {
links = append(links, link) links = append(links, link)
}) })
obj = Object{ obj = Object{
Slug: slugify.Slugify(s.Text()), Slug: slug.Generate(s.Text()),
Title: s.Text(), Title: s.Text(),
Description: desc.Text(), Description: desc.Text(),
Items: links, Items: links,

49
pkg/markdown/convert.go Normal file
View File

@ -0,0 +1,49 @@
package markdown
import (
"bytes"
"github.com/avelino/awesome-go/pkg/slug"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util"
)
func ConvertMarkdownToHTML(markdown []byte) ([]byte, error) {
md := goldmark.New(
goldmark.WithExtensions(extension.GFM),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(), // generate heading IDs for content navigation
),
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(), // allow inline HTML
),
)
ctx := parser.NewContext(
parser.WithIDs(&IDGenerator{}), // register custom ID generator
)
var buf bytes.Buffer
if err := md.Convert(markdown, &buf, parser.WithContext(ctx)); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
type IDGenerator struct {
used map[string]bool
}
func (g *IDGenerator) Generate(value []byte, _ ast.NodeKind) []byte {
return []byte(slug.Generate(string(value)))
}
func (g *IDGenerator) Put(value []byte) {
g.used[util.BytesToReadOnlyString(value)] = true
}

13
pkg/slug/generator.go Normal file
View File

@ -0,0 +1,13 @@
package slug
import (
"strings"
"github.com/avelino/slugify"
)
func Generate(text string) string {
// remove slashes to create slugs similar to GitHub's slugs on markdown parsing
s := strings.ReplaceAll(text, "/", "")
return slugify.Slugify(strings.TrimSpace(s))
}

View File

@ -2,25 +2,24 @@ package main
import ( import (
"bytes" "bytes"
"fmt" "html/template"
"io/ioutil" "io/ioutil"
"os" "os"
"text/template"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/gomarkdown/markdown" "github.com/avelino/awesome-go/pkg/markdown"
"github.com/gomarkdown/markdown/parser"
"github.com/russross/blackfriday"
) )
func readme() []byte { func readme() []byte {
input, err := ioutil.ReadFile("./README.md") input, err := os.ReadFile("./README.md")
if err != nil { if err != nil {
panic(err) panic(err)
} }
html := fmt.Sprintf("<body>%s</body>", blackfriday.MarkdownCommon(input)) html, err := markdown.ConvertMarkdownToHTML(input)
htmlByteArray := []byte(html) if err != nil {
return htmlByteArray panic(err)
}
return html
} }
func startQuery() *goquery.Document { func startQuery() *goquery.Document {
@ -33,7 +32,7 @@ func startQuery() *goquery.Document {
} }
type content struct { type content struct {
Body string Body template.HTML
} }
// GenerateHTML generate site html (index.html) from markdown file // GenerateHTML generate site html (index.html) from markdown file
@ -43,11 +42,8 @@ func GenerateHTML() (err error) {
tplPath := "tmpl/tmpl.html" tplPath := "tmpl/tmpl.html"
idxPath := "tmpl/index.html" idxPath := "tmpl/index.html"
input, _ := ioutil.ReadFile(readmePath) input, _ := ioutil.ReadFile(readmePath)
extensions := parser.CommonExtensions | parser.AutoHeadingIDs | parser.LaxHTMLBlocks body, _ := markdown.ConvertMarkdownToHTML(input)
parser := parser.NewWithExtensions(extensions) c := &content{Body: template.HTML(body)}
body := string(markdown.ToHTML(input, parser, nil))
c := &content{Body: body}
t := template.Must(template.ParseFiles(tplPath)) t := template.Must(template.ParseFiles(tplPath))
f, err := os.Create(idxPath) f, err := os.Create(idxPath)
t.Execute(f, c) t.Execute(f, c)