optimize markdown parsing

This commit is contained in:
Benjamin Schoch 2022-08-30 23:52:53 +02:00 committed by Avelino
parent 6a3844d040
commit 4b54315c48
6 changed files with 78 additions and 23 deletions

3
go.mod
View File

@ -5,8 +5,7 @@ go 1.17
require (
github.com/PuerkitoBio/goquery v1.8.0
github.com/avelino/slugify v0.0.0-20180501145920-855f152bd774
github.com/gomarkdown/markdown v0.0.0-20211212230626-5af6ad2f47df
github.com/russross/blackfriday v1.6.0
github.com/yuin/goldmark v1.4.13
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8
)

6
go.sum
View File

@ -78,8 +78,6 @@ github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvq
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/gomarkdown/markdown v0.0.0-20211212230626-5af6ad2f47df h1:M7mdNDTRraBcrHZg2aOYiFP9yTDajb6fquRZRpXnbVA=
github.com/gomarkdown/markdown v0.0.0-20211212230626-5af6ad2f47df/go.mod h1:JDGcbDT52eL4fju3sZ4TeHGsQwhG9nbDV21aMyhwPoA=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
@ -114,13 +112,13 @@ github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13 h1:fVcFKWvrslecOb/tg+Cc05dkeYx540o0FuFt3nUVDoE=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=

View File

@ -10,7 +10,7 @@ import (
"text/template"
"github.com/PuerkitoBio/goquery"
"github.com/avelino/slugify"
"github.com/avelino/awesome-go/pkg/slug"
)
type Link struct {
@ -89,7 +89,7 @@ func makeObjById(selector string, s *goquery.Selection) (obj Object) {
links = append(links, link)
})
obj = Object{
Slug: slugify.Slugify(s.Text()),
Slug: slug.Generate(s.Text()),
Title: s.Text(),
Description: desc.Text(),
Items: links,

49
pkg/markdown/convert.go Normal file
View File

@ -0,0 +1,49 @@
package markdown
import (
"bytes"
"github.com/avelino/awesome-go/pkg/slug"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util"
)
func ConvertMarkdownToHTML(markdown []byte) ([]byte, error) {
md := goldmark.New(
goldmark.WithExtensions(extension.GFM),
goldmark.WithParserOptions(
parser.WithAutoHeadingID(), // generate heading IDs for content navigation
),
goldmark.WithRendererOptions(
html.WithXHTML(),
html.WithUnsafe(), // allow inline HTML
),
)
ctx := parser.NewContext(
parser.WithIDs(&IDGenerator{}), // register custom ID generator
)
var buf bytes.Buffer
if err := md.Convert(markdown, &buf, parser.WithContext(ctx)); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
type IDGenerator struct {
used map[string]bool
}
func (g *IDGenerator) Generate(value []byte, _ ast.NodeKind) []byte {
return []byte(slug.Generate(string(value)))
}
func (g *IDGenerator) Put(value []byte) {
g.used[util.BytesToReadOnlyString(value)] = true
}

13
pkg/slug/generator.go Normal file
View File

@ -0,0 +1,13 @@
package slug
import (
"strings"
"github.com/avelino/slugify"
)
func Generate(text string) string {
// remove slashes to create slugs similar to GitHub's slugs on markdown parsing
s := strings.ReplaceAll(text, "/", "")
return slugify.Slugify(strings.TrimSpace(s))
}

View File

@ -2,25 +2,24 @@ package main
import (
"bytes"
"fmt"
"html/template"
"io/ioutil"
"os"
"text/template"
"github.com/PuerkitoBio/goquery"
"github.com/gomarkdown/markdown"
"github.com/gomarkdown/markdown/parser"
"github.com/russross/blackfriday"
"github.com/avelino/awesome-go/pkg/markdown"
)
func readme() []byte {
input, err := ioutil.ReadFile("./README.md")
input, err := os.ReadFile("./README.md")
if err != nil {
panic(err)
}
html := fmt.Sprintf("<body>%s</body>", blackfriday.MarkdownCommon(input))
htmlByteArray := []byte(html)
return htmlByteArray
html, err := markdown.ConvertMarkdownToHTML(input)
if err != nil {
panic(err)
}
return html
}
func startQuery() *goquery.Document {
@ -33,7 +32,7 @@ func startQuery() *goquery.Document {
}
type content struct {
Body string
Body template.HTML
}
// GenerateHTML generate site html (index.html) from markdown file
@ -43,11 +42,8 @@ func GenerateHTML() (err error) {
tplPath := "tmpl/tmpl.html"
idxPath := "tmpl/index.html"
input, _ := ioutil.ReadFile(readmePath)
extensions := parser.CommonExtensions | parser.AutoHeadingIDs | parser.LaxHTMLBlocks
parser := parser.NewWithExtensions(extensions)
body := string(markdown.ToHTML(input, parser, nil))
c := &content{Body: body}
body, _ := markdown.ConvertMarkdownToHTML(input)
c := &content{Body: template.HTML(body)}
t := template.Must(template.ParseFiles(tplPath))
f, err := os.Create(idxPath)
t.Execute(f, c)