mirror of
https://github.com/avelino/awesome-go.git
synced 2024-12-12 17:13:58 +00:00
- avoid linking to non-existent pages
- improve crawling for category page content
This commit is contained in:
parent
96677e7650
commit
8b0532228a
51
make_site.go
51
make_site.go
@ -3,7 +3,6 @@ package main
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"strings"
|
"strings"
|
||||||
@ -27,8 +26,11 @@ type Object struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
GenerateHTML()
|
err := GenerateHTML()
|
||||||
input, err := ioutil.ReadFile("./tmpl/index.html")
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
input, err := os.ReadFile("./tmpl/index.html")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -38,22 +40,28 @@ func main() {
|
|||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
objs := []Object{}
|
objs := make(map[string]*Object)
|
||||||
query.Find("body #content ul ul").First().Each(func(_ int, s *goquery.Selection) {
|
query.Find("body #content ul ul").First().Each(func(_ int, s *goquery.Selection) {
|
||||||
|
|
||||||
s.Find("li a").Each(func(_ int, s *goquery.Selection) {
|
s.Find("li a").Each(func(_ int, s *goquery.Selection) {
|
||||||
selector, _ := s.Attr("href")
|
selector, exists := s.Attr("href")
|
||||||
|
if !exists {
|
||||||
|
return
|
||||||
|
}
|
||||||
obj := makeObjById(selector, query.Find("body"))
|
obj := makeObjById(selector, query.Find("body"))
|
||||||
objs = append(objs, obj)
|
if obj == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
objs[selector] = obj
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
makeSiteStruct(objs)
|
makeCategoryPages(objs)
|
||||||
|
linkCategoryPagesInIndex(string(input), query, objs)
|
||||||
|
|
||||||
makeSitemap(objs)
|
makeSitemap(objs)
|
||||||
changeLinksInIndex(string(input), query)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeSiteStruct(objs []Object) {
|
func makeCategoryPages(objs map[string]*Object) {
|
||||||
for _, obj := range objs {
|
for _, obj := range objs {
|
||||||
folder := fmt.Sprintf("tmpl/%s", obj.Slug)
|
folder := fmt.Sprintf("tmpl/%s", obj.Slug)
|
||||||
err := os.Mkdir(folder, 0755)
|
err := os.Mkdir(folder, 0755)
|
||||||
@ -67,16 +75,16 @@ func makeSiteStruct(objs []Object) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeSitemap(objs []Object) {
|
func makeSitemap(objs map[string]*Object) {
|
||||||
t := template.Must(template.ParseFiles("tmpl/sitemap-tmpl.xml"))
|
t := template.Must(template.ParseFiles("tmpl/sitemap-tmpl.xml"))
|
||||||
f, _ := os.Create("tmpl/sitemap.xml")
|
f, _ := os.Create("tmpl/sitemap.xml")
|
||||||
t.Execute(f, objs)
|
t.Execute(f, objs)
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeObjById(selector string, s *goquery.Selection) (obj Object) {
|
func makeObjById(selector string, s *goquery.Selection) (obj *Object) {
|
||||||
s.Find(selector).Each(func(_ int, s *goquery.Selection) {
|
s.Find(selector).Each(func(_ int, s *goquery.Selection) {
|
||||||
desc := s.NextFiltered("p")
|
desc := s.NextFiltered("p")
|
||||||
ul := desc.NextFiltered("ul")
|
ul := s.NextFilteredUntil("ul", "h2")
|
||||||
|
|
||||||
links := []Link{}
|
links := []Link{}
|
||||||
ul.Find("li").Each(func(_ int, s *goquery.Selection) {
|
ul.Find("li").Each(func(_ int, s *goquery.Selection) {
|
||||||
@ -88,7 +96,7 @@ func makeObjById(selector string, s *goquery.Selection) (obj Object) {
|
|||||||
}
|
}
|
||||||
links = append(links, link)
|
links = append(links, link)
|
||||||
})
|
})
|
||||||
obj = Object{
|
obj = &Object{
|
||||||
Slug: slug.Generate(s.Text()),
|
Slug: slug.Generate(s.Text()),
|
||||||
Title: s.Text(),
|
Title: s.Text(),
|
||||||
Description: desc.Text(),
|
Description: desc.Text(),
|
||||||
@ -98,17 +106,24 @@ func makeObjById(selector string, s *goquery.Selection) (obj Object) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func changeLinksInIndex(html string, query *goquery.Document) {
|
func linkCategoryPagesInIndex(html string, query *goquery.Document, objs map[string]*Object) {
|
||||||
query.Find("body #content ul li ul li a").Each(func(_ int, s *goquery.Selection) {
|
query.Find("body #content ul li ul li a").Each(func(_ int, s *goquery.Selection) {
|
||||||
|
href, hrefExists := s.Attr("href")
|
||||||
|
if !hrefExists {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// do not replace links if no page has been created for it
|
||||||
|
_, objExists := objs[href]
|
||||||
|
if !objExists {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
href, exists := s.Attr("href")
|
|
||||||
if exists {
|
|
||||||
uri := strings.SplitAfter(href, "#")
|
uri := strings.SplitAfter(href, "#")
|
||||||
if len(uri) >= 2 && uri[1] != "contents" {
|
if len(uri) >= 2 && uri[1] != "contents" {
|
||||||
html = strings.ReplaceAll(
|
html = strings.ReplaceAll(
|
||||||
html, fmt.Sprintf(`href="%s"`, href), fmt.Sprintf(`href="%s"`, uri[1]))
|
html, fmt.Sprintf(`href="%s"`, href), fmt.Sprintf(`href="%s"`, uri[1]))
|
||||||
}
|
}
|
||||||
}
|
|
||||||
})
|
})
|
||||||
|
|
||||||
os.WriteFile("./tmpl/index.html", []byte(html), 0644)
|
os.WriteFile("./tmpl/index.html", []byte(html), 0644)
|
||||||
|
Loading…
Reference in New Issue
Block a user