2021-12-22 13:14:34 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2023-02-14 19:06:46 +00:00
|
|
|
"errors"
|
2021-12-22 13:14:34 +00:00
|
|
|
"fmt"
|
2023-04-03 11:16:02 +00:00
|
|
|
cp "github.com/otiai10/copy"
|
2023-02-20 13:37:55 +00:00
|
|
|
"net/url"
|
2021-12-22 13:14:34 +00:00
|
|
|
"os"
|
2023-02-04 01:39:51 +00:00
|
|
|
"path/filepath"
|
2021-12-22 13:14:34 +00:00
|
|
|
"text/template"
|
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
2022-08-30 21:52:53 +00:00
|
|
|
"github.com/avelino/awesome-go/pkg/slug"
|
2021-12-22 13:14:34 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
type Link struct {
|
|
|
|
Title string
|
|
|
|
Url string
|
|
|
|
Description string
|
|
|
|
}
|
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
type Category struct {
|
2021-12-22 13:14:34 +00:00
|
|
|
Title string
|
|
|
|
Slug string
|
|
|
|
Description string
|
2023-02-14 23:49:25 +00:00
|
|
|
Links []Link
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 17:58:36 +00:00
|
|
|
// Source files
|
2023-02-04 01:39:51 +00:00
|
|
|
const readmePath = "README.md"
|
2023-02-14 17:49:59 +00:00
|
|
|
|
2023-02-14 17:58:36 +00:00
|
|
|
// This files should be copied 'as is' to outDir directory
|
2023-02-14 17:49:59 +00:00
|
|
|
var staticFiles = []string{
|
|
|
|
"tmpl/assets",
|
|
|
|
"tmpl/_redirects",
|
|
|
|
"tmpl/robots.txt",
|
|
|
|
}
|
2023-02-04 01:39:51 +00:00
|
|
|
|
2023-02-14 18:12:57 +00:00
|
|
|
// TODO: embed
|
|
|
|
// Templates
|
|
|
|
var tplIndex = template.Must(template.ParseFiles("tmpl/tmpl.html"))
|
|
|
|
var tplCategoryIndex = template.Must(template.ParseFiles("tmpl/cat-tmpl.html"))
|
|
|
|
var tplSitemap = template.Must(template.ParseFiles("tmpl/sitemap-tmpl.xml"))
|
2023-02-04 01:39:51 +00:00
|
|
|
|
2023-02-14 17:58:36 +00:00
|
|
|
// Output files
|
|
|
|
const outDir = "out/" // NOTE: trailing slash is required
|
2023-04-03 11:16:02 +00:00
|
|
|
|
|
|
|
var outIndexFile = filepath.Join(outDir, "index.html")
|
|
|
|
var outSitemapFile = filepath.Join(outDir, "sitemap.xml")
|
2023-02-04 01:39:51 +00:00
|
|
|
|
2021-12-22 13:14:34 +00:00
|
|
|
func main() {
|
2023-02-14 22:10:50 +00:00
|
|
|
if err := renderAll(); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: choose a better name
|
|
|
|
func renderAll() error {
|
2023-02-14 23:39:11 +00:00
|
|
|
if err := dropCreateDir(outDir); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("drop-create out dir: %w", err)
|
2023-02-04 02:01:22 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 23:39:17 +00:00
|
|
|
if err := renderIndex(readmePath, outIndexFile); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("convert markdown to html: %w", err)
|
2022-08-30 13:20:58 +00:00
|
|
|
}
|
2023-02-04 01:39:51 +00:00
|
|
|
|
2023-04-03 11:16:02 +00:00
|
|
|
input, err := os.ReadFile(outIndexFile)
|
2021-12-22 13:14:34 +00:00
|
|
|
if err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("read converted html: %w", err)
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
2023-02-04 01:39:51 +00:00
|
|
|
|
2023-02-14 23:21:17 +00:00
|
|
|
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(input))
|
2021-12-22 13:14:34 +00:00
|
|
|
if err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("create goquery instance: %w", err)
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
categories, err := extractCategories(doc)
|
2023-02-14 23:42:19 +00:00
|
|
|
if err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("extract categories: %w", err)
|
2023-02-14 23:42:19 +00:00
|
|
|
}
|
2021-12-22 13:14:34 +00:00
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
if err := renderCategories(categories); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("render categories: %w", err)
|
2023-02-04 01:39:51 +00:00
|
|
|
}
|
2023-02-14 22:26:55 +00:00
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
if err := rewriteLinksInIndex(doc, categories); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("rewrite links in index: %w", err)
|
2023-02-14 22:26:55 +00:00
|
|
|
}
|
2022-08-30 13:20:58 +00:00
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
if err := renderSitemap(categories); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("render sitemap: %w", err)
|
2023-02-14 23:05:31 +00:00
|
|
|
}
|
2023-04-03 11:16:02 +00:00
|
|
|
|
2023-02-14 17:49:59 +00:00
|
|
|
for _, srcFilename := range staticFiles {
|
|
|
|
dstFilename := filepath.Join(outDir, filepath.Base(srcFilename))
|
|
|
|
fmt.Printf("Copy static file: %s -> %s\n", srcFilename, dstFilename)
|
|
|
|
if err := cp.Copy(srcFilename, dstFilename); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("copy static file `%s` to `%s`: %w", srcFilename, dstFilename, err)
|
2023-02-14 17:49:59 +00:00
|
|
|
}
|
2023-04-03 11:16:02 +00:00
|
|
|
}
|
2023-02-14 22:10:50 +00:00
|
|
|
|
|
|
|
return nil
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 23:39:11 +00:00
|
|
|
// dropCreateDir drop and create output directory
|
|
|
|
func dropCreateDir(dir string) error {
|
|
|
|
if err := os.RemoveAll(dir); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("remove dir: %w", err)
|
2023-02-14 23:39:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err := mkdirAll(dir); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("create dir: %w", err)
|
2023-02-14 23:39:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-02-04 01:39:51 +00:00
|
|
|
func mkdirAll(path string) error {
|
|
|
|
_, err := os.Stat(path)
|
2023-02-20 13:37:37 +00:00
|
|
|
// directory is exists
|
2023-02-04 01:39:51 +00:00
|
|
|
if err == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-02-20 13:37:37 +00:00
|
|
|
// unexpected error
|
2023-02-04 01:39:51 +00:00
|
|
|
if !os.IsNotExist(err) {
|
2023-02-14 23:31:31 +00:00
|
|
|
return fmt.Errorf("unexpected result of dir stat: %w", err)
|
2023-02-04 01:39:51 +00:00
|
|
|
}
|
|
|
|
|
2023-02-20 13:37:37 +00:00
|
|
|
// directory is not exists
|
2023-02-14 23:31:31 +00:00
|
|
|
if err := os.MkdirAll(path, 0755); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("midirAll: %w", err)
|
2023-02-04 01:39:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
func renderCategories(categories map[string]Category) error {
|
|
|
|
for _, category := range categories {
|
|
|
|
categoryDir := filepath.Join(outDir, category.Slug)
|
2023-04-03 11:16:02 +00:00
|
|
|
if err := mkdirAll(categoryDir); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("create category dir `%s`: %w", categoryDir, err)
|
2023-02-04 01:39:51 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: embed templates
|
2023-02-14 17:50:14 +00:00
|
|
|
categoryIndexFilename := filepath.Join(categoryDir, "index.html")
|
2023-02-14 22:58:11 +00:00
|
|
|
fmt.Printf("Write category Index file: %s\n", categoryIndexFilename)
|
|
|
|
|
|
|
|
buf := bytes.NewBuffer(nil)
|
2023-02-14 23:48:07 +00:00
|
|
|
if err := tplCategoryIndex.Execute(buf, category); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("render category `%s`: %w", categoryDir, err)
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 22:58:11 +00:00
|
|
|
// Sanitize HTML. This is not necessary, but allows to have content
|
|
|
|
// of all html files in same style.
|
|
|
|
{
|
2023-02-15 01:32:24 +00:00
|
|
|
doc, err := goquery.NewDocumentFromReader(buf)
|
2023-02-14 22:58:11 +00:00
|
|
|
if err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("create goquery instance for `%s`: %w", categoryDir, err)
|
2023-02-14 22:58:11 +00:00
|
|
|
}
|
2023-02-14 17:50:14 +00:00
|
|
|
|
2023-02-15 01:32:24 +00:00
|
|
|
html, err := doc.Html()
|
2023-02-14 22:58:11 +00:00
|
|
|
if err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("render goquery html for `%s`: %w", categoryDir, err)
|
2023-02-14 22:58:11 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err := os.WriteFile(categoryIndexFilename, []byte(html), 0644); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("write category file `%s`: %w", categoryDir, err)
|
2023-02-14 22:58:11 +00:00
|
|
|
}
|
2023-02-04 01:39:51 +00:00
|
|
|
}
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
2023-02-04 01:39:51 +00:00
|
|
|
|
|
|
|
return nil
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
func renderSitemap(categories map[string]Category) error {
|
2023-02-14 23:05:31 +00:00
|
|
|
f, err := os.Create(outSitemapFile)
|
|
|
|
if err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("create sitemap file `%s`: %w", outSitemapFile, err)
|
2023-02-14 23:05:31 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 17:50:14 +00:00
|
|
|
fmt.Printf("Render Sitemap to: %s\n", outSitemapFile)
|
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
if err := tplSitemap.Execute(f, categories); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("render sitemap: %w", err)
|
2023-02-14 23:05:31 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 23:53:54 +00:00
|
|
|
func extractCategories(doc *goquery.Document) (map[string]Category, error) {
|
|
|
|
categories := make(map[string]Category)
|
2023-02-15 03:02:23 +00:00
|
|
|
var rootErr error
|
|
|
|
|
2023-02-14 23:53:54 +00:00
|
|
|
doc.
|
|
|
|
Find("body #contents").
|
|
|
|
NextFiltered("ul").
|
|
|
|
Find("ul").
|
2023-02-15 03:02:23 +00:00
|
|
|
EachWithBreak(func(_ int, selUl *goquery.Selection) bool {
|
|
|
|
if rootErr != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2023-02-14 23:53:54 +00:00
|
|
|
selUl.
|
|
|
|
Find("li a").
|
2023-02-15 03:02:23 +00:00
|
|
|
EachWithBreak(func(_ int, s *goquery.Selection) bool {
|
2023-02-14 23:53:54 +00:00
|
|
|
selector, exists := s.Attr("href")
|
|
|
|
if !exists {
|
2023-02-15 03:02:23 +00:00
|
|
|
return true
|
2023-02-14 23:53:54 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 23:54:33 +00:00
|
|
|
category, err := extractCategory(doc, selector)
|
2023-02-14 23:53:54 +00:00
|
|
|
if err != nil {
|
2023-02-15 03:02:23 +00:00
|
|
|
rootErr = fmt.Errorf("extract category: %w", err)
|
|
|
|
return false
|
2023-02-14 23:53:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
categories[selector] = *category
|
2023-02-15 03:02:23 +00:00
|
|
|
|
|
|
|
return true
|
2023-02-14 23:53:54 +00:00
|
|
|
})
|
2023-02-15 03:02:23 +00:00
|
|
|
|
|
|
|
return true
|
2023-02-14 23:53:54 +00:00
|
|
|
})
|
|
|
|
|
2023-02-15 03:02:23 +00:00
|
|
|
if rootErr != nil {
|
|
|
|
return nil, fmt.Errorf("extract categories: %w", rootErr)
|
|
|
|
}
|
|
|
|
|
2023-02-14 23:53:54 +00:00
|
|
|
return categories, nil
|
|
|
|
}
|
|
|
|
|
2023-02-14 23:54:33 +00:00
|
|
|
func extractCategory(doc *goquery.Document, selector string) (*Category, error) {
|
2023-02-14 23:48:07 +00:00
|
|
|
var category Category
|
2023-02-14 19:06:46 +00:00
|
|
|
var err error
|
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
doc.Find(selector).EachWithBreak(func(_ int, selCatHeader *goquery.Selection) bool {
|
2023-02-14 19:24:30 +00:00
|
|
|
selDescr := selCatHeader.NextFiltered("p")
|
|
|
|
// FIXME: bug. this would select links from all neighboring
|
|
|
|
// sub-categories until the next category. To prevent this we should
|
|
|
|
// find only first ul
|
|
|
|
ul := selCatHeader.NextFilteredUntil("ul", "h2")
|
2021-12-22 13:14:34 +00:00
|
|
|
|
2023-02-14 19:06:46 +00:00
|
|
|
var links []Link
|
2023-02-14 19:24:30 +00:00
|
|
|
ul.Find("li").Each(func(_ int, selLi *goquery.Selection) {
|
|
|
|
selLink := selLi.Find("a")
|
|
|
|
url, _ := selLink.Attr("href")
|
2021-12-22 13:14:34 +00:00
|
|
|
link := Link{
|
2023-02-14 19:24:30 +00:00
|
|
|
Title: selLink.Text(),
|
2023-02-15 03:10:05 +00:00
|
|
|
// FIXME(kazhuravlev): Title contains only title but
|
|
|
|
// description contains Title + description
|
2023-02-14 19:24:30 +00:00
|
|
|
Description: selLi.Text(),
|
2021-12-22 13:14:34 +00:00
|
|
|
Url: url,
|
|
|
|
}
|
|
|
|
links = append(links, link)
|
|
|
|
})
|
2023-02-15 03:10:05 +00:00
|
|
|
|
2023-02-14 19:06:46 +00:00
|
|
|
// FIXME: In this case we would have an empty category in main index.html with link to 404 page.
|
2022-08-30 14:21:44 +00:00
|
|
|
if len(links) == 0 {
|
2023-02-14 23:48:07 +00:00
|
|
|
err = errors.New("category does not contain links")
|
|
|
|
return false
|
2022-08-30 14:21:44 +00:00
|
|
|
}
|
2023-02-14 23:48:07 +00:00
|
|
|
|
|
|
|
category = Category{
|
2023-02-14 19:24:30 +00:00
|
|
|
Slug: slug.Generate(selCatHeader.Text()),
|
|
|
|
Title: selCatHeader.Text(),
|
|
|
|
Description: selDescr.Text(),
|
2023-02-14 23:49:25 +00:00
|
|
|
Links: links,
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
2023-02-14 23:48:07 +00:00
|
|
|
|
|
|
|
return true
|
2021-12-22 13:14:34 +00:00
|
|
|
})
|
2023-02-14 19:06:46 +00:00
|
|
|
|
|
|
|
if err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return nil, fmt.Errorf("build a category: %w", err)
|
2023-02-14 19:06:46 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
return &category, nil
|
2021-12-22 13:14:34 +00:00
|
|
|
}
|
2021-12-25 14:32:20 +00:00
|
|
|
|
2023-02-14 23:48:07 +00:00
|
|
|
func rewriteLinksInIndex(doc *goquery.Document, categories map[string]Category) error {
|
2023-02-20 13:37:55 +00:00
|
|
|
var iterErr error
|
2023-02-14 23:21:17 +00:00
|
|
|
doc.
|
|
|
|
Find("body #content ul li ul li a").
|
2023-02-20 13:37:55 +00:00
|
|
|
EachWithBreak(func(_ int, s *goquery.Selection) bool {
|
2023-02-14 23:21:17 +00:00
|
|
|
href, hrefExists := s.Attr("href")
|
|
|
|
if !hrefExists {
|
|
|
|
// FIXME: looks like is an error. Tag `a` in our case always
|
|
|
|
// should have `href` attr.
|
2023-02-20 13:37:55 +00:00
|
|
|
return true
|
2023-02-14 23:21:17 +00:00
|
|
|
}
|
2021-12-25 14:32:20 +00:00
|
|
|
|
2023-02-14 23:21:17 +00:00
|
|
|
// do not replace links if no page has been created for it
|
2023-02-14 23:48:07 +00:00
|
|
|
_, catExists := categories[href]
|
|
|
|
if !catExists {
|
2023-02-20 13:37:55 +00:00
|
|
|
return true
|
2023-02-14 23:21:17 +00:00
|
|
|
}
|
2022-08-30 13:20:58 +00:00
|
|
|
|
2023-02-20 13:37:55 +00:00
|
|
|
linkUrl, err := url.Parse(href)
|
|
|
|
if err != nil {
|
|
|
|
iterErr = err
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if linkUrl.Fragment != "" && linkUrl.Fragment != "contents" {
|
|
|
|
s.SetAttr("href", linkUrl.Fragment)
|
2023-02-14 23:21:17 +00:00
|
|
|
}
|
2023-02-20 13:37:55 +00:00
|
|
|
|
|
|
|
return true
|
2023-02-14 23:21:17 +00:00
|
|
|
})
|
2021-12-25 14:32:20 +00:00
|
|
|
|
2023-02-20 13:37:55 +00:00
|
|
|
if iterErr != nil {
|
|
|
|
return iterErr
|
|
|
|
}
|
|
|
|
|
2023-02-14 17:50:14 +00:00
|
|
|
fmt.Printf("Rewrite links in Index file: %s\n", outIndexFile)
|
2023-02-14 23:21:17 +00:00
|
|
|
resultHtml, err := doc.Html()
|
2023-02-14 22:26:55 +00:00
|
|
|
if err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("render html: %w", err)
|
2023-02-14 22:26:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if err := os.WriteFile(outIndexFile, []byte(resultHtml), 0644); err != nil {
|
2023-02-15 02:57:37 +00:00
|
|
|
return fmt.Errorf("rewrite index file: %w", err)
|
2023-02-14 22:26:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2021-12-25 14:32:20 +00:00
|
|
|
}
|