From e017b669049891e3ad00e6f4dc716731c794fb98 Mon Sep 17 00:00:00 2001 From: Alex Denes Date: Tue, 10 May 2022 20:18:26 +0000 Subject: [PATCH] Initial commit --- go.mod | 3 ++ main.go | 136 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 go.mod create mode 100644 main.go diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..0883d67 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.redxen.eu/caskd/goom + +go 1.18 diff --git a/main.go b/main.go new file mode 100644 index 0000000..68a2a76 --- /dev/null +++ b/main.go @@ -0,0 +1,136 @@ +package main + +import ( + "bytes" + "crypto/md5" + "encoding/hex" + "errors" + "flag" + "fmt" + "html" + "io" + "io/fs" + "log" + "os" + "strings" + + "git.redxen.eu/caskd/goboru" + "git.redxen.eu/caskd/goboru/modules/gelbooru" +) + +func main() { + var ( + jobs uint + ) + flag.UintVar(&jobs, "maxjobs", 10, "Maximum concurrent page fetch jobs") + flag.Parse() + + var ( + media []goboru.Media + err error + ) + + if media, err = gelbooru.Query(flag.Args(), 10); err != nil { + log.Fatal(err) + } + + log.Print("Fetched ", len(media), " elements") + + for _, v := range media { + var i uint64 + if i, err = download(v); err != nil { + log.Fatal(err) + } + if i == 0 { + log.Print("Skipped ", v.MD5) + } else { + log.Print("Downloaded ", i, " bytes") + } + } +} + +func download(m goboru.Media) (i uint64, err error) { + var ( + f *os.File + r io.ReadCloser + skip bool + ) + + f, err = os.OpenFile(m.MD5, os.O_RDWR, 0644) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + f, err = os.Create(m.MD5) + if err != nil { + err = fmt.Errorf("Failed to create file: %s", err) + return + } + } else { + err = fmt.Errorf("Failed to open existing file to check MD5 checksum: %s", err) + return + } + } else { + var md []byte + md, err = hex.DecodeString(m.MD5) + if err != nil { + err = fmt.Errorf("Failed to decode hexadecimal MD5 hash: %s", err) + return + } + h := md5.New() + + _, err = io.Copy(h, f) + if err != nil { + err = fmt.Errorf("Failed to copy bytes to intermediate buffer for checksumming: %s", err) + return + } + f.Seek(0, io.SeekStart) + + csum := h.Sum(nil) + if bytes.Equal(csum, md) { + skip = true + } else { + log.Printf("Hash mismatch: %x != %x [%s]", csum, md, m.Source) + } + } + + if !skip { + r, err = m.Content() + if err != nil { + err = fmt.Errorf("Failed to get content reader: %s", err) + return + } + + var ir int64 + ir, err = io.Copy(f, r) + i = uint64(ir) + if err != nil { + err = fmt.Errorf("Failed to copy bytes to file: %s", err) + return + } + r.Close() + } + f.Close() + + for _, x := range m.Tags { + x = html.UnescapeString(x) // Unescape tags + x = strings.ReplaceAll(x, "/", "_") // Replace filesystem delimiters in tags + + err = os.Mkdir(x, 0750) + if err != nil { + if !os.IsExist(err) { + err = fmt.Errorf("Failed to create tag directory %s: %v", x, err) + return + } + } + + src, dst := strings.Join([]string{x, m.MD5}, "/"), strings.Join([]string{"..", m.MD5}, "/") + err = os.Symlink(dst, src) + if err != nil { + if !os.IsExist(err) { + err = fmt.Errorf("Failed to create symlink in tag directory %s: %v", src, err) + return + } + } + } + err = nil + return +}