2022-05-10 20:15:01 +00:00
|
|
|
package gelbooru
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"log"
|
|
|
|
"net/http"
|
|
|
|
"net/url"
|
|
|
|
"sort"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
. "git.redxen.eu/caskd/goboru"
|
|
|
|
)
|
|
|
|
|
|
|
|
type gelbooru_API struct {
|
|
|
|
Attributes struct {
|
|
|
|
Limit float64 `json:"limit"`
|
|
|
|
Offset float64 `json:"offset"`
|
|
|
|
Count float64 `json:"count"`
|
|
|
|
} `json:"@attributes"`
|
|
|
|
Posts []struct {
|
|
|
|
Id uint64 `json:"id"`
|
|
|
|
Created_at string `json:"created_at"`
|
|
|
|
Score int64 `json:"score"`
|
|
|
|
Width uint64 `json:"width"`
|
|
|
|
Height uint64 `json:"height"`
|
|
|
|
MD5 string `json:"md5"`
|
|
|
|
Directory string `json:"directory"`
|
|
|
|
Image string `json:"image"`
|
|
|
|
Rating string `json:"rating"`
|
|
|
|
Source string `json:"source"`
|
|
|
|
Change uint64 `json:"change"`
|
|
|
|
Owner string `json:"owner"`
|
|
|
|
Creator_id uint64 `json:"creator_id"`
|
|
|
|
Parent_id uint64 `json:"parent_id"`
|
|
|
|
Sample uint64 `json:"sample"`
|
|
|
|
Preview_height uint64 `json:"preview_height"`
|
|
|
|
Preview_width uint64 `json:"preview_width"`
|
|
|
|
Tags string `json:"tags"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
Has_notes string `json:"has_notes"`
|
|
|
|
Has_comments string `json:"has_comments"`
|
|
|
|
File_url string `json:"file_url"`
|
|
|
|
Preview_url string `json:"preview_url"`
|
|
|
|
Sample_url string `json:"sample_url"`
|
|
|
|
Sample_height uint64 `json:"sample_height"`
|
|
|
|
Sample_width uint64 `json:"sample_width"`
|
|
|
|
Status string `json:"status"`
|
|
|
|
Post_locked uint64 `json:"post_locked"`
|
|
|
|
Has_children string `json:"has_children"`
|
|
|
|
} `json:"post"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type result struct {
|
|
|
|
media []Media
|
|
|
|
err error
|
|
|
|
pid uint
|
|
|
|
}
|
|
|
|
|
|
|
|
func Query(tags []string, j_max uint) (mr []Media, err error) {
|
|
|
|
res_chan := make(chan result)
|
|
|
|
var r_arr []result
|
|
|
|
|
|
|
|
for pid, rpid, ppid := uint(0), uint(0), uint(0); ; {
|
|
|
|
if pid <= 200 { // API only allows to fetch up to 200 pages per query
|
|
|
|
go run_job(tags, pid, res_chan)
|
|
|
|
pid++
|
|
|
|
}
|
|
|
|
|
|
|
|
if pid < j_max {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if rpid < pid {
|
|
|
|
r := <-res_chan
|
|
|
|
rpid++
|
|
|
|
r_arr = append(r_arr, r)
|
|
|
|
}
|
|
|
|
|
|
|
|
if ppid < pid {
|
|
|
|
sort.Slice(r_arr, func(i, j int) bool {
|
|
|
|
return r_arr[i].pid < r_arr[j].pid
|
|
|
|
})
|
|
|
|
|
|
|
|
if c := r_arr[0]; c.pid == ppid {
|
|
|
|
ppid++
|
|
|
|
r_arr = r_arr[1:]
|
|
|
|
if c.err != nil {
|
|
|
|
err = c.err
|
|
|
|
break
|
|
|
|
}
|
|
|
|
if len(c.media) == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
mr = append(mr, c.media...)
|
|
|
|
log.Print("Added ", len(c.media), "/", len(mr), " elements")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
break // Break when no more pages have been fetched
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func run_job(tags []string, pid uint, res chan result) {
|
|
|
|
r := result{pid: pid}
|
2022-08-11 16:27:40 +00:00
|
|
|
defer func(x result, c chan result) { c <- x }(r, res)
|
2022-05-10 20:15:01 +00:00
|
|
|
|
|
|
|
var rc io.ReadCloser
|
|
|
|
if rc, r.err = fetch(tags, pid); r.err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
defer rc.Close()
|
|
|
|
|
|
|
|
if r.media, r.err = parse(rc); r.err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func fetch(tags []string, pid uint) (rc io.ReadCloser, err error) {
|
|
|
|
client := http.Client{Timeout: 10 * time.Second}
|
|
|
|
req := &http.Request{
|
|
|
|
URL: &url.URL{
|
|
|
|
Scheme: "https",
|
|
|
|
Host: "gelbooru.com",
|
|
|
|
Path: "/index.php",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
query := req.URL.Query()
|
|
|
|
query.Add("page", "dapi")
|
|
|
|
query.Add("s", "post")
|
|
|
|
query.Add("q", "index")
|
|
|
|
query.Add("json", "1")
|
|
|
|
query.Add("pid", strconv.FormatUint(uint64(pid), 10))
|
|
|
|
query.Add("tags", strings.Join(tags, " "))
|
|
|
|
req.URL.RawQuery = query.Encode()
|
|
|
|
|
|
|
|
resp, err := client.Do(req)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
rc = resp.Body
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func parse(r io.Reader) (m []Media, err error) {
|
|
|
|
var api_resp gelbooru_API
|
|
|
|
|
|
|
|
d := json.NewDecoder(r)
|
|
|
|
if err = d.Decode(&api_resp); err != nil {
|
|
|
|
err = fmt.Errorf("JSON parse: %s", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(api_resp.Posts) == 0 {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, v := range api_resp.Posts {
|
|
|
|
cur := Media{
|
|
|
|
Source: v.File_url,
|
|
|
|
MD5: v.MD5,
|
|
|
|
Tags: strings.Split(v.Tags, " "),
|
|
|
|
}
|
|
|
|
m = append(m, cur)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|