commit 50dcef8631a656719200e5ac746063fef6922bae Author: Alex Denes Date: Tue May 10 20:15:01 2022 +0000 Initial commit diff --git a/generic.go b/generic.go new file mode 100644 index 0000000..dd82c38 --- /dev/null +++ b/generic.go @@ -0,0 +1,28 @@ +package goboru + +import ( + "io" + "net/http" + "time" +) + +type Module interface { + Query([]string) ([]Media, error) +} + +type Media struct { + Source string + MD5 string + Tags []string +} + +func (m Media) Content() (rc io.ReadCloser, err error) { + client := http.Client{Timeout: 0, Transport: &http.Transport{ResponseHeaderTimeout: 10 * time.Second}} + + var resp *http.Response + if resp, err = client.Get(m.Source); err != nil { + return + } + rc = resp.Body + return +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..50e242e --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module git.redxen.eu/caskd/goboru + +go 1.18 diff --git a/modules/gelbooru/main.go b/modules/gelbooru/main.go new file mode 100644 index 0000000..1340a5c --- /dev/null +++ b/modules/gelbooru/main.go @@ -0,0 +1,173 @@ +package gelbooru + +import ( + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "net/url" + "sort" + "strconv" + "strings" + "time" + + . "git.redxen.eu/caskd/goboru" +) + +type gelbooru_API struct { + Attributes struct { + Limit float64 `json:"limit"` + Offset float64 `json:"offset"` + Count float64 `json:"count"` + } `json:"@attributes"` + Posts []struct { + Id uint64 `json:"id"` + Created_at string `json:"created_at"` + Score int64 `json:"score"` + Width uint64 `json:"width"` + Height uint64 `json:"height"` + MD5 string `json:"md5"` + Directory string `json:"directory"` + Image string `json:"image"` + Rating string `json:"rating"` + Source string `json:"source"` + Change uint64 `json:"change"` + Owner string `json:"owner"` + Creator_id uint64 `json:"creator_id"` + Parent_id uint64 `json:"parent_id"` + Sample uint64 `json:"sample"` + Preview_height uint64 `json:"preview_height"` + Preview_width uint64 `json:"preview_width"` + Tags string `json:"tags"` + Title string `json:"title"` + Has_notes string `json:"has_notes"` + Has_comments string `json:"has_comments"` + File_url string `json:"file_url"` + Preview_url string `json:"preview_url"` + Sample_url string `json:"sample_url"` + Sample_height uint64 `json:"sample_height"` + Sample_width uint64 `json:"sample_width"` + Status string `json:"status"` + Post_locked uint64 `json:"post_locked"` + Has_children string `json:"has_children"` + } `json:"post"` +} + +type result struct { + media []Media + err error + pid uint +} + +func Query(tags []string, j_max uint) (mr []Media, err error) { + res_chan := make(chan result) + var r_arr []result + + for pid, rpid, ppid := uint(0), uint(0), uint(0); ; { + if pid <= 200 { // API only allows to fetch up to 200 pages per query + go run_job(tags, pid, res_chan) + pid++ + } + + if pid < j_max { + continue + } + + if rpid < pid { + r := <-res_chan + rpid++ + r_arr = append(r_arr, r) + } + + if ppid < pid { + sort.Slice(r_arr, func(i, j int) bool { + return r_arr[i].pid < r_arr[j].pid + }) + + if c := r_arr[0]; c.pid == ppid { + ppid++ + r_arr = r_arr[1:] + if c.err != nil { + err = c.err + break + } + if len(c.media) == 0 { + break + } + mr = append(mr, c.media...) + log.Print("Added ", len(c.media), "/", len(mr), " elements") + } + } else { + break // Break when no more pages have been fetched + } + } + return +} + +func run_job(tags []string, pid uint, res chan result) { + r := result{pid: pid} + + var rc io.ReadCloser + if rc, r.err = fetch(tags, pid); r.err != nil { + res <- r + return + } + defer rc.Close() + + if r.media, r.err = parse(rc); r.err != nil { + res <- r + return + } + res <- r +} + +func fetch(tags []string, pid uint) (rc io.ReadCloser, err error) { + client := http.Client{Timeout: 10 * time.Second} + req := &http.Request{ + URL: &url.URL{ + Scheme: "https", + Host: "gelbooru.com", + Path: "/index.php", + }, + } + query := req.URL.Query() + query.Add("page", "dapi") + query.Add("s", "post") + query.Add("q", "index") + query.Add("json", "1") + query.Add("pid", strconv.FormatUint(uint64(pid), 10)) + query.Add("tags", strings.Join(tags, " ")) + req.URL.RawQuery = query.Encode() + + resp, err := client.Do(req) + if err != nil { + return + } + rc = resp.Body + return +} + +func parse(r io.Reader) (m []Media, err error) { + var api_resp gelbooru_API + + d := json.NewDecoder(r) + if err = d.Decode(&api_resp); err != nil { + err = fmt.Errorf("JSON parse: %s", err) + return + } + + if len(api_resp.Posts) == 0 { + return + } + + for _, v := range api_resp.Posts { + cur := Media{ + Source: v.File_url, + MD5: v.MD5, + Tags: strings.Split(v.Tags, " "), + } + m = append(m, cur) + } + return +}