diff --git a/generic.go b/generic.go index dd82c38..249cba8 100644 --- a/generic.go +++ b/generic.go @@ -1,15 +1,37 @@ +/* + * This file is part of goboru. (https://git.redxen.eu/caskd/goboru) + * Copyright (c) 2022 Alex-David Denes + * + * goboru is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * goboru is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with goboru. If not, see . + */ + package goboru import ( + "errors" "io" "net/http" "time" ) type Module interface { - Query([]string) ([]Media, error) + Query(Tags, Jobs) ([]Media, error) } +type Tags []string +type Jobs uint + type Media struct { Source string MD5 string @@ -17,6 +39,9 @@ type Media struct { } func (m Media) Content() (rc io.ReadCloser, err error) { + if m.Source == "" { + err = errors.New("This media has no content") + } client := http.Client{Timeout: 0, Transport: &http.Transport{ResponseHeaderTimeout: 10 * time.Second}} var resp *http.Response diff --git a/modules/e621/main.go b/modules/e621/main.go new file mode 100644 index 0000000..6ffd6e2 --- /dev/null +++ b/modules/e621/main.go @@ -0,0 +1,248 @@ +/* + * This file is part of goboru. (https://git.redxen.eu/caskd/goboru) + * Copyright (c) 2022 Alex-David Denes + * + * goboru is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * goboru is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with goboru. If not, see . + */ + +package e621 + +import ( + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "net/url" + "sort" + "strconv" + "strings" + "time" + + . "git.redxen.eu/caskd/goboru" +) + +type ( + user_id uint64 + post_id uint64 + pool_id uint64 + file_size uint64 + file_struct struct { + Width uint64 `json:"width"` + Height uint64 `json:"height"` + URL string `json:"url"` + } + e621_API struct { + Posts []struct { + ID post_id `json:"id"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` + File struct { + file_struct + Size file_size `json:"size"` + Ext string `json:"ext"` + Md5 string `json:"md5"` + } `json:"file"` + Preview file_struct `json:"preview"` + Sample struct { + file_struct + Has bool `json:"has"` + Alternates map[string]struct { + Type string `json:"type"` + Height int `json:"height"` + Width int `json:"width"` + Urls []string `json:"urls"` + } `json:"alternates"` + } `json:"sample"` + Score struct { + Up int64 `json:"up"` + Down int64 `json:"down"` + Total int64 `json:"total"` + } `json:"score"` + Tags struct { + General []string `json:"general"` + Species []string `json:"species"` + Character []string `json:"character"` + Copyright []string `json:"copyright"` + Artist []string `json:"artist"` + Invalid []string `json:"invalid"` + Lore []string `json:"lore"` + Meta []string `json:"meta"` + } `json:"tags"` + LockedTags []string `json:"locked_tags"` + ChangeSeq uint64 `json:"change_seq"` + Flags struct { + Pending bool `json:"pending"` + Flagged bool `json:"flagged"` + NoteLocked bool `json:"note_locked"` + StatusLocked bool `json:"status_locked"` + RatingLocked bool `json:"rating_locked"` + CommentDisabled bool `json:"comment_disabled"` + Deleted bool `json:"deleted"` + } `json:"flags"` + Rating string `json:"rating"` + FavCount uint64 `json:"fav_count"` + Sources []string `json:"sources"` + Pools []pool_id `json:"pools"` + Relationships struct { + ParentID post_id `json:"parent_id"` + HasChildren bool `json:"has_children"` + HasActiveChildren bool `json:"has_active_children"` + Children []post_id `json:"children"` + } `json:"relationships"` + ApproverID user_id `json:"approver_id"` + UploaderID user_id `json:"uploader_id"` + Description string `json:"description"` + CommentCount uint64 `json:"comment_count"` + IsFavorited bool `json:"is_favorited"` + HasNotes bool `json:"has_notes"` + Duration float32 `json:"duration"` + } `json:"posts"` + } +) + +type result struct { + media []Media + err error + pid uint +} + +func Query(tags Tags, j_max Jobs) (mr []Media, err error) { + res_chan := make(chan result) + var r_arr []result + + for pid, rpid, ppid := uint(0), uint(0), uint(0); ; { + /* + if pid <= 200 { // API only allows to fetch up to 200 pages per query + */ + go run_job(tags, pid, res_chan) + pid++ + /* + } + */ + + if pid < uint(j_max) { + continue + } + + if rpid < pid { + r := <-res_chan + rpid++ + r_arr = append(r_arr, r) + } + + if ppid < pid { + sort.Slice(r_arr, func(i, j int) bool { + return r_arr[i].pid < r_arr[j].pid + }) + + if c := r_arr[0]; c.pid == ppid { + ppid++ + r_arr = r_arr[1:] + if c.err != nil { + err = c.err + break + } + if len(c.media) == 0 { + break + } + mr = append(mr, c.media...) + log.Print("Added ", len(c.media), "/", len(mr), " elements") + } + } else { + break // Break when no more pages have been fetched + } + } + return +} + +func run_job(tags []string, pid uint, res chan result) { + r := result{pid: pid} + defer func(x *result, c chan result) { c <- *x }(&r, res) + + var rc io.ReadCloser + if rc, r.err = fetch(tags, pid); r.err != nil { + return + } + defer rc.Close() + + if r.media, r.err = parse(rc); r.err != nil { + return + } +} + +func fetch(tags []string, pid uint) (rc io.ReadCloser, err error) { + client := http.Client{Timeout: 10 * time.Second} + req := &http.Request{ + URL: &url.URL{ + Scheme: "https", + Host: "e621.net", + Path: "/posts.json", + }, + } + query := req.URL.Query() + query.Add("page", strconv.FormatUint(uint64(pid), 10)) + query.Add("tags", strings.Join(tags, " ")) + req.URL.RawQuery = query.Encode() + req.Header = make(http.Header) + req.Header.Set("user-agent", "gomon/1.0 (https://git.redxen.eu/caskd/gomon)") + + resp, err := client.Do(req) + if err != nil { + return + } + rc = resp.Body + return +} + +func parse(r io.Reader) (m []Media, err error) { + var api_resp e621_API + + d := json.NewDecoder(r) + d.DisallowUnknownFields() + if err = d.Decode(&api_resp); err != nil { + err = fmt.Errorf("JSON parse: %s", err) + return + } + + if len(api_resp.Posts) == 0 { + return + } + + for _, v := range api_resp.Posts { + if v.File.URL == "" { + // Skip posts that require higher priviledges to access + // - "Some posts cannot be viewed without logging in based on which tags are applied to them." + // https://e621.net/forum_topics/25717 + continue + } + cur := Media{ + Source: v.File.URL, + MD5: v.File.Md5, + } + for _, tc := range [][]string{ + v.Tags.General, + v.Tags.Species, + v.Tags.Character, + v.Tags.Copyright, + v.Tags.Artist, + v.Tags.Lore, + v.Tags.Meta, + } { + cur.Tags = append(cur.Tags, tc...) + } + m = append(m, cur) + } + return +} diff --git a/modules/gelbooru/main.go b/modules/gelbooru/main.go index 528821a..f498adb 100644 --- a/modules/gelbooru/main.go +++ b/modules/gelbooru/main.go @@ -1,3 +1,21 @@ +/* + * This file is part of goboru. (https://git.redxen.eu/caskd/goboru) + * Copyright (c) 2022 Alex-David Denes + * + * goboru is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * any later version. + * + * goboru is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with goboru. If not, see . + */ + package gelbooru import ( @@ -60,7 +78,7 @@ type result struct { pid uint } -func Query(tags []string, j_max uint) (mr []Media, err error) { +func Query(tags Tags, j_max Jobs) (mr []Media, err error) { res_chan := make(chan result) var r_arr []result @@ -70,7 +88,7 @@ func Query(tags []string, j_max uint) (mr []Media, err error) { pid++ } - if pid < j_max { + if pid < uint(j_max) { continue } @@ -107,7 +125,7 @@ func Query(tags []string, j_max uint) (mr []Media, err error) { func run_job(tags []string, pid uint, res chan result) { r := result{pid: pid} - defer func(x result, c chan result) { c <- x }(r, res) + defer func(x *result, c chan result) { c <- *x }(&r, res) var rc io.ReadCloser if rc, r.err = fetch(tags, pid); r.err != nil { @@ -150,6 +168,7 @@ func parse(r io.Reader) (m []Media, err error) { var api_resp gelbooru_API d := json.NewDecoder(r) + d.DisallowUnknownFields() if err = d.Decode(&api_resp); err != nil { err = fmt.Errorf("JSON parse: %s", err) return