Add e621 module and misc changes

- Error on potential new fields
- Add e621 module
- Change interface to fit current function prototypes
This commit is contained in:
Alex D. 2022-08-11 19:24:05 +00:00
parent 4dfeb751c6
commit 1e26ed602e
Signed by: caskd
GPG Key ID: F92BA85F61F4C173
3 changed files with 296 additions and 4 deletions

View File

@ -1,15 +1,37 @@
/*
* This file is part of goboru. (https://git.redxen.eu/caskd/goboru)
* Copyright (c) 2022 Alex-David Denes
*
* goboru is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* goboru is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with goboru. If not, see <https://www.gnu.org/licenses/>.
*/
package goboru
import (
"errors"
"io"
"net/http"
"time"
)
type Module interface {
Query([]string) ([]Media, error)
Query(Tags, Jobs) ([]Media, error)
}
type Tags []string
type Jobs uint
type Media struct {
Source string
MD5 string
@ -17,6 +39,9 @@ type Media struct {
}
func (m Media) Content() (rc io.ReadCloser, err error) {
if m.Source == "" {
err = errors.New("This media has no content")
}
client := http.Client{Timeout: 0, Transport: &http.Transport{ResponseHeaderTimeout: 10 * time.Second}}
var resp *http.Response

248
modules/e621/main.go Normal file
View File

@ -0,0 +1,248 @@
/*
* This file is part of goboru. (https://git.redxen.eu/caskd/goboru)
* Copyright (c) 2022 Alex-David Denes
*
* goboru is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* goboru is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with goboru. If not, see <https://www.gnu.org/licenses/>.
*/
package e621
import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/url"
"sort"
"strconv"
"strings"
"time"
. "git.redxen.eu/caskd/goboru"
)
type (
user_id uint64
post_id uint64
pool_id uint64
file_size uint64
file_struct struct {
Width uint64 `json:"width"`
Height uint64 `json:"height"`
URL string `json:"url"`
}
e621_API struct {
Posts []struct {
ID post_id `json:"id"`
CreatedAt string `json:"created_at"`
UpdatedAt string `json:"updated_at"`
File struct {
file_struct
Size file_size `json:"size"`
Ext string `json:"ext"`
Md5 string `json:"md5"`
} `json:"file"`
Preview file_struct `json:"preview"`
Sample struct {
file_struct
Has bool `json:"has"`
Alternates map[string]struct {
Type string `json:"type"`
Height int `json:"height"`
Width int `json:"width"`
Urls []string `json:"urls"`
} `json:"alternates"`
} `json:"sample"`
Score struct {
Up int64 `json:"up"`
Down int64 `json:"down"`
Total int64 `json:"total"`
} `json:"score"`
Tags struct {
General []string `json:"general"`
Species []string `json:"species"`
Character []string `json:"character"`
Copyright []string `json:"copyright"`
Artist []string `json:"artist"`
Invalid []string `json:"invalid"`
Lore []string `json:"lore"`
Meta []string `json:"meta"`
} `json:"tags"`
LockedTags []string `json:"locked_tags"`
ChangeSeq uint64 `json:"change_seq"`
Flags struct {
Pending bool `json:"pending"`
Flagged bool `json:"flagged"`
NoteLocked bool `json:"note_locked"`
StatusLocked bool `json:"status_locked"`
RatingLocked bool `json:"rating_locked"`
CommentDisabled bool `json:"comment_disabled"`
Deleted bool `json:"deleted"`
} `json:"flags"`
Rating string `json:"rating"`
FavCount uint64 `json:"fav_count"`
Sources []string `json:"sources"`
Pools []pool_id `json:"pools"`
Relationships struct {
ParentID post_id `json:"parent_id"`
HasChildren bool `json:"has_children"`
HasActiveChildren bool `json:"has_active_children"`
Children []post_id `json:"children"`
} `json:"relationships"`
ApproverID user_id `json:"approver_id"`
UploaderID user_id `json:"uploader_id"`
Description string `json:"description"`
CommentCount uint64 `json:"comment_count"`
IsFavorited bool `json:"is_favorited"`
HasNotes bool `json:"has_notes"`
Duration float32 `json:"duration"`
} `json:"posts"`
}
)
type result struct {
media []Media
err error
pid uint
}
func Query(tags Tags, j_max Jobs) (mr []Media, err error) {
res_chan := make(chan result)
var r_arr []result
for pid, rpid, ppid := uint(0), uint(0), uint(0); ; {
/*
if pid <= 200 { // API only allows to fetch up to 200 pages per query
*/
go run_job(tags, pid, res_chan)
pid++
/*
}
*/
if pid < uint(j_max) {
continue
}
if rpid < pid {
r := <-res_chan
rpid++
r_arr = append(r_arr, r)
}
if ppid < pid {
sort.Slice(r_arr, func(i, j int) bool {
return r_arr[i].pid < r_arr[j].pid
})
if c := r_arr[0]; c.pid == ppid {
ppid++
r_arr = r_arr[1:]
if c.err != nil {
err = c.err
break
}
if len(c.media) == 0 {
break
}
mr = append(mr, c.media...)
log.Print("Added ", len(c.media), "/", len(mr), " elements")
}
} else {
break // Break when no more pages have been fetched
}
}
return
}
func run_job(tags []string, pid uint, res chan result) {
r := result{pid: pid}
defer func(x *result, c chan result) { c <- *x }(&r, res)
var rc io.ReadCloser
if rc, r.err = fetch(tags, pid); r.err != nil {
return
}
defer rc.Close()
if r.media, r.err = parse(rc); r.err != nil {
return
}
}
func fetch(tags []string, pid uint) (rc io.ReadCloser, err error) {
client := http.Client{Timeout: 10 * time.Second}
req := &http.Request{
URL: &url.URL{
Scheme: "https",
Host: "e621.net",
Path: "/posts.json",
},
}
query := req.URL.Query()
query.Add("page", strconv.FormatUint(uint64(pid), 10))
query.Add("tags", strings.Join(tags, " "))
req.URL.RawQuery = query.Encode()
req.Header = make(http.Header)
req.Header.Set("user-agent", "gomon/1.0 (https://git.redxen.eu/caskd/gomon)")
resp, err := client.Do(req)
if err != nil {
return
}
rc = resp.Body
return
}
func parse(r io.Reader) (m []Media, err error) {
var api_resp e621_API
d := json.NewDecoder(r)
d.DisallowUnknownFields()
if err = d.Decode(&api_resp); err != nil {
err = fmt.Errorf("JSON parse: %s", err)
return
}
if len(api_resp.Posts) == 0 {
return
}
for _, v := range api_resp.Posts {
if v.File.URL == "" {
// Skip posts that require higher priviledges to access
// - "Some posts cannot be viewed without logging in based on which tags are applied to them."
// https://e621.net/forum_topics/25717
continue
}
cur := Media{
Source: v.File.URL,
MD5: v.File.Md5,
}
for _, tc := range [][]string{
v.Tags.General,
v.Tags.Species,
v.Tags.Character,
v.Tags.Copyright,
v.Tags.Artist,
v.Tags.Lore,
v.Tags.Meta,
} {
cur.Tags = append(cur.Tags, tc...)
}
m = append(m, cur)
}
return
}

View File

@ -1,3 +1,21 @@
/*
* This file is part of goboru. (https://git.redxen.eu/caskd/goboru)
* Copyright (c) 2022 Alex-David Denes
*
* goboru is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* any later version.
*
* goboru is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with goboru. If not, see <https://www.gnu.org/licenses/>.
*/
package gelbooru
import (
@ -60,7 +78,7 @@ type result struct {
pid uint
}
func Query(tags []string, j_max uint) (mr []Media, err error) {
func Query(tags Tags, j_max Jobs) (mr []Media, err error) {
res_chan := make(chan result)
var r_arr []result
@ -70,7 +88,7 @@ func Query(tags []string, j_max uint) (mr []Media, err error) {
pid++
}
if pid < j_max {
if pid < uint(j_max) {
continue
}
@ -107,7 +125,7 @@ func Query(tags []string, j_max uint) (mr []Media, err error) {
func run_job(tags []string, pid uint, res chan result) {
r := result{pid: pid}
defer func(x result, c chan result) { c <- x }(r, res)
defer func(x *result, c chan result) { c <- *x }(&r, res)
var rc io.ReadCloser
if rc, r.err = fetch(tags, pid); r.err != nil {
@ -150,6 +168,7 @@ func parse(r io.Reader) (m []Media, err error) {
var api_resp gelbooru_API
d := json.NewDecoder(r)
d.DisallowUnknownFields()
if err = d.Decode(&api_resp); err != nil {
err = fmt.Errorf("JSON parse: %s", err)
return