Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 104 additions & 8 deletions pkg/scraper/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,27 +24,123 @@ func (e scraperAction) IsValid() bool {
return false
}

type scraperActionImpl interface {
type urlScraperActionImpl interface {
scrapeByURL(ctx context.Context, url string, ty ScrapeContentType) (ScrapedContent, error)
}

func (c Definition) getURLScraper(def ByURLDefinition, client *http.Client, globalConfig GlobalConfig) urlScraperActionImpl {
switch def.Action {
case scraperActionScript:
return &scriptURLScraper{
scriptScraper: scriptScraper{
definition: c,
globalConfig: globalConfig,
},
definition: def,
}
case scraperActionStash:
return newStashScraper(client, c, globalConfig)
case scraperActionXPath:
return &xpathURLScraper{
xpathScraper: xpathScraper{
definition: c,
globalConfig: globalConfig,
client: client,
},
definition: def,
}
case scraperActionJson:
return &jsonURLScraper{
jsonScraper: jsonScraper{
definition: c,
globalConfig: globalConfig,
client: client,
},
definition: def,
}
}

panic("unknown scraper action: " + def.Action)
}

type nameScraperActionImpl interface {
scrapeByName(ctx context.Context, name string, ty ScrapeContentType) ([]ScrapedContent, error)
}

func (c Definition) getNameScraper(def ByNameDefinition, client *http.Client, globalConfig GlobalConfig) nameScraperActionImpl {
switch def.Action {
case scraperActionScript:
return &scriptNameScraper{
scriptScraper: scriptScraper{
definition: c,
globalConfig: globalConfig,
},
definition: def,
}
case scraperActionStash:
return newStashScraper(client, c, globalConfig)
case scraperActionXPath:
return &xpathNameScraper{
xpathScraper: xpathScraper{
definition: c,
globalConfig: globalConfig,
client: client,
},
definition: def,
}
case scraperActionJson:
return &jsonNameScraper{
jsonScraper: jsonScraper{
definition: c,
globalConfig: globalConfig,
client: client,
},
definition: def,
}
}

panic("unknown scraper action: " + def.Action)
}

type fragmentScraperActionImpl interface {
scrapeByFragment(ctx context.Context, input Input) (ScrapedContent, error)

scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*models.ScrapedScene, error)
scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*models.ScrapedGallery, error)
scrapeImageByImage(ctx context.Context, image *models.Image) (*models.ScrapedImage, error)
}

func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, globalConfig GlobalConfig) scraperActionImpl {
switch scraper.Action {
func (c Definition) getFragmentScraper(actionDef ByFragmentDefinition, client *http.Client, globalConfig GlobalConfig) fragmentScraperActionImpl {
switch actionDef.Action {
case scraperActionScript:
return newScriptScraper(scraper, c, globalConfig)
return &scriptFragmentScraper{
scriptScraper: scriptScraper{
definition: c,
globalConfig: globalConfig,
},
definition: actionDef,
}
case scraperActionStash:
return newStashScraper(scraper, client, c, globalConfig)
return newStashScraper(client, c, globalConfig)
case scraperActionXPath:
return newXpathScraper(scraper, client, c, globalConfig)
return &xpathFragmentScraper{
xpathScraper: xpathScraper{
definition: c,
globalConfig: globalConfig,
client: client,
},
definition: actionDef,
}
case scraperActionJson:
return newJsonScraper(scraper, client, c, globalConfig)
return &jsonFragmentScraper{
jsonScraper: jsonScraper{
definition: c,
globalConfig: globalConfig,
client: client,
},
definition: actionDef,
}
}

panic("unknown scraper action: " + scraper.Action)
panic("unknown scraper action: " + actionDef.Action)
}
2 changes: 1 addition & 1 deletion pkg/scraper/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ func (c *Cache) ReloadScrapers() {
if err != nil {
logger.Errorf("Error loading scraper %s: %v", fp, err)
} else {
scraper := newGroupScraper(*conf, c.globalConfig)
scraper := scraperFromDefinition(*conf, c.globalConfig)
scrapers[scraper.spec().ID] = scraper
}
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/scraper/cookies.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
)

// jar constructs a cookie jar from a configuration
func (c config) jar() (*cookiejar.Jar, error) {
func (c Definition) jar() (*cookiejar.Jar, error) {
opts := c.DriverOptions
jar, err := cookiejar.New(&cookiejar.Options{
PublicSuffixList: publicsuffix.List,
Expand Down Expand Up @@ -77,7 +77,7 @@ func randomSequence(n int) string {
}

// printCookies prints all cookies from the given cookie jar
func printCookies(jar *cookiejar.Jar, scraperConfig config, msg string) {
func printCookies(jar *cookiejar.Jar, scraperConfig Definition, msg string) {
driverOptions := scraperConfig.DriverOptions
if driverOptions != nil && !driverOptions.UseCDP {
var foundURLs []*url.URL
Expand Down
48 changes: 25 additions & 23 deletions pkg/scraper/group.go → pkg/scraper/defined_scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,26 @@ import (
"github.com/stashapp/stash/pkg/models"
)

type group struct {
config config
// definedScraper implements the scraper interface using a Definition object.
type definedScraper struct {
config Definition

globalConf GlobalConfig
}

func newGroupScraper(c config, globalConfig GlobalConfig) scraper {
return group{
func scraperFromDefinition(c Definition, globalConfig GlobalConfig) definedScraper {
return definedScraper{
config: c,
globalConf: globalConfig,
}
}

func (g group) spec() Scraper {
func (g definedScraper) spec() Scraper {
return g.config.spec()
}

// fragmentScraper finds an appropriate fragment scraper based on input.
func (g group) fragmentScraper(input Input) *scraperTypeConfig {
func (g definedScraper) fragmentScraper(input Input) *ByFragmentDefinition {
switch {
case input.Performer != nil:
return g.config.PerformerByFragment
Expand All @@ -43,7 +44,7 @@ func (g group) fragmentScraper(input Input) *scraperTypeConfig {
return nil
}

func (g group) viaFragment(ctx context.Context, client *http.Client, input Input) (ScrapedContent, error) {
func (g definedScraper) viaFragment(ctx context.Context, client *http.Client, input Input) (ScrapedContent, error) {
stc := g.fragmentScraper(input)
if stc == nil {
// If there's no performer fragment scraper in the group, we try to use
Expand All @@ -56,38 +57,38 @@ func (g group) viaFragment(ctx context.Context, client *http.Client, input Input
return nil, ErrNotSupported
}

s := g.config.getScraper(*stc, client, g.globalConf)
s := g.config.getFragmentScraper(*stc, client, g.globalConf)
return s.scrapeByFragment(ctx, input)
}

func (g group) viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) {
func (g definedScraper) viaScene(ctx context.Context, client *http.Client, scene *models.Scene) (*models.ScrapedScene, error) {
if g.config.SceneByFragment == nil {
return nil, ErrNotSupported
}

s := g.config.getScraper(*g.config.SceneByFragment, client, g.globalConf)
s := g.config.getFragmentScraper(*g.config.SceneByFragment, client, g.globalConf)
return s.scrapeSceneByScene(ctx, scene)
}

func (g group) viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) {
func (g definedScraper) viaGallery(ctx context.Context, client *http.Client, gallery *models.Gallery) (*models.ScrapedGallery, error) {
if g.config.GalleryByFragment == nil {
return nil, ErrNotSupported
}

s := g.config.getScraper(*g.config.GalleryByFragment, client, g.globalConf)
s := g.config.getFragmentScraper(*g.config.GalleryByFragment, client, g.globalConf)
return s.scrapeGalleryByGallery(ctx, gallery)
}

func (g group) viaImage(ctx context.Context, client *http.Client, gallery *models.Image) (*models.ScrapedImage, error) {
func (g definedScraper) viaImage(ctx context.Context, client *http.Client, gallery *models.Image) (*models.ScrapedImage, error) {
if g.config.ImageByFragment == nil {
return nil, ErrNotSupported
}

s := g.config.getScraper(*g.config.ImageByFragment, client, g.globalConf)
s := g.config.getFragmentScraper(*g.config.ImageByFragment, client, g.globalConf)
return s.scrapeImageByImage(ctx, gallery)
}

func loadUrlCandidates(c config, ty ScrapeContentType) []*scrapeByURLConfig {
func loadUrlCandidates(c Definition, ty ScrapeContentType) []*ByURLDefinition {
switch ty {
case ScrapeContentTypePerformer:
return c.PerformerByURL
Expand All @@ -104,12 +105,13 @@ func loadUrlCandidates(c config, ty ScrapeContentType) []*scrapeByURLConfig {
panic("loadUrlCandidates: unreachable")
}

func (g group) viaURL(ctx context.Context, client *http.Client, url string, ty ScrapeContentType) (ScrapedContent, error) {
func (g definedScraper) viaURL(ctx context.Context, client *http.Client, url string, ty ScrapeContentType) (ScrapedContent, error) {
candidates := loadUrlCandidates(g.config, ty)
for _, scraper := range candidates {
if scraper.matchesURL(url) {
s := g.config.getScraper(scraper.scraperTypeConfig, client, g.globalConf)
ret, err := s.scrapeByURL(ctx, url, ty)
u := replaceURL(url, *scraper) // allow a URL Replace for url-queries
s := g.config.getURLScraper(*scraper, client, g.globalConf)
ret, err := s.scrapeByURL(ctx, u, ty)
if err != nil {
return nil, err
}
Expand All @@ -123,31 +125,31 @@ func (g group) viaURL(ctx context.Context, client *http.Client, url string, ty S
return nil, nil
}

func (g group) viaName(ctx context.Context, client *http.Client, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
func (g definedScraper) viaName(ctx context.Context, client *http.Client, name string, ty ScrapeContentType) ([]ScrapedContent, error) {
switch ty {
case ScrapeContentTypePerformer:
if g.config.PerformerByName == nil {
break
}

s := g.config.getScraper(*g.config.PerformerByName, client, g.globalConf)
s := g.config.getNameScraper(*g.config.PerformerByName, client, g.globalConf)
return s.scrapeByName(ctx, name, ty)
case ScrapeContentTypeScene:
if g.config.SceneByName == nil {
break
}

s := g.config.getScraper(*g.config.SceneByName, client, g.globalConf)
s := g.config.getNameScraper(*g.config.SceneByName, client, g.globalConf)
return s.scrapeByName(ctx, name, ty)
}

return nil, fmt.Errorf("%w: cannot load %v by name", ErrNotSupported, ty)
}

func (g group) supports(ty ScrapeContentType) bool {
func (g definedScraper) supports(ty ScrapeContentType) bool {
return g.config.supports(ty)
}

func (g group) supportsURL(url string, ty ScrapeContentType) bool {
func (g definedScraper) supportsURL(url string, ty ScrapeContentType) bool {
return g.config.matchesURL(url, ty)
}
Loading