WebCollage/src/Collector/DeviantartCollector.php

73 lines
2.1 KiB
PHP
Raw Normal View History

2021-02-22 16:38:47 +01:00
<?php
namespace Shikiryu\WebGobbler\Collector;
use Goutte\Client;
use Shikiryu\WebGobbler\Collector;
class DeviantartCollector extends Collector
{
/**
*
*/
public const RANDOM_URL = 'https://www.deviantart.com/popular/deviations';
public const SEARCH_URL = 'https://www.deviantart.com/search?q=%s';
2021-02-22 16:38:47 +01:00
/**
* Regular expression to extract the maximum deviantionID from homepage
*/
public const RE_ALLDEVIATIONID = '/href="https:\/\/www.deviantart.com\/[^\/]+\/art\/([^"]+)"/m';
/**
* @return int
*/
public function getRandomImage()
{
return $this->getRandomImages(1);
}
/**
* @param int $number
*
* @return int
*/
public function getRandomImages(int $number = 1)
{
$word_to_search = $this->getConfig()->get('collector.keywords.keywords', false);
if (false !== $word_to_search) {
$html = file_get_contents(sprintf(self::SEARCH_URL, urlencode($word_to_search)));
} else {
$html = file_get_contents(self::RANDOM_URL);
}
2021-02-22 16:38:47 +01:00
preg_match_all(self::RE_ALLDEVIATIONID, $html, $deviant_ids);
$deviant_ids = array_map(static function ($deviant_id) {
$array = explode('-', $deviant_id);
return (int)end($array);
}, $deviant_ids[1]);
$deviant_ids = array_unique($deviant_ids);
$index_to_download = array_rand($deviant_ids, min($number, count($deviant_ids)));
2021-02-22 16:38:47 +01:00
if (!is_array($index_to_download)) {
$index_to_download = [$index_to_download];
}
foreach ($index_to_download as $deviant_id) {
$client = new Client();
$crawler = $client->request('GET', 'https://www.deviantart.com/deviation/'.$deviant_ids[$deviant_id]);
$img_url = $crawler->filter('[data-hook="art_stage"] img')->eq(0)->attr('src');
file_put_contents($this->getPoolDirectory() . '/' . basename(parse_url($img_url, PHP_URL_PATH)), file_get_contents($img_url));
}
return $number; // Fixme
}
/**
* @return string
*/
public function getName()
{
return 'collector_deviantart';
}
}