62 lines
1.6 KiB
PHP
62 lines
1.6 KiB
PHP
<?php
|
|
|
|
namespace Shikiryu\WebGobbler\Collector;
|
|
|
|
use Goutte\Client;
|
|
use Shikiryu\WebGobbler\Collector;
|
|
|
|
class YahooImageCollector extends Collector
|
|
{
|
|
public const SEARCH_URL = 'https://images.search.yahoo.com/search/images?p=%s';
|
|
|
|
/**
|
|
* @return string
|
|
*/
|
|
public function getName()
|
|
{
|
|
return 'collector_yahooimage';
|
|
}
|
|
|
|
/**
|
|
* @return int
|
|
*/
|
|
public function getRandomImage()
|
|
{
|
|
$word_to_search = $this->getConfig()->get('collector.keywords.keywords', $this->generateRandomWord());
|
|
|
|
$client = new Client();
|
|
$crawler = $client->request('GET', sprintf(self::SEARCH_URL, $word_to_search));
|
|
$imgs = $crawler->filter('noscript img');
|
|
if ($imgs->count() < 20) {
|
|
return $this->getRandomImage(); // FIXME possible infinite loop if keywords is given
|
|
}
|
|
|
|
$img_url = $imgs->eq(random_int(0, $imgs->count() - 1))->attr('src');
|
|
$parsed_url = parse_url( $img_url );
|
|
parse_str( $parsed_url['query'] , $url_vars );
|
|
$name = $url_vars['id'];
|
|
unset($url_vars['w'], $url_vars['h']);
|
|
$parsed_url['query'] = $url_vars;
|
|
$img_url = $this->reverse_url($parsed_url);
|
|
|
|
file_put_contents($this->getPoolDirectory() . '/'. $name . '.jpg', file_get_contents($img_url));
|
|
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
* @param int $number
|
|
*
|
|
* @return int
|
|
*/
|
|
public function getRandomImages(int $number)
|
|
{
|
|
$count = 0;
|
|
for ($i = 0; $i < $number; $i++) {
|
|
$count += $this->getRandomImage();
|
|
}
|
|
|
|
return $count;
|
|
}
|
|
}
|