⬆️ Met à jour le script suite au passage à React

This commit is contained in:
Shikiryu 2018-02-05 22:29:17 +01:00
parent ac21f74c11
commit bb5721f911
3 changed files with 249 additions and 42 deletions

View File

@ -43,48 +43,6 @@ class Deal
$this->id = $id;
}
public static function fromURL(Account $account, $url)
{
$client = $account->getClient();
$crawler = $client->request('GET', $url);
$deal = new self($crawler->filter('[data-savead-id]')->attr('data-savead-id'));
$deal->setAccount($account);
$deal->setSubject(trim($crawler->filter('h1')->first()->text()));
$deal->setCategory(array_search($crawler->filter('.breadcrumbsNav >ul >li')->eq(2)->text(), Categories::$categories));
$deal->setType(self::TYPE_OFFER);
$deal->setBody(trim($crawler->filter('.properties_description')->first()->filter('p')->eq(1)->text()));
$deal->setPrice($crawler->filter('[itemprop=price]')->first()->attr('content'));
$date_node = $crawler->filter('[itemprop=availabilityStarts]')->first();
$date = \DateTime::createFromFormat('Y-m-d', $date_node->attr('content'));
$hours = current($date_node->extract(['_text']));
$hours = substr($hours, strpos($hours, 'à')+2);
list($hour, $min) = explode(':', $hours);
$date->setTime((int) $hour, (int) $min);
$deal->setDateCreation($date);
if ($crawler->filter('.item_photo')->count() > 0) {
$script = $crawler ->filter('.item_photo')->first()->nextAll()
->filter('script')->first()->html();
preg_match_all("/\"(http.*ad-thumb.*)\"/m", $script, $urls);
$urls = $urls[1];
$images = [];
foreach ($urls as $i => $url) {
$images[] = [sprintf('setImage%s', $i) => str_replace('thumb', 'large', $url)];
}
} else {
$images = $crawler->filter('[data-popin-content]')->each(
function (Crawler $node, $i) {
return [ sprintf('setImage%s', $i) => $node->attr('data-popin-content')];
}
);
}
foreach ($images as $image) {
foreach ($image as $method => $uri) {
$deal->$method($uri);
}
}
return $deal;
}
/**
* @return int
*/
@ -351,6 +309,99 @@ class Deal
return true;
}
/**
* @param Account $account
* @param string $url
*
* @return Deal
*/
public static function fromURL(Account $account, $url)
{
$client = $account->getClient();
$crawler = $client->request('GET', $url);
preg_match('/<script>window\.FLUX_STATE = (.*)<\/script>/im', $crawler->html(), $js_object);
if (isset($js_object[1])) {
return static::parseReactDeal($js_object[1], $account);
}
return static::parseHtmlDeal($crawler, $account);
}
/**
* @param $js_object
* @return null
*/
protected static function parseReactDeal($js_object, Account $account)
{
try {
$data = [];
JsParser::parse_jsobj($js_object, $data);
$deal = new self($data['adview']['list_id']);
$deal->setAccount($account);
$deal->setSubject($data['adview']['subject']);
$deal->setCategory($data['adview']['category_id']);
$deal->setType(self::TYPE_OFFER);
$deal->setBody($data['adview']['body']);
$deal->setPrice($data['adview']['price'][0]);
$date = \DateTime::createFromFormat('Y-m-d H:i:s', $data['adview']['first_publication_date']);
$deal->setDateCreation($date);
foreach ($data['adview']['images']['urls_large'] as $i => $url) {
$deal->{'setImage'.$i}($url);
}
return $deal;
} catch (JsParserException $e) {
return null;
}
}
/**
* @param Crawler $crawler
* @param Account $account
*
* @return Deal
*/
protected static function parseHtmlDeal(Crawler $crawler, Account $account)
{
$deal = new self($crawler->filter('[data-savead-id]')->attr('data-savead-id'));
$deal->setAccount($account);
$deal->setSubject(trim($crawler->filter('h1')->first()->text()));
$deal->setCategory(array_search($crawler->filter('.breadcrumbsNav >ul >li')->eq(2)->text(), Categories::$categories));
$deal->setType(self::TYPE_OFFER);
$deal->setBody(trim($crawler->filter('.properties_description')->first()->filter('p')->eq(1)->text()));
$deal->setPrice($crawler->filter('[itemprop=price]')->first()->attr('content'));
$date_node = $crawler->filter('[itemprop=availabilityStarts]')->first();
$date = \DateTime::createFromFormat('Y-m-d', $date_node->attr('content'));
$hours = current($date_node->extract(['_text']));
$hours = substr($hours, strpos($hours, 'à')+2);
list($hour, $min) = explode(':', $hours);
$date->setTime((int) $hour, (int) $min);
$deal->setDateCreation($date);
if ($crawler->filter('.item_photo')->count() > 0) {
$script = $crawler ->filter('.item_photo')->first()->nextAll()
->filter('script')->first()->html();
preg_match_all("/\"(http.*ad-thumb.*)\"/m", $script, $urls);
$urls = $urls[1];
$images = [];
foreach ($urls as $i => $url) {
$images[] = [sprintf('setImage%s', $i) => str_replace('thumb', 'large', $url)];
}
} else {
$images = $crawler->filter('[data-popin-content]')->each(
function (Crawler $node, $i) {
return [ sprintf('setImage%s', $i) => $node->attr('data-popin-content')];
}
);
}
foreach ($images as $image) {
foreach ($image as $method => $uri) {
$deal->$method($uri);
}
}
return $deal;
}
/**
* @param string $json path to json
*

147
library/JsParser.php Normal file
View File

@ -0,0 +1,147 @@
<?php
namespace Shikiryu\LBCReposter;
class JsParser
{
/**
* @param $str
* @param $data
*
* @return bool|string
*
* @throws JsParserException
*/
public static function parse_jsobj($str, &$data)
{
$str = trim($str);
if (strlen($str) < 1) return null;
if ($str{0} != '{') {
throw new JsParserException('The given string is not a JS object');
}
$str = substr($str, 1);
/* While we have data, and it's not the end of this dict (the comma is needed for nested dicts) */
while (strlen($str) && $str{0} != '}' && $str{0} != ',') {
/* find the key */
if ($str{0} == "'" || $str{0} == '"') {
/* quoted key */
list($str, $key) = self::parse_jsdata($str, ':');
} else {
$match = null;
/* unquoted key */
if (!preg_match('/^\s*[a-zA-z_][a-zA-Z_\d]*\s*:/', $str, $match)) {
throw new JsParserException('Invalid key ("' . $str . '")');
}
$key = $match[0];
$str = substr($str, strlen($key));
$key = trim(substr($key, 0, -1)); /* discard the ':' */
}
list($str, $data[$key]) = self::parse_jsdata($str, '}');
}
return substr($str, 1);
}
/**
* @param $str
* @param $term
* @return bool|int|mixed
* @throws JsParserException
*/
public static function comma_or_term_pos($str, $term)
{
$cpos = strpos($str, ',');
$tpos = strpos($str, $term);
if ($cpos === false && $tpos === false) {
throw new JsParserException('unterminated dict or array');
} else if ($cpos === false) {
return $tpos;
} else if ($tpos === false) {
return $cpos;
}
return min($tpos, $cpos);
}
/**
* @param $str
* @param string $term
* @return array
* @throws JsParserException
*/
public static function parse_jsdata($str, $term = "}")
{
$str = trim($str);
if (is_numeric($str{0} . "0")) {
/* a number (int or float) */
$newpos = self::comma_or_term_pos($str, $term);
$num = trim(substr($str, 0, $newpos));
$str = substr($str, $newpos + 1); /* discard num and comma */
if (!is_numeric($num)) {
throw new JsParserException('OOPSIE while parsing number: "' . $num . '"');
}
return array(trim($str), $num + 0);
} else if ($str{0} == '"' || $str{0} == "'") {
/* string */
$q = $str{0};
$offset = 1;
do {
$pos = strpos($str, $q, $offset);
$offset = $pos;
} while ($str{$pos - 1} == '\\'); /* find un-escaped quote */
$data = substr($str, 1, $pos - 1);
$str = substr($str, $pos);
$pos = self::comma_or_term_pos($str, $term);
$str = substr($str, $pos + 1);
return array(trim($str), $data);
} else if ($str{0} == '{') {
/* dict */
$data = array();
$str = self::parse_jsobj($str, $data);
return array($str, $data);
} else if ($str{0} == '[') {
/* array */
$arr = array();
$str = substr($str, 1);
if (']' === $str[0]) {
return ['', $arr];
}
while (strlen($str) && $str{0} != $term && $str{0} != ',') {
$val = null;
list($str, $val) = self::parse_jsdata($str, ']');
$arr[] = $val;
$str = trim($str);
}
$str = trim(substr($str, 1));
return array($str, $arr);
} else if (stripos($str, 'true') === 0) {
/* true */
$pos = self::comma_or_term_pos($str, $term);
$str = substr($str, $pos + 1); /* discard terminator */
return array(trim($str), true);
} else if (stripos($str, 'false') === 0) {
/* false */
$pos = self::comma_or_term_pos($str, $term);
$str = substr($str, $pos + 1); /* discard terminator */
return array(trim($str), false);
} else if (stripos($str, 'null') === 0) {
/* null */
$pos = self::comma_or_term_pos($str, $term);
$str = substr($str, $pos + 1); /* discard terminator */
return array(trim($str), null);
} else if (strpos($str, 'undefined') === 0) {
/* null */
$pos = self::comma_or_term_pos($str, $term);
$str = substr($str, $pos + 1); /* discard terminator */
return array(trim($str), null);
} else {
throw new JsParserException('Cannot figure out how to parse "' . $str . '" (term is ' . $term . ')');
}
}
}

View File

@ -0,0 +1,9 @@
<?php
namespace Shikiryu\LBCReposter;
class JsParserException extends \Exception
{
}