Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
Shikiryu | 7cada3dd07 | |
Shikiryu | bb5721f911 | |
Shikiryu | ac21f74c11 | |
Shikiryu | c0c02459f8 | |
Shikiryu | c078fc1c68 |
|
@ -1,3 +1,7 @@
|
|||
[DEBUG]
|
||||
log=true
|
||||
verbose=1
|
||||
|
||||
[CREDENTIALS]
|
||||
login=xxxxx
|
||||
password=xxxxx
|
||||
|
|
|
@ -13,6 +13,14 @@ class Actions
|
|||
const DELETE_URL = 'https://compteperso.leboncoin.fr/store/main?cmd=adservices';
|
||||
const UPLOAD_URL = 'https://www.leboncoin.fr/ai/photo_upload_ajax/0';
|
||||
|
||||
const VERBOSE_INFO = 1;
|
||||
const VERBOSE_PAGE = 2;
|
||||
const VERBOSE_REQUEST = 4;
|
||||
const VERBOSE_ALL = 8;
|
||||
|
||||
protected $debug = false;
|
||||
protected $verbose = self::VERBOSE_INFO;
|
||||
|
||||
/**
|
||||
* @var Account
|
||||
*/
|
||||
|
@ -27,6 +35,39 @@ class Actions
|
|||
$this->account = $account;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bool $debug
|
||||
* @return Actions
|
||||
*/
|
||||
public function setDebug($debug)
|
||||
{
|
||||
$this->debug = $debug;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $verbose
|
||||
* @return Actions
|
||||
*/
|
||||
public function setVerbose($verbose)
|
||||
{
|
||||
$this->verbose = $verbose;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $action
|
||||
* @param Crawler $crawler
|
||||
* @return bool|int
|
||||
*/
|
||||
private function addPageDebug($action, Crawler $crawler)
|
||||
{
|
||||
if ($this->debug !== false) {
|
||||
return file_put_contents(sprintf('%s/%s-%s.html', $this->debug, date('YmdHi'), $action), $crawler->html());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if connected, if not, try to connect you
|
||||
|
@ -40,8 +81,10 @@ class Actions
|
|||
if (!$this->account->isConnected()) {
|
||||
// Let's connect to your account (or not)
|
||||
$crawler = $this->account->getClient()->request('GET', self::LOGIN_URL);
|
||||
$this->addPageDebug('connect', $crawler);
|
||||
$form = $crawler->selectButton('Se connecter')->form();
|
||||
$crawler = $this->account->getClient()->submit($form, ['st_username' => $this->account->getConfig()->login, 'st_passwd' => $this->account->getConfig()->password]);
|
||||
$this->addPageDebug('checkIsConnected', $crawler);
|
||||
$this->account->setConnected($crawler->filter('.account_userinfo')->count() > 0);
|
||||
}
|
||||
return $this->account->isConnected();
|
||||
|
@ -57,6 +100,7 @@ class Actions
|
|||
if ($this->connect()) {
|
||||
// Let's go to our dashboard
|
||||
$crawler = $this->account->getClient()->request('GET', self::ACCOUNT_URL);
|
||||
$this->addPageDebug('retrieve', $crawler);
|
||||
// Let's list our existing deals
|
||||
$deals = $crawler->filter('#dashboard .list .element')->each(
|
||||
function (Crawler $node) {
|
||||
|
@ -86,8 +130,20 @@ class Actions
|
|||
{
|
||||
try {
|
||||
$crawler = $this->account->getClient()->request('GET', self::ADD_URL);
|
||||
$this->addPageDebug('add-form', $crawler);
|
||||
$form = $crawler->selectButton('Valider')->form();
|
||||
$image0 = sprintf('%s/%s/image0.jpg', DEALS_DIR, $deal->getId());
|
||||
$path = $deal->getPath();
|
||||
if (empty($path)) {
|
||||
$path = sprintf('%s/%s', DEALS_DIR, $deal->getId());
|
||||
}
|
||||
$images = [];
|
||||
$i = 0;
|
||||
foreach (new \DirectoryIterator($path) as $file) {
|
||||
if ($file->isFile() && $file->getExtension() === 'jpg') {
|
||||
$images['image'.$i] = $file->getRealPath();
|
||||
$i++;
|
||||
}
|
||||
}
|
||||
$fields = $form->getPhpValues();
|
||||
$fields = array_merge(
|
||||
$fields,
|
||||
|
@ -118,12 +174,15 @@ class Actions
|
|||
);
|
||||
$uri = $form->getUri();
|
||||
// It needs to be done twice !!
|
||||
$this->account->getClient()->request('POST', $uri, $fields, ['image0' => $image0]);
|
||||
$crawler = $this->account->getClient()->request('POST', $uri, $fields, ['image0' => $image0]);
|
||||
$crawler = $this->account->getClient()->request('POST', $uri, $fields, $images);
|
||||
$this->addPageDebug('add-1', $crawler);
|
||||
$crawler = $this->account->getClient()->request('POST', $uri, $fields, $images);
|
||||
$this->addPageDebug('add-2', $crawler);
|
||||
// TODO need to check if we're in the good page
|
||||
// Let's validate
|
||||
$form = $crawler->selectButton('Valider mon annonce')->form();
|
||||
$form = $crawler->selectButton('Valider')->form();
|
||||
$crawler = $this->account->getClient()->submit($form, ['accept_rule' => 1]);
|
||||
$this->addPageDebug('add-validation', $crawler);
|
||||
// TODO return if it's the validation page or not
|
||||
return true;
|
||||
} catch (\Exception $e) {
|
||||
|
@ -148,10 +207,16 @@ class Actions
|
|||
'continue' => 'Continuer'
|
||||
];
|
||||
$crawler = $this->account->getClient()->request('POST', self::DELETE_URL, $fields);
|
||||
// confirmation
|
||||
$form = $crawler->selectButton('Valider')->form();
|
||||
$crawler = $this->account->getClient()->submit($form, ['delete_reason' => '1']);
|
||||
// TODO return if it's the validation page or not
|
||||
$this->addPageDebug('delete', $crawler);
|
||||
// check s'il y a un bouton "Valider", s'il y en a pas c'est que la demande a déjà été faite
|
||||
$node = $crawler->filterXPath('//input[@value="Valider"]');
|
||||
if ($node->count() == 1) {
|
||||
// confirmation
|
||||
$form = $crawler->selectButton('Valider')->form();
|
||||
$crawler = $this->account->getClient()->submit($form, ['delete_reason' => '1']);
|
||||
// TODO return if it's the validation page or not
|
||||
$this->addPageDebug('delete-validation', $crawler);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
166
library/Deal.php
166
library/Deal.php
|
@ -31,6 +31,8 @@ class Deal
|
|||
protected $image2;
|
||||
/** @var \DateTime */
|
||||
protected $datecreation;
|
||||
/** @var string */
|
||||
protected $path;
|
||||
|
||||
/**
|
||||
* Deal constructor.
|
||||
|
@ -41,48 +43,6 @@ class Deal
|
|||
$this->id = $id;
|
||||
}
|
||||
|
||||
public static function fromURL(Account $account, $url)
|
||||
{
|
||||
$client = $account->getClient();
|
||||
$crawler = $client->request('GET', $url);
|
||||
$deal = new self($crawler->filter('[data-savead-id]')->attr('data-savead-id'));
|
||||
$deal->setAccount($account);
|
||||
$deal->setSubject($crawler->filter('h1')->first()->text());
|
||||
$deal->setCategory(array_search($crawler->filter('.breadcrumbsNav >ul >li')->eq(2)->text(), Categories::$categories));
|
||||
$deal->setType(self::TYPE_OFFER);
|
||||
$deal->setBody($crawler->filter('.properties_description')->first()->filter('p')->eq(1)->text());
|
||||
$deal->setPrice($crawler->filter('[itemprop=price]')->first()->attr('content'));
|
||||
$date_node = $crawler->filter('[itemprop=availabilityStarts]')->first();
|
||||
$date = \DateTime::createFromFormat('Y-m-d', $date_node->attr('content'));
|
||||
$hours = current($date_node->extract(['_text']));
|
||||
$hours = substr($hours, strpos($hours, 'à')+2);
|
||||
list($hour, $min) = explode(':', $hours);
|
||||
$date->setTime((int) $hour, (int) $min);
|
||||
$deal->setDateCreation($date);
|
||||
if ($crawler->filter('.item_photo')->count() > 0) {
|
||||
$script = $crawler ->filter('.item_photo')->first()->nextAll()
|
||||
->filter('script')->first()->html();
|
||||
preg_match_all("/\"(http.*ad-thumb.*)\"/m", $script, $urls);
|
||||
$urls = $urls[1];
|
||||
$images = [];
|
||||
foreach ($urls as $i => $url) {
|
||||
$images[] = [sprintf('setImage%s', $i) => str_replace('thumb', 'large', $url)];
|
||||
}
|
||||
} else {
|
||||
$images = $crawler->filter('[data-popin-content]')->each(
|
||||
function (Crawler $node, $i) {
|
||||
return [ sprintf('setImage%s', $i) => $node->attr('data-popin-content')];
|
||||
}
|
||||
);
|
||||
}
|
||||
foreach ($images as $image) {
|
||||
foreach ($image as $method => $uri) {
|
||||
$deal->$method($uri);
|
||||
}
|
||||
}
|
||||
return $deal;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
|
@ -254,15 +214,38 @@ class Deal
|
|||
}
|
||||
|
||||
/**
|
||||
* @param \DateTime $price
|
||||
* @param array|\DateTime|string $datecreation
|
||||
* @return Deal
|
||||
*/
|
||||
public function setDateCreation($datecreation)
|
||||
{
|
||||
if (is_array($datecreation)) {
|
||||
$datecreation = new \DateTime($datecreation['date']);
|
||||
} elseif (is_string($datecreation)) {
|
||||
$datecreation = new \DateTime($datecreation);
|
||||
}
|
||||
$this->datecreation = $datecreation;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
public function getPath()
|
||||
{
|
||||
return $this->path;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $path
|
||||
* @return Deal
|
||||
*/
|
||||
public function setPath($path)
|
||||
{
|
||||
$this->path = $path;
|
||||
return $this;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Account
|
||||
*/
|
||||
|
@ -290,7 +273,7 @@ class Deal
|
|||
$method = sprintf('get%s', ucfirst($prop->getName()));
|
||||
$json[$prop->getName()] = $this->$method();
|
||||
}
|
||||
return \json_encode($json);
|
||||
return \json_encode($json, JSON_UNESCAPED_UNICODE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -326,6 +309,99 @@ class Deal
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Account $account
|
||||
* @param string $url
|
||||
*
|
||||
* @return Deal
|
||||
*/
|
||||
public static function fromURL(Account $account, $url)
|
||||
{
|
||||
$client = $account->getClient();
|
||||
$crawler = $client->request('GET', $url);
|
||||
preg_match('/<script>window\.FLUX_STATE = (.*)<\/script>/im', $crawler->html(), $js_object);
|
||||
if (isset($js_object[1])) {
|
||||
return static::parseReactDeal($js_object[1], $account);
|
||||
}
|
||||
|
||||
return static::parseHtmlDeal($crawler, $account);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $js_object
|
||||
* @return null
|
||||
*/
|
||||
protected static function parseReactDeal($js_object, Account $account)
|
||||
{
|
||||
try {
|
||||
$data = [];
|
||||
JsParser::parse_jsobj($js_object, $data);
|
||||
$deal = new self($data['adview']['list_id']);
|
||||
$deal->setAccount($account);
|
||||
$deal->setSubject($data['adview']['subject']);
|
||||
$deal->setCategory($data['adview']['category_id']);
|
||||
$deal->setType(self::TYPE_OFFER);
|
||||
$deal->setBody($data['adview']['body']);
|
||||
$deal->setPrice($data['adview']['price'][0]);
|
||||
$date = \DateTime::createFromFormat('Y-m-d H:i:s', $data['adview']['first_publication_date']);
|
||||
$deal->setDateCreation($date);
|
||||
|
||||
foreach ($data['adview']['images']['urls_large'] as $i => $url) {
|
||||
$deal->{'setImage'.$i}($url);
|
||||
}
|
||||
|
||||
return $deal;
|
||||
} catch (JsParserException $e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Crawler $crawler
|
||||
* @param Account $account
|
||||
*
|
||||
* @return Deal
|
||||
*/
|
||||
protected static function parseHtmlDeal(Crawler $crawler, Account $account)
|
||||
{
|
||||
$deal = new self($crawler->filter('[data-savead-id]')->attr('data-savead-id'));
|
||||
$deal->setAccount($account);
|
||||
$deal->setSubject(trim($crawler->filter('h1')->first()->text()));
|
||||
$deal->setCategory(array_search($crawler->filter('.breadcrumbsNav >ul >li')->eq(2)->text(), Categories::$categories));
|
||||
$deal->setType(self::TYPE_OFFER);
|
||||
$deal->setBody(trim($crawler->filter('.properties_description')->first()->filter('p')->eq(1)->text()));
|
||||
$deal->setPrice($crawler->filter('[itemprop=price]')->first()->attr('content'));
|
||||
$date_node = $crawler->filter('[itemprop=availabilityStarts]')->first();
|
||||
$date = \DateTime::createFromFormat('Y-m-d', $date_node->attr('content'));
|
||||
$hours = current($date_node->extract(['_text']));
|
||||
$hours = substr($hours, strpos($hours, 'à')+2);
|
||||
list($hour, $min) = explode(':', $hours);
|
||||
$date->setTime((int) $hour, (int) $min);
|
||||
$deal->setDateCreation($date);
|
||||
if ($crawler->filter('.item_photo')->count() > 0) {
|
||||
$script = $crawler ->filter('.item_photo')->first()->nextAll()
|
||||
->filter('script')->first()->html();
|
||||
preg_match_all("/\"(http.*ad-thumb.*)\"/m", $script, $urls);
|
||||
$urls = $urls[1];
|
||||
$images = [];
|
||||
foreach ($urls as $i => $url) {
|
||||
$images[] = [sprintf('setImage%s', $i) => str_replace('thumb', 'large', $url)];
|
||||
}
|
||||
} else {
|
||||
$images = $crawler->filter('[data-popin-content]')->each(
|
||||
function (Crawler $node, $i) {
|
||||
return [ sprintf('setImage%s', $i) => $node->attr('data-popin-content')];
|
||||
}
|
||||
);
|
||||
}
|
||||
foreach ($images as $image) {
|
||||
foreach ($image as $method => $uri) {
|
||||
$deal->$method($uri);
|
||||
}
|
||||
}
|
||||
return $deal;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $json path to json
|
||||
*
|
||||
|
@ -333,12 +409,14 @@ class Deal
|
|||
*/
|
||||
public static function fromJSON($json)
|
||||
{
|
||||
$path = dirname(realpath($json));
|
||||
$json = \json_decode(file_get_contents($json), true);
|
||||
$deal = new self();
|
||||
foreach ($json as $property => $value) {
|
||||
$method = sprintf('set%s', ucfirst($property));
|
||||
$deal->$method($value);
|
||||
}
|
||||
$deal->setPath($path);
|
||||
return $deal;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,147 @@
|
|||
<?php
|
||||
|
||||
namespace Shikiryu\LBCReposter;
|
||||
|
||||
|
||||
class JsParser
|
||||
{
|
||||
|
||||
/**
|
||||
* @param $str
|
||||
* @param $data
|
||||
*
|
||||
* @return bool|string
|
||||
*
|
||||
* @throws JsParserException
|
||||
*/
|
||||
public static function parse_jsobj($str, &$data)
|
||||
{
|
||||
$str = trim($str);
|
||||
if (strlen($str) < 1) return null;
|
||||
|
||||
if ($str{0} != '{') {
|
||||
throw new JsParserException('The given string is not a JS object');
|
||||
}
|
||||
$str = substr($str, 1);
|
||||
|
||||
/* While we have data, and it's not the end of this dict (the comma is needed for nested dicts) */
|
||||
while (strlen($str) && $str{0} != '}' && $str{0} != ',') {
|
||||
/* find the key */
|
||||
if ($str{0} == "'" || $str{0} == '"') {
|
||||
/* quoted key */
|
||||
list($str, $key) = self::parse_jsdata($str, ':');
|
||||
} else {
|
||||
$match = null;
|
||||
/* unquoted key */
|
||||
if (!preg_match('/^\s*[a-zA-z_][a-zA-Z_\d]*\s*:/', $str, $match)) {
|
||||
throw new JsParserException('Invalid key ("' . $str . '")');
|
||||
}
|
||||
$key = $match[0];
|
||||
$str = substr($str, strlen($key));
|
||||
$key = trim(substr($key, 0, -1)); /* discard the ':' */
|
||||
}
|
||||
|
||||
list($str, $data[$key]) = self::parse_jsdata($str, '}');
|
||||
}
|
||||
|
||||
return substr($str, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $str
|
||||
* @param $term
|
||||
* @return bool|int|mixed
|
||||
* @throws JsParserException
|
||||
*/
|
||||
public static function comma_or_term_pos($str, $term)
|
||||
{
|
||||
$cpos = strpos($str, ',');
|
||||
$tpos = strpos($str, $term);
|
||||
if ($cpos === false && $tpos === false) {
|
||||
throw new JsParserException('unterminated dict or array');
|
||||
} else if ($cpos === false) {
|
||||
return $tpos;
|
||||
} else if ($tpos === false) {
|
||||
return $cpos;
|
||||
}
|
||||
return min($tpos, $cpos);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param $str
|
||||
* @param string $term
|
||||
* @return array
|
||||
* @throws JsParserException
|
||||
*/
|
||||
public static function parse_jsdata($str, $term = "}")
|
||||
{
|
||||
$str = trim($str);
|
||||
|
||||
|
||||
if (is_numeric($str{0} . "0")) {
|
||||
/* a number (int or float) */
|
||||
$newpos = self::comma_or_term_pos($str, $term);
|
||||
$num = trim(substr($str, 0, $newpos));
|
||||
$str = substr($str, $newpos + 1); /* discard num and comma */
|
||||
if (!is_numeric($num)) {
|
||||
throw new JsParserException('OOPSIE while parsing number: "' . $num . '"');
|
||||
}
|
||||
return array(trim($str), $num + 0);
|
||||
} else if ($str{0} == '"' || $str{0} == "'") {
|
||||
/* string */
|
||||
$q = $str{0};
|
||||
$offset = 1;
|
||||
do {
|
||||
$pos = strpos($str, $q, $offset);
|
||||
$offset = $pos;
|
||||
} while ($str{$pos - 1} == '\\'); /* find un-escaped quote */
|
||||
$data = substr($str, 1, $pos - 1);
|
||||
$str = substr($str, $pos);
|
||||
$pos = self::comma_or_term_pos($str, $term);
|
||||
$str = substr($str, $pos + 1);
|
||||
return array(trim($str), $data);
|
||||
} else if ($str{0} == '{') {
|
||||
/* dict */
|
||||
$data = array();
|
||||
$str = self::parse_jsobj($str, $data);
|
||||
return array($str, $data);
|
||||
} else if ($str{0} == '[') {
|
||||
/* array */
|
||||
$arr = array();
|
||||
$str = substr($str, 1);
|
||||
if (']' === $str[0]) {
|
||||
return ['', $arr];
|
||||
}
|
||||
while (strlen($str) && $str{0} != $term && $str{0} != ',') {
|
||||
$val = null;
|
||||
list($str, $val) = self::parse_jsdata($str, ']');
|
||||
$arr[] = $val;
|
||||
$str = trim($str);
|
||||
}
|
||||
$str = trim(substr($str, 1));
|
||||
return array($str, $arr);
|
||||
} else if (stripos($str, 'true') === 0) {
|
||||
/* true */
|
||||
$pos = self::comma_or_term_pos($str, $term);
|
||||
$str = substr($str, $pos + 1); /* discard terminator */
|
||||
return array(trim($str), true);
|
||||
} else if (stripos($str, 'false') === 0) {
|
||||
/* false */
|
||||
$pos = self::comma_or_term_pos($str, $term);
|
||||
$str = substr($str, $pos + 1); /* discard terminator */
|
||||
return array(trim($str), false);
|
||||
} else if (stripos($str, 'null') === 0) {
|
||||
/* null */
|
||||
$pos = self::comma_or_term_pos($str, $term);
|
||||
$str = substr($str, $pos + 1); /* discard terminator */
|
||||
return array(trim($str), null);
|
||||
} else if (strpos($str, 'undefined') === 0) {
|
||||
/* null */
|
||||
$pos = self::comma_or_term_pos($str, $term);
|
||||
$str = substr($str, $pos + 1); /* discard terminator */
|
||||
return array(trim($str), null);
|
||||
} else {
|
||||
throw new JsParserException('Cannot figure out how to parse "' . $str . '" (term is ' . $term . ')');
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
<?php
|
||||
|
||||
namespace Shikiryu\LBCReposter;
|
||||
|
||||
|
||||
class JsParserException extends \Exception
|
||||
{
|
||||
|
||||
}
|
Loading…
Reference in New Issue