MyHomeCollection/app/Parser/Orpi.php

123 lines
4.5 KiB
PHP

<?php
namespace App\Parser;
use App\ParsedHome;
use App\Parser;
use GuzzleHttp\Exception\InvalidArgumentException;
use Symfony\Component\DomCrawler\Crawler;
use function GuzzleHttp\json_decode;
/**
* Class Orpi
* @package App\Parser
*/
class Orpi extends Parser
{
/**
* @inheritDoc
*/
public function parse(): ParsedHome
{
$request = $this->client->get($this->url);
$body = $request->getBody()->getContents();
$crawler = new Crawler($body);
$parsedHome = new ParsedHome();
/**
* Orpi ads can be parsed 2 ways :
* * sometimes, a JSON is included in the page so it's just a reading/feeding object
* * else, we must crawl the webpage…
*/
$data_estate = $crawler->filter('[data-estate]');
if ($data_estate->count() > 0) {
return $this->parseJSON($parsedHome, $crawler);
}
return $this->parseHTML($parsedHome, $crawler);
}
/**
* @param \App\ParsedHome $parsed_home
* @param \Symfony\Component\DomCrawler\Crawler $crawler
*
* @return \App\ParsedHome
*/
private function parseJSON(ParsedHome $parsed_home, Crawler $crawler)
{
$data_estate = $crawler->filter('[data-estate]');
try {
$json_data = json_decode($data_estate->attr('data-estate'), true);
$parsed_home->price = $json_data['price'];
$parsed_home->city = $json_data['city']['name'];
$parsed_home->surface = $json_data['surface'];
$parsed_home->garden_surface = $json_data['lotSurface'];
$parsed_home->rooms = $json_data['nbRooms'];
$parsed_home->description = $json_data['longAd'];
$parsed_home->title = $json_data['seo']['metaTitle'];
$parsed_home->map = ['lat' => $json_data['latitude'], 'lng' => $json_data['longitude']];
$parsed_home->pictures = $json_data['imagesFull'];
$parsed_home->energy = $this->calculateDPE($json_data['consumptionValue']);
$parsed_home->ges = $this->calculateGES($json_data['emissionValue']);
return $parsed_home;
} catch (InvalidArgumentException $e) {
return $this->parseHTML($parsed_home, $crawler);
}
}
/**
* @param \App\ParsedHome $parsed_home
* @param \Symfony\Component\DomCrawler\Crawler $crawler
*
* @return \App\ParsedHome
*/
private function parseHTML(ParsedHome $parsed_home, Crawler $crawler)
{
$ad = $crawler->filter('article');
$first_section = $ad->children()->first();
$second_section = $ad->children()->eq(1);
$third_section = $ad->children()->eq(2);
$parsed_home->description = $second_section->filter('.o-container')->children()->eq(1)->text();
$second_section->filter('.c-badge__text')->each(static function (Crawler $detail, $i) use (&$parsed_home) {
$detail_text = $detail->text();
if (mb_strpos($detail_text, 'Terrain') === 0) {
$parsed_home->garden_surface = mb_substr($detail_text, 8, -2);
}
if (mb_strpos($detail_text, 'pièces') !== false) {
$parsed_home->rooms = (int)$detail_text;
}
});
$h1 = $first_section->filter('h1');
$parsed_home->title = $h1->children()->first()->text();
$parsed_home->surface = (int)$h1->children()->eq(2)->text();
$parsed_home->city = $h1->children()->eq(4)->text();
$parsed_home->price = (int)str_replace(' ', '', $first_section->filter('.u-h1')->text());
$third_section->filter('.c-dpe')->each(static function (Crawler $detail, $i) use (&$parsed_home) {
$abbr = $detail->filter('abbr');
if ($abbr->count() > 0) {
if ($detail->attr('c-dpe--ges') !== null) {
$parsed_home->ges = $abbr->text();
} elseif ($detail) {
$parsed_home->energy = $abbr->text();
}
}
});
$request = $this->client->get($this->url.'/photos/');
$body = $request->getBody()->getContents();
$crawler = new Crawler($body);
$parsed_home->pictures = $crawler
->filter('.u-cover')
->each(static function (Crawler $node, $i) {
if (strtolower($node->nodeName()) === 'img') {
return $node->attr('src');
}
return null;
});
return $parsed_home;
}
}