123 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			123 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
 | 
						|
namespace App\Parser;
 | 
						|
 | 
						|
use App\ParsedHome;
 | 
						|
use App\Parser;
 | 
						|
use GuzzleHttp\Exception\InvalidArgumentException;
 | 
						|
use Symfony\Component\DomCrawler\Crawler;
 | 
						|
use function GuzzleHttp\json_decode;
 | 
						|
 | 
						|
/**
 | 
						|
 * Class Orpi
 | 
						|
 * @package App\Parser
 | 
						|
 */
 | 
						|
class Orpi extends Parser
 | 
						|
{
 | 
						|
    /**
 | 
						|
     * @inheritDoc
 | 
						|
     */
 | 
						|
    public function parse(): ParsedHome
 | 
						|
    {
 | 
						|
        $request = $this->client->get($this->url);
 | 
						|
        $body = $request->getBody()->getContents();
 | 
						|
        $crawler = new Crawler($body);
 | 
						|
        $parsedHome = new ParsedHome();
 | 
						|
        /**
 | 
						|
         * Orpi ads can be parsed 2 ways :
 | 
						|
         *  * sometimes, a JSON is included in the page so it's just a reading/feeding object
 | 
						|
         *  * else, we must crawl the webpage…
 | 
						|
         */
 | 
						|
        $data_estate = $crawler->filter('[data-estate]');
 | 
						|
        if ($data_estate->count() > 0) {
 | 
						|
            return $this->parseJSON($parsedHome, $crawler);
 | 
						|
        }
 | 
						|
 | 
						|
        return $this->parseHTML($parsedHome, $crawler);
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * @param \App\ParsedHome                       $parsed_home
 | 
						|
     * @param \Symfony\Component\DomCrawler\Crawler $crawler
 | 
						|
     *
 | 
						|
     * @return \App\ParsedHome
 | 
						|
     */
 | 
						|
    private function parseJSON(ParsedHome $parsed_home, Crawler $crawler)
 | 
						|
    {
 | 
						|
        $data_estate = $crawler->filter('[data-estate]');
 | 
						|
        try {
 | 
						|
            $json_data = json_decode($data_estate->attr('data-estate'), true);
 | 
						|
            $parsed_home->price = $json_data['price'];
 | 
						|
            $parsed_home->city = $json_data['city']['name'];
 | 
						|
            $parsed_home->surface = $json_data['surface'];
 | 
						|
            $parsed_home->garden_surface = $json_data['lotSurface'];
 | 
						|
            $parsed_home->rooms = $json_data['nbRooms'];
 | 
						|
            $parsed_home->description = $json_data['longAd'];
 | 
						|
            $parsed_home->title = $json_data['seo']['metaTitle'];
 | 
						|
            $parsed_home->map = ['lat' => $json_data['latitude'], 'lng' => $json_data['longitude']];
 | 
						|
            $parsed_home->pictures = $json_data['imagesFull'];
 | 
						|
            $parsed_home->energy = $this->calculateDPE($json_data['consumptionValue']);
 | 
						|
            $parsed_home->ges = $this->calculateGES($json_data['emissionValue']);
 | 
						|
 | 
						|
            return $parsed_home;
 | 
						|
        } catch (InvalidArgumentException $e) {
 | 
						|
            return $this->parseHTML($parsed_home, $crawler);
 | 
						|
        }
 | 
						|
    }
 | 
						|
 | 
						|
    /**
 | 
						|
     * @param \App\ParsedHome                       $parsed_home
 | 
						|
     * @param \Symfony\Component\DomCrawler\Crawler $crawler
 | 
						|
     *
 | 
						|
     * @return \App\ParsedHome
 | 
						|
     */
 | 
						|
    private function parseHTML(ParsedHome $parsed_home, Crawler $crawler)
 | 
						|
    {
 | 
						|
        $ad = $crawler->filter('article');
 | 
						|
        $first_section = $ad->children()->first();
 | 
						|
        $second_section = $ad->children()->eq(1);
 | 
						|
        $third_section = $ad->children()->eq(2);
 | 
						|
        $parsed_home->description = $second_section->filter('.o-container')->children()->eq(1)->text();
 | 
						|
        $second_section->filter('.c-badge__text')->each(static function (Crawler $detail, $i) use (&$parsed_home) {
 | 
						|
            $detail_text = $detail->text();
 | 
						|
            if (mb_strpos($detail_text, 'Terrain') === 0) {
 | 
						|
                $parsed_home->garden_surface = mb_substr($detail_text, 8, -2);
 | 
						|
            }
 | 
						|
 | 
						|
            if (mb_strpos($detail_text, 'pièces') !== false) {
 | 
						|
                $parsed_home->rooms = (int)$detail_text;
 | 
						|
            }
 | 
						|
        });
 | 
						|
        $h1 = $first_section->filter('h1');
 | 
						|
        $parsed_home->title = $h1->children()->first()->text();
 | 
						|
        $parsed_home->surface = (int)$h1->children()->eq(2)->text();
 | 
						|
        $parsed_home->city = $h1->children()->eq(4)->text();
 | 
						|
        $parsed_home->price = (int)str_replace(' ', '', $first_section->filter('.u-h1')->text());
 | 
						|
        $third_section->filter('.c-dpe')->each(static function (Crawler $detail, $i) use (&$parsed_home) {
 | 
						|
            $abbr = $detail->filter('abbr');
 | 
						|
            if ($abbr->count() > 0) {
 | 
						|
                if ($detail->attr('c-dpe--ges') !== null) {
 | 
						|
                    $parsed_home->ges = $abbr->text();
 | 
						|
                } elseif ($detail) {
 | 
						|
                    $parsed_home->energy = $abbr->text();
 | 
						|
                }
 | 
						|
            }
 | 
						|
        });
 | 
						|
 | 
						|
        $request = $this->client->get($this->url.'/photos/');
 | 
						|
        $body = $request->getBody()->getContents();
 | 
						|
        $crawler = new Crawler($body);
 | 
						|
        $parsed_home->pictures = $crawler
 | 
						|
            ->filter('.u-cover')
 | 
						|
            ->each(static function (Crawler $node, $i) {
 | 
						|
                if (strtolower($node->nodeName()) === 'img') {
 | 
						|
                    return $node->attr('src');
 | 
						|
                }
 | 
						|
 | 
						|
                return null;
 | 
						|
            });
 | 
						|
 | 
						|
        return $parsed_home;
 | 
						|
    }
 | 
						|
}
 |