From 7a959f070b8d114ae88f0ee795829b634166d0ef Mon Sep 17 00:00:00 2001 From: Clement Date: Mon, 9 Nov 2020 17:35:50 +0100 Subject: [PATCH] :sparkles: Ajoute le parser Orpi --- app/Console/Commands/ParseLinkCommand.php | 8 +- app/Exceptions/UnknownParser.php | 8 + app/Parser.php | 7 +- app/Parser/Orpi.php | 192 ++++++++++++++++++++++ config/parser.php | 4 +- 5 files changed, 216 insertions(+), 3 deletions(-) create mode 100644 app/Exceptions/UnknownParser.php create mode 100644 app/Parser/Orpi.php diff --git a/app/Console/Commands/ParseLinkCommand.php b/app/Console/Commands/ParseLinkCommand.php index a53c31c..3fde367 100644 --- a/app/Console/Commands/ParseLinkCommand.php +++ b/app/Console/Commands/ParseLinkCommand.php @@ -3,7 +3,9 @@ namespace App\Console\Commands; use App\Parser; +use GuzzleHttp\Exception\InvalidArgumentException; use Illuminate\Console\Command; +use function GuzzleHttp\json_encode; class ParseLinkCommand extends Command { @@ -39,7 +41,11 @@ class ParseLinkCommand extends Command public function handle() { $parser = Parser::factory($this->argument('url')); - $parser->parse(); + try { + $this->info(json_encode($parser->parse(), true)); + } catch (InvalidArgumentException $e) { + $this->error($e->getMessage()); + } return 0; } diff --git a/app/Exceptions/UnknownParser.php b/app/Exceptions/UnknownParser.php new file mode 100644 index 0000000..b4fc92c --- /dev/null +++ b/app/Exceptions/UnknownParser.php @@ -0,0 +1,8 @@ +client->get($this->url); + $body = $request->getBody()->getContents(); + $crawler = new Crawler($body); + $parsedHome = new ParsedHome(); + /** + * Orpi ads can be parsed 2 ways : + * * sometimes, a JSON is included in the page so it's just a reading/feeding object + * * else, we must crawl the webpage… + */ + $data_estate = $crawler->filter('[data-estate]'); + if ($data_estate->count() > 0) { + return $this->parseJSON($parsedHome, $crawler); + } + + return $this->parseHTML($parsedHome, $crawler); + } + + /** + * @param int $score + * + * @return string + */ + private function calculateDPE($score) + { + if (empty($score)) { + return 'Inconnu'; + } + if ($score <= 50) { + return 'A'; + } + if ($score >= 51 && $score <= 90) { + return 'B'; + } + if ($score >= 91 && $score <= 150) { + return 'C'; + } + if ($score >= 151 && $score <= 230) { + return 'D'; + } + if ($score >= 231 && $score <= 330) { + return 'E'; + } + if ($score >= 331 && $score <= 450) { + return 'F'; + } + if ($score > 450) { + return 'G'; + } + + return 'Inconnu'; + } + + /** + * @param $score + * + * @return string + */ + private function calculateGES($score) + { + if (empty($score)) { + return 'Inconnu'; + } + if ($score <= 5) { + return 'A'; + } + if ($score >= 6 && $score <= 10) { + return 'B'; + } + if ($score >= 11 && $score <= 20) { + return 'C'; + } + if ($score >= 21 && $score <= 35) { + return 'D'; + } + if ($score >= 36 && $score <= 55) { + return 'E'; + } + if ($score >= 56 && $score <= 80) { + return 'F'; + } + if ($score > 80) { + return 'G'; + } + + return 'Inconnu'; + } + + /** + * @param \App\ParsedHome $parsed_home + * @param \Symfony\Component\DomCrawler\Crawler $crawler + * + * @return \App\ParsedHome + */ + private function parseJSON(ParsedHome $parsed_home, Crawler $crawler) + { + $data_estate = $crawler->filter('[data-estate]'); + try { + $json_data = json_decode($data_estate->attr('data-estate'), true); + $parsed_home->price = $json_data['price']; + $parsed_home->city = $json_data['city']['name']; + $parsed_home->surface = $json_data['surface']; + $parsed_home->garden_surface = $json_data['lotSurface']; + $parsed_home->rooms = $json_data['nbRooms']; + $parsed_home->description = $json_data['longAd']; + $parsed_home->title = $json_data['seo']['metaTitle']; + $parsed_home->map = ['lat' => $json_data['latitude'], 'lng' => $json_data['longitude']]; + $parsed_home->pictures = $json_data['imagesFull']; + $parsed_home->energy = $this->calculateDPE($json_data['consumptionValue']); + $parsed_home->ges = $this->calculateGES($json_data['emissionValue']); + + return $parsed_home; + } catch (InvalidArgumentException $e) { + return $this->parseHTML($parsed_home, $crawler); + } + } + + /** + * @param \App\ParsedHome $parsed_home + * @param \Symfony\Component\DomCrawler\Crawler $crawler + * + * @return \App\ParsedHome + */ + private function parseHTML(ParsedHome $parsed_home, Crawler $crawler) + { + $ad = $crawler->filter('article'); + $first_section = $ad->children()->first(); + $second_section = $ad->children()->eq(1); + $third_section = $ad->children()->eq(2); + $parsed_home->description = $second_section->filter('.o-container')->children()->eq(1)->text(); + $second_section->filter('.c-badge__text')->each(static function (Crawler $detail, $i) use (&$parsed_home) { + $detail_text = $detail->text(); + if (mb_strpos($detail_text, 'Terrain') === 0) { + $parsed_home->garden_surface = mb_substr($detail_text, 8, -2); + } + + if (mb_strpos($detail_text, 'pièces') !== false) { + $parsed_home->rooms = (int)$detail_text; + } + }); + $h1 = $first_section->filter('h1'); + $parsed_home->title = $h1->children()->first()->text(); + $parsed_home->surface = (int)$h1->children()->eq(2)->text(); + $parsed_home->city = $h1->children()->eq(4)->text(); + $parsed_home->price = (int)str_replace(' ', '', $first_section->filter('.u-h1')->text()); + $third_section->filter('.c-dpe')->each(static function (Crawler $detail, $i) use (&$parsed_home) { + $abbr = $detail->filter('abbr'); + if ($abbr->count() > 0) { + if ($detail->attr('c-dpe--ges') !== null) { + $parsed_home->ges = $abbr->text(); + } elseif ($detail) { + $parsed_home->energy = $abbr->text(); + } + } + }); + + $request = $this->client->get($this->url.'/photos/'); + $body = $request->getBody()->getContents(); + $crawler = new Crawler($body); + $parsed_home->pictures = $crawler + ->filter('.u-cover') + ->each(static function (Crawler $node, $i) { + if (strtolower($node->nodeName()) === 'img') { + return $node->attr('src'); + } + + return null; + }); + + return $parsed_home; + } +} diff --git a/config/parser.php b/config/parser.php index ba8e13d..011bc2a 100644 --- a/config/parser.php +++ b/config/parser.php @@ -3,6 +3,7 @@ use App\Parser\ImmobilierNotaires; use App\Parser\LannionImmo; use App\Parser\LBC; +use App\Parser\Orpi; use App\Parser\OuestFrance; use App\Parser\Pap; use App\Parser\SeLoger; @@ -14,6 +15,7 @@ return [ 'pap.fr' => Pap::class, 'ouestfrance-immo.com' => OuestFrance::class, 'lannion.immo' => LannionImmo::class, - 'immobilier.notaires.fr'=> ImmobilierNotaires::class + 'immobilier.notaires.fr'=> ImmobilierNotaires::class, + 'orpi.com' => Orpi::class ], ];