From 25d18c71847d541d97e056961799d30e4c7461e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment?= Date: Wed, 3 Feb 2021 16:38:01 +0100 Subject: [PATCH] :alien: Ajoute le scraping de SeLoger car l'API ne fonctionne plus Fix #22 --- app/Exceptions/UnknownParser.php | 2 + app/Parser.php | 72 ++++++++++++++++++++++++++++ app/Parser/Orpi.php | 70 --------------------------- app/Parser/SeLoger.php | 81 ++++++++++++++++++++++++++++---- 4 files changed, 146 insertions(+), 79 deletions(-) diff --git a/app/Exceptions/UnknownParser.php b/app/Exceptions/UnknownParser.php index b4fc92c..8ccd5a5 100644 --- a/app/Exceptions/UnknownParser.php +++ b/app/Exceptions/UnknownParser.php @@ -2,6 +2,8 @@ namespace App\Exceptions; +use Exception; + class UnknownParser extends Exception { diff --git a/app/Parser.php b/app/Parser.php index 5ba5047..198de88 100644 --- a/app/Parser.php +++ b/app/Parser.php @@ -27,6 +27,7 @@ abstract class Parser * @param string $url * * @return \App\Parser|null + * @throws \App\Exceptions\UnknownParser */ public static function factory(string $url): ?Parser { @@ -48,4 +49,75 @@ abstract class Parser * @return \App\ParsedHome */ abstract public function parse(): ParsedHome; + + + /** + * @param int $score + * + * @return string + */ + protected function calculateDPE($score) + { + if (empty($score)) { + return 'Inconnu'; + } + if ($score <= 50) { + return 'A'; + } + if ($score >= 51 && $score <= 90) { + return 'B'; + } + if ($score >= 91 && $score <= 150) { + return 'C'; + } + if ($score >= 151 && $score <= 230) { + return 'D'; + } + if ($score >= 231 && $score <= 330) { + return 'E'; + } + if ($score >= 331 && $score <= 450) { + return 'F'; + } + if ($score > 450) { + return 'G'; + } + + return 'Inconnu'; + } + + /** + * @param $score + * + * @return string + */ + protected function calculateGES($score) + { + if (empty($score)) { + return 'Inconnu'; + } + if ($score <= 5) { + return 'A'; + } + if ($score >= 6 && $score <= 10) { + return 'B'; + } + if ($score >= 11 && $score <= 20) { + return 'C'; + } + if ($score >= 21 && $score <= 35) { + return 'D'; + } + if ($score >= 36 && $score <= 55) { + return 'E'; + } + if ($score >= 56 && $score <= 80) { + return 'F'; + } + if ($score > 80) { + return 'G'; + } + + return 'Inconnu'; + } } diff --git a/app/Parser/Orpi.php b/app/Parser/Orpi.php index 35a5d1b..4538775 100644 --- a/app/Parser/Orpi.php +++ b/app/Parser/Orpi.php @@ -36,76 +36,6 @@ class Orpi extends Parser return $this->parseHTML($parsedHome, $crawler); } - /** - * @param int $score - * - * @return string - */ - private function calculateDPE($score) - { - if (empty($score)) { - return 'Inconnu'; - } - if ($score <= 50) { - return 'A'; - } - if ($score >= 51 && $score <= 90) { - return 'B'; - } - if ($score >= 91 && $score <= 150) { - return 'C'; - } - if ($score >= 151 && $score <= 230) { - return 'D'; - } - if ($score >= 231 && $score <= 330) { - return 'E'; - } - if ($score >= 331 && $score <= 450) { - return 'F'; - } - if ($score > 450) { - return 'G'; - } - - return 'Inconnu'; - } - - /** - * @param $score - * - * @return string - */ - private function calculateGES($score) - { - if (empty($score)) { - return 'Inconnu'; - } - if ($score <= 5) { - return 'A'; - } - if ($score >= 6 && $score <= 10) { - return 'B'; - } - if ($score >= 11 && $score <= 20) { - return 'C'; - } - if ($score >= 21 && $score <= 35) { - return 'D'; - } - if ($score >= 36 && $score <= 55) { - return 'E'; - } - if ($score >= 56 && $score <= 80) { - return 'F'; - } - if ($score > 80) { - return 'G'; - } - - return 'Inconnu'; - } - /** * @param \App\ParsedHome $parsed_home * @param \Symfony\Component\DomCrawler\Crawler $crawler diff --git a/app/Parser/SeLoger.php b/app/Parser/SeLoger.php index 9c86124..61d6e1c 100644 --- a/app/Parser/SeLoger.php +++ b/app/Parser/SeLoger.php @@ -4,6 +4,8 @@ namespace App\Parser; use App\ParsedHome; use App\Parser; +use GuzzleHttp\Exception\GuzzleException; +use Symfony\Component\DomCrawler\Crawler; /** * Thanks to https://github.com/axeleroy/untoitpourcaramel/issues/3 @@ -25,15 +27,19 @@ class SeLoger extends Parser $token = $this->retrieveToken(); $idAnnonce = $this->getIdAnnonceFromUrl($this->url); $url = sprintf('%s%s', self::LISTING_URL, $idAnnonce); - $request = $this->client->request( - 'GET', - $url, - [ - 'headers' => [ - 'AppToken' => $token, - ], - ] - ); + try { + $request = $this->client->request( + 'GET', + $url, + [ + 'headers' => [ + 'AppToken' => $token, + ], + ] + ); + } catch (GuzzleException $e) { + return $this->parseHTML(); + } $annonce = json_decode($request->getBody()->getContents(), true); $parsedHome = new ParsedHome(); @@ -83,4 +89,61 @@ class SeLoger extends Parser return trim($request->getBody()->getContents(), '"'); } + + /** + * @return \App\ParsedHome + */ + private function parseHTML() + { + $request = $this->client->get( + $this->url, + [ + 'headers' => [ + 'User-Agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0', + 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Encoding' => 'gzip, deflate, br', + 'Accept-Language' => 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3', + 'Cache-Control' => 'no-cache', + 'Connection' => 'keep-alive', + ] + ] + ); + $body = $request->getBody()->getContents(); + $crawler = new Crawler($body); + + $parsed_home = new ParsedHome(); + + $parsed_home->title = $crawler->filter('h1')->text(); + $parsed_home->description = $crawler->filter('#showcase-description > div')->first()->text(); + $parsed_home->price = (int) str_replace(' ', '', $crawler->filter('[class^=Summarystyled__PriceText]')->text()); + $parsed_home->city = $crawler->filter('[class^=Summarystyled__Address]')->text(); + $parsed_home->energy = $this->calculateDPE( + (int)$crawler->filter('[class^=Preview__PreviewTooltipValue]')->first()->text() + ); + $parsed_home->ges = $this->calculateGES( + (int)$crawler->filter('[class^=Preview__PreviewTooltipValue]')->eq(1)->text() + ); + + $crawler + ->filter('[class^=Summarystyled__TagsWrapper] > div') + ->each(static function (Crawler $property____information, $i) use (&$parsed_home) { + $element = $property____information->children()->eq(1)->text(); + if ('pièces' === mb_substr($element, -6)) { + $parsed_home->rooms = (int) $element; + } + if ('m²' === mb_substr($element, -2) && strpos($element, '/') === false) { + $parsed_home->surface = (int) $element; + } + }); + + $parsed_home->pictures = $crawler + ->filter('.swiper-wrapper') + ->first() + ->filter('[data-background]') + ->each(static function($img) { + return $img->attr('data-background'); + }); + + return $parsed_home; + } }