client->get($this->url); $body = $request->getBody()->getContents(); $crawler = new Crawler($body); $parsedHome = new ParsedHome(); /** * Orpi ads can be parsed 2 ways : * * sometimes, a JSON is included in the page so it's just a reading/feeding object * * else, we must crawl the webpage… */ $data_estate = $crawler->filter('[data-estate]'); if ($data_estate->count() > 0) { return $this->parseJSON($parsedHome, $crawler); } return $this->parseHTML($parsedHome, $crawler); } /** * @param int $score * * @return string */ private function calculateDPE($score) { if (empty($score)) { return 'Inconnu'; } if ($score <= 50) { return 'A'; } if ($score >= 51 && $score <= 90) { return 'B'; } if ($score >= 91 && $score <= 150) { return 'C'; } if ($score >= 151 && $score <= 230) { return 'D'; } if ($score >= 231 && $score <= 330) { return 'E'; } if ($score >= 331 && $score <= 450) { return 'F'; } if ($score > 450) { return 'G'; } return 'Inconnu'; } /** * @param $score * * @return string */ private function calculateGES($score) { if (empty($score)) { return 'Inconnu'; } if ($score <= 5) { return 'A'; } if ($score >= 6 && $score <= 10) { return 'B'; } if ($score >= 11 && $score <= 20) { return 'C'; } if ($score >= 21 && $score <= 35) { return 'D'; } if ($score >= 36 && $score <= 55) { return 'E'; } if ($score >= 56 && $score <= 80) { return 'F'; } if ($score > 80) { return 'G'; } return 'Inconnu'; } /** * @param \App\ParsedHome $parsed_home * @param \Symfony\Component\DomCrawler\Crawler $crawler * * @return \App\ParsedHome */ private function parseJSON(ParsedHome $parsed_home, Crawler $crawler) { $data_estate = $crawler->filter('[data-estate]'); try { $json_data = json_decode($data_estate->attr('data-estate'), true); $parsed_home->price = $json_data['price']; $parsed_home->city = $json_data['city']['name']; $parsed_home->surface = $json_data['surface']; $parsed_home->garden_surface = $json_data['lotSurface']; $parsed_home->rooms = $json_data['nbRooms']; $parsed_home->description = $json_data['longAd']; $parsed_home->title = $json_data['seo']['metaTitle']; $parsed_home->map = ['lat' => $json_data['latitude'], 'lng' => $json_data['longitude']]; $parsed_home->pictures = $json_data['imagesFull']; $parsed_home->energy = $this->calculateDPE($json_data['consumptionValue']); $parsed_home->ges = $this->calculateGES($json_data['emissionValue']); return $parsed_home; } catch (InvalidArgumentException $e) { return $this->parseHTML($parsed_home, $crawler); } } /** * @param \App\ParsedHome $parsed_home * @param \Symfony\Component\DomCrawler\Crawler $crawler * * @return \App\ParsedHome */ private function parseHTML(ParsedHome $parsed_home, Crawler $crawler) { $ad = $crawler->filter('article'); $first_section = $ad->children()->first(); $second_section = $ad->children()->eq(1); $third_section = $ad->children()->eq(2); $parsed_home->description = $second_section->filter('.o-container')->children()->eq(1)->text(); $second_section->filter('.c-badge__text')->each(static function (Crawler $detail, $i) use (&$parsed_home) { $detail_text = $detail->text(); if (mb_strpos($detail_text, 'Terrain') === 0) { $parsed_home->garden_surface = mb_substr($detail_text, 8, -2); } if (mb_strpos($detail_text, 'pièces') !== false) { $parsed_home->rooms = (int)$detail_text; } }); $h1 = $first_section->filter('h1'); $parsed_home->title = $h1->children()->first()->text(); $parsed_home->surface = (int)$h1->children()->eq(2)->text(); $parsed_home->city = $h1->children()->eq(4)->text(); $parsed_home->price = (int)str_replace(' ', '', $first_section->filter('.u-h1')->text()); $third_section->filter('.c-dpe')->each(static function (Crawler $detail, $i) use (&$parsed_home) { $abbr = $detail->filter('abbr'); if ($abbr->count() > 0) { if ($detail->attr('c-dpe--ges') !== null) { $parsed_home->ges = $abbr->text(); } elseif ($detail) { $parsed_home->energy = $abbr->text(); } } }); $request = $this->client->get($this->url.'/photos/'); $body = $request->getBody()->getContents(); $crawler = new Crawler($body); $parsed_home->pictures = $crawler ->filter('.u-cover') ->each(static function (Crawler $node, $i) { if (strtolower($node->nodeName()) === 'img') { return $node->attr('src'); } return null; }); return $parsed_home; } }