👽 Ajoute le scraping de SeLoger car l'API ne fonctionne plus

Fix #22
This commit is contained in:
Clément 2021-02-03 16:38:01 +01:00
parent d5ffdd8699
commit 25d18c7184
4 changed files with 146 additions and 79 deletions

View File

@ -2,6 +2,8 @@
namespace App\Exceptions;
use Exception;
class UnknownParser extends Exception
{

View File

@ -27,6 +27,7 @@ abstract class Parser
* @param string $url
*
* @return \App\Parser|null
* @throws \App\Exceptions\UnknownParser
*/
public static function factory(string $url): ?Parser
{
@ -48,4 +49,75 @@ abstract class Parser
* @return \App\ParsedHome
*/
abstract public function parse(): ParsedHome;
/**
* @param int $score
*
* @return string
*/
protected function calculateDPE($score)
{
if (empty($score)) {
return 'Inconnu';
}
if ($score <= 50) {
return 'A';
}
if ($score >= 51 && $score <= 90) {
return 'B';
}
if ($score >= 91 && $score <= 150) {
return 'C';
}
if ($score >= 151 && $score <= 230) {
return 'D';
}
if ($score >= 231 && $score <= 330) {
return 'E';
}
if ($score >= 331 && $score <= 450) {
return 'F';
}
if ($score > 450) {
return 'G';
}
return 'Inconnu';
}
/**
* @param $score
*
* @return string
*/
protected function calculateGES($score)
{
if (empty($score)) {
return 'Inconnu';
}
if ($score <= 5) {
return 'A';
}
if ($score >= 6 && $score <= 10) {
return 'B';
}
if ($score >= 11 && $score <= 20) {
return 'C';
}
if ($score >= 21 && $score <= 35) {
return 'D';
}
if ($score >= 36 && $score <= 55) {
return 'E';
}
if ($score >= 56 && $score <= 80) {
return 'F';
}
if ($score > 80) {
return 'G';
}
return 'Inconnu';
}
}

View File

@ -36,76 +36,6 @@ class Orpi extends Parser
return $this->parseHTML($parsedHome, $crawler);
}
/**
* @param int $score
*
* @return string
*/
private function calculateDPE($score)
{
if (empty($score)) {
return 'Inconnu';
}
if ($score <= 50) {
return 'A';
}
if ($score >= 51 && $score <= 90) {
return 'B';
}
if ($score >= 91 && $score <= 150) {
return 'C';
}
if ($score >= 151 && $score <= 230) {
return 'D';
}
if ($score >= 231 && $score <= 330) {
return 'E';
}
if ($score >= 331 && $score <= 450) {
return 'F';
}
if ($score > 450) {
return 'G';
}
return 'Inconnu';
}
/**
* @param $score
*
* @return string
*/
private function calculateGES($score)
{
if (empty($score)) {
return 'Inconnu';
}
if ($score <= 5) {
return 'A';
}
if ($score >= 6 && $score <= 10) {
return 'B';
}
if ($score >= 11 && $score <= 20) {
return 'C';
}
if ($score >= 21 && $score <= 35) {
return 'D';
}
if ($score >= 36 && $score <= 55) {
return 'E';
}
if ($score >= 56 && $score <= 80) {
return 'F';
}
if ($score > 80) {
return 'G';
}
return 'Inconnu';
}
/**
* @param \App\ParsedHome $parsed_home
* @param \Symfony\Component\DomCrawler\Crawler $crawler

View File

@ -4,6 +4,8 @@ namespace App\Parser;
use App\ParsedHome;
use App\Parser;
use GuzzleHttp\Exception\GuzzleException;
use Symfony\Component\DomCrawler\Crawler;
/**
* Thanks to https://github.com/axeleroy/untoitpourcaramel/issues/3
@ -25,6 +27,7 @@ class SeLoger extends Parser
$token = $this->retrieveToken();
$idAnnonce = $this->getIdAnnonceFromUrl($this->url);
$url = sprintf('%s%s', self::LISTING_URL, $idAnnonce);
try {
$request = $this->client->request(
'GET',
$url,
@ -34,6 +37,9 @@ class SeLoger extends Parser
],
]
);
} catch (GuzzleException $e) {
return $this->parseHTML();
}
$annonce = json_decode($request->getBody()->getContents(), true);
$parsedHome = new ParsedHome();
@ -83,4 +89,61 @@ class SeLoger extends Parser
return trim($request->getBody()->getContents(), '"');
}
/**
* @return \App\ParsedHome
*/
private function parseHTML()
{
$request = $this->client->get(
$this->url,
[
'headers' => [
'User-Agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding' => 'gzip, deflate, br',
'Accept-Language' => 'fr,fr-FR;q=0.8,en-US;q=0.5,en;q=0.3',
'Cache-Control' => 'no-cache',
'Connection' => 'keep-alive',
]
]
);
$body = $request->getBody()->getContents();
$crawler = new Crawler($body);
$parsed_home = new ParsedHome();
$parsed_home->title = $crawler->filter('h1')->text();
$parsed_home->description = $crawler->filter('#showcase-description > div')->first()->text();
$parsed_home->price = (int) str_replace(' ', '', $crawler->filter('[class^=Summarystyled__PriceText]')->text());
$parsed_home->city = $crawler->filter('[class^=Summarystyled__Address]')->text();
$parsed_home->energy = $this->calculateDPE(
(int)$crawler->filter('[class^=Preview__PreviewTooltipValue]')->first()->text()
);
$parsed_home->ges = $this->calculateGES(
(int)$crawler->filter('[class^=Preview__PreviewTooltipValue]')->eq(1)->text()
);
$crawler
->filter('[class^=Summarystyled__TagsWrapper] > div')
->each(static function (Crawler $property____information, $i) use (&$parsed_home) {
$element = $property____information->children()->eq(1)->text();
if ('pièces' === mb_substr($element, -6)) {
$parsed_home->rooms = (int) $element;
}
if ('m²' === mb_substr($element, -2) && strpos($element, '/') === false) {
$parsed_home->surface = (int) $element;
}
});
$parsed_home->pictures = $crawler
->filter('.swiper-wrapper')
->first()
->filter('[data-background]')
->each(static function($img) {
return $img->attr('data-background');
});
return $parsed_home;
}
}