From 38ef1570f32411441f40cdd22c17a5d7e8cd6dba Mon Sep 17 00:00:00 2001 From: Mohamed Jinas Date: Wed, 7 Oct 2020 21:17:40 +0500 Subject: [PATCH] Update AvasScraper.php --- app/Services/Scrapers/AvasScraper.php | 99 ++++++++++++++------------- 1 file changed, 50 insertions(+), 49 deletions(-) diff --git a/app/Services/Scrapers/AvasScraper.php b/app/Services/Scrapers/AvasScraper.php index c952d66..dea5639 100644 --- a/app/Services/Scrapers/AvasScraper.php +++ b/app/Services/Scrapers/AvasScraper.php @@ -1,8 +1,10 @@ client = new Client; @@ -20,59 +22,58 @@ class AvasScraper public function extract($url, $date) { + try { + $crawler = $this->client->request('GET', $url); - $crawler = $this->client->request('GET', $url); + $title = $crawler->filter('h1')->first()->text(); - $title = $crawler->filter('h1')->first()->text(); + if ($crawler->filter('figure img')->count() > 0) { + $this->image = $crawler->filter('figure img')->first()->attr('src'); + } - if($crawler->filter('figure img')->count() > 0) - { - $this->image = $crawler->filter('figure img')->first()->attr('src'); - } - - if($crawler->filter('.post_content p')->count() == 0) - { - return; - } - - $crawler->filter('.post_content p')->each(function ($node) { - $this->content[] = preg_replace("/[a-zA-Z]/","",$node->text()); - }); - - $crawler->filter('div[class*="border-t border-grey-light border-dotted mt-7 py-3"] a')->each(function ($node) { - - //Removing the show more tags button - if($node->text() == "+") - { + if ($crawler->filter('.post_content p')->count() == 0) { return; } - $this->topics[] = [ - "name" => $node->text(), - "slug" => str_replace("/", "", $node->attr('href')) + + $crawler->filter('.post_content p')->each(function ($node) { + $this->content[] = preg_replace("/[a-zA-Z]/", "", $node->text()); + }); + + $crawler->filter('div[class*="border-t border-grey-light border-dotted mt-7 py-3"] a')->each(function ($node) { + + //Removing the show more tags button + if ($node->text() == "+") { + return; + } + $this->topics[] = [ + "name" => $node->text(), + "slug" => str_replace("/", "", $node->attr('href')) + ]; + }); + + + + if ($crawler->filter('div[class*="font-waheed text-grey ml-3 pl-3 text-lg border-l border-grey border-dotted"] a')->count() == 1) { + $this->author = $crawler->filter('div[class*="font-waheed text-grey ml-3 pl-3 text-lg border-l border-grey border-dotted"] a')->first()->text(); + } + + + //Remove all the alphabets from string + //preg_replace("/[a-zA-Z]/", "",$string); + return [ + 'source' => 'Avas', + 'title' => $title, + 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), + 'image' => $this->image, + 'content' => $this->content, + 'url' => $url, + 'date' => $date, + 'guid' => str_replace("https://avas.mv/", "", $url), + 'author' => $this->author, + 'topics' => $this->topics ]; - }); - - - - if($crawler->filter('div[class*="font-waheed text-grey ml-3 pl-3 text-lg border-l border-grey border-dotted"] a')->count() == 1) - { - $this->author = $crawler->filter('div[class*="font-waheed text-grey ml-3 pl-3 text-lg border-l border-grey border-dotted"] a')->first()->text(); + } catch (\Exception $e) { + } - - - //Remove all the alphabets from string - //preg_replace("/[a-zA-Z]/", "",$string); - return [ - 'source' => 'Avas', - 'title' => $title, - 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), - 'image' => $this->image, - 'content' => $this->content, - 'url' => $url, - 'date' => $date, - 'guid' => str_replace("https://avas.mv/","",$url), - 'author' => $this->author, - 'topics' => $this->topics - ]; } -} \ No newline at end of file +}