Avas Crawler fixes
This commit is contained in:
		| @@ -18,7 +18,7 @@ class AvasService | ||||
|         $articlesitems = []; | ||||
|         //Looping through the articles and scraping and while scraping it creates a new instance of the scraper. | ||||
|         foreach ($articles as $article) { | ||||
|             $articlesitems[] = (new AvasScraper)->extract($article["link"]); | ||||
|             $articlesitems[] = (new AvasScraper)->extract($article["link"], $article["date"]); | ||||
|         } | ||||
|  | ||||
|         return $articlesitems; | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
| namespace App\Services\Feeds; | ||||
|  | ||||
| use Goutte\Client; | ||||
| use Illuminate\Support\Carbon; | ||||
|  | ||||
| class AvasFeed implements Feed | ||||
| { | ||||
| @@ -23,22 +24,36 @@ class AvasFeed implements Feed | ||||
|         $crawler = $this->client->request('GET', "https://avas.mv/"); | ||||
|  | ||||
|         $feeds = []; | ||||
|         $first_batch_dates = []; | ||||
|         $second_batch_dates = []; | ||||
|  | ||||
|         $crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node) use (&$feeds) { | ||||
|             $feeds[] = [ | ||||
|                 "title" => trim($node->text()), | ||||
|                 "link" => "https://avas.mv" . $node->attr('href') | ||||
|             ]; | ||||
|         $crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a timeago')->each(function ($node) use (&$first_batch_dates) { | ||||
|             $first_batch_dates[] = $node->attr('datetime'); | ||||
|         }); | ||||
|  | ||||
|         $crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node) use (&$feeds) { | ||||
|         $crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a timeago')->each(function ($node) use (&$second_batch_dates) { | ||||
|             $second_batch_dates[] = $node->attr('datetime'); | ||||
|         }); | ||||
|  | ||||
|  | ||||
|         $crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node, $i) use (&$feeds, $first_batch_dates) { | ||||
|             $feeds[] = [ | ||||
|                 "title" => trim($node->text()), | ||||
|                 "link" => "https://avas.mv" . $node->attr('href') | ||||
|                 "link" => "https://avas.mv" . $node->attr('href'), | ||||
|                 "date" => Carbon::parse($first_batch_dates[$i])->format("Y-m-d H:i:s") | ||||
|  | ||||
|             ]; | ||||
|         }); | ||||
|  | ||||
|  | ||||
|         $crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node, $i) use (&$feeds, $second_batch_dates) { | ||||
|             $feeds[] = [ | ||||
|                 "title" => trim($node->text()), | ||||
|                 "link" => "https://avas.mv" . $node->attr('href'), | ||||
|                 "date" => Carbon::parse($second_batch_dates[$i])->format("Y-m-d H:i:s") | ||||
|  | ||||
|             ]; | ||||
|         }); | ||||
|  | ||||
|         return $feeds; | ||||
|     } | ||||
|   | ||||
| @@ -18,7 +18,7 @@ class AvasScraper | ||||
|         $this->client = new Client; | ||||
|     } | ||||
|  | ||||
|     public function extract($url) | ||||
|     public function extract($url, $date) | ||||
|     { | ||||
|  | ||||
|         $crawler = $this->client->request('GET', $url); | ||||
| @@ -64,7 +64,7 @@ class AvasScraper | ||||
|             'image'      => $this->image, | ||||
|             'content'    => $this->content, | ||||
|             'url'        => $url, | ||||
|             'date'       =>  Carbon::parse($crawler->filter('timeago')->first()->attr('datetime'))->format("Y-m-d H:i:s"), | ||||
|             'date'       =>  $date, | ||||
|             'guid'       => str_replace("https://avas.mv/","",$url), | ||||
|             'author'     => $this->author, | ||||
|             'topics'       => $this->topics | ||||
|   | ||||
		Reference in New Issue
	
	Block a user