Avas Crawler fixes
This commit is contained in:
		@@ -18,7 +18,7 @@ class AvasService
 | 
			
		||||
        $articlesitems = [];
 | 
			
		||||
        //Looping through the articles and scraping and while scraping it creates a new instance of the scraper.
 | 
			
		||||
        foreach ($articles as $article) {
 | 
			
		||||
            $articlesitems[] = (new AvasScraper)->extract($article["link"]);
 | 
			
		||||
            $articlesitems[] = (new AvasScraper)->extract($article["link"], $article["date"]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return $articlesitems;
 | 
			
		||||
 
 | 
			
		||||
@@ -3,6 +3,7 @@
 | 
			
		||||
namespace App\Services\Feeds;
 | 
			
		||||
 | 
			
		||||
use Goutte\Client;
 | 
			
		||||
use Illuminate\Support\Carbon;
 | 
			
		||||
 | 
			
		||||
class AvasFeed implements Feed
 | 
			
		||||
{
 | 
			
		||||
@@ -23,22 +24,36 @@ class AvasFeed implements Feed
 | 
			
		||||
        $crawler = $this->client->request('GET', "https://avas.mv/");
 | 
			
		||||
 | 
			
		||||
        $feeds = [];
 | 
			
		||||
        $first_batch_dates = [];
 | 
			
		||||
        $second_batch_dates = [];
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node) use (&$feeds) {
 | 
			
		||||
            $feeds[] = [
 | 
			
		||||
                "title" => trim($node->text()),
 | 
			
		||||
                "link" => "https://avas.mv" . $node->attr('href')
 | 
			
		||||
            ];
 | 
			
		||||
        $crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a timeago')->each(function ($node) use (&$first_batch_dates) {
 | 
			
		||||
            $first_batch_dates[] = $node->attr('datetime');
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node) use (&$feeds) {
 | 
			
		||||
        $crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a timeago')->each(function ($node) use (&$second_batch_dates) {
 | 
			
		||||
            $second_batch_dates[] = $node->attr('datetime');
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node, $i) use (&$feeds, $first_batch_dates) {
 | 
			
		||||
            $feeds[] = [
 | 
			
		||||
                "title" => trim($node->text()),
 | 
			
		||||
                "link" => "https://avas.mv" . $node->attr('href')
 | 
			
		||||
                "link" => "https://avas.mv" . $node->attr('href'),
 | 
			
		||||
                "date" => Carbon::parse($first_batch_dates[$i])->format("Y-m-d H:i:s")
 | 
			
		||||
 | 
			
		||||
            ];
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node, $i) use (&$feeds, $second_batch_dates) {
 | 
			
		||||
            $feeds[] = [
 | 
			
		||||
                "title" => trim($node->text()),
 | 
			
		||||
                "link" => "https://avas.mv" . $node->attr('href'),
 | 
			
		||||
                "date" => Carbon::parse($second_batch_dates[$i])->format("Y-m-d H:i:s")
 | 
			
		||||
 | 
			
		||||
            ];
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        return $feeds;
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
@@ -18,7 +18,7 @@ class AvasScraper
 | 
			
		||||
        $this->client = new Client;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public function extract($url)
 | 
			
		||||
    public function extract($url, $date)
 | 
			
		||||
    {
 | 
			
		||||
 | 
			
		||||
        $crawler = $this->client->request('GET', $url);
 | 
			
		||||
@@ -64,7 +64,7 @@ class AvasScraper
 | 
			
		||||
            'image'      => $this->image,
 | 
			
		||||
            'content'    => $this->content,
 | 
			
		||||
            'url'        => $url,
 | 
			
		||||
            'date'       =>  Carbon::parse($crawler->filter('timeago')->first()->attr('datetime'))->format("Y-m-d H:i:s"),
 | 
			
		||||
            'date'       =>  $date,
 | 
			
		||||
            'guid'       => str_replace("https://avas.mv/","",$url),
 | 
			
		||||
            'author'     => $this->author,
 | 
			
		||||
            'topics'       => $this->topics
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user