Avas Crawler fixes

This commit is contained in:
2020-10-03 20:07:38 +05:00
parent d8bfdf4693
commit ee8e34e0e3
3 changed files with 25 additions and 10 deletions

View File

@@ -18,7 +18,7 @@ class AvasService
$articlesitems = []; $articlesitems = [];
//Looping through the articles and scraping and while scraping it creates a new instance of the scraper. //Looping through the articles and scraping and while scraping it creates a new instance of the scraper.
foreach ($articles as $article) { foreach ($articles as $article) {
$articlesitems[] = (new AvasScraper)->extract($article["link"]); $articlesitems[] = (new AvasScraper)->extract($article["link"], $article["date"]);
} }
return $articlesitems; return $articlesitems;

View File

@@ -3,6 +3,7 @@
namespace App\Services\Feeds; namespace App\Services\Feeds;
use Goutte\Client; use Goutte\Client;
use Illuminate\Support\Carbon;
class AvasFeed implements Feed class AvasFeed implements Feed
{ {
@@ -23,22 +24,36 @@ class AvasFeed implements Feed
$crawler = $this->client->request('GET', "https://avas.mv/"); $crawler = $this->client->request('GET', "https://avas.mv/");
$feeds = []; $feeds = [];
$first_batch_dates = [];
$second_batch_dates = [];
$crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node) use (&$feeds) { $crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a timeago')->each(function ($node) use (&$first_batch_dates) {
$feeds[] = [ $first_batch_dates[] = $node->attr('datetime');
"title" => trim($node->text()),
"link" => "https://avas.mv" . $node->attr('href')
];
}); });
$crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node) use (&$feeds) { $crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a timeago')->each(function ($node) use (&$second_batch_dates) {
$second_batch_dates[] = $node->attr('datetime');
});
$crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node, $i) use (&$feeds, $first_batch_dates) {
$feeds[] = [ $feeds[] = [
"title" => trim($node->text()), "title" => trim($node->text()),
"link" => "https://avas.mv" . $node->attr('href') "link" => "https://avas.mv" . $node->attr('href'),
"date" => Carbon::parse($first_batch_dates[$i])->format("Y-m-d H:i:s")
]; ];
}); });
$crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node, $i) use (&$feeds, $second_batch_dates) {
$feeds[] = [
"title" => trim($node->text()),
"link" => "https://avas.mv" . $node->attr('href'),
"date" => Carbon::parse($second_batch_dates[$i])->format("Y-m-d H:i:s")
];
});
return $feeds; return $feeds;
} }

View File

@@ -18,7 +18,7 @@ class AvasScraper
$this->client = new Client; $this->client = new Client;
} }
public function extract($url) public function extract($url, $date)
{ {
$crawler = $this->client->request('GET', $url); $crawler = $this->client->request('GET', $url);
@@ -64,7 +64,7 @@ class AvasScraper
'image' => $this->image, 'image' => $this->image,
'content' => $this->content, 'content' => $this->content,
'url' => $url, 'url' => $url,
'date' => Carbon::parse($crawler->filter('timeago')->first()->attr('datetime'))->format("Y-m-d H:i:s"), 'date' => $date,
'guid' => str_replace("https://avas.mv/","",$url), 'guid' => str_replace("https://avas.mv/","",$url),
'author' => $this->author, 'author' => $this->author,
'topics' => $this->topics 'topics' => $this->topics