Avas Crawler fixes
This commit is contained in:
@@ -18,7 +18,7 @@ class AvasService
|
||||
$articlesitems = [];
|
||||
//Looping through the articles and scraping and while scraping it creates a new instance of the scraper.
|
||||
foreach ($articles as $article) {
|
||||
$articlesitems[] = (new AvasScraper)->extract($article["link"]);
|
||||
$articlesitems[] = (new AvasScraper)->extract($article["link"], $article["date"]);
|
||||
}
|
||||
|
||||
return $articlesitems;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
namespace App\Services\Feeds;
|
||||
|
||||
use Goutte\Client;
|
||||
use Illuminate\Support\Carbon;
|
||||
|
||||
class AvasFeed implements Feed
|
||||
{
|
||||
@@ -23,22 +24,36 @@ class AvasFeed implements Feed
|
||||
$crawler = $this->client->request('GET', "https://avas.mv/");
|
||||
|
||||
$feeds = [];
|
||||
$first_batch_dates = [];
|
||||
$second_batch_dates = [];
|
||||
|
||||
$crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node) use (&$feeds) {
|
||||
$feeds[] = [
|
||||
"title" => trim($node->text()),
|
||||
"link" => "https://avas.mv" . $node->attr('href')
|
||||
];
|
||||
$crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a timeago')->each(function ($node) use (&$first_batch_dates) {
|
||||
$first_batch_dates[] = $node->attr('datetime');
|
||||
});
|
||||
|
||||
$crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node) use (&$feeds) {
|
||||
$crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a timeago')->each(function ($node) use (&$second_batch_dates) {
|
||||
$second_batch_dates[] = $node->attr('datetime');
|
||||
});
|
||||
|
||||
|
||||
$crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node, $i) use (&$feeds, $first_batch_dates) {
|
||||
$feeds[] = [
|
||||
"title" => trim($node->text()),
|
||||
"link" => "https://avas.mv" . $node->attr('href')
|
||||
"link" => "https://avas.mv" . $node->attr('href'),
|
||||
"date" => Carbon::parse($first_batch_dates[$i])->format("Y-m-d H:i:s")
|
||||
|
||||
];
|
||||
});
|
||||
|
||||
|
||||
$crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node, $i) use (&$feeds, $second_batch_dates) {
|
||||
$feeds[] = [
|
||||
"title" => trim($node->text()),
|
||||
"link" => "https://avas.mv" . $node->attr('href'),
|
||||
"date" => Carbon::parse($second_batch_dates[$i])->format("Y-m-d H:i:s")
|
||||
|
||||
];
|
||||
});
|
||||
|
||||
return $feeds;
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ class AvasScraper
|
||||
$this->client = new Client;
|
||||
}
|
||||
|
||||
public function extract($url)
|
||||
public function extract($url, $date)
|
||||
{
|
||||
|
||||
$crawler = $this->client->request('GET', $url);
|
||||
@@ -64,7 +64,7 @@ class AvasScraper
|
||||
'image' => $this->image,
|
||||
'content' => $this->content,
|
||||
'url' => $url,
|
||||
'date' => Carbon::parse($crawler->filter('timeago')->first()->attr('datetime'))->format("Y-m-d H:i:s"),
|
||||
'date' => $date,
|
||||
'guid' => str_replace("https://avas.mv/","",$url),
|
||||
'author' => $this->author,
|
||||
'topics' => $this->topics
|
||||
|
||||
Reference in New Issue
Block a user