Avas Crawler fixes

This commit is contained in:
2020-10-03 20:07:38 +05:00
parent d8bfdf4693
commit ee8e34e0e3
3 changed files with 25 additions and 10 deletions

View File

@@ -18,7 +18,7 @@ class AvasService
$articlesitems = [];
//Looping through the articles and scraping and while scraping it creates a new instance of the scraper.
foreach ($articles as $article) {
$articlesitems[] = (new AvasScraper)->extract($article["link"]);
$articlesitems[] = (new AvasScraper)->extract($article["link"], $article["date"]);
}
return $articlesitems;

View File

@@ -3,6 +3,7 @@
namespace App\Services\Feeds;
use Goutte\Client;
use Illuminate\Support\Carbon;
class AvasFeed implements Feed
{
@@ -23,22 +24,36 @@ class AvasFeed implements Feed
$crawler = $this->client->request('GET', "https://avas.mv/");
$feeds = [];
$first_batch_dates = [];
$second_batch_dates = [];
$crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node) use (&$feeds) {
$feeds[] = [
"title" => trim($node->text()),
"link" => "https://avas.mv" . $node->attr('href')
];
$crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a timeago')->each(function ($node) use (&$first_batch_dates) {
$first_batch_dates[] = $node->attr('datetime');
});
$crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node) use (&$feeds) {
$crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a timeago')->each(function ($node) use (&$second_batch_dates) {
$second_batch_dates[] = $node->attr('datetime');
});
$crawler->filter('div[class*="flex rtl -mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/3 px-4 mb-7"] div a')->each(function ($node, $i) use (&$feeds, $first_batch_dates) {
$feeds[] = [
"title" => trim($node->text()),
"link" => "https://avas.mv" . $node->attr('href')
"link" => "https://avas.mv" . $node->attr('href'),
"date" => Carbon::parse($first_batch_dates[$i])->format("Y-m-d H:i:s")
];
});
$crawler->filter('div[class*="flex md:-mx-4 flex-wrap md:px-0"] div[class*="w-full md:w-1/5 px-4 mb-8"] div a')->each(function ($node, $i) use (&$feeds, $second_batch_dates) {
$feeds[] = [
"title" => trim($node->text()),
"link" => "https://avas.mv" . $node->attr('href'),
"date" => Carbon::parse($second_batch_dates[$i])->format("Y-m-d H:i:s")
];
});
return $feeds;
}

View File

@@ -18,7 +18,7 @@ class AvasScraper
$this->client = new Client;
}
public function extract($url)
public function extract($url, $date)
{
$crawler = $this->client->request('GET', $url);
@@ -64,7 +64,7 @@ class AvasScraper
'image' => $this->image,
'content' => $this->content,
'url' => $url,
'date' => Carbon::parse($crawler->filter('timeago')->first()->attr('datetime'))->format("Y-m-d H:i:s"),
'date' => $date,
'guid' => str_replace("https://avas.mv/","",$url),
'author' => $this->author,
'topics' => $this->topics