61 lines
2.0 KiB
PHP
61 lines
2.0 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Scrapers;
|
|
|
|
use Exception;
|
|
use Goutte\Client;
|
|
use Illuminate\Support\Carbon;
|
|
|
|
class FunadhooTimesScraper
|
|
{
|
|
protected $client;
|
|
|
|
protected $title;
|
|
protected $content;
|
|
protected $topics;
|
|
|
|
public function __construct()
|
|
{
|
|
$this->client = new Client;
|
|
}
|
|
|
|
public function extract($url,$title)
|
|
{
|
|
$crawler = $this->client->request('GET', $url);
|
|
|
|
$crawler->filter('.card-body > p')->each(function ($node) {
|
|
$this->content[] = $node->text();
|
|
});
|
|
|
|
|
|
$crawler->filter('.article-tags a')->each(function ($node) {
|
|
$this->topics[] = [
|
|
"name" => $node->text(),
|
|
"slug" => str_replace("https://hama.mv/", "", $node->attr('href'))
|
|
];
|
|
});
|
|
|
|
|
|
//Remove all the alphabets from string
|
|
//preg_replace("/[a-zA-Z]/", "",$string);
|
|
return [
|
|
'source' => 'Funadhoo times',
|
|
'title' => $title,
|
|
'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'),
|
|
'image' => "https://funadhootimes.com/".$crawler->filter(".card img")->first()->attr('src'),
|
|
'content' => $this->content,
|
|
'url' => $url,
|
|
'date' => Carbon::parse(explode('|',$crawler->filter('div[class*="d-flex flex-row-reverse justify-content-between"] small')->first()->text())[0])->format("Y-m-d H:i:s"),
|
|
'guid' => str_replace("https://www.funadhootimes.com/p.php?id=","",$url),
|
|
'author' => explode('|',$crawler->filter('div[class*="d-flex flex-row-reverse justify-content-between"] small')->first()->text())[1],
|
|
'topics' => [
|
|
[
|
|
"name" => "ވަކި މަޢުލޫއެއް ނޭންގެ",
|
|
"slug" => "no-specific-topic"
|
|
]
|
|
]
|
|
];
|
|
|
|
}
|
|
}
|