diff --git a/app/Services/Feeds/ThiladhunFeed.php b/app/Services/Feeds/ThiladhunFeed.php new file mode 100644 index 0000000..c6b04f9 --- /dev/null +++ b/app/Services/Feeds/ThiladhunFeed.php @@ -0,0 +1,39 @@ +client = new Client(); + } + /** + * Get all the latest news + * + * @return array + */ + public function get() : array + { + + $crawler = $this->client->request('GET', "https://www.thiladhun.com"); + + $feeds = []; + $crawler->filter('div[class*="posts-listing posts-list"] article')->each(function ($node) use (&$feeds) { + + + $feeds[] = [ + "title" => $node->filter('.post__title a')->text(), + "link" => $node->filter('.post__title a')->attr('href'), + "date" => $node->filter('time')->first()->attr('datetime') + ]; + + }); + + return $feeds; + + } +} \ No newline at end of file diff --git a/app/Services/Scrapers/ThiladhunScraper.php b/app/Services/Scrapers/ThiladhunScraper.php index dd6c988..3d842e7 100644 --- a/app/Services/Scrapers/ThiladhunScraper.php +++ b/app/Services/Scrapers/ThiladhunScraper.php @@ -10,7 +10,6 @@ class ThiladhunScraper protected $title; protected $content; - protected $guid; protected $image; protected $author; protected $topics = []; @@ -34,9 +33,8 @@ class ThiladhunScraper * * @return array */ - public function extract($url) + public function extract($url, $date) { - $this->guid = str_replace('https://thiladhun.com/', '', $url); $crawler = $this->client->request('GET', $url); @@ -75,10 +73,10 @@ class ThiladhunScraper 'og_title' => str_replace(" | Thiladhun", "", $crawler->filter('title')->first()->text('content')), 'image' => $this->image, 'content' => $this->content, - 'date' => $crawler->filter('.entry-meta time')->attr('datetime'), + 'date' => $date, 'url' => $url, 'author' => $this->author, - 'guid' => $this->guid, + 'guid' => basename($url), 'topics' => $this->topics ? : [ [ "name" => "ވަކި މަޢުލޫއެއް ނޭންގެ", diff --git a/app/Services/ThiladhunService.php b/app/Services/ThiladhunService.php index 32c798b..b7f7b54 100644 --- a/app/Services/ThiladhunService.php +++ b/app/Services/ThiladhunService.php @@ -2,6 +2,7 @@ namespace App\Services; +use App\Services\Feeds\ThiladhunFeed; use App\Services\Scrapers\ThiladhunScraper; class ThiladhunService extends Client @@ -14,13 +15,12 @@ class ThiladhunService extends Client public function scrape(): array { //Return only the rss that contains "news" keyboard in its url - $articles = $this->get("https://thiladhun.com/feed")["channel"]["item"]; + $articles = (new ThiladhunFeed)->get(); $articlesitems = []; //Looping through the articles and scraping and while scraping it creates a new instance of the scraper. foreach ($articles as $article) { - $link = $article['link']; - $articlesitems[] = (new ThiladhunScraper)->extract($link); + $articlesitems[] = (new ThiladhunScraper)->extract($article["link"], $article["date"]); } return $articlesitems;