diff --git a/app/Console/Commands/ScrapeOneOnlineCommand.php b/app/Console/Commands/ScrapeOneOnlineCommand.php new file mode 100644 index 0000000..340addd --- /dev/null +++ b/app/Console/Commands/ScrapeOneOnlineCommand.php @@ -0,0 +1,72 @@ +first(); + + $articles = (new OneOnlineService)->scrape(); + + foreach ($articles as $article) { + + // Attach the relationship between source and article and return the curren article instance + $articleModel = $source->articles()->updateOrCreate(["guid" => $article["guid"]], + [ + "title" => $article["title"], + "url" => $article["url"], + "author" => $article["author"], + "featured_image" => $article["image"], + "body" => $article["content"], + "published_date" => $article["date"], + "meta" => [ + "title" => $article["og_title"] + ] + + ]); + + collect($article["topics"])->each(function($topic) use ($articleModel) { + $topicModel = Topic::firstOrCreate(["slug" => $topic["slug"]],["name" => $topic["name"]]); + + $topicModel->articles()->syncWithoutDetaching($articleModel); + }); + + } + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 491efb7..8b7280f 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -26,34 +26,37 @@ class Kernel extends ConsoleKernel protected function schedule(Schedule $schedule) { $schedule->command('scrape:mihaaru')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/mihaaru"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/mihaaru"); $schedule->command('scrape:sun')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/sun"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/sun"); $schedule->command('scrape:avas')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/avas"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/avas"); $schedule->command('scrape:dhuvas')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/dhuvas"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/dhuvas"); $schedule->command('scrape:thiladhun')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/thiladhun"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/thiladhun"); $schedule->command('scrape:thepress')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/thepress"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/thepress"); $schedule->command('scrape:addulive')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/addulive"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/addulive"); $schedule->command('scrape:voice')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/voice"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/voice"); $schedule->command('scrape:dhen')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/dhen"); - + ->pingOnSuccess(env('APP_URL') . "/api/ping/dhen"); + $schedule->command('scrape:psm')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL')."/api/ping/psm"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/psm"); + + $schedule->command('scrape:oneonline')->everyFiveMinutes() + ->pingOnSuccess(env('APP_URL') . "/api/ping/oneonline"); } /** @@ -63,7 +66,7 @@ class Kernel extends ConsoleKernel */ protected function commands() { - $this->load(__DIR__.'/Commands'); + $this->load(__DIR__ . '/Commands'); require base_path('routes/console.php'); } diff --git a/app/Services/Feeds/OneOnlineFeed.php b/app/Services/Feeds/OneOnlineFeed.php new file mode 100644 index 0000000..9f9fee0 --- /dev/null +++ b/app/Services/Feeds/OneOnlineFeed.php @@ -0,0 +1,37 @@ +client = new Client(); + } + /** + * Return the latest articles from avas + * + * @return array + */ + public function get() : array + { + + $crawler = $this->client->request('GET', "https://oneonline.mv/"); + + $feeds = []; + + $crawler->filter('.latest-listing a')->each(function($node) use (&$feeds){ + $feeds[] = [ + "title" => $node->text(), + "link" => $node->attr('href') + ]; + }); + + + return $feeds; + + } +} \ No newline at end of file diff --git a/app/Services/OneOnlineService.php b/app/Services/OneOnlineService.php new file mode 100644 index 0000000..962520e --- /dev/null +++ b/app/Services/OneOnlineService.php @@ -0,0 +1,26 @@ +get(); + $articlesitems = []; + //Looping through the articles and scraping and while scraping it creates a new instance of the scraper. + foreach ($articles as $article) { + $articlesitems[] = (new OneOnlineScraper)->extract($article["link"]); + } + + return $articlesitems; + } +} \ No newline at end of file diff --git a/app/Services/Scrapers/OneOnlineScraper.php b/app/Services/Scrapers/OneOnlineScraper.php new file mode 100644 index 0000000..cb47658 --- /dev/null +++ b/app/Services/Scrapers/OneOnlineScraper.php @@ -0,0 +1,66 @@ +client = new Client; + } + + public function extract($url) + { + + $crawler = $this->client->request('GET', $url); + + $crawler->filter('h1')->each(function ($node) { + $this->title = $node->text(); + }); + + $crawler->filter('.content p')->each(function ($node) { + $this->content[] = $node->text(); + }); + + $crawler->filter('.tags a')->each(function ($node) { + //Removing the show more tags button + if ($node->text() == "ގުޅޭ ޓެގު") { + return; + } + + $this->topics[] = [ + "name" => $node->text(), + "slug" => str_replace("https://oneonline.mv/", "", $node->attr('href')) + ]; + }); + + + if ($crawler->filter('div[class*="text-grey-dark font-waheed flex flex-row items-center"] span')->count() == 1) { + $this->author = $crawler->filter('div[class*="text-grey-dark font-waheed flex flex-row items-center"] span')->first()->text(); + } + //Remove all the alphabets from string + //preg_replace("/[a-zA-Z]/", "",$string); + return [ + 'source' => 'OneOnline', + 'title' => $this->title, + 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), + 'image' => $crawler->filter("figure img")->first()->attr('src'), + 'content' => $this->content, + 'url' => $url, + 'date' => Carbon::parse($crawler->filter('time')->first()->attr('datetime'))->format("Y-m-d H:i:s"), + 'guid' => str_replace("https://oneonline.mv/", "", $url), + 'author' => $this->author, + 'topics' => $this->topics + ]; + } +}