diff --git a/app/Console/Commands/ScrapeRaajjeMvCommand.php b/app/Console/Commands/ScrapeRaajjeMvCommand.php new file mode 100644 index 0000000..411b6ac --- /dev/null +++ b/app/Console/Commands/ScrapeRaajjeMvCommand.php @@ -0,0 +1,73 @@ +first(); + + $articles = (new RaajjeMvService)->scrape(); + + foreach ($articles as $article) { + + // Attach the relationship between source and article and return the curren article instance + $articleModel = $source->articles()->updateOrCreate(["guid" => $article["guid"]], + [ + "title" => $article["title"], + "url" => $article["url"], + "author" => $article["author"], + "featured_image" => $article["image"], + "body" => $article["content"], + "published_date" => $article["date"], + "meta" => [ + "title" => $article["og_title"] + ] + + ]); + + collect($article["topics"])->each(function($topic) use ($articleModel) { + $topicModel = Topic::firstOrCreate(["slug" => $topic["slug"]],["name" => $topic["name"]]); + + $topicModel->articles()->syncWithoutDetaching($articleModel); + }); + + } + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 6f4e527..a36d7be 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -84,6 +84,10 @@ class Kernel extends ConsoleKernel $schedule->command('scrape:hama')->everyFiveMinutes() ->runInBackground() ->pingOnSuccess(env('APP_URL') . "/api/ping/hama"); + + $schedule->command('scrape:raajjemv')->everyFiveMinutes() + ->runInBackground() + ->pingOnSuccess(env('APP_URL') . "/api/ping/raajjemv"); } /** diff --git a/app/Services/Feeds/RaajjeMvFeed.php b/app/Services/Feeds/RaajjeMvFeed.php new file mode 100644 index 0000000..5eb0cd7 --- /dev/null +++ b/app/Services/Feeds/RaajjeMvFeed.php @@ -0,0 +1,40 @@ +client = new Client(); + } + /** + * Get all the latest news + * + * @return array + */ + public function get(): array + { + + $crawler = $this->client->request('GET', "https://raajje.mv/"); + + $feeds = []; + $articles = $crawler->filter('div[v-if*="homepage.emme_fas"] article-grid')->first()->attr(':collection'); + + $raw_response = json_decode(html_entity_decode($articles), true); + foreach($raw_response as $response) + { + $feeds[] = [ + "title" => $response["heading"], + "link" => "https://raajje.mv/".$response["id"], + "date" => $response["approved_date"] + ]; + } + + return $feeds; + } +} diff --git a/app/Services/RaajjeMvService.php b/app/Services/RaajjeMvService.php new file mode 100644 index 0000000..c1e8f72 --- /dev/null +++ b/app/Services/RaajjeMvService.php @@ -0,0 +1,27 @@ +get(); + $articlesitems = []; + //Looping through the articles and scraping and while scraping it creates a new instance of the scraper. + foreach ($articles as $article) { + $articlesitems[] = (new RaajjeMvScraper)->extract($article["link"], $article["date"]); + } + + return $articlesitems; + } +} diff --git a/app/Services/Scrapers/RaajjeMvScraper.php b/app/Services/Scrapers/RaajjeMvScraper.php new file mode 100644 index 0000000..5195b57 --- /dev/null +++ b/app/Services/Scrapers/RaajjeMvScraper.php @@ -0,0 +1,57 @@ +client = new Client; + } + + public function extract($url, $date) + { + $crawler = $this->client->request('GET', $url); + + $crawler->filter('div[id*="article-content"] div > p')->each(function ($node) { + $this->content[] = $node->text(); + }); + + if ($crawler->filter('.twitter-user')->count() > 0) { + $this->author = $crawler->filter('.twitter-user')->first()->text(); + } + + + //Remove all the alphabets from string + //preg_replace("/[a-zA-Z]/", "",$string); + return [ + 'source' => 'RaajjeMv', + 'title' => $crawler->filter('h1')->first()->text(), + 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), + 'image' => $crawler->filter("figure > img")->first()->attr('src'), + 'content' => $this->content, + 'url' => $url, + 'date' => Carbon::parse($date)->format("Y-m-d H:i:s"), + 'guid' => basename($url), + 'author' => $this->author, + 'topics' => [ + [ + "name" => "ވަކި މަޢުލޫއެއް ނޭންގެ", + "slug" => "no-specific-topic" + ] + ] + ]; + } +}