diff --git a/app/Console/Commands/ScrapeMiadhuCommand.php b/app/Console/Commands/ScrapeMiadhuCommand.php new file mode 100644 index 0000000..ebbaba5 --- /dev/null +++ b/app/Console/Commands/ScrapeMiadhuCommand.php @@ -0,0 +1,72 @@ +first(); + + $articles = (new MiadhuService)->scrape(); + + foreach ($articles as $article) { + + // Attach the relationship between source and article and return the curren article instance + $articleModel = $source->articles()->updateOrCreate(["guid" => $article["guid"]], + [ + "title" => $article["title"], + "url" => $article["url"], + "author" => $article["author"], + "featured_image" => $article["image"], + "body" => $article["content"], + "published_date" => $article["date"], + "meta" => [ + "title" => $article["og_title"] + ] + + ]); + + collect($article["topics"])->each(function($topic) use ($articleModel) { + $topicModel = Topic::firstOrCreate(["slug" => $topic["slug"]],["name" => $topic["name"]]); + + $topicModel->articles()->syncWithoutDetaching($articleModel); + }); + + } + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 009121f..cb24bd0 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -59,7 +59,10 @@ class Kernel extends ConsoleKernel ->pingOnSuccess(env('APP_URL') . "/api/ping/oneonline"); $schedule->command('scrape:feshun')->everyFiveMinutes() - ->pingOnSuccess(env('APP_URL') . "/api/ping/feshun"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/feshun"); + + $schedule->command('scrape:miadhu')->everyFiveMinutes() + ->pingOnSuccess(env('APP_URL') . "/api/ping/miadhu"); } /** diff --git a/app/Services/Feeds/MiadhuFeed.php b/app/Services/Feeds/MiadhuFeed.php new file mode 100644 index 0000000..f0dfead --- /dev/null +++ b/app/Services/Feeds/MiadhuFeed.php @@ -0,0 +1,44 @@ +client = new Client(); + } + /** + * Return the latest articles from avas + * + * @return array + */ + public function get(): array + { + + $crawler = $this->client->request('GET', "https://miadhu.mv"); + + $feeds = []; + $dates = []; + + // scrape the dates for the articles + $crawler->filter('.middle div[class*="col-md-3 col-6 news-block"] em')->each(function ($node) use (&$dates) { + $dates[] = $node->text(); + }); + + $crawler->filter('.middle div[class*="col-md-3 col-6 news-block"] h2 a')->each(function ($node, $i) use (&$feeds, $dates) { + $feeds[] = [ + "title" => $node->text(), + "link" => $node->attr('href'), + "date" => $dates[$i] + ]; + }); + + + return $feeds; + } +} diff --git a/app/Services/MiadhuService.php b/app/Services/MiadhuService.php new file mode 100644 index 0000000..5626ee9 --- /dev/null +++ b/app/Services/MiadhuService.php @@ -0,0 +1,27 @@ +get(); + $articlesitems = []; + //Looping through the articles and scraping and while scraping it creates a new instance of the scraper. + foreach ($articles as $article) { + $articlesitems[] = (new MiadhuScraper)->extract($article["link"], $article["date"]); + } + + return $articlesitems; + } +} diff --git a/app/Services/Scrapers/MiadhuScraper.php b/app/Services/Scrapers/MiadhuScraper.php new file mode 100644 index 0000000..77bbdc7 --- /dev/null +++ b/app/Services/Scrapers/MiadhuScraper.php @@ -0,0 +1,53 @@ +client = new Client; + } + + public function extract($url, $date) + { + + $crawler = $this->client->request('GET', $url); + + $crawler->filter('.read-dv-text > p')->each(function ($node) { + $this->content[] = $node->text(); + }); + + if ($crawler->filter('.author-name')->count() == 1) { + $this->author = $crawler->filter('.author-name')->first()->text(); + } + //Remove all the alphabets from string + //preg_replace("/[a-zA-Z]/", "",$string); + return [ + 'source' => 'Miadhu News', + 'title' => $crawler->filter('h1')->first()->text(), + 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), + 'image' => $crawler->filter(".col-md-12 img")->first()->attr('src'), + 'content' => $this->content, + 'url' => $url, + 'date' => Carbon::parse($date)->format("Y-m-d H:i:s"), + 'guid' => str_replace("https://miadhu.mv/article/read/", "", $url), + 'author' => $this->author, + 'topics' => [ + [ + "name" => "ވަކި މަޢުލޫއެއް ނޭންގެ", + "slug" => "no-specific-topic" + ] + ] + ]; + } +}