From fff50a36ace80ff8771804d5073787a7ecaa8a35 Mon Sep 17 00:00:00 2001 From: Mohamed Jinas Date: Tue, 13 Oct 2020 23:01:02 +0500 Subject: [PATCH] Hama news intergration --- app/Console/Commands/ScrapeHamaCommand.php | 73 ++++++++++++++++++++++ app/Console/Kernel.php | 8 ++- app/Services/Feeds/HamaFeed.php | 39 ++++++++++++ app/Services/HamaService.php | 27 ++++++++ app/Services/Scrapers/HamaScraper.php | 59 +++++++++++++++++ resources/views/articles/show.blade.php | 1 + 6 files changed, 205 insertions(+), 2 deletions(-) create mode 100644 app/Console/Commands/ScrapeHamaCommand.php create mode 100644 app/Services/Feeds/HamaFeed.php create mode 100644 app/Services/HamaService.php create mode 100644 app/Services/Scrapers/HamaScraper.php diff --git a/app/Console/Commands/ScrapeHamaCommand.php b/app/Console/Commands/ScrapeHamaCommand.php new file mode 100644 index 0000000..eb77edd --- /dev/null +++ b/app/Console/Commands/ScrapeHamaCommand.php @@ -0,0 +1,73 @@ +first(); + + $articles = (new HamaService)->scrape(); + + foreach ($articles as $article) { + + // Attach the relationship between source and article and return the curren article instance + $articleModel = $source->articles()->updateOrCreate(["guid" => $article["guid"]], + [ + "title" => $article["title"], + "url" => $article["url"], + "author" => $article["author"], + "featured_image" => $article["image"], + "body" => $article["content"], + "published_date" => $article["date"], + "meta" => [ + "title" => $article["og_title"] + ] + + ]); + + collect($article["topics"])->each(function($topic) use ($articleModel) { + $topicModel = Topic::firstOrCreate(["slug" => $topic["slug"]],["name" => $topic["name"]]); + + $topicModel->articles()->syncWithoutDetaching($articleModel); + }); + + } + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index d8e01d6..6f4e527 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -77,9 +77,13 @@ class Kernel extends ConsoleKernel ->runInBackground() ->pingOnSuccess(env('APP_URL') . "/api/ping/miadhu"); - $schedule->command('scrape:vnews')->everyFiveMinutes() + $schedule->command('scrape:vnews')->everyFiveMinutes() ->runInBackground() - ->pingOnSuccess(env('APP_URL') . "/api/ping/vnews"); + ->pingOnSuccess(env('APP_URL') . "/api/ping/vnews"); + + $schedule->command('scrape:hama')->everyFiveMinutes() + ->runInBackground() + ->pingOnSuccess(env('APP_URL') . "/api/ping/hama"); } /** diff --git a/app/Services/Feeds/HamaFeed.php b/app/Services/Feeds/HamaFeed.php new file mode 100644 index 0000000..79488f5 --- /dev/null +++ b/app/Services/Feeds/HamaFeed.php @@ -0,0 +1,39 @@ +client = new Client(); + } + /** + * Get all the latest news + * + * @return array + */ + public function get() : array + { + + $crawler = $this->client->request('GET', "https://hama.mv/"); + + $feeds = []; + $crawler->filter('div[id*="latest"] div[class*="col-md-3 col-6"] a')->each(function ($node) use (&$feeds) { + + + $feeds[] = [ + "title" => $node->filter('h5')->first()->text(), + "link" => $node->attr('href'), + "date" => $node->filter('.datetime')->first()->text() + ]; + + }); + + return $feeds; + + } +} \ No newline at end of file diff --git a/app/Services/HamaService.php b/app/Services/HamaService.php new file mode 100644 index 0000000..a230d03 --- /dev/null +++ b/app/Services/HamaService.php @@ -0,0 +1,27 @@ +get(); + $articlesitems = []; + //Looping through the articles and scraping and while scraping it creates a new instance of the scraper. + foreach ($articles as $article) { + $articlesitems[] = (new HamaScraper)->extract($article["link"], $article["date"]); + } + + return $articlesitems; + } +} diff --git a/app/Services/Scrapers/HamaScraper.php b/app/Services/Scrapers/HamaScraper.php new file mode 100644 index 0000000..027d058 --- /dev/null +++ b/app/Services/Scrapers/HamaScraper.php @@ -0,0 +1,59 @@ +client = new Client; + } + + public function extract($url, $date) + { + $crawler = $this->client->request('GET', $url); + + $crawler->filter('.body > p')->each(function ($node) { + $this->content[] = $node->text(); + }); + + if ($crawler->filter('.author_name')->count() > 0) { + $this->author = $crawler->filter('.author_name')->first()->text(); + } + + $crawler->filter('.article-tags a')->each(function ($node) { + $this->topics[] = [ + "name" => $node->text(), + "slug" => str_replace("https://hama.mv/", "", $node->attr('href')) + ]; + }); + + + //Remove all the alphabets from string + //preg_replace("/[a-zA-Z]/", "",$string); + return [ + 'source' => 'Hama', + 'title' => $crawler->filter('h1')->first()->text(), + 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), + 'image' => $crawler->filter("figure > img")->first()->attr('data-src'), + 'content' => $this->content, + 'url' => $url, + 'date' => Carbon::parse($date)->format("Y-m-d H:i:s"), + 'guid' => basename($url), + 'author' => $this->author, + 'topics' => $this->topics + ]; + + } +} diff --git a/resources/views/articles/show.blade.php b/resources/views/articles/show.blade.php index 236ea49..a02d09f 100644 --- a/resources/views/articles/show.blade.php +++ b/resources/views/articles/show.blade.php @@ -94,4 +94,5 @@ + \ No newline at end of file