diff --git a/.idea/baraveli-news.iml b/.idea/baraveli-news.iml index 8d45442..7993b0b 100644 --- a/.idea/baraveli-news.iml +++ b/.idea/baraveli-news.iml @@ -2,11 +2,10 @@ - - - + + @@ -116,6 +115,49 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/.idea/blade.xml b/.idea/blade.xml new file mode 100644 index 0000000..552879b --- /dev/null +++ b/.idea/blade.xml @@ -0,0 +1,121 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/laravel-idea.xml b/.idea/laravel-idea.xml new file mode 100644 index 0000000..29d6b2e --- /dev/null +++ b/.idea/laravel-idea.xml @@ -0,0 +1,19 @@ + + + + + + \ No newline at end of file diff --git a/.idea/php.xml b/.idea/php.xml index 8580406..594c763 100644 --- a/.idea/php.xml +++ b/.idea/php.xml @@ -1,5 +1,15 @@ + + + + + + @@ -111,12 +121,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + + + + + \ No newline at end of file diff --git a/.idea/phpunit.xml b/.idea/phpunit.xml index 0e6ddb0..4f8104c 100644 --- a/.idea/phpunit.xml +++ b/.idea/phpunit.xml @@ -3,8 +3,7 @@ diff --git a/app/Console/Commands/ScrapeAdhadhuCommand.php b/app/Console/Commands/ScrapeAdhadhuCommand.php new file mode 100644 index 0000000..6d3919d --- /dev/null +++ b/app/Console/Commands/ScrapeAdhadhuCommand.php @@ -0,0 +1,73 @@ +first(); + + $articles = (new AdhadhuService)->scrape(); + + foreach ($articles as $article) { + $articleModel = $source->articles()->updateOrCreate( + ["guid" => $article["guid"]], + [ + "title" => $article["title"], + "url" => $article["url"], + "author" => $article["author"], + "featured_image" => $article["image"], + "body" => $article["content"], + "published_date" => $article["date"], + "meta" => [ + "title" => $article["og_title"] + ] + ] + ); + + collect($article["topics"])->each(function ($topic) use ($articleModel) { + $topicModel = Topic::firstOrCreate(["slug" => $topic["slug"]], ["name" => $topic["name"]]); + + $topicModel->articles()->syncWithoutDetaching($articleModel); + }); + } + + return 0; + } +} diff --git a/app/Console/Kernel.php b/app/Console/Kernel.php index 096723c..b1b87ca 100644 --- a/app/Console/Kernel.php +++ b/app/Console/Kernel.php @@ -101,8 +101,13 @@ class Kernel extends ConsoleKernel $schedule->command('scrape:dhiyares')->everyFiveMinutes() ->runInBackground() ->pingOnSuccess(config('app.url') . "/api/ping/dhiyares"); + + $schedule->command('scrape:adhadhu')->everyFiveMinutes() + ->runInBackground() + ->pingOnSuccess(config('app.url') . "/api/ping/adhadhu"); } + /** * Register the commands for the application. * diff --git a/app/Services/AdhadhuService.php b/app/Services/AdhadhuService.php new file mode 100644 index 0000000..4bb0574 --- /dev/null +++ b/app/Services/AdhadhuService.php @@ -0,0 +1,29 @@ +get(); + + $articleItems = []; + foreach ($articles as $article) { + $scrapedData = (new AdhadhuScraper)->extract($article["link"], $article["date"]); + if ($scrapedData !== null) { + $articleItems[] = $scrapedData; + } + } + + return $articleItems; + } +} diff --git a/app/Services/Feeds/AdhadhuFeed.php b/app/Services/Feeds/AdhadhuFeed.php new file mode 100644 index 0000000..114ae2f --- /dev/null +++ b/app/Services/Feeds/AdhadhuFeed.php @@ -0,0 +1,82 @@ +client = new Client(); + } + + /** + * Return the latest articles from Adhadhu + * + * @return array + */ + public function get(): array + { + $crawler = $this->client->request('GET', "https://adhadhu.com/category/News"); + + $feeds = []; + + // Parse the news articles + $crawler->filter('div.category-news div.row div.list a.item, div.category-news div.row div.list a')->each(function ($node) use (&$feeds) { + // Extract the details of each article + $title = $node->filter('h4')->text(); + $link = $node->attr('href'); + $timeText = $node->filter('p.font-11')->text(); + // Extract the time and convert it to a Carbon instance + $date = $this->extractDate($timeText); + + $feeds[] = [ + "title" => trim($title), + "link" => "https://adhadhu.com" . $link, + "date" => $date + ]; + }); + + + return $feeds; + } + + /** + * Extract and format the date from the text + * + * @param string $timeText + * @return string + */ + protected function extractDate($timeText) + { + // A simple regex to extract numbers and time units (e.g., "minutes", "hours") + if (preg_match('/(\d+)\s*(minute|hour|day|second)s?/', $timeText, $matches)) { + $number = $matches[1]; + $unit = $matches[2]; + + // Use Carbon's sub method to subtract the time + switch ($unit) { + case 'minute': + return Carbon::now()->subMinutes($number)->format('Y-m-d H:i:s'); + case 'hour': + return Carbon::now()->subHours($number)->format('Y-m-d H:i:s'); + case 'day': + return Carbon::now()->subDays($number)->format('Y-m-d H:i:s'); + case 'second': + return Carbon::now()->subSeconds($number)->format('Y-m-d H:i:s'); + default: + // Handle unexpected time unit + return Carbon::now()->format('Y-m-d H:i:s'); + } + } else { + // Default to current time if parsing fails + return Carbon::now()->format('Y-m-d H:i:s'); + } + } + +} + diff --git a/app/Services/Scrapers/AdhadhuScraper.php b/app/Services/Scrapers/AdhadhuScraper.php new file mode 100644 index 0000000..52125d5 --- /dev/null +++ b/app/Services/Scrapers/AdhadhuScraper.php @@ -0,0 +1,65 @@ +client = new Client; + } + + public function extract($url, $date = null) + { + $crawler = $this->client->request('GET', $url); + + // Extract title + $this->title = $crawler->filter('h1.font-52')->first()->text(); + + // Extract image URL + $this->image = $crawler->filter('img.img-fluid.hero-img')->first()->attr('src'); + + // Extract author name + $this->author = $crawler->filter('.MuiAvatar-circle img')->first()->attr('alt'); + + // Extract content + $crawler->filter('.body > p')->each(function ($node) { + $this->content[] = $node->text(); + }); + + // Extract topics (tags) + $crawler->filter('a[href^="/tags/"]')->each(function ($node) { + $href = $node->attr('href'); + $slug = basename($href); // Extracts the last segment of the URL + + $this->topics[] = [ + "name" => trim($node->filter('.tag')->first()->text()), + "slug" => Str::slug($slug) + ]; + }); + + return [ + 'source' => 'Adhadhu', + 'title' => $this->title, + 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), + 'image' => $this->image, + 'content' => $this->content, + 'url' => $url, + 'date' => $date, + 'guid' => str_replace("https://adhadhu.com/news/","",$url), + 'author' => $this->author, + 'topics' => $this->topics + ]; + } +} diff --git a/public/images/source/adhadhu.jpg b/public/images/source/adhadhu.jpg new file mode 100644 index 0000000..98e7321 Binary files /dev/null and b/public/images/source/adhadhu.jpg differ