client = new Client; } public function extract($url, $date = null) { $crawler = $this->client->request('GET', $url); $crawler->filter('h1')->each(function ($node) { $this->title = $node->text(); }); // Checking for three types of article patterns if ($crawler->filter('.elementor-widget-container > div.normal_text_dv')->count() > 0) { $crawler->filter('.elementor-widget-container > div.normal_text_dv, div.normal_text_dv > p')->each(function ($node) { // Check if any alphabet exist. (To ignore the ads in articles) if (preg_match('/[a-zA-Z]/', $node->text())) { return; } $this->content[] = preg_replace("/[a-zA-Z]/", "", $node->text()); }); } else if ($crawler->filter('.elementor-widget-container p[dir*="rtl"]')->count() > 0) { $crawler->filter('.elementor-widget-container p[dir*="rtl"]')->each(function ($node) { $this->content[] = preg_replace("/[a-zA-Z]/", "", $node->text()); }); } else { // Normal Pattern $crawler->filter('.elementor-widget-container > p')->each(function ($node) { $this->content[] = preg_replace("/[a-zA-Z]/", "", $node->text()); }); } // Checking if the author of the article exists if ($crawler->filter('span[class*="elementor-icon-list-text elementor-post-info__item elementor-post-info__item--type-author"]')->count() == 1) { $this->author = $crawler->filter('span[class*="elementor-icon-list-text elementor-post-info__item elementor-post-info__item--type-author"]')->first()->text(); } //Remove all the alphabets from string //preg_replace("/[a-zA-Z]/", "",$string); return [ 'source' => 'Dhuvas', 'title' => $this->title, 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), 'image' => $crawler->filter('.elementor-widget-theme-post-featured-image .elementor-image a img')->first()->attr('src'), 'content' => $this->content, 'url' => $url, 'date' => Carbon::parse($date)->format("Y-m-d H:i:s"), 'guid' => str_replace("https://dhuvas.mv/", "", $url), 'author' => $this->author, 'topics' => [ [ "name" => "ވަކި މަޢުލޫއެއް ނޭންގެ", "slug" => "no-specific-topic" ] ] ]; } }