client = new Client; } public function extract($url, $date = null) { $crawler = $this->client->request('GET', $url); $crawler->filter('h1')->each(function ($node) { $this->title = $node->text(); }); $crawler->filter('.container img')->eq(3)->each(function ($node) { $this->image = $node->attr('src'); }); $crawler->filter('.by-line address')->each(function ($node) { $author = $node->text(); //Trim all the white spaces $spacetrim = str_replace(' ', '', $author); //Replace multiple spaces and newlines with a single space $cleaneddata = trim(preg_replace('/\s\s+/', ' ', $spacetrim)); $this->author = $cleaneddata; }); $crawler->filter('article p')->each(function ($node) { $this->content[] = preg_replace("/[a-zA-Z]/","",$node->text()); }); $crawler->filter('.article-tags')->each(function ($node) { $this->topics[] = [ "name" => $node->text(), "slug" => str_replace("https://mihaaru.com/", "", $node->attr('href')) ]; }); //Remove all the alphabets from string //preg_replace("/[a-zA-Z]/", "",$string); return [ 'source' => 'Mihaaru', 'title' => $this->title, 'og_title' => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), 'image' => $this->image, 'content' => $this->content, 'url' => $url, 'date' => $date, 'guid' => str_replace("https://mihaaru.com/news/","",$url), 'author' => $this->author, 'topics' => $this->topics ]; } }