client = new Client; } public function extract($url, $date = null, $guid = null) { $crawler = $this->client->request('GET', $url); $crawler->filter('h1')->each(function ($node) { $title = $node->text(); $this->title = $title; }); $crawler->filter('.container img')->eq(3)->each(function ($node) { $image = $node->attr('src'); $this->image = $image; }); $crawler->filter('.by-line address')->each(function ($node) { $author = $node->text(); //Trim all the white spaces $spacetrim = str_replace(' ', '', $author); //Replace multiple spaces and newlines with a single space $cleaneddata = trim(preg_replace('/\s\s+/', ' ', $spacetrim)); $this->author = $cleaneddata; }); $crawler->filter('article')->each(function ($node) { $content = $node->text(); $input = str_replace("\n", '', $content); $this->content = $input; }); $crawler->filter('.article-tags')->each(function ($node) { $tags[] = [ "name" => $node->text(), "slug" => str_replace("https://mihaaru.com/", "", $node->attr('href')) ]; $this->tags[] = $tags; }); //Remove all the alphabets from string //preg_replace("/[a-zA-Z]/", "",$string); $data = [ 'source' => 'Mihaaru', 'title' => $this->title, 'image' => $this->image, 'content' => $this->content, 'url' => $url, 'date' => $date, 'guid' => $guid, 'author' => $this->author, 'topics' => $this->tags, ]; return $data; } }