fix voice
This commit is contained in:
		@@ -9,8 +9,9 @@ class VoiceFeed implements Feed
 | 
			
		||||
 | 
			
		||||
    public function __construct()
 | 
			
		||||
    {
 | 
			
		||||
        $this->client = new Client();        
 | 
			
		||||
    }    
 | 
			
		||||
        $this->client = new Client();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Get all the latest news
 | 
			
		||||
     *
 | 
			
		||||
@@ -18,11 +19,14 @@ class VoiceFeed implements Feed
 | 
			
		||||
     */
 | 
			
		||||
    public function get() : array
 | 
			
		||||
    {
 | 
			
		||||
      
 | 
			
		||||
        $crawler = $this->client->request('GET', "https://voice.mv/");
 | 
			
		||||
 | 
			
		||||
        return $crawler->filter('div[id*="latest-news"] .content a')
 | 
			
		||||
                            ->extract(['_text', '_attr' => 'href']);             
 | 
			
		||||
 | 
			
		||||
        return $crawler->filter('div#latest-news > div > a')->each(function ($node) {
 | 
			
		||||
            return [
 | 
			
		||||
                "title" => $node->filter('.dv-bold')->text(),
 | 
			
		||||
                "link" => 'https://voice.mv' . $node->attr('href'),
 | 
			
		||||
                "date" => $node->filter('.en-font')->text(),
 | 
			
		||||
            ];
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -8,57 +8,49 @@ use Illuminate\Support\Carbon;
 | 
			
		||||
class VoiceScraper
 | 
			
		||||
{
 | 
			
		||||
    protected $client;
 | 
			
		||||
 | 
			
		||||
    protected $author;
 | 
			
		||||
    protected $content;
 | 
			
		||||
    protected $author = 'unknown';
 | 
			
		||||
    protected $content = [];
 | 
			
		||||
    protected $topics = [];
 | 
			
		||||
 | 
			
		||||
    public function __construct()
 | 
			
		||||
    {
 | 
			
		||||
        $this->client = new Client;
 | 
			
		||||
        $this->client = new Client();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public function extract($url)
 | 
			
		||||
    {
 | 
			
		||||
 | 
			
		||||
        $crawler = $this->client->request('GET', $url);
 | 
			
		||||
 | 
			
		||||
        $title = $crawler->filter('.content h1')->first()->text();
 | 
			
		||||
        // Extracting title - checking for multiple class names
 | 
			
		||||
        $title = $crawler->filter('.text-3xl')->first()->text();
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        $image = $crawler->filter('.image img')->first()->attr('src');
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('article p')->each(function ($node) {
 | 
			
		||||
            $this->content[] = preg_replace("/[a-zA-Z]/", "", $node->text());
 | 
			
		||||
        // Extracting article content
 | 
			
		||||
        $crawler->filter('.container .dv')->each(function ($node) {
 | 
			
		||||
            $this->content[] = $node->text();
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        // Extracting topics
 | 
			
		||||
        $this->topics = [];
 | 
			
		||||
        $crawler->filter('.related-tags-holder a')->each(function ($node) {
 | 
			
		||||
 | 
			
		||||
            $this->topics[] = [
 | 
			
		||||
                "name" => $node->text(),
 | 
			
		||||
                "slug" => str_replace("/", "", $node->attr('href'))
 | 
			
		||||
            ];
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        if ($crawler->filter(".authorname a")->count() == 1) {
 | 
			
		||||
            $this->author = str_replace("- ", "", $crawler->filter('.authorname a')->first()->text());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        //Remove all the alphabets from string
 | 
			
		||||
        //preg_replace("/[a-zA-Z]/", "",$string);
 | 
			
		||||
        // Returning extracted data
 | 
			
		||||
        return [
 | 
			
		||||
            'source'    => 'Voice',
 | 
			
		||||
            'title'      => $title,
 | 
			
		||||
            'og_title'   => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'),
 | 
			
		||||
            'image'      => $image,
 | 
			
		||||
            'content'    => $this->content,
 | 
			
		||||
            'url'        => $url,
 | 
			
		||||
            'date'       => Carbon::parse(str_replace("- ", "", $crawler->filter('.authordatecomment .date')->first()->text()))->format("Y-m-d H:i:s"),
 | 
			
		||||
            'guid'       => str_replace("https://voice.mv/", "", $url),
 | 
			
		||||
            'author'     => $this->author,
 | 
			
		||||
            'topics'       => $this->topics
 | 
			
		||||
            'title'     => $title,
 | 
			
		||||
            'og_title'  => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'),
 | 
			
		||||
            'image'     => $crawler->filter('meta[property="og:image"]')->first()->attr('content'),
 | 
			
		||||
            'content'   => $this->content,
 | 
			
		||||
            'url'       => $url,
 | 
			
		||||
            'date'      => Carbon::now(),
 | 
			
		||||
            'guid'      => str_replace("https://voice.mv/", "", $url),
 | 
			
		||||
            'author'    => $this->author,
 | 
			
		||||
            'topics'    => $this->topics
 | 
			
		||||
        ];
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,4 +1,5 @@
 | 
			
		||||
<?php
 | 
			
		||||
 | 
			
		||||
namespace App\Services;
 | 
			
		||||
 | 
			
		||||
use App\Services\Feeds\VoiceFeed;
 | 
			
		||||
@@ -18,9 +19,10 @@ class VoiceService
 | 
			
		||||
        $articlesitems = [];
 | 
			
		||||
        //Looping through the articles and scraping and while scraping it creates a new instance of the scraper.
 | 
			
		||||
        foreach ($articles as $article) {
 | 
			
		||||
            $articlesitems[] = (new VoiceScraper)->extract("https://voice.mv".$article[1]);
 | 
			
		||||
 | 
			
		||||
            $articlesitems[] = (new VoiceScraper)->extract($article["link"]);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return $articlesitems;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user