Fix mihaaru scraper
This commit is contained in:
		@@ -3,6 +3,7 @@
 | 
			
		||||
namespace App\Services\Scrapers;
 | 
			
		||||
 | 
			
		||||
use Goutte\Client;
 | 
			
		||||
use Symfony\Component\HttpClient\HttpClient;
 | 
			
		||||
 | 
			
		||||
class MihaaruScraper
 | 
			
		||||
{
 | 
			
		||||
@@ -16,7 +17,11 @@ class MihaaruScraper
 | 
			
		||||
 | 
			
		||||
    public function __construct()
 | 
			
		||||
    {
 | 
			
		||||
        $this->client = new Client;
 | 
			
		||||
        $this->client = new Client(
 | 
			
		||||
            HttpClient::create([
 | 
			
		||||
                "proxy" => config('karudhaas.proxy.host')
 | 
			
		||||
            ])
 | 
			
		||||
        );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public function extract($url, $date = null)
 | 
			
		||||
@@ -28,9 +33,7 @@ class MihaaruScraper
 | 
			
		||||
            $this->title = $node->text();
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('.container  img')->eq(3)->each(function ($node) {
 | 
			
		||||
            $this->image = $node->attr('src');
 | 
			
		||||
        });
 | 
			
		||||
        $this->image = $crawler->filter('.w-full.flex.flex-col.items-end.max-w-3xl.mb-10.relative img')->attr('src');
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('.by-line address')->each(function ($node) {
 | 
			
		||||
            $author = $node->text();
 | 
			
		||||
@@ -41,21 +44,28 @@ class MihaaruScraper
 | 
			
		||||
            $this->author = $cleaneddata;
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('article p')->each(function ($node) {
 | 
			
		||||
            $this->content[] = preg_replace("/[a-zA-Z]/","",$node->text());
 | 
			
		||||
        $crawler->filter('.text-faseyha')->each(function ($node) {
 | 
			
		||||
            $this->content[] = $node->text();
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        $crawler->filter('.article-tags')->each(function ($node) {
 | 
			
		||||
           
 | 
			
		||||
        $crawler->filter('.items-end a')->each(function ($node) {
 | 
			
		||||
 | 
			
		||||
            try {
 | 
			
		||||
                $topicName = $node->filter('span')->text();
 | 
			
		||||
                $topicSlug =  ltrim($node->attr('href'), '/');
 | 
			
		||||
            } catch (\Throwable $th) {
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            $this->topics[] = [
 | 
			
		||||
                "name" => $node->text(),
 | 
			
		||||
                "slug" => str_replace("https://mihaaru.com/", "", $node->attr('href'))
 | 
			
		||||
                "name" => $topicName,
 | 
			
		||||
                "slug" => $topicSlug
 | 
			
		||||
            ];
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        //Remove all the alphabets from string
 | 
			
		||||
        //preg_replace("/[a-zA-Z]/", "",$string);
 | 
			
		||||
       return [
 | 
			
		||||
        return [
 | 
			
		||||
            'source'    => 'Mihaaru',
 | 
			
		||||
            'title'      => $this->title,
 | 
			
		||||
            'og_title'   => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'),
 | 
			
		||||
@@ -63,7 +73,7 @@ class MihaaruScraper
 | 
			
		||||
            'content'    => $this->content,
 | 
			
		||||
            'url'        => $url,
 | 
			
		||||
            'date'       => $date,
 | 
			
		||||
            'guid'       => str_replace("https://mihaaru.com/news/","",$url),
 | 
			
		||||
            'guid'       => str_replace("https://mihaaru.com/news/", "", $url),
 | 
			
		||||
            'author'     => $this->author,
 | 
			
		||||
            'topics'       => $this->topics
 | 
			
		||||
        ];
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user