<?php
/**
 * Movie Scraper Class
 * Handles movie scraping with caching and error handling
 */

class MovieScraper {
    private $config;
    private $logger;
    private $cache;
    private $cacheTimeout = 3600; // 1 hour
    
    public function __construct($config, $logger) {
        $this->config = $config;
        $this->logger = $logger;
        $this->cache = [];
    }
    
    /**
     * Scrape movies with caching
     */
    public function scrapeMovies($query) {
        try {
            $cacheKey = 'movies_' . md5($query);
            
            // Check cache first
            if (isset($this->cache[$cacheKey])) {
                $cached = $this->cache[$cacheKey];
                if (time() - $cached['timestamp'] < $this->cacheTimeout) {
                    $this->logger->info("Returning cached movie results", ['query' => $query]);
                    return $cached['data'];
                }
            }
            
            $searchUrl = $this->config['search_url'] . urlencode($query) . "&page=1";
            
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $searchUrl);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
            curl_setopt($ch, CURLOPT_USERAGENT, $this->config['user_agent']);
            curl_setopt($ch, CURLOPT_TIMEOUT, $this->config['timeout']);
            curl_setopt($ch, CURLOPT_ENCODING, '');
            
            $html = curl_exec($ch);
            $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            $error = curl_error($ch);
            curl_close($ch);
            
            if ($error) {
                throw new Exception("cURL Error: {$error}");
            }
            
            if ($httpCode !== 200) {
                throw new Exception("HTTP Error: {$httpCode}");
            }
            
            if (!$html) {
                throw new Exception("Empty response");
            }
            
            // Parse JSON response
            $data = json_decode($html, true);
            
            if (!$data || !isset($data['data'])) {
                throw new Exception("Invalid JSON response");
            }
            
            $movies = [];
            $films = $data['data'];
            $count = 0;
            
            foreach ($films as $film) {
                if ($count >= $this->config['max_results']) break;
                
                // Filter only movies, skip series
                $filmType = isset($film['type']) ? $film['type'] : '';
                if ($filmType !== 'movie') {
                    continue;
                }
                
                $movie = [];
                $movie['title'] = isset($film['title']) ? $film['title'] : '';
                $movie['year'] = isset($film['year']) ? $film['year'] : '';
                $movie['rating'] = isset($film['rating']) ? $film['rating'] : '';
                $movie['duration'] = isset($film['runtime']) ? $film['runtime'] : '';
                $movie['quality'] = isset($film['quality']) ? $film['quality'] : '';
                $movie['type'] = isset($film['type']) ? $film['type'] : '';
                $movie['slug'] = isset($film['slug']) ? $film['slug'] : '';
                $movie['poster'] = isset($film['poster']) ? 'https://poster.lk21.party/wp-content/uploads/' . $film['poster'] : '';
                
                if (!empty($movie['slug'])) {
                    $movie['url'] = 'https://tv6.lk21official.cc/' . $movie['slug'];
                }
                
                if (!empty($movie['title'])) {
                    $movies[] = $movie;
                    $count++;
                }
            }
            
            // Cache the results
            $this->cache[$cacheKey] = [
                'data' => $movies,
                'timestamp' => time()
            ];
            
            $this->logger->info("Movies scraped successfully", [
                'query' => $query,
                'count' => count($movies)
            ]);
            
            return $movies;
            
        } catch (Exception $e) {
            $this->logger->error("Failed to scrape movies", [
                'query' => $query,
                'error' => $e->getMessage()
            ]);
            return false;
        }
    }
    
    /**
     * Scrape movie detail
     */
    public function scrapeMovieDetail($url) {
        try {
            $cacheKey = 'detail_' . md5($url);
            
            // Check cache first
            if (isset($this->cache[$cacheKey])) {
                $cached = $this->cache[$cacheKey];
                if (time() - $cached['timestamp'] < $this->cacheTimeout) {
                    $this->logger->info("Returning cached movie detail", ['url' => $url]);
                    return $cached['data'];
                }
            }
            
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
            curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
            curl_setopt($ch, CURLOPT_TIMEOUT, 30);
            curl_setopt($ch, CURLOPT_ENCODING, '');
            
            $html = curl_exec($ch);
            $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            $error = curl_error($ch);
            curl_close($ch);
            
            if ($error) {
                throw new Exception("cURL Error: {$error}");
            }
            
            if ($httpCode !== 200) {
                throw new Exception("HTTP Error: {$httpCode}");
            }
            
            if (!$html) {
                throw new Exception("Empty response");
            }
            
            // Parse HTML
            $dom = new DOMDocument();
            libxml_use_internal_errors(true);
            @$dom->loadHTML($html);
            libxml_clear_errors();
            
            $xpath = new DOMXPath($dom);
            $detail = [];
            $detail['url'] = $url;
            
            // Extract title
            $titleNode = $xpath->query('//h1')->item(0);
            if ($titleNode) {
                $rawTitle = trim($titleNode->textContent);
                $detail['title'] = $this->cleanMovieTitle($rawTitle);
            }
            
            // Extract rating
            $ratingNode = $xpath->query('//div[@class="info-tag"]//strong')->item(0);
            if ($ratingNode) {
                $detail['rating'] = trim($ratingNode->textContent);
            }
            
            // Extract info tags
            $infoTags = $xpath->query('//div[@class="info-tag"]//span');
            $tags = [];
            foreach ($infoTags as $tag) {
                $text = trim($tag->textContent);
                if (!empty($text) && $text !== $detail['rating']) {
                    $tags[] = $text;
                }
            }
            $detail['info_tags'] = $tags;
            
            // Extract genres and countries
            $genreNodes = $xpath->query('//div[@class="tag-list"]//a');
            $genres = [];
            $countries = [];
            foreach ($genreNodes as $node) {
                $href = $node->getAttribute('href');
                $text = trim($node->textContent);
                if (strpos($href, '/genre/') !== false) {
                    $genres[] = $text;
                } elseif (strpos($href, '/country/') !== false) {
                    $countries[] = $text;
                }
            }
            $detail['genres'] = $genres;
            $detail['countries'] = $countries;
            
            // Extract synopsis
            $synopsisNode = $xpath->query('//div[@class="synopsis collapsed"]')->item(0);
            if ($synopsisNode) {
                $detail['synopsis'] = trim($synopsisNode->textContent);
            }
            
            // Extract poster
            $posterNode = $xpath->query('//div[@class="detail hidden"]//img')->item(0);
            if ($posterNode) {
                $posterUrl = $posterNode->getAttribute('src');
                if ($posterUrl) {
                    $detail['poster'] = 'https://apivalidasi.my.id/lk21/img.php?l=' . urlencode($posterUrl);
                }
            }
            
            // Check if it's a series
            if ($this->isSeries($url, $html, $detail)) {
                $this->logger->info("Detected series, returning false", ['url' => $url]);
                return false;
            }
            
            // Cache the result
            $this->cache[$cacheKey] = [
                'data' => $detail,
                'timestamp' => time()
            ];
            
            $this->logger->info("Movie detail scraped successfully", ['url' => $url]);
            return $detail;
            
        } catch (Exception $e) {
            $this->logger->error("Failed to scrape movie detail", [
                'url' => $url,
                'error' => $e->getMessage()
            ]);
            return false;
        }
    }
    
    /**
     * Clean movie title
     */
    private function cleanMovieTitle($title) {
        $title = preg_replace('/^Nonton\s+/i', '', $title);
        $title = preg_replace('/\s+Sub\s+Indo\s+di\s+Lk21\s*$/i', '', $title);
        $title = preg_replace('/\s+di\s+Lk21\s*$/i', '', $title);
        $title = preg_replace('/\s+Sub\s+Indo\s*$/i', '', $title);
        return trim($title);
    }
    
    /**
     * Check if content is a series
     */
    private function isSeries($url, $html, $detail) {
        // Check URL
        if (strpos($url, '/series/') !== false || strpos($url, '/drama/') !== false) {
            return true;
        }
        
        // Check content indicators
        $seriesIndicators = [
            'type="series"',
            'class="series"',
            'season 1',
            'episode 1',
            '- series',
            '(series)',
            'series -',
            'season: 1',
            'episode: 1'
        ];
        
        $pageContent = strtolower($html);
        foreach ($seriesIndicators as $indicator) {
            if (strpos($pageContent, $indicator) !== false) {
                return true;
            }
        }
        
        // Check title
        if (isset($detail['title'])) {
            $titleLower = strtolower($detail['title']);
            foreach ($seriesIndicators as $indicator) {
                if (strpos($titleLower, $indicator) !== false) {
                    return true;
                }
            }
        }
        
        return false;
    }
    
    /**
     * Get total films count
     */
    public function getTotalFilms() {
        try {
            $cacheKey = 'total_films';
            
            // Check cache first
            if (isset($this->cache[$cacheKey])) {
                $cached = $this->cache[$cacheKey];
                if (time() - $cached['timestamp'] < $this->cacheTimeout) {
                    return $cached['data'];
                }
            }
            
            $url = 'https://tv6.lk21official.cc/latest';
            $ch = curl_init();
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
            curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
            curl_setopt($ch, CURLOPT_ENCODING, '');
            curl_setopt($ch, CURLOPT_TIMEOUT, 10);
            
            $html = curl_exec($ch);
            $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            curl_close($ch);
            
            if ($httpCode !== 200 || !$html) {
                return 0;
            }
            
            // Parse HTML for pagination
            $dom = new DOMDocument();
            @$dom->loadHTML($html);
            $xpath = new DOMXPath($dom);
            
            $paginationLinks = $xpath->query('//ul[@class="pagination"]//li//a[contains(@href, "/latest/page/")]');
            $maxPage = 1;
            
            foreach ($paginationLinks as $link) {
                $href = $link->getAttribute('href');
                if (preg_match('/\/latest\/page\/(\d+)/', $href, $matches)) {
                    $pageNum = (int)$matches[1];
                    if ($pageNum > $maxPage) {
                        $maxPage = $pageNum;
                    }
                }
            }
            
            $totalFilms = $maxPage * 24;
            
            // Cache the result
            $this->cache[$cacheKey] = [
                'data' => $totalFilms,
                'timestamp' => time()
            ];
            
            $this->logger->info("Total films calculated", ['total' => $totalFilms]);
            return $totalFilms;
            
        } catch (Exception $e) {
            $this->logger->error("Failed to get total films", ['error' => $e->getMessage()]);
            return 0;
        }
    }
}



