• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

eliashaeussler / cache-warmup / 12446679310

21 Dec 2024 04:15PM UTC coverage: 90.326% (-0.3%) from 90.591%
12446679310

Pull #442

github

web-flow
Merge d9e5756a7 into 4fdab41b4
Pull Request #442: [!!!][TASK] Drop `client_config` crawler option in favor of `clientOptions`

3 of 3 new or added lines in 3 files covered. (100.0%)

4 existing lines in 1 file now uncovered.

1718 of 1902 relevant lines covered (90.33%)

10.44 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.96
/src/Xml/SitemapXmlParser.php
1
<?php
2

3
declare(strict_types=1);
4

5
/*
6
 * This file is part of the Composer package "eliashaeussler/cache-warmup".
7
 *
8
 * Copyright (C) 2020-2024 Elias Häußler <elias@haeussler.dev>
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation, either version 3 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22
 */
23

24
namespace EliasHaeussler\CacheWarmup\Xml;
25

26
use EliasHaeussler\CacheWarmup\Exception;
27
use EliasHaeussler\CacheWarmup\Helper;
28
use EliasHaeussler\CacheWarmup\Http;
29
use EliasHaeussler\CacheWarmup\Result;
30
use EliasHaeussler\CacheWarmup\Sitemap;
31
use GuzzleHttp\Client;
32
use GuzzleHttp\ClientInterface;
33
use GuzzleHttp\Exception\GuzzleException;
34
use GuzzleHttp\RequestOptions;
35
use Psr\Http\Message;
36
use Symfony\Component\OptionsResolver;
37

38
use function fclose;
39
use function fopen;
40
use function fread;
41
use function is_file;
42
use function is_readable;
43
use function is_resource;
44
use function libxml_clear_errors;
45
use function libxml_get_errors;
46
use function libxml_use_internal_errors;
47
use function sha1;
48
use function simplexml_load_file;
49
use function sprintf;
50
use function sys_get_temp_dir;
51
use function unlink;
52

53
/**
54
 * SitemapXmlParser.
55
 *
56
 * @author Elias Häußler <elias@haeussler.dev>
57
 * @license GPL-3.0-or-later
58
 *
59
 * @phpstan-type ParserOptions array{
60
 *     request_headers: array<string, string>,
61
 *     request_options: array<string, mixed>,
62
 * }
63
 */
64
final class SitemapXmlParser implements ConfigurableParser
65
{
66
    private readonly OptionsResolver\OptionsResolver $optionsResolver;
67
    private readonly Node\SitemapNodeConverter $sitemapConverter;
68

69
    /**
70
     * @var list<string>
71
     */
72
    private array $temporaryFiles = [];
73

74
    /**
75
     * @var ParserOptions
76
     */
77
    private array $options;
78

79
    /**
80
     * @param array<string, mixed> $options
81
     */
82
    public function __construct(
13✔
83
        array $options = [],
84
        private readonly ClientInterface $client = new Client(),
85
    ) {
86
        $this->optionsResolver = $this->createOptionsResolver();
13✔
87
        $this->sitemapConverter = new Node\SitemapNodeConverter();
13✔
88

89
        $this->setOptions($options);
13✔
90
    }
91

92
    /**
93
     * @throws Exception\FileIsMissing
94
     * @throws Exception\FileIsNotReadable
95
     * @throws Exception\SitemapIsMalformed
96
     * @throws GuzzleException
97
     */
98
    public function parse(Sitemap\Sitemap $sitemap): Result\ParserResult
11✔
99
    {
100
        $filename = $this->fetchSitemapFile($sitemap);
11✔
101

102
        // Parse XML sitemap and collect possible errors
103
        $useInternalErrors = libxml_use_internal_errors(true);
10✔
104
        $xml = simplexml_load_file($filename, null, LIBXML_NOCDATA);
10✔
105
        $errors = libxml_get_errors();
10✔
106

107
        // Reset internal libxml state
108
        libxml_clear_errors();
10✔
109
        libxml_use_internal_errors($useInternalErrors);
10✔
110

111
        // Throw exception if XML parsing failed
112
        if ([] !== $errors || false === $xml) {
10✔
113
            throw new Exception\SitemapIsMalformed($sitemap, $errors);
1✔
114
        }
115

116
        $sitemaps = [];
9✔
117
        $urls = [];
9✔
118

119
        if (isset($xml->sitemap)) {
9✔
120
            foreach ($xml->sitemap as $node) {
5✔
121
                /** @var array{loc?: string, lastmod?: string} $nodeArray */
122
                $nodeArray = (array) $node;
5✔
123
                $sitemaps[] = $this->sitemapConverter->convertSitemap($nodeArray, $sitemap);
5✔
124
            }
125
        }
126

127
        if (isset($xml->url)) {
8✔
128
            foreach ($xml->url as $node) {
4✔
129
                /** @var array{loc?: string, priority?: string, lastmod?: string, changefreq?: string} $nodeArray */
130
                $nodeArray = (array) $node;
4✔
131
                $urls[] = $this->sitemapConverter->convertUrl($nodeArray, $sitemap);
4✔
132
            }
133
        }
134

135
        return new Result\ParserResult($sitemaps, $urls);
7✔
136
    }
137

138
    /**
139
     * @param array<string, mixed> $options
140
     */
141
    public function setOptions(array $options): void
13✔
142
    {
143
        /* @phpstan-ignore assign.propertyType */
144
        $this->options = $this->optionsResolver->resolve($options);
13✔
145
    }
146

147
    /**
148
     * @throws Exception\FileIsMissing
149
     * @throws Exception\FileIsNotReadable
150
     * @throws GuzzleException
151
     */
152
    private function fetchSitemapFile(Sitemap\Sitemap $sitemap): string
11✔
153
    {
154
        $uri = $sitemap->getUri();
11✔
155

156
        // Fetch XML source
157
        if ($sitemap->isLocalFile()) {
11✔
158
            $filename = $sitemap->getLocalFilePath();
2✔
159
        } else {
160
            $filename = $this->downloadSitemap($uri);
9✔
161
        }
162

163
        // Check if file exists
164
        if (!is_file($filename) || !is_readable($filename)) {
11✔
165
            throw new Exception\FileIsMissing($filename);
1✔
166
        }
167

168
        $file = fopen($filename, 'rb');
10✔
169

170
        if (!is_resource($file)) {
10✔
171
            throw new Exception\FileIsNotReadable($filename);
×
172
        }
173

174
        // Use built-in gzip decoding if necessary
175
        if (0 === mb_strpos((string) fread($file, 10), "\x1f\x8b\x08")) {
10✔
176
            $filename = 'compress.zlib://'.$filename;
1✔
177
        }
178

179
        fclose($file);
10✔
180

181
        return $filename;
10✔
182
    }
183

184
    /**
185
     * @throws GuzzleException
186
     */
187
    private function downloadSitemap(Message\UriInterface $uri): string
9✔
188
    {
189
        $filename = $this->createTemporaryFilename((string) $uri);
9✔
190

191
        $requestFactory = new Http\Message\RequestFactory('GET', $this->options['request_headers']);
9✔
192
        $request = $requestFactory->build($uri);
9✔
193
        $requestOptions = $this->options['request_options'];
9✔
194
        $requestOptions[RequestOptions::SINK] = $filename;
9✔
195

196
        $this->client->send($request, $requestOptions);
9✔
197

198
        return $filename;
9✔
199
    }
200

201
    private function createTemporaryFilename(string $identifier): string
9✔
202
    {
203
        $salt = 0;
9✔
204

205
        do {
206
            $file = Helper\FilesystemHelper::joinPathSegments(
9✔
207
                sys_get_temp_dir(),
9✔
208
                sprintf('sitemap_%s_%d.xml', sha1($identifier), $salt++),
9✔
209
            );
9✔
210
        } while (is_file($file));
9✔
211

212
        return $this->temporaryFiles[] = $file;
9✔
213
    }
214

215
    private function createOptionsResolver(): OptionsResolver\OptionsResolver
13✔
216
    {
217
        $optionsResolver = new OptionsResolver\OptionsResolver();
13✔
218

219
        $optionsResolver->define('request_headers')
13✔
220
            ->allowedTypes('array')
13✔
221
            ->default([])
13✔
222
        ;
13✔
223

224
        $optionsResolver->define('request_options')
13✔
225
            ->allowedTypes('array')
13✔
226
            ->default([])
13✔
227
        ;
13✔
228

229
        return $optionsResolver;
13✔
230
    }
231

UNCOV
232
    public function __destruct()
×
233
    {
UNCOV
234
        foreach ($this->temporaryFiles as $temporaryFile) {
×
UNCOV
235
            if (is_file($temporaryFile)) {
×
UNCOV
236
                unlink($temporaryFile);
×
237
            }
238
        }
239
    }
240
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc