• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

eliashaeussler / cache-warmup / 12401324735

18 Dec 2024 09:06PM UTC coverage: 89.057% (-1.4%) from 90.444%
12401324735

Pull #421

github

web-flow
Merge 4e24c8e38 into d133b8e46
Pull Request #421: [FEATURE] Use simpler XML parsing to reduce high memory load

128 of 164 new or added lines in 9 files covered. (78.05%)

1 existing line in 1 file now uncovered.

1652 of 1855 relevant lines covered (89.06%)

9.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

98.92
/src/Xml/SitemapXmlParser.php
1
<?php
2

3
declare(strict_types=1);
4

5
/*
6
 * This file is part of the Composer package "eliashaeussler/cache-warmup".
7
 *
8
 * Copyright (C) 2020-2024 Elias Häußler <elias@haeussler.dev>
9
 *
10
 * This program is free software: you can redistribute it and/or modify
11
 * it under the terms of the GNU General Public License as published by
12
 * the Free Software Foundation, either version 3 of the License, or
13
 * (at your option) any later version.
14
 *
15
 * This program is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
 * GNU General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU General Public License
21
 * along with this program. If not, see <https://www.gnu.org/licenses/>.
22
 */
23

24
namespace EliasHaeussler\CacheWarmup\Xml;
25

26
use EliasHaeussler\CacheWarmup\Exception;
27
use EliasHaeussler\CacheWarmup\Helper;
28
use EliasHaeussler\CacheWarmup\Http;
29
use EliasHaeussler\CacheWarmup\Result;
30
use EliasHaeussler\CacheWarmup\Sitemap;
31
use GuzzleHttp\Client;
32
use GuzzleHttp\ClientInterface;
33
use GuzzleHttp\Exception\GuzzleException;
34
use GuzzleHttp\RequestOptions;
35
use Netlogix\XmlProcessor;
36
use Psr\Http\Message;
37
use Symfony\Component\OptionsResolver;
38

39
use function array_map;
40
use function fclose;
41
use function fopen;
42
use function fread;
43
use function is_file;
44
use function is_readable;
45
use function is_resource;
46
use function restore_error_handler;
47
use function set_error_handler;
48
use function sha1;
49
use function sprintf;
50
use function sys_get_temp_dir;
51
use function unlink;
52

53
/**
54
 * SitemapXmlParser.
55
 *
56
 * @author Elias Häußler <elias@haeussler.dev>
57
 * @license GPL-3.0-or-later
58
 *
59
 * @phpstan-type ParserOptions array{
60
 *     client_config: array<string, mixed>,
61
 *     request_headers: array<string, string>,
62
 *     request_options: array<string, mixed>,
63
 * }
64
 */
65
final class SitemapXmlParser implements ConfigurableParser
66
{
67
    private readonly OptionsResolver\OptionsResolver $optionsResolver;
68
    private readonly Node\SitemapNodeProcessor $sitemapProcessor;
69
    private readonly Node\SitemapNodeProcessor $urlProcessor;
70
    private readonly XmlProcessor\XmlProcessor $xmlProcessor;
71
    private readonly Node\SitemapNodeConverter $sitemapConverter;
72

73
    /**
74
     * @var list<string>
75
     */
76
    private array $temporaryFiles = [];
77

78
    /**
79
     * @var ParserOptions
80
     */
81
    private array $options;
82

83
    /**
84
     * @param array<string, mixed> $options
85
     */
86
    public function __construct(
14✔
87
        array $options = [],
88
        private readonly ?ClientInterface $client = null,
89
    ) {
90
        $this->optionsResolver = $this->createOptionsResolver();
14✔
91
        $this->sitemapProcessor = new Node\SitemapNodeProcessor(
14✔
92
            Node\SitemapNodePath::Sitemap,
14✔
93
            [
14✔
94
                Node\SitemapNode::LastModificationDate,
14✔
95
                Node\SitemapNode::Location,
14✔
96
            ],
14✔
97
        );
14✔
98
        $this->urlProcessor = new Node\SitemapNodeProcessor(
14✔
99
            Node\SitemapNodePath::Url,
14✔
100
            [
14✔
101
                Node\SitemapNode::ChangeFrequency,
14✔
102
                Node\SitemapNode::LastModificationDate,
14✔
103
                Node\SitemapNode::Location,
14✔
104
                Node\SitemapNode::Priority,
14✔
105
            ],
14✔
106
        );
14✔
107
        $this->xmlProcessor = new XmlProcessor\XmlProcessor([
14✔
108
            $this->sitemapProcessor,
14✔
109
            $this->urlProcessor,
14✔
110
        ]);
14✔
111
        $this->sitemapConverter = new Node\SitemapNodeConverter();
14✔
112

113
        $this->setOptions($options);
14✔
114
    }
115

116
    /**
117
     * @throws Exception\FileIsMissing
118
     * @throws Exception\FileIsNotReadable
119
     * @throws Exception\SitemapCannotBeRead
120
     * @throws Exception\SitemapIsMalformed
121
     * @throws GuzzleException
122
     */
123
    public function parse(Sitemap\Sitemap $sitemap): Result\ParserResult
12✔
124
    {
125
        $filename = $this->fetchSitemapFile($sitemap);
12✔
126

127
        set_error_handler(
11✔
128
            static fn () => throw new Exception\SitemapCannotBeRead($sitemap),
11✔
129
        );
11✔
130

131
        $this->sitemapProcessor->reset();
11✔
132
        $this->urlProcessor->reset();
11✔
133

134
        try {
135
            $this->xmlProcessor->processFile($filename);
11✔
136
        } catch (Exception\XmlNodeIsEmpty $exception) {
2✔
137
            throw new Exception\SitemapIsMalformed($sitemap, $exception);
1✔
138
        } finally {
139
            restore_error_handler();
11✔
140
        }
141

142
        $sitemaps = $this->sitemapProcessor->getProcessedNodes();
9✔
143
        $urls = $this->urlProcessor->getProcessedNodes();
9✔
144

145
        return new Result\ParserResult(
9✔
146
            array_map(fn (array $node) => $this->sitemapConverter->convertSitemap($node, $sitemap), $sitemaps),
9✔
147
            array_map(fn (array $node) => $this->sitemapConverter->convertUrl($node, $sitemap), $urls),
9✔
148
        );
9✔
149
    }
150

151
    /**
152
     * @param array<string, mixed> $options
153
     */
154
    public function setOptions(array $options): void
14✔
155
    {
156
        /* @phpstan-ignore assign.propertyType */
157
        $this->options = $this->optionsResolver->resolve($options);
14✔
158
    }
159

160
    /**
161
     * @throws Exception\FileIsMissing
162
     * @throws Exception\FileIsNotReadable
163
     * @throws GuzzleException
164
     */
165
    private function fetchSitemapFile(Sitemap\Sitemap $sitemap): string
12✔
166
    {
167
        $uri = $sitemap->getUri();
12✔
168

169
        // Fetch XML source
170
        if ($sitemap->isLocalFile()) {
12✔
171
            $filename = $sitemap->getLocalFilePath();
2✔
172
        } else {
173
            $filename = $this->downloadSitemap($uri);
10✔
174
        }
175

176
        // Check if file exists
177
        if (!is_file($filename) || !is_readable($filename)) {
12✔
178
            throw new Exception\FileIsMissing($filename);
1✔
179
        }
180

181
        $file = fopen($filename, 'rb');
11✔
182

183
        if (!is_resource($file)) {
11✔
NEW
UNCOV
184
            throw new Exception\FileIsNotReadable($filename);
×
185
        }
186

187
        // Use built-in gzip decoding if necessary
188
        if (0 === mb_strpos((string) fread($file, 10), "\x1f\x8b\x08")) {
11✔
189
            $filename = 'compress.zlib://'.$filename;
1✔
190
        }
191

192
        fclose($file);
11✔
193

194
        return $filename;
11✔
195
    }
196

197
    /**
198
     * @throws GuzzleException
199
     */
200
    private function downloadSitemap(Message\UriInterface $uri): string
10✔
201
    {
202
        $filename = $this->createTemporaryFilename((string) $uri);
10✔
203

204
        $requestFactory = new Http\Message\RequestFactory('GET', $this->options['request_headers']);
10✔
205
        $request = $requestFactory->build($uri);
10✔
206
        $requestOptions = $this->options['request_options'];
10✔
207
        $requestOptions[RequestOptions::SINK] = $filename;
10✔
208

209
        $client = $this->client ?? new Client($this->options['client_config']);
10✔
210
        $client->send($request, $requestOptions);
10✔
211

212
        return $filename;
10✔
213
    }
214

215
    private function createTemporaryFilename(string $identifier): string
10✔
216
    {
217
        $salt = 0;
10✔
218

219
        do {
220
            $file = Helper\FilesystemHelper::joinPathSegments(
10✔
221
                sys_get_temp_dir(),
10✔
222
                sprintf('sitemap_%s_%d.xml', sha1($identifier), $salt++),
10✔
223
            );
10✔
224
        } while (is_file($file));
10✔
225

226
        return $this->temporaryFiles[] = $file;
10✔
227
    }
228

229
    private function createOptionsResolver(): OptionsResolver\OptionsResolver
14✔
230
    {
231
        $optionsResolver = new OptionsResolver\OptionsResolver();
14✔
232

233
        $optionsResolver->define('client_config')
14✔
234
            ->allowedTypes('array')
14✔
235
            ->default([])
14✔
236
        ;
14✔
237

238
        $optionsResolver->define('request_headers')
14✔
239
            ->allowedTypes('array')
14✔
240
            ->default([])
14✔
241
        ;
14✔
242

243
        $optionsResolver->define('request_options')
14✔
244
            ->allowedTypes('array')
14✔
245
            ->default([])
14✔
246
        ;
14✔
247

248
        return $optionsResolver;
14✔
249
    }
250

251
    public function __destruct()
1✔
252
    {
253
        foreach ($this->temporaryFiles as $temporaryFile) {
1✔
254
            if (is_file($temporaryFile)) {
1✔
255
                unlink($temporaryFile);
1✔
256
            }
257
        }
258
    }
259
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc