• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

django-import-export / django-import-export / 15366210941

31 May 2025 06:04PM CUT coverage: 100.0%. Remained the same
15366210941

Pull #2064

github

web-flow
Merge a393fe140 into 71d241954
Pull Request #2064: removed FIXME

2282 of 2282 relevant lines covered (100.0%)

4.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0
/import_export/formats/base_formats.py
1
# when adding imports, ensure that they are local to the
2
# correct class for the file format.
3
# e.g. add openpyxl imports to the XLSXFormat class
4
# See issue 2004
5
import logging
5✔
6
import warnings
5✔
7

8
import tablib
5✔
9
from django.conf import settings
5✔
10
from django.utils.translation import gettext_lazy as _
5✔
11
from tablib.formats import registry
5✔
12

13
logger = logging.getLogger(__name__)
5✔
14

15

16
class Format:
5✔
17
    def get_title(self):
5✔
18
        return type(self)
5✔
19

20
    def create_dataset(self, in_stream):
5✔
21
        """
22
        Create dataset from given string.
23
        """
24
        raise NotImplementedError()
5✔
25

26
    def export_data(self, dataset, **kwargs):
5✔
27
        """
28
        Returns format representation for given dataset.
29
        """
30
        raise NotImplementedError()
5✔
31

32
    def is_binary(self):
5✔
33
        """
34
        Returns if this format is binary.
35
        """
36
        return True
5✔
37

38
    def get_read_mode(self):
5✔
39
        """
40
        Returns mode for opening files.
41
        """
42
        return "rb"
5✔
43

44
    def get_extension(self):
5✔
45
        """
46
        Returns extension for this format files.
47
        """
48
        return ""
5✔
49

50
    def get_content_type(self):
5✔
51
        # For content types see
52
        # https://www.iana.org/assignments/media-types/media-types.xhtml
53
        return "application/octet-stream"
5✔
54

55
    @classmethod
5✔
56
    def is_available(cls):
5✔
57
        return True
5✔
58

59
    def can_import(self):
5✔
60
        return False
5✔
61

62
    def can_export(self):
5✔
63
        return False
5✔
64

65

66
class TablibFormat(Format):
5✔
67
    TABLIB_MODULE = None
5✔
68
    CONTENT_TYPE = "application/octet-stream"
5✔
69

70
    def __init__(self, encoding=None):
5✔
71
        self.encoding = encoding
5✔
72

73
    def get_format(self):
5✔
74
        """
75
        Import and returns tablib module.
76
        """
77
        if not self.TABLIB_MODULE:
5✔
78
            raise AttributeError("TABLIB_MODULE must be defined")
5✔
79
        key = self.TABLIB_MODULE.split(".")[-1].replace("_", "")
5✔
80
        return registry.get_format(key)
5✔
81

82
    @classmethod
5✔
83
    def is_available(cls):
5✔
84
        try:
5✔
85
            cls().get_format()
5✔
86
        except (tablib.core.UnsupportedFormat, ImportError):
5✔
87
            return False
5✔
88
        return True
5✔
89

90
    def get_title(self):
5✔
91
        return self.get_format().title
5✔
92

93
    def create_dataset(self, in_stream, **kwargs):
5✔
94
        return tablib.import_set(in_stream, format=self.get_title(), **kwargs)
5✔
95

96
    def export_data(self, dataset, **kwargs):
5✔
97
        if getattr(settings, "IMPORT_EXPORT_ESCAPE_FORMULAE_ON_EXPORT", False) is True:
5✔
98
            self._escape_formulae(dataset)
5✔
99
        return dataset.export(self.get_title(), **kwargs)
5✔
100

101
    def get_extension(self):
5✔
102
        return self.get_format().extensions[0]
5✔
103

104
    def get_content_type(self):
5✔
105
        return self.CONTENT_TYPE
5✔
106

107
    def can_import(self):
5✔
108
        return hasattr(self.get_format(), "import_set")
5✔
109

110
    def can_export(self):
5✔
111
        return hasattr(self.get_format(), "export_set")
5✔
112

113
    def _escape_formulae(self, dataset):
5✔
114
        def _do_escape(s):
5✔
115
            return s.replace("=", "", 1) if s.startswith("=") else s
5✔
116

117
        for r in dataset:
5✔
118
            row = dataset.lpop()
5✔
119
            row = [_do_escape(str(cell)) for cell in row]
5✔
120
            dataset.append(row)
5✔
121

122

123
class TextFormat(TablibFormat):
5✔
124
    def create_dataset(self, in_stream, **kwargs):
5✔
125
        if isinstance(in_stream, bytes) and self.encoding:
5✔
126
            in_stream = in_stream.decode(self.encoding)
5✔
127
        return super().create_dataset(in_stream, **kwargs)
5✔
128

129
    def get_read_mode(self):
5✔
130
        return "r"
5✔
131

132
    def is_binary(self):
5✔
133
        return False
5✔
134

135

136
class CSV(TextFormat):
5✔
137
    TABLIB_MODULE = "tablib.formats._csv"
5✔
138
    CONTENT_TYPE = "text/csv"
5✔
139

140

141
class JSON(TextFormat):
5✔
142
    TABLIB_MODULE = "tablib.formats._json"
5✔
143
    CONTENT_TYPE = "application/json"
5✔
144

145

146
class YAML(TextFormat):
5✔
147
    TABLIB_MODULE = "tablib.formats._yaml"
5✔
148
    # See https://stackoverflow.com/questions/332129/yaml-mime-type
149
    CONTENT_TYPE = "text/yaml"
5✔
150

151

152
class TSV(TextFormat):
5✔
153
    TABLIB_MODULE = "tablib.formats._tsv"
5✔
154
    CONTENT_TYPE = "text/tab-separated-values"
5✔
155

156

157
class ODS(TextFormat):
5✔
158
    TABLIB_MODULE = "tablib.formats._ods"
5✔
159
    CONTENT_TYPE = "application/vnd.oasis.opendocument.spreadsheet"
5✔
160

161

162
class HTML(TextFormat):
5✔
163
    TABLIB_MODULE = "tablib.formats._html"
5✔
164
    CONTENT_TYPE = "text/html"
5✔
165

166

167
class XLS(TablibFormat):
5✔
168
    TABLIB_MODULE = "tablib.formats._xls"
5✔
169
    CONTENT_TYPE = "application/vnd.ms-excel"
5✔
170

171
    def create_dataset(self, in_stream):
5✔
172
        """
173
        Create dataset from first sheet.
174
        """
175
        import xlrd
5✔
176

177
        xls_book = xlrd.open_workbook(file_contents=in_stream)
5✔
178
        dataset = tablib.Dataset()
5✔
179
        sheet = xls_book.sheets()[0]
5✔
180

181
        dataset.headers = sheet.row_values(0)
5✔
182
        for i in range(1, sheet.nrows):
5✔
183
            dataset.append(sheet.row_values(i))
5✔
184
        return dataset
5✔
185

186

187
class XLSX(TablibFormat):
5✔
188
    TABLIB_MODULE = "tablib.formats._xlsx"
5✔
189
    CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
5✔
190

191
    def create_dataset(self, in_stream):
5✔
192
        """
193
        Create dataset from first sheet.
194
        """
195
        from io import BytesIO
5✔
196

197
        import openpyxl
5✔
198

199
        # 'data_only' means values are read from formula cells, not the formula itself
200
        xlsx_book = openpyxl.load_workbook(
5✔
201
            BytesIO(in_stream), read_only=True, data_only=True
202
        )
203

204
        dataset = tablib.Dataset()
5✔
205
        sheet = xlsx_book.active
5✔
206

207
        # obtain generator
208
        rows = sheet.rows
5✔
209
        dataset.headers = [cell.value for cell in next(rows)]
5✔
210

211
        ignore_blanks = getattr(
5✔
212
            settings, "IMPORT_EXPORT_IMPORT_IGNORE_BLANK_LINES", False
213
        )
214
        for row in rows:
5✔
215
            row_values = [cell.value for cell in row]
5✔
216

217
            if ignore_blanks:
5✔
218
                # do not add empty rows to dataset
219
                if not all(value is None for value in row_values):
5✔
220
                    dataset.append(row_values)
5✔
221
            else:
222
                dataset.append(row_values)
5✔
223
        return dataset
5✔
224

225
    def export_data(self, dataset, **kwargs):
5✔
226
        from openpyxl.utils.exceptions import IllegalCharacterError
5✔
227

228
        # #1698 temporary catch for deprecation warning in openpyxl
229
        # this catch block must be removed when openpyxl updated
230
        with warnings.catch_warnings():
5✔
231
            warnings.filterwarnings("ignore", category=DeprecationWarning)
5✔
232
            try:
5✔
233
                return super().export_data(dataset, **kwargs)
5✔
234
            except IllegalCharacterError as e:
5✔
235
                if (
5✔
236
                    getattr(
237
                        settings, "IMPORT_EXPORT_ESCAPE_ILLEGAL_CHARS_ON_EXPORT", False
238
                    )
239
                    is True
240
                ):
241
                    self._escape_illegal_chars(dataset)
5✔
242
                    return super().export_data(dataset, **kwargs)
5✔
243
                logger.exception(e)
5✔
244
                # not raising original error due to reflected xss risk
245
                raise ValueError(_("export failed due to IllegalCharacterError"))
5✔
246

247
    def _escape_illegal_chars(self, dataset):
5✔
248
        from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
5✔
249

250
        def _do_escape(cell):
5✔
251
            if type(cell) is str:
5✔
252
                cell = ILLEGAL_CHARACTERS_RE.sub("\N{REPLACEMENT CHARACTER}", cell)
5✔
253
            return cell
5✔
254

255
        for r in dataset:
5✔
256
            row = dataset.lpop()
5✔
257
            row = [_do_escape(cell) for cell in row]
5✔
258
            dataset.append(row)
5✔
259

260

261
#: These are the default formats for import and export. Whether they can be
262
#: used or not is depending on their implementation in the tablib library.
263
DEFAULT_FORMATS = [
5✔
264
    fmt
265
    for fmt in (
266
        CSV,
267
        XLS,
268
        XLSX,
269
        TSV,
270
        ODS,
271
        JSON,
272
        YAML,
273
        HTML,
274
    )
275
    if fmt.is_available()
276
]
277

278
#: These are the formats which support different data types (such as datetime
279
#: and numbers) for which `coerce_to_string` is to be set false dynamically.
280
BINARY_FORMATS = [
5✔
281
    fmt
282
    for fmt in (
283
        XLS,
284
        XLSX,
285
        ODS,
286
    )
287
    if fmt.is_available()
288
]
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc