15366210941

Committed 31 May 2025 06:04PM CUT coverage: 100.0%. Remained the same

Build # 15366210941

Build Type

Pull #2064

github

Committed by

web-flow

Commit Message

Merge a393fe140 into 71d241954

Pull Request Pull Request #2064: removed FIXME

Run Details

2282 of 2282 relevant lines covered (100.0%)

4.98 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

100.0

/import_export/formats/base_formats.py

# when adding imports, ensure that they are local to the
# correct class for the file format.
# e.g. add openpyxl imports to the XLSXFormat class
# See issue 2004
import logging
import warnings

import tablib
from django.conf import settings
from django.utils.translation import gettext_lazy as _
from tablib.formats import registry

logger = logging.getLogger(__name__)


class Format:
    def get_title(self):
        return type(self)

    def create_dataset(self, in_stream):
        """
        Create dataset from given string.
        """
        raise NotImplementedError()

    def export_data(self, dataset, **kwargs):
        """
        Returns format representation for given dataset.
        """
        raise NotImplementedError()

    def is_binary(self):
        """
        Returns if this format is binary.
        """
        return True

    def get_read_mode(self):
        """
        Returns mode for opening files.
        """
        return "rb"

    def get_extension(self):
        """
        Returns extension for this format files.
        """
        return ""

    def get_content_type(self):
        # For content types see
        # https://www.iana.org/assignments/media-types/media-types.xhtml
        return "application/octet-stream"

    @classmethod
    def is_available(cls):
        return True

    def can_import(self):
        return False

    def can_export(self):
        return False


class TablibFormat(Format):
    TABLIB_MODULE = None
    CONTENT_TYPE = "application/octet-stream"

    def __init__(self, encoding=None):
        self.encoding = encoding

    def get_format(self):
        """
        Import and returns tablib module.
        """
        if not self.TABLIB_MODULE:
            raise AttributeError("TABLIB_MODULE must be defined")
        key = self.TABLIB_MODULE.split(".")[-1].replace("_", "")
        return registry.get_format(key)

    @classmethod
    def is_available(cls):
        try:
            cls().get_format()
        except (tablib.core.UnsupportedFormat, ImportError):
            return False
        return True

    def get_title(self):
        return self.get_format().title

    def create_dataset(self, in_stream, **kwargs):
        return tablib.import_set(in_stream, format=self.get_title(), **kwargs)

    def export_data(self, dataset, **kwargs):
        if getattr(settings, "IMPORT_EXPORT_ESCAPE_FORMULAE_ON_EXPORT", False) is True:
            self._escape_formulae(dataset)
        return dataset.export(self.get_title(), **kwargs)

    def get_extension(self):
        return self.get_format().extensions[0]

    def get_content_type(self):
        return self.CONTENT_TYPE

    def can_import(self):
        return hasattr(self.get_format(), "import_set")

    def can_export(self):
        return hasattr(self.get_format(), "export_set")

    def _escape_formulae(self, dataset):
        def _do_escape(s):
            return s.replace("=", "", 1) if s.startswith("=") else s

        for r in dataset:
            row = dataset.lpop()
            row = [_do_escape(str(cell)) for cell in row]
            dataset.append(row)


class TextFormat(TablibFormat):
    def create_dataset(self, in_stream, **kwargs):
        if isinstance(in_stream, bytes) and self.encoding:
            in_stream = in_stream.decode(self.encoding)
        return super().create_dataset(in_stream, **kwargs)

    def get_read_mode(self):
        return "r"

    def is_binary(self):
        return False


class CSV(TextFormat):
    TABLIB_MODULE = "tablib.formats._csv"
    CONTENT_TYPE = "text/csv"


class JSON(TextFormat):
    TABLIB_MODULE = "tablib.formats._json"
    CONTENT_TYPE = "application/json"


class YAML(TextFormat):
    TABLIB_MODULE = "tablib.formats._yaml"
    # See https://stackoverflow.com/questions/332129/yaml-mime-type
    CONTENT_TYPE = "text/yaml"


class TSV(TextFormat):
    TABLIB_MODULE = "tablib.formats._tsv"
    CONTENT_TYPE = "text/tab-separated-values"


class ODS(TextFormat):
    TABLIB_MODULE = "tablib.formats._ods"
    CONTENT_TYPE = "application/vnd.oasis.opendocument.spreadsheet"


class HTML(TextFormat):
    TABLIB_MODULE = "tablib.formats._html"
    CONTENT_TYPE = "text/html"


class XLS(TablibFormat):
    TABLIB_MODULE = "tablib.formats._xls"
    CONTENT_TYPE = "application/vnd.ms-excel"

    def create_dataset(self, in_stream):
        """
        Create dataset from first sheet.
        """
        import xlrd

        xls_book = xlrd.open_workbook(file_contents=in_stream)
        dataset = tablib.Dataset()
        sheet = xls_book.sheets()[0]

        dataset.headers = sheet.row_values(0)
        for i in range(1, sheet.nrows):
            dataset.append(sheet.row_values(i))
        return dataset


class XLSX(TablibFormat):
    TABLIB_MODULE = "tablib.formats._xlsx"
    CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"

    def create_dataset(self, in_stream):
        """
        Create dataset from first sheet.
        """
        from io import BytesIO

        import openpyxl

        # 'data_only' means values are read from formula cells, not the formula itself
        xlsx_book = openpyxl.load_workbook(
            BytesIO(in_stream), read_only=True, data_only=True
        )

        dataset = tablib.Dataset()
        sheet = xlsx_book.active

        # obtain generator
        rows = sheet.rows
        dataset.headers = [cell.value for cell in next(rows)]

        ignore_blanks = getattr(
            settings, "IMPORT_EXPORT_IMPORT_IGNORE_BLANK_LINES", False
        )
        for row in rows:
            row_values = [cell.value for cell in row]

            if ignore_blanks:
                # do not add empty rows to dataset
                if not all(value is None for value in row_values):
                    dataset.append(row_values)
            else:
                dataset.append(row_values)
        return dataset

    def export_data(self, dataset, **kwargs):
        from openpyxl.utils.exceptions import IllegalCharacterError

        # #1698 temporary catch for deprecation warning in openpyxl
        # this catch block must be removed when openpyxl updated
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", category=DeprecationWarning)
            try:
                return super().export_data(dataset, **kwargs)
            except IllegalCharacterError as e:
                if (
                    getattr(
                        settings, "IMPORT_EXPORT_ESCAPE_ILLEGAL_CHARS_ON_EXPORT", False
                    )
                    is True
                ):
                    self._escape_illegal_chars(dataset)
                    return super().export_data(dataset, **kwargs)
                logger.exception(e)
                # not raising original error due to reflected xss risk
                raise ValueError(_("export failed due to IllegalCharacterError"))

    def _escape_illegal_chars(self, dataset):
        from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE

        def _do_escape(cell):
            if type(cell) is str:
                cell = ILLEGAL_CHARACTERS_RE.sub("\N{REPLACEMENT CHARACTER}", cell)
            return cell

        for r in dataset:
            row = dataset.lpop()
            row = [_do_escape(cell) for cell in row]
            dataset.append(row)


#: These are the default formats for import and export. Whether they can be
#: used or not is depending on their implementation in the tablib library.
DEFAULT_FORMATS = [
    fmt
    for fmt in (
        CSV,
        XLS,
        XLSX,
        TSV,
        ODS,
        JSON,
        YAML,
        HTML,
    )
    if fmt.is_available()
]

#: These are the formats which support different data types (such as datetime
#: and numbers) for which `coerce_to_string` is to be set false dynamically.
BINARY_FORMATS = [
    fmt
    for fmt in (
        XLS,
        XLSX,
        ODS,
    )
    if fmt.is_available()
]

1	# when adding imports, ensure that they are local to the
2	# correct class for the file format.
3	# e.g. add openpyxl imports to the XLSXFormat class
4	# See issue 2004
5	import logging	5✔
6	import warnings	5✔
7
8	import tablib	5✔
9	from django.conf import settings	5✔
10	from django.utils.translation import gettext_lazy as _	5✔
11	from tablib.formats import registry	5✔
12
13	logger = logging.getLogger(__name__)	5✔
14
15
16	class Format:	5✔
17	def get_title(self):	5✔
18	return type(self)	5✔
19
20	def create_dataset(self, in_stream):	5✔
21	"""
22	Create dataset from given string.
23	"""
24	raise NotImplementedError()	5✔
25
26	def export_data(self, dataset, **kwargs):	5✔
27	"""
28	Returns format representation for given dataset.
29	"""
30	raise NotImplementedError()	5✔
31
32	def is_binary(self):	5✔
33	"""
34	Returns if this format is binary.
35	"""
36	return True	5✔
37
38	def get_read_mode(self):	5✔
39	"""
40	Returns mode for opening files.
41	"""
42	return "rb"	5✔
43
44	def get_extension(self):	5✔
45	"""
46	Returns extension for this format files.
47	"""
48	return ""	5✔
49
50	def get_content_type(self):	5✔
51	# For content types see
52	# https://www.iana.org/assignments/media-types/media-types.xhtml
53	return "application/octet-stream"	5✔
54
55	@classmethod	5✔
56	def is_available(cls):	5✔
57	return True	5✔
58
59	def can_import(self):	5✔
60	return False	5✔
61
62	def can_export(self):	5✔
63	return False	5✔
64
65
66	class TablibFormat(Format):	5✔
67	TABLIB_MODULE = None	5✔
68	CONTENT_TYPE = "application/octet-stream"	5✔
69
70	def __init__(self, encoding=None):	5✔
71	self.encoding = encoding	5✔
72
73	def get_format(self):	5✔
74	"""
75	Import and returns tablib module.
76	"""
77	if not self.TABLIB_MODULE:	5✔
78	raise AttributeError("TABLIB_MODULE must be defined")	5✔
79	key = self.TABLIB_MODULE.split(".")[-1].replace("_", "")	5✔
80	return registry.get_format(key)	5✔
81
82	@classmethod	5✔
83	def is_available(cls):	5✔
84	try:	5✔
85	cls().get_format()	5✔
86	except (tablib.core.UnsupportedFormat, ImportError):	5✔
87	return False	5✔
88	return True	5✔
89
90	def get_title(self):	5✔
91	return self.get_format().title	5✔
92
93	def create_dataset(self, in_stream, **kwargs):	5✔
94	return tablib.import_set(in_stream, format=self.get_title(), **kwargs)	5✔
95
96	def export_data(self, dataset, **kwargs):	5✔
97	if getattr(settings, "IMPORT_EXPORT_ESCAPE_FORMULAE_ON_EXPORT", False) is True:	5✔
98	self._escape_formulae(dataset)	5✔
99	return dataset.export(self.get_title(), **kwargs)	5✔
100
101	def get_extension(self):	5✔
102	return self.get_format().extensions[0]	5✔
103
104	def get_content_type(self):	5✔
105	return self.CONTENT_TYPE	5✔
106
107	def can_import(self):	5✔
108	return hasattr(self.get_format(), "import_set")	5✔
109
110	def can_export(self):	5✔
111	return hasattr(self.get_format(), "export_set")	5✔
112
113	def _escape_formulae(self, dataset):	5✔
114	def _do_escape(s):	5✔
115	return s.replace("=", "", 1) if s.startswith("=") else s	5✔
116
117	for r in dataset:	5✔
118	row = dataset.lpop()	5✔
119	row = [_do_escape(str(cell)) for cell in row]	5✔
120	dataset.append(row)	5✔
121
122
123	class TextFormat(TablibFormat):	5✔
124	def create_dataset(self, in_stream, **kwargs):	5✔
125	if isinstance(in_stream, bytes) and self.encoding:	5✔
126	in_stream = in_stream.decode(self.encoding)	5✔
127	return super().create_dataset(in_stream, **kwargs)	5✔
128
129	def get_read_mode(self):	5✔
130	return "r"	5✔
131
132	def is_binary(self):	5✔
133	return False	5✔
134
135
136	class CSV(TextFormat):	5✔
137	TABLIB_MODULE = "tablib.formats._csv"	5✔
138	CONTENT_TYPE = "text/csv"	5✔
139
140
141	class JSON(TextFormat):	5✔
142	TABLIB_MODULE = "tablib.formats._json"	5✔
143	CONTENT_TYPE = "application/json"	5✔
144
145
146	class YAML(TextFormat):	5✔
147	TABLIB_MODULE = "tablib.formats._yaml"	5✔
148	# See https://stackoverflow.com/questions/332129/yaml-mime-type
149	CONTENT_TYPE = "text/yaml"	5✔
150
151
152	class TSV(TextFormat):	5✔
153	TABLIB_MODULE = "tablib.formats._tsv"	5✔
154	CONTENT_TYPE = "text/tab-separated-values"	5✔
155
156
157	class ODS(TextFormat):	5✔
158	TABLIB_MODULE = "tablib.formats._ods"	5✔
159	CONTENT_TYPE = "application/vnd.oasis.opendocument.spreadsheet"	5✔
160
161
162	class HTML(TextFormat):	5✔
163	TABLIB_MODULE = "tablib.formats._html"	5✔
164	CONTENT_TYPE = "text/html"	5✔
165
166
167	class XLS(TablibFormat):	5✔
168	TABLIB_MODULE = "tablib.formats._xls"	5✔
169	CONTENT_TYPE = "application/vnd.ms-excel"	5✔
170
171	def create_dataset(self, in_stream):	5✔
172	"""
173	Create dataset from first sheet.
174	"""
175	import xlrd	5✔
176
177	xls_book = xlrd.open_workbook(file_contents=in_stream)	5✔
178	dataset = tablib.Dataset()	5✔
179	sheet = xls_book.sheets()[0]	5✔
180
181	dataset.headers = sheet.row_values(0)	5✔
182	for i in range(1, sheet.nrows):	5✔
183	dataset.append(sheet.row_values(i))	5✔
184	return dataset	5✔
185
186
187	class XLSX(TablibFormat):	5✔
188	TABLIB_MODULE = "tablib.formats._xlsx"	5✔
189	CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"	5✔
190
191	def create_dataset(self, in_stream):	5✔
192	"""
193	Create dataset from first sheet.
194	"""
195	from io import BytesIO	5✔
196
197	import openpyxl	5✔
198
199	# 'data_only' means values are read from formula cells, not the formula itself
200	xlsx_book = openpyxl.load_workbook(	5✔
201	BytesIO(in_stream), read_only=True, data_only=True
202	)
203
204	dataset = tablib.Dataset()	5✔
205	sheet = xlsx_book.active	5✔
206
207	# obtain generator
208	rows = sheet.rows	5✔
209	dataset.headers = [cell.value for cell in next(rows)]	5✔
210
211	ignore_blanks = getattr(	5✔
212	settings, "IMPORT_EXPORT_IMPORT_IGNORE_BLANK_LINES", False
213	)
214	for row in rows:	5✔
215	row_values = [cell.value for cell in row]	5✔
216
217	if ignore_blanks:	5✔
218	# do not add empty rows to dataset
219	if not all(value is None for value in row_values):	5✔
220	dataset.append(row_values)	5✔
221	else:
222	dataset.append(row_values)	5✔
223	return dataset	5✔
224
225	def export_data(self, dataset, **kwargs):	5✔
226	from openpyxl.utils.exceptions import IllegalCharacterError	5✔
227
228	# #1698 temporary catch for deprecation warning in openpyxl
229	# this catch block must be removed when openpyxl updated
230	with warnings.catch_warnings():	5✔
231	warnings.filterwarnings("ignore", category=DeprecationWarning)	5✔
232	try:	5✔
233	return super().export_data(dataset, **kwargs)	5✔
234	except IllegalCharacterError as e:	5✔
235	if (	5✔
236	getattr(
237	settings, "IMPORT_EXPORT_ESCAPE_ILLEGAL_CHARS_ON_EXPORT", False
238	)
239	is True
240	):
241	self._escape_illegal_chars(dataset)	5✔
242	return super().export_data(dataset, **kwargs)	5✔
243	logger.exception(e)	5✔
244	# not raising original error due to reflected xss risk
245	raise ValueError(_("export failed due to IllegalCharacterError"))	5✔
246
247	def _escape_illegal_chars(self, dataset):	5✔
248	from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE	5✔
249
250	def _do_escape(cell):	5✔
251	if type(cell) is str:	5✔
252	cell = ILLEGAL_CHARACTERS_RE.sub("\N{REPLACEMENT CHARACTER}", cell)	5✔
253	return cell	5✔
254
255	for r in dataset:	5✔
256	row = dataset.lpop()	5✔
257	row = [_do_escape(cell) for cell in row]	5✔
258	dataset.append(row)	5✔
259
260
261	#: These are the default formats for import and export. Whether they can be
262	#: used or not is depending on their implementation in the tablib library.
263	DEFAULT_FORMATS = [	5✔
264	fmt
265	for fmt in (
266	CSV,
267	XLS,
268	XLSX,
269	TSV,
270	ODS,
271	JSON,
272	YAML,
273	HTML,
274	)
275	if fmt.is_available()
276	]
277
278	#: These are the formats which support different data types (such as datetime
279	#: and numbers) for which `coerce_to_string` is to be set false dynamically.
280	BINARY_FORMATS = [	5✔
281	fmt
282	for fmt in (
283	XLS,
284	XLSX,
285	ODS,
286	)
287	if fmt.is_available()
288	]

django-import-export / django-import-export / 15366210941

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous