Source code for pypsi.utils
#
# Copyright (c) 2015, Adam Meily <meily.adam@gmail.com>
# Pypsi - https://github.com/ameily/pypsi
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
'''
Utility functions and classes.
'''
import chardet
import codecs
import io
[docs]def safe_open(file, mode='r', chunk_size=4096, ascii_is_utf8=True, **kwargs):
'''
Detect a file's encoding, skip any Byte Order Marks that the are located
at the beginning of the file, and returns the opened file stream. The
`file` argument can be either a string containing a path to the file or
an already open binary file-like object.
:param str file: path to the file or a binary file-like object
:param str mode: the mode to open the file (see :func:`open`)
:param int chunk_size: number of bytes to read to determine encoding
:param bool ascii_is_utf8:
whether to force UTF-8 encoding if the file is dected as ASCII
:param str errors:
determines how errors are handled and is passed to the call to
:func:`open`.
:returns file: the opened file stream
'''
is_path = isinstance(file, str)
header = None
if 'b' in mode:
# open the file as binary
return open(file, mode) if is_path else file
if is_path:
# open the file on disk and read the first chunk
with open(file, 'rb') as fp:
header = fp.read(chunk_size)
else:
# read the header and move back to the beginning of the file
header = file.read(chunk_size)
file.seek(0)
if not header:
return open(file, mode) if is_path else file
result = chardet.detect(header)
enc = result['encoding']
if ascii_is_utf8 and enc == 'ascii':
# the encoding has been detected as ASCII, check if we should open the
# fileas UTF-8
enc = 'utf-8'
if is_path:
fp = codecs.open(file, mode, encoding=enc, **kwargs)
else:
fp = io.TextIOWrapper(file, encoding=enc, **kwargs)
for bom in (codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE, codecs.BOM_UTF8,
codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE):
if header.startswith(bom):
fp.seek(len(bom))
break
return fp
def escape_string(s, escape_char, chars=' \n\t\xa0', escape_escape_char=True):
ret = ''
if escape_escape_char:
escape = chars + escape_char
else:
escape = chars
for c in s:
if c in escape:
ret += escape_char + c
else:
ret += c
return ret