Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F120768838
chardetect.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Jul 6, 22:24
Size
2 KB
Mime Type
text/x-python
Expires
Tue, Jul 8, 22:24 (2 d)
Engine
blob
Format
Raw Data
Handle
27226798
Attached To
R3852 EMS for Smart-Building
chardetect.py
View Options
#!/usr/bin/env python
"""
Script which takes one or more file paths and reports on their detected
encodings
Example::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
If no paths are provided, it takes its input from stdin.
"""
from
__future__
import
absolute_import
,
print_function
,
unicode_literals
import
argparse
import
sys
from
io
import
open
from
chardet
import
__version__
from
chardet.universaldetector
import
UniversalDetector
def
description_of
(
lines
,
name
=
'stdin'
):
"""
Return a string describing the probable encoding of a file or
list of strings.
:param lines: The lines to get the encoding of.
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
"""
u
=
UniversalDetector
()
for
line
in
lines
:
u
.
feed
(
line
)
u
.
close
()
result
=
u
.
result
if
result
[
'encoding'
]:
return
'{0}: {1} with confidence {2}'
.
format
(
name
,
result
[
'encoding'
],
result
[
'confidence'
])
else
:
return
'{0}: no result'
.
format
(
name
)
def
main
(
argv
=
None
):
'''
Handles command line arguments and gets things started.
:param argv: List of arguments, as if specified on the command-line.
If None, ``sys.argv[1:]`` is used instead.
:type argv: list of str
'''
# Get command line arguments
parser
=
argparse
.
ArgumentParser
(
description
=
"Takes one or more file paths and reports their detected
\
encodings"
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
,
conflict_handler
=
'resolve'
)
parser
.
add_argument
(
'input'
,
help
=
'File whose encoding we would like to determine.'
,
type
=
argparse
.
FileType
(
'rb'
),
nargs
=
'*'
,
default
=
[
sys
.
stdin
])
parser
.
add_argument
(
'--version'
,
action
=
'version'
,
version
=
'
%(prog)s
{0}'
.
format
(
__version__
))
args
=
parser
.
parse_args
(
argv
)
for
f
in
args
.
input
:
if
f
.
isatty
():
print
(
"You are running chardetect interactively. Press "
+
"CTRL-D twice at the start of a blank line to signal the "
+
"end of your input. If you want help, run chardetect "
+
"--help
\n
"
,
file
=
sys
.
stderr
)
print
(
description_of
(
f
,
f
.
name
))
if
__name__
==
'__main__'
:
main
()
Event Timeline
Log In to Comment