import pandas as pd
filename = r'http://samplecsvs.s3.amazonaws.com/SalesJan2009.csv'
df = pd.read_csv(filename)
print(df.shape)
Output:(998, 12)
import pandas as pd
filename = r'http://nodestreams.com/input/people.csv.gz'
df = pd.read_csv(filename, compression='gzip')
print(df.shape)
Output:
Traceback (most recent call last):File "/nfs/site/home/nschultz/mydisk4/web/test.py", line 33, in <module>df = pd.read_csv(filename, compression='gzip')File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 452, in parser_freturn _read(filepath_or_buffer, kwds)File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 234, in _readparser = TextFileReader(filepath_or_buffer, **kwds)File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 542, in __init__self._make_engine(self.engine)File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 679, in _make_engineself._engine = CParserWrapper(self.f, **self.options)File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 1041, in __init__self._reader = _parser.TextReader(src, **kwds)File "parser.pyx", line 485, in pandas.parser.TextReader.__cinit__ (pandas/parser.c:4413)File "parser.pyx", line 600, in pandas.parser.TextReader._get_header (pandas/parser.c:5649)File "parser.pyx", line 791, in pandas.parser.TextReader._tokenize_rows (pandas/parser.c:7599)File "parser.pyx", line 1699, in pandas.parser.raise_parser_error (pandas/parser.c:19062)pandas.parser.CParserError: Error tokenizing data. C error: Calling read(nbytes) on source failed. Try engine='python'.
import pandas as pd
filename = r'http://nodestreams.com/input/people.csv.gz'
df = pd.read_csv(filename, compression='gzip', engine= 'python')
print(df.shape)
Output:Traceback (most recent call last):File "/nfs/site/home/nschultz/mydisk4/web/test.py", line 33, in <module>df = pd.read_csv(filename, compression='gzip', engine= 'python')File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 452, in parser_freturn _read(filepath_or_buffer, kwds)File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 234, in _readparser = TextFileReader(filepath_or_buffer, **kwds)File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 542, in __init__self._make_engine(self.engine)File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 685, in _make_engineself._engine = klass(self.f, **self.options)File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 1373, in __init__self.columns, self.num_original_columns = self._infer_columns()File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 1587, in _infer_columnsline = self._buffered_line()File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 1713, in _buffered_linereturn self._next_line()File "/nfs/fm/disks/fm_cse_05026/nschultz/python/lib/python3.3/site-packages/pandas-0.14.1-py3.3-linux-x86_64.egg/pandas/io/parsers.py", line 1738, in _next_lineorig_line = next(self.data)File "/usr/intel/pkgs/python/3.3.2/lib/python3.3/gzip.py", line 393, in read1self._read()File "/usr/intel/pkgs/python/3.3.2/lib/python3.3/gzip.py", line 441, in _readself._read_gzip_header()File "/usr/intel/pkgs/python/3.3.2/lib/python3.3/gzip.py", line 285, in _read_gzip_headermagic = self.fileobj.read(2)File "/usr/intel/pkgs/python/3.3.2/lib/python3.3/gzip.py", line 93, in readself.file.read(size-self._length+read)TypeError: can't concat bytes to str