Hello everybody,
I'm using Ipython for some data analysis, and I encounter some strange error message:
For a specific dataset, when I'm running
df_bad.groupby(["L","Ts","rundesc"]).sum()
It raises an exception:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-41-440bdd98e977> in <module>()
1
----> 2 df_bad.groupby(["L","Ts","rundesc"]).sum()
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in f(self)
62 raise SpecificationError(str(e))
63 except Exception:
---> 64 result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
65 if _convert:
66 result = result.convert_objects()
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in aggregate(self, arg, *args, **kwargs)
1695
1696 if self.grouper.nkeys > 1:
-> 1697 return self._python_agg_general(arg, *args, **kwargs)
1698 else:
1699 result = self._aggregate_generic(arg, *args, **kwargs)
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in _python_agg_general(self, func, *args, **kwargs)
498 output[name] = self._try_cast(values[mask],result)
499
--> 500 return self._wrap_aggregated_output(output)
501
502 def _wrap_applied_output(self, *args, **kwargs):
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in _wrap_aggregated_output(self, output, names)
2036 def _wrap_aggregated_output(self, output, names=None):
2037 agg_axis = 0 if self.axis == 1 else 1
-> 2038 agg_labels = self._obj_with_exclusions._get_axis(agg_axis)
2039
2040 output_keys = self._decide_output_index(output, agg_labels)
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/lib.so in pandas.lib.cache_readonly.__get__ (pandas/lib.c:28123)()
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in _obj_with_exclusions(self)
1639
1640 if len(self.exclusions) > 0:
-> 1641 return self.obj.drop(self.exclusions, axis=1)
1642 else:
1643 return self.obj
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in drop(self, labels, axis, level)
370 else:
371 new_axis = axis.drop(labels)
--> 372 dropped = self.reindex(**{axis_name: new_axis})
373 try:
374 dropped.axes[axis_].names = axis.names
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in reindex(self, index, columns, method, level, fill_value, limit, copy)
2523 if columns is not None:
2524 frame = frame._reindex_columns(columns, copy, level,
-> 2525 fill_value, limit)
2526
2527 if index is not None:
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _reindex_columns(self, new_columns, copy, level, fill_value, limit)
2615 limit=limit)
2616 return self._reindex_with_indexers(None, None, new_columns, indexer,
-> 2617 copy, fill_value)
2618
2619 def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer,
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _reindex_with_indexers(self, index, row_indexer, columns, col_indexer, copy, fill_value)
2632 col_indexer = com._ensure_int64(col_indexer)
2633 new_data = new_data.reindex_indexer(columns, col_indexer, axis=0,
-> 2634 fill_value=fill_value)
2635 elif columns is not None and columns is not new_data.axes[0]:
2636 new_data = new_data.reindex_items(columns, copy=copy,
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in reindex_indexer(self, new_axis, indexer, axis, fill_value)
1564 """
1565 if axis == 0:
-> 1566 return self._reindex_indexer_items(new_axis, indexer, fill_value)
1567
1568 new_blocks = []
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in _reindex_indexer_items(self, new_items, indexer, fill_value)
1595 new_block_items = new_items.take(selector.nonzero()[0])
1596 new_values = com.take_nd(blk.values, blk_indexer[selector], axis=0,
-> 1597 allow_fill=False)
1598 new_blocks.append(make_block(new_values, new_block_items,
1599 new_items, fastpath=True))
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/core/common.pyc in take_nd(arr, indexer, axis, out, fill_value, mask_info, allow_fill)
531 func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype,
532 axis=axis, mask_info=mask_info)
--> 533 func(arr, indexer, out, fill_value)
534 return out
535
/home/ronen/anaconda/lib/python2.7/site-packages/pandas/algos.so in pandas.algos.take_2d_axis0_float64_float64 (pandas/algos.c:72776)()
ValueError: Big-endian buffer not supported on little-endian compiler
And I have no idea why. The dataframe seems just right, and I couldn't find anything wrong with it. the same exception raised when I'm applying an aggregation function.
I would attach the DataFrame, but when I'm storing it to HDF and then reopen it, there is no error.
This exception is raised only on few of many datasets prepared in the same way.
Any idea how to debug/fix it?
Thanks,
Ronen