dataframe_lists = [df1, df2, df3]
result = pd.concat(dataframe_lists, keys = ['one', 'two','three'], ignore_index=True)
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-198-a30c57d465d0> in <module>()
----> 1 result = pd.concat(dataframe_lists, keys = ['one', 'two','three'], ignore_index=True)
2 check(dataframe_lists)
C:\WinPython-64bit-3.4.3.5\python-3.4.3.amd64\lib\site-packages\pandas\tools\merge.py in concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy)
753 verify_integrity=verify_integrity,
754 copy=copy)
--> 755 return op.get_result()
756
757
C:\WinPython-64bit-3.4.3.5\python-3.4.3.amd64\lib\site-packages\pandas\tools\merge.py in get_result(self)
924
925 new_data = concatenate_block_managers(
--> 926 mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
927 if not self.copy:
928 new_data._consolidate_inplace()
C:\WinPython-64bit-3.4.3.5\python-3.4.3.amd64\lib\site-packages\pandas\core\internals.py in concatenate_block_managers(mgrs_indexers, axes, concat_axis, copy)
4061 copy=copy),
4062 placement=placement)
-> 4063 for placement, join_units in concat_plan]
4064
4065 return BlockManager(blocks, axes)
C:\WinPython-64bit-3.4.3.5\python-3.4.3.amd64\lib\site-packages\pandas\core\internals.py in <listcomp>(.0)
4061 copy=copy),
4062 placement=placement)
-> 4063 for placement, join_units in concat_plan]
4064
4065 return BlockManager(blocks, axes)
C:\WinPython-64bit-3.4.3.5\python-3.4.3.amd64\lib\site-packages\pandas\core\internals.py in concatenate_join_units(join_units, concat_axis, copy)
4150 raise AssertionError("Concatenating join units along axis0")
4151
-> 4152 empty_dtype, upcasted_na = get_empty_dtype_and_na(join_units)
4153
4154 to_concat = [ju.get_reindexed_values(empty_dtype=empty_dtype,
C:\WinPython-64bit-3.4.3.5\python-3.4.3.amd64\lib\site-packages\pandas\core\internals.py in get_empty_dtype_and_na(join_units)
4139 return np.dtype('m8[ns]'), tslib.iNaT
4140 else: # pragma
-> 4141 raise AssertionError("invalid dtype determination in get_concat_dtype")
4142
4143
AssertionError: invalid dtype determination in get_concat_dtype def check(list_of_df):
headers = []
for df in dataframe_lists:
if df.empty is not True:
continue
else:
headers.append(df.columns)
return headers 'AT','AccountNum', 'AcctType', 'Amount', 'City', 'Comment', 'Country','DuplicateAddressFlag', 'FromAccount', 'FromAccountNum', 'FromAccountT','PN', 'PriorCity', 'PriorCountry', 'PriorState', 'PriorStreetAddress','PriorStreetAddress2', 'PriorZip', 'RTID', 'State', 'Street1','Street2', 'Timestamp', 'ToAccount', 'ToAccountNum', 'ToAccountT', 'TransferAmount', 'TransferMade', 'TransferTimestamp', 'Ttype', 'WA','WC', 'Zip' 'A', 'AT','AccountNum', 'AcctType', 'Amount', 'B', 'C', 'City', 'Comment', 'Country', 'D', 'DuplicateAddressFlag', 'E', 'F' 'FromAccount', 'FromAccountNum', 'FromAccountT', 'G', 'PN', 'PriorCity', 'PriorCountry', 'PriorState', 'PriorStreetAddress','PriorStreetAddress2', 'PriorZip', 'RTID', 'State', 'Street1','Street2', 'Timestamp', 'ToAccount', 'ToAccountNum', 'ToAccountT', 'TransferAmount', 'TransferMade', 'TransferTimestamp', 'Ttype', 'WA','WC', 'Zip' --
You received this message because you are subscribed to the Google Groups "PyData" group.
To unsubscribe from this group and stop receiving emails from it, send an email to pydata+un...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
What version of pandas are you using?With a small example, concating an empty frame works for me with pandas 0.16.2:
In [1]: df1 = pd.DataFrame({'a':[1,2], 'b':[3,4]})
In [3]: df2 = pd.DataFrame(columns=['a', 'b'])
In [4]: df2
Out[4]:
Empty DataFrame
Columns: [a, b]
Index: []
In [5]: pd.concat([df1, df2])
Out[5]:
a b
0 1 3
1 2 4
On Wednesday, September 9, 2015 at 5:38:59 PM UTC-4, Joris Van den Bossche wrote:What version of pandas are you using?With a small example, concating an empty frame works for me with pandas 0.16.2:
In [1]: df1 = pd.DataFrame({'a':[1,2], 'b':[3,4]})
In [3]: df2 = pd.DataFrame(columns=['a', 'b'])
In [4]: df2
Out[4]:
Empty DataFrame
Columns: [a, b]
Index: []
In [5]: pd.concat([df1, df2])
Out[5]:
a b
0 1 3
1 2 4I too have replicated something similar. However, for some reason the 410000 row CSV file that I am using has up to 1000 fieldnames and hence mixed dtypes. Perhaps this is the cause?
Regardless I am curious to know how to write out just the headers of any empty dataframe (avoiding duplicates and appending any unique column headers - if any). Thank you for your feedback.
2015-09-10 2:30 GMT+02:00 kyoto89 <ahlusar....@gmail.com>:
On Wednesday, September 9, 2015 at 5:38:59 PM UTC-4, Joris Van den Bossche wrote:What version of pandas are you using?With a small example, concating an empty frame works for me with pandas 0.16.2:
In [1]: df1 = pd.DataFrame({'a':[1,2], 'b':[3,4]})
In [3]: df2 = pd.DataFrame(columns=['a', 'b'])
In [4]: df2
Out[4]:
Empty DataFrame
Columns: [a, b]
Index: []
In [5]: pd.concat([df1, df2])
Out[5]:
a b
0 1 3
1 2 4I too have replicated something similar. However, for some reason the 410000 row CSV file that I am using has up to 1000 fieldnames and hence mixed dtypes. Perhaps this is the cause?Using a similar small test as above but with mixed dtypes and with different columns names also works for me.Can you try to provide a reproducible (copy-pastable) example that reproduces the error?
A= data[data['RRT'] == 'A'] #Select just the columns with from the dataframe "data"
B= data[data['RRT'] == 'B']
C= data[data['RRT'] == 'C']
D= data[data['RRT'] == 'D']for column_name, column in A.transpose().iterrows():
AColumns= A[['ANum','RTID', 'Description','Type','Status', 'AD', 'CD', 'OD', 'RCD']] #get select columns indexed with dataframe, "A"
A.count
This is the output:
<bound method DataFrame.count of Empty DataFrame
Columns: [ANum,RTID, Description,Type,Status, AD, CD, OD, RCD]
Index: []>
data=pd.read_csv('Merged_Success2.csv', dtype=str, error_bad_lines = False, iterator=True, chunksize=1000)
data=pd.concat([chunk for chunk in data], ignore_index=True)Regardless I am curious to know how to write out just the headers of any empty dataframe (avoiding duplicates and appending any unique column headers - if any). Thank you for your feedback.You can always manually take the union of the column names, and reindex with that.Roughly something like this:all_cols = df1.columns.union(df2.columns)df1.reindex(columns=all_cols)