Ken Mankoff
unread,Jul 14, 2025, 7:53:29 AMJul 14Sign in to reply to author
Sign in to forward
You do not have permission to delete messages in this group
Either email addresses are anonymous for this group or you need the view member email addresses permission to view the original message
to Daniele Nicolodi, bean...@googlegroups.com
Hi Daniele,
Thanks for the 'modern' suggestion.
But based on the strongly worded suggestion from Martin, I'll also share a script that loads all transactions into a pandas dataframe (thanks to ChatGPT). Further processing on this dataframe lets me do anything I want...
-k.
#!/usr/bin/env python
"""
Extract (date, from, to, amount, currency, description) rows from Beancount ledger with date as a pandas‑friendly datetime index.
"""
import sys
from pathlib import Path
from typing import List, Tuple
import pandas as pd
from beancount.loader import load_file
from beancount.core.data import Transaction, Posting
Row = Tuple[str, str, str, float, str, str] # (date, from, to, amount, currency, description)
def pair_postings(date: str, description: str, postings: List[Posting]) -> List[Row]:
"""
For all postings of the same currency in a transaction, pair debits (negative)
with credits (positive) and emit (date, from, to, amount, currency, description) rows.
"""
pos = [p for p in postings if p.units and p.units.number > 0]
neg = [p for p in postings if p.units and p.units.number < 0]
rows: List[Row] = []
while pos and neg:
p_from = neg[0]
p_to = pos[0]
amt = min(abs(p_from.units.number), p_to.units.number)
currency = p_from.units.currency
rows.append((date, p_from.account, p_to.account, amt, currency, description))
# Decrement amounts
p_from = p_from._replace(units=p_from.units._replace(number=p_from.units.number + amt))
p_to = p_to._replace(units=p_to.units._replace(number=p_to.units.number - amt))
if p_from.units.number == 0:
neg.pop(0)
else:
neg[0] = p_from
if p_to.units.number == 0:
pos.pop(0)
else:
pos[0] = p_to
return rows
def ledger_to_dataframe(path: Path | str) -> pd.DataFrame:
entries, errors, _ = load_file(str(path))
if errors:
for e in errors:
print(e, file=sys.stderr)
rows: List[Row] = []
for entry in entries:
if not isinstance(entry, Transaction):
continue
description = entry.payee or ""
if entry.narration:
if description:
description += " "
description += entry.narration
by_currency: dict[str, List[Posting]] = {}
for p in entry.postings:
if p.units:
by_currency.setdefault(p.units.currency, []).append(p)
for plist in by_currency.values():
rows.extend(pair_postings(entry.date, description, plist))
df = pd.DataFrame(rows, columns=["date", "from", "to", "amount", "currency", "description"])
df["date"] = pd.to_datetime(df["date"]).dt.normalize()
df["amount"] = df["amount"].astype(float)
df = df.set_index('date')
return df
if __name__ == "__main__":
if len(sys.argv) != 2:
sys.exit("Usage: python txns_to_dataframe.py /path/to/ledger.beancount")
df = ledger_to_dataframe(Path(sys.argv[1]).expanduser())
print(df.head())