Thanks for explaining this!! before I saw your explanation I had went ahead and wrote a quick script to create a single .mrc file from my individual marcxml files by traversing the xmldoms for each…
import os
import pymarc
from pymarc import Record, Field
from xml.dom.minidom import parseString
inst_code = raw_input('Enter the 3-letter institutional code: ')
batch_date = raw_input('Enter the batch date (YYYYMMDD): ')
base_dir = 'work/'+inst_code+'/'+inst_code+'_'+batch_date
marcRecsOut = pymarc.MARCWriter(file(base_dir+'/'+inst_code+'_'+batch_date+'_1_orig_recs.mrc', 'w'))
marcxml_dir = base_dir+'/marcxml'
for filename in os.listdir(marcxml_dir):
file_path = os.path.join(marcxml_dir,filename)
if os.path.isfile(file_path):
if file_path[-3:]=='xml':
marcxml_file = open(file_path, 'r')
marcxml_str = marcxml_file.read()
marcxml_file.close()
mrc_rec = Record()
xmlDOM = parseString(marcxml_str)
xml_recs = xmlDOM.getElementsByTagName('record')
for xml_rec in xml_recs:
ldrs = xml_rec.getElementsByTagName('leader')
for ldr in ldrs:
ldr_data = ldr.firstChild.nodeValue
ldr_field = Field(tag='000', data=ldr_data)
mrc_rec.add_field(ldr_field)
cntrls = xml_rec.getElementsByTagName('controlfield')
for cntrl in cntrls:
cntrl_tag = cntrl.getAttribute('tag')
cntrl_data = cntrl.firstChild.nodeValue
cntrl_field = Field(tag=cntrl_tag, data=cntrl_data)
mrc_rec.add_field(cntrl_field)
datafields = xml_rec.getElementsByTagName('datafield')
for datafield in datafields:
datafield_tag = datafield.getAttribute('tag')
ind1 = datafield.getAttribute('ind1')
ind2 = datafield.getAttribute('ind2')
mrc_field = Field(tag=datafield_tag, indicators=[ind1,ind2], subfields=[])
subfields = datafield.getElementsByTagName('subfield')
for subfield in subfields:
subfield_code = subfield.getAttribute('code')
subfield_data = subfield.firstChild.nodeValue
subfield_data = subfield_data.encode('ascii', 'ignore')
mrc_field.add_subfield(subfield_code,subfield_data)
mrc_rec.add_field(mrc_field)
marcRecsOut.write(mrc_rec)
marcRecsOut.close()
BUT, I definitely plan to look into the pymarc functions you mention since I'd really like to understand that method.