Update many with limit

17 views
Skip to first unread message

Rogerio Carrasqueira

unread,
Jan 8, 2021, 11:13:17 AM1/8/21
to mongoeng...@googlegroups.com
Hello Guys!

I’m having a similar issue, I have a database with 17 MM of records,
but I would like to select a range with 5k records inside this
collection, that matches with a single query, but I cannot get the
first register, because the database does an abend. I’m running a
machine at AWS using an instance t3.xlarge with replication.

This query that I’m running in python

query = {'$and': [{u'tag_list': '33854'}, {u'customs': {u'$not':
{u'$elemMatch': {u'k': u'rule_id', u'v': 301}}}}, {u'customer_id':
4275L}, {'status': {'$nin': [u'invalid_domain', u'inexistent_address',
u'mailbox_full', u'smsfail', u'whatsappfail']}}, {'customer_id':
{'$in': [4275]}, 'opt_out': False}, {u'campaigns': {u'$not':
{u'$elemMatch': {u'id': 112129L}}}}]}
cursor = Contact._get_collection().find(query,{'id':
1}).hint([('customer_id',1), ('tag_list',1), ('status',1),
('opt_out',1)]).batch_size(5000).limit(5000)[1100001:1149999]

update_exec = {
'$push': {
'campaigns': {'id': campaign_id, 'was_sent': False, 'type':
campaign_type}
}
}

for c in cursor:

contact = Contact._from_son(c)

bulk_operations.append(
UpdateOne({
'_id': contact.id,
'campaigns': {
'$not':{
'$elemMatch':{
'id': campaign_id,
}
}
}
}, update_exec)
)

results = Contact._get_collection().bulk_write(bulk_operations, ordered=False)

I’m using mongoengine to specify my class, so I can describe the
attributes below:

# Create your models here.
class Contact(mongo.DynamicDocument):


STATUS_CHOICES = (('ok', _('Ativo')),
('mx', _('Falha na entrega')),
('invalid_domain', _('Dominio inválido')),
('inexistent_address', _('E-mail não existe')),
('mailbox_full', _('Caixa cheia')),
('size_limit', _('Limite da mensagem excedido')),
('mail_loop', _('E-mail em loop')),
('spam', _('Spam')),
('unknown', _('Erro desconhecido')),
('complaint', _('Reclamação')),
('abuse', _('Denúncia de Abuse')),
('smsfail', _('Falha na Entrega de SMS')),
('whatsappfail', _('Falha na Entrega de WhatsApp')))


customer_id = mongo.IntField(verbose_name=_(u'Cliente'),
unique_with='email')
name = mongo.StringField(max_length=255, verbose_name=_(u'Nome'))
email = mongo.EmailField(verbose_name=_(u'E-mail'))
campaigns = mongo.ListField(mongo.DictField(),
verbose_name=_(u'Campanhas que o usuário participou'))
customs = mongo.ListField(mongo.DictField(),
verbose_name=_(u'Campos customizados do cliente'))
status = mongo.StringField(choices=STATUS_CHOICES, default='ok',
max_length='10', verbose_name=_(u'Status do E-mail'))
date_created = mongo.DateTimeField(verbose_name=_(u'Criado em'))
last_updated = mongo.DateTimeField(verbose_name=_(u'Última atualização em'))
tag_list = mongo.ListField(mongo.StringField(),
verbose_name=_(u'Listas que o contato faz parte'))

meta = {
'index_background': True,
'index_drop_dups': True,
'indexes': [
('customer_id', 'tag_list', 'status', 'opt_out'),
('customer_id', 'tag_list', 'status'),
('customer_id', 'tag_list', 'opt_out'),
('customer_id', 'customs.k', 'customs.v', 'status', 'opt_out')
],
}

class Meta:
using = 'mongodb'
verbose_name = _(u'Contato')
verbose_name_plural = _(u'Contatos')

I must confess that I’ve tried everything to solve this performance
issue I’m almost looking for a bount hunt to help to solve this
problem.

Any help will be very appreciated

Thanks so much!

Rogério Carrasqueira
Reply all
Reply to author
Forward
0 new messages