Django insert performance with multiple nested models
I have the following model definition:
class Workflow(models.Model):
name = models.CharField(max_length=255)
class Step(models.Model):
workflow = models.ForeignKey(Section, on_delete=models.CASCADE, related_name='steps')
title = models.CharField(max_length=255)
class Section(models.Model):
body = models.CharField(max_length=255)
step = models.ForeignKey(Section, on_delete=models.CASCADE, related_name='sections')
class Question(models.Model):
description = models.CharField(max_length=255)
section = models.ForeignKey(Section, on_delete=models.CASCADE, related_name='questions')
class Option(models.Model):
set_fail = models.BooleanField()
question = models.ForeignKey(Question, on_delete=models.CASCADE, related_name='options')
class Action(models.Model):
yes_no = models.BooleanField()
option = models.ForeignKey(Option, on_delete=models.CASCADE, related_name='actions')
# Workflow -> Step -> Section -> Question -> Option -> Action
And I am sending the following body for inserting data from the client side (the request body is huge and I can't paste it here)
https://jsoneditoronline.org/?id=e970abc01b2a489c9933464867d11eaf
You'll see that the data is huge, with multiple records at each level, and then doing the insert does take time.
I am currently using this insert method:
class WorkflowUpdateSerializer(serializers.Serializer):
def update(self, workflow, data):
self.update_steps(workflow, data) # data is request JSON body
def update_steps(self, workflow, steps):
step_clones = [{key: value for key, value in step.items() if key != 'sections'} for step in steps]
step_instances = Step.objects.bulk_create(
[Step(workflow=workflow, **step) for step in step_clones])
for index, step in enumerate(steps):
self.update_sections(step_instances[index], step.pop('sections'))
def update_sections(self, step, sections):
section_clones = [{key: value for key, value in section.items() if
key != 'questions'} for section in sections]
section_instances = Section.objects.bulk_create(
[Section(step=step, **section) for section in section_clones])
for index, section in enumerate(sections):
self.update_questions(section=section_instances[index], questions=section.pop('questions'))
def update_questions(self, section, questions):
# code
def update_options(self, question, options):
# code
def update_actions(self, option, actions):
# code
Do you have any ideas how to improve it?
thanks.
Here is my solution. It bulk creates all instances of each model in a single database call, resulting in only 5 bulk inserts.
class WorkflowUpdateSerializer(serializers.Serializer):
steps = serializers.JSONField()
def update(self, workflow, validated_data):
steps_dicts = [s['step'] for s in validated_data['steps']]
sections_dicts = []
questions_dicts = []
options_dicts = []
actions_dicts = []
def _kws(d, exclude):
return {k: v for k, v in d.items() if k != exclude}
steps = []
for step_dict in steps_dicts:
sections_dicts.extend(step_dict['section'])
steps.append(Step(workflow=workflow, **_kws(step_dict, 'section')))
steps = Step.objects.bulk_create(steps)
sections = []
for step, step_dict in zip(steps, steps_dicts):
for section_dict in step_dict['section']:
questions_dicts.extend(section_dict['questions'])
sections.append(Section(step=step, **_kws(section_dict, 'questions')))
sections = Section.objects.bulk_create(sections)
questions = []
for section, section_dict in zip(sections, sections_dicts):
for question_dict in section_dict['questions']:
options_dicts.extend(question_dict['options'])
questions.append(Question(section=section, **_kws(question_dict, 'options')))
questions = Question.objects.bulk_create(questions)
options = []
for question, question_dict in zip(questions, questions_dicts):
for option_dict in question_dict['options']:
actions_dicts.extend(option_dict['actions'])
options.append(Option(question=question, **_kws(option_dict, 'actions')))
options = Option.objects.bulk_create(options)
actions = []
for option, option_dict in zip(options, options_dicts):
for action_dict in option_dict['actions']:
actions.append(Action(option=option, **action_dict))
actions = Action.objects.bulk_create(actions)
return workflow
Note that bulk_create
to return the instance ID, the model must have something other than AutoField as its primary key. I have to create a summary BaseModel
like this
# models.py
class BaseModel(models.Model):
id = models.UUIDField(default=uuid.uuid4, primary_key=True)
class Meta:
abstract = True
class Step(BaseModel):
workflow = models.ForeignKey(Workflow, on_delete=models.CASCADE, related_name='steps')
title = models.CharField(max_length=255)
...
This is how I test the serializer
# tests.py
with open('./data.json') as f:
data = json.load(f)
class TestSerializer(TestCase):
def test_serializer(self):
workflow = Workflow.objects.create(name='test')
serializer = WorkflowUpdateSerializer(instance=workflow, data={'steps': data})
serializer.is_valid(raise_exception=True)
serializer.save()
self.assertEqual(Step.objects.count(), 3)
self.assertEqual(Section.objects.count(), 9)
self.assertEqual(Question.objects.count(), 18)
self.assertEqual(Option.objects.count(), 54)
self.assertEqual(Action.objects.count(), 162)