diff --git a/Makefile b/Makefile index acf9b0cf67..734c628ade 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ dummyusers: cd contentcuration/ && python manage.py loaddata contentcuration/fixtures/admin_user_token.json prodceleryworkers: - cd contentcuration/ && celery -A contentcuration worker -l info --concurrency=3 --without-mingle --without-gossip + cd contentcuration/ && celery -A contentcuration worker -l info --concurrency=3 --task-events --without-mingle --without-gossip prodcelerydashboard: # connect to the celery dashboard by visiting http://localhost:5555 diff --git a/babel.config.js b/babel.config.js index 15b35136ee..d8e92cc17a 100644 --- a/babel.config.js +++ b/babel.config.js @@ -3,7 +3,7 @@ module.exports = { [ '@babel/preset-env', { - useBuiltIns: 'entry', + useBuiltIns: 'usage', corejs: '3', }, ], diff --git a/contentcuration/contentcuration/apps.py b/contentcuration/contentcuration/apps.py index 026ec8d269..e448580629 100644 --- a/contentcuration/contentcuration/apps.py +++ b/contentcuration/contentcuration/apps.py @@ -9,6 +9,6 @@ class ContentConfig(AppConfig): def ready(self): # see note in the celery_signals.py file for why we import here. - import contentcuration.utils.celery_signals + import contentcuration.utils.celery.signals # noqa if settings.AWS_AUTO_CREATE_BUCKET: ensure_storage_bucket_public() diff --git a/contentcuration/contentcuration/celery.py b/contentcuration/contentcuration/celery.py index 142e4b968b..1ae0072279 100644 --- a/contentcuration/contentcuration/celery.py +++ b/contentcuration/contentcuration/celery.py @@ -2,20 +2,19 @@ import os -from celery import Celery -from django.conf import settings +import django + +from contentcuration.utils.celery.app import CeleryApp # set the default Django settings module for the 'celery' program. os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'contentcuration.settings') -app = Celery('contentcuration') +app = CeleryApp('contentcuration') # Using a string here means the worker will not have to # pickle the object when using Windows. app.config_from_object('django.conf:settings', namespace='CELERY') -import django django.setup() -app.autodiscover_tasks(lambda: settings.INSTALLED_APPS, force=True) @app.task(bind=True) diff --git a/contentcuration/contentcuration/db/models/expressions.py b/contentcuration/contentcuration/db/models/expressions.py index 01a14f4a02..3daec06977 100644 --- a/contentcuration/contentcuration/db/models/expressions.py +++ b/contentcuration/contentcuration/db/models/expressions.py @@ -1,5 +1,7 @@ from django.db.models import BooleanField +from django.db.models import F from django.db.models import Q +from django.db.models import Value from django.db.models.expressions import CombinedExpression from django.db.models.expressions import Func from django.db.models.sql.where import WhereNode @@ -28,6 +30,20 @@ class BooleanComparison(CombinedExpression): output_field = BooleanField() +class IsNull(BooleanComparison): + """ + An expression that results in a Boolean value, useful for annotating + if a column IS or IS NOT NULL + + Example: + IsNull('my_field_name') -> my_field_name IS NULL + IsNull('my_field_name', negate=True) -> my_field_name IS NOT NULL + """ + def __init__(self, field_name, negate=False): + operator = 'IS NOT' if negate else 'IS' + super(IsNull, self).__init__(F(field_name), operator, Value(None)) + + class Array(Func): """ Create an array datatype within Postgres. diff --git a/contentcuration/contentcuration/db/models/manager.py b/contentcuration/contentcuration/db/models/manager.py index dc52cbb34d..c647ac5a0a 100644 --- a/contentcuration/contentcuration/db/models/manager.py +++ b/contentcuration/contentcuration/db/models/manager.py @@ -14,8 +14,7 @@ from contentcuration.db.advisory_lock import advisory_lock from contentcuration.db.models.query import CustomTreeQuerySet -from contentcuration.utils.tasks import increment_progress -from contentcuration.utils.tasks import set_total +from contentcuration.utils.cache import ResourceSizeCache logging = logger.getLogger(__name__) @@ -198,6 +197,7 @@ def move_node(self, node, target, position="last-child"): ``MPTTMeta.order_insertion_by``. In most cases you should just move the node yourself by setting node.parent. """ + old_parent = node.parent with self.lock_mptt(node.tree_id, target.tree_id): # Call _mptt_refresh to ensure that the mptt fields on # these nodes are up to date once we have acquired a lock @@ -212,6 +212,14 @@ def move_node(self, node, target, position="last-child"): node_moved.send( sender=node.__class__, instance=node, target=target, position=position, ) + # when moving to a new tree, like trash, we'll blanket reset the modified for the + # new root and the old root nodes + if old_parent.tree_id != target.tree_id: + for size_cache in [ + ResourceSizeCache(target.get_root()), + ResourceSizeCache(old_parent.get_root()) + ]: + size_cache.reset_modified(None) def get_source_attributes(self, source): """ @@ -330,14 +338,18 @@ def copy_node( excluded_descendants=None, can_edit_source_channel=None, batch_size=None, + progress_tracker=None ): + """ + :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None + """ if batch_size is None: batch_size = BATCH_SIZE source_channel_id = node.get_channel_id() total_nodes = self._all_nodes_to_copy(node, excluded_descendants).count() - - set_total(total_nodes) + if progress_tracker: + progress_tracker.set_total(total_nodes) return self._copy( node, @@ -349,6 +361,7 @@ def copy_node( excluded_descendants, can_edit_source_channel, batch_size, + progress_tracker=progress_tracker, ) def _copy( @@ -362,9 +375,13 @@ def _copy( excluded_descendants, can_edit_source_channel, batch_size, + progress_tracker=None, ): + """ + :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None + """ if node.rght - node.lft < batch_size: - return self._deep_copy( + copied_nodes = self._deep_copy( node, target, position, @@ -374,6 +391,9 @@ def _copy( excluded_descendants, can_edit_source_channel, ) + if progress_tracker: + progress_tracker.increment(len(copied_nodes)) + return copied_nodes else: node_copy = self._shallow_copy( node, @@ -384,6 +404,8 @@ def _copy( mods, can_edit_source_channel, ) + if progress_tracker: + progress_tracker.increment() children = node.get_children().order_by("lft") if excluded_descendants: children = children.exclude(node_id__in=excluded_descendants.keys()) @@ -398,6 +420,7 @@ def _copy( excluded_descendants, can_edit_source_channel, batch_size, + progress_tracker=progress_tracker, ) return [node_copy] @@ -532,7 +555,6 @@ def _shallow_copy( node_copy.save(force_insert=True) self._copy_associated_objects({node.id: node_copy.id}) - increment_progress(1) return node_copy def _deep_copy( @@ -590,8 +612,6 @@ def _deep_copy( self._copy_associated_objects(source_copy_id_map) - increment_progress(len(nodes_to_copy)) - return new_nodes def build_tree_nodes(self, data, target=None, position="last-child"): diff --git a/contentcuration/contentcuration/db/models/query.py b/contentcuration/contentcuration/db/models/query.py index 6be651004c..3cd57093dc 100644 --- a/contentcuration/contentcuration/db/models/query.py +++ b/contentcuration/contentcuration/db/models/query.py @@ -1,6 +1,110 @@ +from django.db import connections +from django.db.models.expressions import Col +from django.db.models.sql.compiler import SQLCompiler +from django.db.models.sql.constants import INNER +from django.db.models.sql.query import Query from django_cte import CTEQuerySet +from django_cte import With as CTEWith from mptt.querysets import TreeQuerySet +RIGHT_JOIN = 'RIGHT JOIN' + + class CustomTreeQuerySet(TreeQuerySet, CTEQuerySet): pass + + +class With(CTEWith): + """ + Custom CTE class which allows more join types than just INNER and LOUTER (LEFT) + """ + def join(self, model_or_queryset, *filter_q, **filter_kw): + """ + Slight hack to allow more join types + """ + join_type = filter_kw.get('_join_type', INNER) + queryset = super(With, self).join(model_or_queryset, *filter_q, **filter_kw) + + # the underlying Django code forces the join type into INNER or a LEFT OUTER join + alias, _ = queryset.query.table_alias(self.name) + join = queryset.query.alias_map[alias] + if join.join_type != join_type: + join.join_type = join_type + return queryset + + +class WithValues(With): + """ + Allows the creation of a CTE that holds a VALUES list + + @see https://www.postgresql.org/docs/9.6/queries-values.html + """ + def __init__(self, fields, values_list, name="cte"): + super(WithValues, self).__init__(None, name=name) + self.query = WithValuesQuery(self) + self.fields = fields + self.values_list = values_list + + def _resolve_ref(self, name): + """ + Gets called when a column reference is accessed via the CTE instance `.col.name` + """ + if name not in self.fields: + raise RuntimeError("No field with name `{}`".format(name)) + + field = self.fields.get(name) + field.set_attributes_from_name(name) + return Col(self.name, field, output_field=field) + + +class WithValuesSQLCompiler(SQLCompiler): + TEMPLATE = "SELECT * FROM (VALUES {values_statement}) AS {cte_name}({fields_statement})" + + def as_sql(self, with_limits=True, with_col_aliases=False): + """ + Ideally this would return something like: + WITH t_cte(fieldA, fieldB) AS (VALUES (), ...) + But django_cte doesn't give us a way to do that, so we do this instead: + WITH t_cte AS (SELECT * FROM (VALUES (), ...) AS _t_cte(fieldA, fieldB))) + + :return: A tuple of SQL and parameters + """ + value_parameters = ", ".join(["%s"] * len(self.cte.fields)) + values_statement = ", ".join(["({})".format(value_parameters)] * len(self.cte.values_list)) + fields_statement = ", ".join([self.connection.ops.quote_name(field) for field in list(self.cte.fields)]) + sql = self.TEMPLATE.format( + values_statement=values_statement, + cte_name="_{}".format(self.cte.name), + fields_statement=fields_statement + ) + return sql, list(sum(self.cte.values_list, ())) + + @property + def cte(self): + """ + :rtype: WithValues + """ + return self.query.cte + + +class WithValuesQuery(Query): + """ + Dedicated query class for creating a CTE + + Note: this does inherit from Query, which we're not passing a Model instance so not all Query + functionality is intended to work + """ + def __init__(self, cte): + super(WithValuesQuery, self).__init__(None) + self.cte = cte + + def get_compiler(self, using=None, connection=None): + """ + This code is modeled after Query.get_compiler() + """ + if using is None and connection is None: + raise ValueError("Need either using or connection") + if using: + connection = connections[using] + return WithValuesSQLCompiler(self, connection, using) diff --git a/contentcuration/contentcuration/dev_urls.py b/contentcuration/contentcuration/dev_urls.py index b854445dd5..0dbc590662 100644 --- a/contentcuration/contentcuration/dev_urls.py +++ b/contentcuration/contentcuration/dev_urls.py @@ -1,6 +1,7 @@ from django.conf import settings from django.conf.urls import include from django.conf.urls import url +from django.contrib import admin from django.http.response import HttpResponseRedirect from drf_yasg import openapi from drf_yasg.views import get_schema_view @@ -31,6 +32,7 @@ def webpack_redirect_view(request): urlpatterns = urlpatterns + [ url(r"^__open-in-editor/", webpack_redirect_view), + url(r'^admin/', include(admin.site.urls)), url( r"^swagger(?P\.json|\.yaml)$", schema_view.without_ui(cache_timeout=0), diff --git a/contentcuration/contentcuration/frontend/channelEdit/components/ContentNodeListItem/index.vue b/contentcuration/contentcuration/frontend/channelEdit/components/ContentNodeListItem/index.vue index eb5364e226..a32834b929 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/components/ContentNodeListItem/index.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/components/ContentNodeListItem/index.vue @@ -263,7 +263,7 @@ ...mapActions('task', ['deleteTask']), handleTileClick(e) { // Ensures that clicking an icon button is not treated the same as clicking the card - if (e.target.tagName !== 'svg' && !this.copying) { + if (e.target && e.target.tagName !== 'svg' && !this.copying) { this.isTopic ? this.$emit('topicChevronClick') : this.$emit('infoClick'); } }, diff --git a/contentcuration/contentcuration/frontend/channelEdit/components/publish/PublishModal.vue b/contentcuration/contentcuration/frontend/channelEdit/components/publish/PublishModal.vue index 09015fc045..4101e141a7 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/components/publish/PublishModal.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/components/publish/PublishModal.vue @@ -5,7 +5,7 @@

{{ currentChannel.name }}

-

+

@@ -49,6 +49,7 @@ {{ $tr('nextButton') }} @@ -120,13 +121,15 @@ step: 0, publishDescription: '', size: 0, - loadingMetadata: false, + loading: false, + loadingTaskId: null, }; }, computed: { ...mapGetters(['areAllChangesSaved']), ...mapGetters('currentChannel', ['currentChannel', 'rootId']), ...mapGetters('contentNode', ['getContentNode']), + ...mapGetters('task', ['getAsyncTask']), dialog: { get() { return this.value; @@ -144,6 +147,17 @@ descriptionRules() { return [v => !!v.trim() || this.$tr('descriptionRequiredMessage')]; }, + sizeCalculationTask() { + return this.loadingTaskId ? this.getAsyncTask(this.loadingTaskId) : null; + }, + }, + watch: { + sizeCalculationTask(task) { + if (task && task.status === 'SUCCESS') { + this.loading = false; + this.size = task.metadata.result; + } + }, }, beforeMount() { // Proceed to description if no incomplete nodes found @@ -152,10 +166,11 @@ } }, mounted() { - this.loadingMetadata = true; - this.loadChannelSize(this.rootId).then(size => { - this.size = size; - this.loadingMetadata = false; + this.loading = true; + this.loadChannelSize(this.rootId).then(response => { + this.size = response.size; + this.loading = response.stale; + this.loadingTaskId = response.changes.length ? response.changes[0].key : null; }); }, methods: { diff --git a/contentcuration/contentcuration/frontend/channelEdit/components/publish/__tests__/publishModal.spec.js b/contentcuration/contentcuration/frontend/channelEdit/components/publish/__tests__/publishModal.spec.js index 403711ff96..d2525228c5 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/components/publish/__tests__/publishModal.spec.js +++ b/contentcuration/contentcuration/frontend/channelEdit/components/publish/__tests__/publishModal.spec.js @@ -67,7 +67,7 @@ describe('publishModal', () => { }); describe('on validation step', () => { beforeEach(() => { - wrapper.setData({ step: steps.VALIDATION }); + wrapper.setData({ step: steps.VALIDATION, loading: false }); }); it('next button should go to the next step if enabled', () => { wrapper.find('[data-test="next"]').trigger('click'); diff --git a/contentcuration/contentcuration/frontend/channelEdit/views/progress/ProgressModal.vue b/contentcuration/contentcuration/frontend/channelEdit/views/progress/ProgressModal.vue index 269bf7ed5c..2bf42e04a1 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/views/progress/ProgressModal.vue +++ b/contentcuration/contentcuration/frontend/channelEdit/views/progress/ProgressModal.vue @@ -141,7 +141,7 @@ // for not syncing channels with no imported resources // this property is added her as a way to manager feedback to the user nothingToSync() { - return this.noSyncNeeded; + return this.isSyncing && this.noSyncNeeded; }, isPublishing() { return this.currentChannel && this.currentChannel.publishing; diff --git a/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/__tests__/module.spec.js b/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/__tests__/module.spec.js index d8073f8316..5e2373ecfd 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/__tests__/module.spec.js +++ b/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/__tests__/module.spec.js @@ -1,7 +1,6 @@ import currentChannel from '../index'; import storeFactory from 'shared/vuex/baseStore'; -import client from 'shared/client'; -import { Channel } from 'shared/data/resources'; +import { Channel, ContentNode } from 'shared/data/resources'; jest.mock('shared/client'); jest.mock('shared/vuex/connectionPlugin'); @@ -15,9 +14,12 @@ describe('currentChannel store', () => { }); describe('actions', () => { it('loadChannelSize action should get from contentnode_size endpoint', () => { + const spy = jest + .spyOn(ContentNode, 'getResourceSize') + .mockImplementation(() => Promise.resolve({ size: 123, stale: false, changes: [] })); return store.dispatch('currentChannel/loadChannelSize', 'root-id').then(() => { - expect(client.get.mock.calls[0][0]).toBe('contentnode_size'); - client.get.mockRestore(); + expect(spy.mock.calls[0][0]).toBe('root-id'); + spy.mockRestore(); }); }); it('publishChannel action should post to publish_channel endpoint', () => { diff --git a/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/actions.js b/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/actions.js index 8e3442de6a..f5f22c0ce5 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/actions.js +++ b/contentcuration/contentcuration/frontend/channelEdit/vuex/currentChannel/actions.js @@ -1,7 +1,7 @@ import Vue from 'vue'; +import { ContentNode, Channel } from '../../../shared/data/resources'; import client from 'shared/client'; import applyChanges from 'shared/data/applyRemoteChanges'; -import { Channel } from 'shared/data/resources'; export function loadChannel(context, { staging = false } = {}) { return context @@ -13,9 +13,7 @@ export function loadChannel(context, { staging = false } = {}) { } export function loadChannelSize(context, rootId) { - return client.get(window.Urls.contentnode_size(rootId)).then(response => { - return response.data; - }); + return ContentNode.getResourceSize(rootId); } export function loadCurrentChannelStagingDiff(context) { diff --git a/contentcuration/contentcuration/frontend/channelEdit/vuex/task/index.js b/contentcuration/contentcuration/frontend/channelEdit/vuex/task/index.js index 6a74d71b21..2518a6abd9 100644 --- a/contentcuration/contentcuration/frontend/channelEdit/vuex/task/index.js +++ b/contentcuration/contentcuration/frontend/channelEdit/vuex/task/index.js @@ -1,4 +1,5 @@ import Vue from 'vue'; +import sortBy from 'lodash/sortBy'; import { Task } from 'shared/data/resources'; import { TABLE_NAMES, CHANGE_TYPES } from 'shared/data'; @@ -17,7 +18,8 @@ export default { return getters.asyncTasks.filter(t => t.status !== 'SUCCESS' || t.status !== 'FAILED'); }, asyncTasks(state) { - return Object.values(state.asyncTasksMap); + // ensure most recent tasks are first + return sortBy(Object.values(state.asyncTasksMap), 'created').reverse(); }, getAsyncTask(state) { return function(taskId) { diff --git a/contentcuration/contentcuration/frontend/shared/app.js b/contentcuration/contentcuration/frontend/shared/app.js index 9a49b76f5a..178b1fe1c4 100644 --- a/contentcuration/contentcuration/frontend/shared/app.js +++ b/contentcuration/contentcuration/frontend/shared/app.js @@ -1,4 +1,3 @@ -import 'core-js'; import 'regenerator-runtime/runtime'; import Vue from 'vue'; import VueRouter from 'vue-router'; diff --git a/contentcuration/contentcuration/frontend/shared/data/applyRemoteChanges.js b/contentcuration/contentcuration/frontend/shared/data/applyRemoteChanges.js index c5320ed138..088bf4a6b1 100644 --- a/contentcuration/contentcuration/frontend/shared/data/applyRemoteChanges.js +++ b/contentcuration/contentcuration/frontend/shared/data/applyRemoteChanges.js @@ -92,6 +92,10 @@ function applyMoveChanges(changes) { * Modified from https://github.com/dfahlander/Dexie.js/blob/master/addons/Dexie.Syncable/src/apply-changes.js */ export default function applyChanges(changes) { + if (!changes.length) { + return Promise.resolve(); + } + const collectedChanges = collectChanges(changes); let table_names = Object.keys(collectedChanges); let tables = table_names.map(table => db.table(table)); diff --git a/contentcuration/contentcuration/frontend/shared/data/resources.js b/contentcuration/contentcuration/frontend/shared/data/resources.js index e5586d0bfe..72a88153e1 100644 --- a/contentcuration/contentcuration/frontend/shared/data/resources.js +++ b/contentcuration/contentcuration/frontend/shared/data/resources.js @@ -879,6 +879,10 @@ export const Channel = new Resource({ .post(this.getUrlFunction('publish')(id), { version_notes, }) + .then(response => { + // The endpoint may return a Task create event in `changes` + return applyChanges(response.data.changes || []); + }) .catch(() => this.clearPublish(id)); }); }, @@ -890,12 +894,17 @@ export const Channel = new Resource({ }, sync(id, { attributes = false, tags = false, files = false, assessment_items = false } = {}) { - return client.post(this.getUrlFunction('sync')(id), { - attributes, - tags, - files, - assessment_items, - }); + return client + .post(this.getUrlFunction('sync')(id), { + attributes, + tags, + files, + assessment_items, + }) + .then(response => { + // The endpoint may return a Task create event in `changes` + return applyChanges(response.data.changes || []); + }); }, softDelete(id) { @@ -1008,6 +1017,17 @@ export const ContentNode = new TreeResource({ }); }, + /** + * @param {string} id -- The node PK + * @returns {{ size: Number, stale: Boolean, changes: [{key: string}]}} + */ + getResourceSize(id) { + return client.get(this.getUrlFunction('size')(id)).then(response => { + // The endpoint may return a Task create event in `changes` + return applyChanges(response.data.changes || []).then(() => response.data); + }); + }, + /** * Resolves target ID string into parent object, based off position, for tree inserts * diff --git a/contentcuration/contentcuration/frontend/shared/views/MarkdownEditor/plugins/image-upload/index.js b/contentcuration/contentcuration/frontend/shared/views/MarkdownEditor/plugins/image-upload/index.js index 1e41cbd04c..b4b13a7201 100644 --- a/contentcuration/contentcuration/frontend/shared/views/MarkdownEditor/plugins/image-upload/index.js +++ b/contentcuration/contentcuration/frontend/shared/views/MarkdownEditor/plugins/image-upload/index.js @@ -22,7 +22,7 @@ export const imageMdToParams = imageMd => { }; export const paramsToImageMd = ({ src, alt, width, height }) => { - src = src.split('/').lastItem; + src = src.split('/').slice(-1)[0]; if (width && width !== 'auto' && height && height !== 'auto') { return `![${alt}](${IMAGE_PLACEHOLDER}/${src} =${width}x${height})`; } else { diff --git a/contentcuration/contentcuration/migrations/0121_auto_20210305_2028.py b/contentcuration/contentcuration/migrations/0121_auto_20210305_2028.py new file mode 100644 index 0000000000..7c18bc3d88 --- /dev/null +++ b/contentcuration/contentcuration/migrations/0121_auto_20210305_2028.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2021-03-05 20:28 +from __future__ import unicode_literals + +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ('contentcuration', '0120_auto_20210128_1646'), + ] + + operations = [ + migrations.AddField( + model_name='file', + name='modified', + field=models.DateTimeField(auto_now=True, verbose_name='modified', null=True), + ), + ] diff --git a/contentcuration/contentcuration/migrations/0122_file_modified_index.py b/contentcuration/contentcuration/migrations/0122_file_modified_index.py new file mode 100644 index 0000000000..b83b432a4d --- /dev/null +++ b/contentcuration/contentcuration/migrations/0122_file_modified_index.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.29 on 2021-03-05 20:28 +from __future__ import unicode_literals + +from django.db import migrations +from django.db import models + +from contentcuration.models import FILE_MODIFIED_DESC_INDEX_NAME + + +class Migration(migrations.Migration): + atomic = False + + dependencies = [ + ('contentcuration', '0121_auto_20210305_2028'), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.AddIndex( + model_name='file', + index=models.Index(fields=['-modified'], name=FILE_MODIFIED_DESC_INDEX_NAME), + ), + ], + database_operations=[ + # operation to run custom SQL command (check the output of `sqlmigrate` + # to see the auto-generated SQL, edit as needed) + migrations.RunSQL( + sql='CREATE INDEX CONCURRENTLY "{index_name}" ON "contentcuration_file"("modified" DESC NULLS LAST)'.format( + index_name=FILE_MODIFIED_DESC_INDEX_NAME + ), + reverse_sql='DROP INDEX "{index_name}"'.format( + index_name=FILE_MODIFIED_DESC_INDEX_NAME + ), + ), + ], + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 853619246f..775fe4414e 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -1089,6 +1089,7 @@ class ContentTag(models.Model): id = UUIDField(primary_key=True, default=uuid.uuid4) tag_name = models.CharField(max_length=50) channel = models.ForeignKey('Channel', related_name='tags', blank=True, null=True, db_index=True, on_delete=models.SET_NULL) + objects = CustomManager() def __str__(self): return self.tag_name @@ -1791,9 +1792,10 @@ def copy_to( mods=None, excluded_descendants=None, can_edit_source_channel=None, - batch_size=None + batch_size=None, + progress_tracker=None ): - return self._tree_manager.copy_node(self, target, position, pk, mods, excluded_descendants, can_edit_source_channel, batch_size)[0] + return self._tree_manager.copy_node(self, target, position, pk, mods, excluded_descendants, can_edit_source_channel, batch_size, progress_tracker)[0] def copy(self): return self.copy_to() @@ -1974,6 +1976,7 @@ class StagedFile(models.Model): FILE_DISTINCT_INDEX_NAME = "file_checksum_file_size_idx" +FILE_MODIFIED_DESC_INDEX_NAME = "file_modified_desc_idx" class File(models.Model): @@ -1996,6 +1999,8 @@ class File(models.Model): source_url = models.CharField(max_length=400, blank=True, null=True) uploaded_by = models.ForeignKey(User, related_name='files', blank=True, null=True, on_delete=models.SET_NULL) + modified = models.DateTimeField(auto_now=True, verbose_name="modified", null=True) + objects = CustomManager() _permission_filter = Q(tree_id=OuterRef("contentnode__tree_id")) | Q(tree_id=OuterRef("assessment_item__contentnode__tree_id")) @@ -2059,6 +2064,11 @@ def filename(self): return os.path.basename(self.file_on_disk.name) + def on_update(self): + # since modified was added later as a nullable field to File, we don't use a default but + # instead we'll just make sure it's always updated through our serializers + self.modified = timezone.now() + def save(self, set_by_file_on_disk=True, *args, **kwargs): """ Overrider the default save method. @@ -2091,6 +2101,7 @@ def save(self, set_by_file_on_disk=True, *args, **kwargs): class Meta: indexes = [ models.Index(fields=['checksum', 'file_size'], name=FILE_DISTINCT_INDEX_NAME), + models.Index(fields=["-modified"], name=FILE_MODIFIED_DESC_INDEX_NAME), ] diff --git a/contentcuration/contentcuration/settings.py b/contentcuration/contentcuration/settings.py index 107ffe02b0..7aafc423d0 100644 --- a/contentcuration/contentcuration/settings.py +++ b/contentcuration/contentcuration/settings.py @@ -68,9 +68,9 @@ INSTALLED_APPS = ( 'contentcuration.apps.ContentConfig', - 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', + 'django.contrib.admin', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.sites', @@ -354,14 +354,9 @@ def ugettext(s): CELERY_ACCEPT_CONTENT = ['application/json'] CELERY_TASK_SERIALIZER = 'json' CELERY_RESULT_SERIALIZER = 'json' -# This is needed for worker update_state calls to work so they can send progress info. -CELERYD_STATE_DB = '/tmp/celery_state' # If this is True, Celery tasks are run synchronously. This is set to True in the unit tests, # as it is not possible to correctly test Celery tasks asynchronously currently. CELERY_TASK_ALWAYS_EAGER = False -# This tells Celery to mark a task as started. Otherwise, we would have no way of tracking -# if the task is running. -CELERY_TASK_TRACK_STARTED = True # We hook into task events to update the Task DB records with the updated state. # See celerysignals.py for more info. CELERY_WORKER_SEND_TASK_EVENTS = True diff --git a/contentcuration/contentcuration/tasks.py b/contentcuration/contentcuration/tasks.py index 490cd354d8..f477c99d5c 100644 --- a/contentcuration/contentcuration/tasks.py +++ b/contentcuration/contentcuration/tasks.py @@ -5,7 +5,7 @@ import time from builtins import str -from celery.decorators import task +from celery import states from celery.utils.log import get_task_logger from django.conf import settings from django.core.cache import cache @@ -16,15 +16,16 @@ from django.utils import translation from django.utils.translation import ugettext as _ +from contentcuration.celery import app from contentcuration.models import Channel from contentcuration.models import ContentNode from contentcuration.models import Task from contentcuration.models import User from contentcuration.utils.csv_writer import write_channel_csv_file from contentcuration.utils.csv_writer import write_user_csv +from contentcuration.utils.nodes import calculate_resource_size from contentcuration.utils.nodes import generate_diff from contentcuration.utils.publish import publish_channel -from contentcuration.utils.sentry import report_exception from contentcuration.utils.sync import sync_channel from contentcuration.utils.user import CACHE_USER_STORAGE_KEY from contentcuration.viewsets.sync.constants import CHANNEL @@ -41,26 +42,11 @@ if settings.RUNNING_TESTS: from .tasks_test import error_test_task, progress_test_task, test_task -# TODO: Try to get debugger working for celery workers -# Attach Python Cloud Debugger -# try: -# import googleclouddebugger -# if os.getenv("RUN_CLOUD_DEBUGGER"): -# googleclouddebugger.AttachDebugger( -# version=os.getenv("GCLOUD_DEBUGGER_APP_IDENTIFIER"), -# project_id=os.getenv('GOOGLE_CLOUD_PROJECT'), -# project_number=os.getenv('GOOGLE_CLOUD_PROJECT_NUMBER'), -# enable_service_account_auth=True, -# service_account_json_file=os.getenv("GOOGLE_APPLICATION_CREDENTIALS"), -# ) -# except ImportError, RuntimeError: -# pass +STATE_QUEUED = "QUEUED" -# runs the management command 'exportchannel' async through celery - -@task(bind=True, name="delete_node_task") +@app.task(bind=True, name="delete_node_task") def delete_node_task( self, user_id, @@ -82,10 +68,14 @@ def delete_node_task( else: raise except Exception as e: - report_exception(e) + self.report_exception(e) + + # TODO: Generate create event if failed? + # if not deleted: + # return {"changes": [generate_create_event(node.pk, CONTENTNODE, node)]} -@task(bind=True, name="move_nodes_task") +@app.task(bind=True, name="move_nodes_task") def move_nodes_task( self, user_id, @@ -113,12 +103,13 @@ def move_nodes_task( else: raise except Exception as e: - report_exception(e) + self.report_exception(e) - return {"changes": [generate_update_event(node.pk, CONTENTNODE, {"parent": node.parent_id})]} + if not moved: + return {"changes": [generate_update_event(node.pk, CONTENTNODE, {"parent": node.parent_id})]} -@task(bind=True, name="duplicate_nodes_task") +@app.task(bind=True, name="duplicate_nodes_task", track_progress=True) def duplicate_nodes_task( self, user_id, @@ -130,9 +121,6 @@ def duplicate_nodes_task( mods=None, excluded_descendants=None, ): - self.progress = 0 - self.update_state(state="STARTED", meta={"progress": self.progress}) - source = ContentNode.objects.get(id=source_id) target = ContentNode.objects.get(id=target_id) @@ -150,6 +138,7 @@ def duplicate_nodes_task( mods, excluded_descendants, can_edit_source_channel=can_edit_source_channel, + progress_tracker=self.progress, ) except IntegrityError: # This will happen if the node has already been created @@ -157,13 +146,15 @@ def duplicate_nodes_task( # Possible we might want to raise an error here, but not clear # whether this could then be a way to sniff for ids pass + + changes = [] if new_node is not None: - return {"changes": [ - generate_update_event(pk, CONTENTNODE, {COPYING_FLAG: False, "node_id": new_node.node_id}) - ]} + changes.append(generate_update_event(pk, CONTENTNODE, {COPYING_FLAG: False, "node_id": new_node.node_id})) + + return {"changes": changes} -@task(bind=True, name="export_channel_task") +@app.task(bind=True, name="export_channel_task", track_progress=True) def export_channel_task(self, user_id, channel_id, version_notes="", language=settings.LANGUAGE_CODE): with translation.override(language): channel = publish_channel( @@ -171,7 +162,7 @@ def export_channel_task(self, user_id, channel_id, version_notes="", language=se channel_id, version_notes=version_notes, send_email=True, - task_object=self, + progress_tracker=self.progress, ) return {"changes": [ generate_update_event(channel_id, CHANNEL, {"published": True, "primary_token": channel.get_human_token().token}), @@ -179,7 +170,7 @@ def export_channel_task(self, user_id, channel_id, version_notes="", language=se ]} -@task(bind=True, name="sync_channel_task") +@app.task(bind=True, name="sync_channel_task", track_progress=True) def sync_channel_task( self, user_id, @@ -196,11 +187,11 @@ def sync_channel_task( sync_tags, sync_files, sync_tags, - task_object=self, + progress_tracker=self.progress, ) -@task(name="generatechannelcsv_task") +@app.task(name="generatechannelcsv_task") def generatechannelcsv_task(channel_id, domain, user_id): channel = Channel.objects.get(pk=channel_id) user = User.objects.get(pk=user_id) @@ -231,7 +222,7 @@ def attach(self, filename=None, content=None, mimetype=None): self.attachments.append((filename, content, mimetype)) -@task(name="generateusercsv_task") +@app.task(name="generateusercsv_task") def generateusercsv_task(user_id, language=settings.LANGUAGE_CODE): with translation.override(language): user = User.objects.get(pk=user_id) @@ -254,30 +245,40 @@ def generateusercsv_task(user_id, language=settings.LANGUAGE_CODE): email.send() -@task(name="deletetree_task") +@app.task(name="deletetree_task") def deletetree_task(tree_id): ContentNode.objects.filter(tree_id=tree_id).delete() -@task(name="getnodedetails_task") +@app.task(name="getnodedetails_task") def getnodedetails_task(node_id): node = ContentNode.objects.get(pk=node_id) return node.get_details() -@task(name="generatenodediff_task") +@app.task(name="generatenodediff_task") def generatenodediff_task(updated_id, original_id): return generate_diff(updated_id, original_id) -@task(name="calculate_user_storage_task") +@app.task(name="calculate_user_storage_task") def calculate_user_storage_task(user_id): - user = User.objects.get(pk=user_id) - user.set_space_used() - cache.delete(CACHE_USER_STORAGE_KEY.format(user_id)) + try: + user = User.objects.get(pk=user_id) + user.set_space_used() + cache.delete(CACHE_USER_STORAGE_KEY.format(user_id)) + except User.DoesNotExist: + logging.error("Tried to calculate user storage for user with id {} but they do not exist".format(user_id)) -@task(name="sendcustomemails_task") +@app.task(name="calculate_resource_size_task") +def calculate_resource_size_task(node_id, channel_id): + node = ContentNode.objects.get(pk=node_id) + size, _ = calculate_resource_size(node=node, force=True) + return size + + +@app.task(name="sendcustomemails_task") def sendcustomemails_task(subject, message, query): subject = render_to_string('registration/custom_email_subject.txt', {'subject': subject}) recipients = AdminUserFilter(data=query).qs.distinct() @@ -289,24 +290,53 @@ def sendcustomemails_task(subject, message, query): type_mapping = { - "duplicate-nodes": {"task": duplicate_nodes_task, "progress_tracking": True}, - "move-nodes": {"task": move_nodes_task, "progress_tracking": False}, - "delete-node": {"task": delete_node_task, "progress_tracking": False}, - "export-channel": {"task": export_channel_task, "progress_tracking": True}, - "sync-channel": {"task": sync_channel_task, "progress_tracking": True}, - "get-node-diff": {"task": generatenodediff_task, "progress_tracking": False}, + "duplicate-nodes": duplicate_nodes_task, + "move-nodes": move_nodes_task, + "delete-node": delete_node_task, + "export-channel": export_channel_task, + "sync-channel": sync_channel_task, + "get-node-diff": generatenodediff_task, + "calculate-resource-size": calculate_resource_size_task, } if settings.RUNNING_TESTS: type_mapping.update( { - "test": {"task": test_task, "progress_tracking": False}, - "error-test": {"task": error_test_task, "progress_tracking": False}, - "progress-test": {"task": progress_test_task, "progress_tracking": True}, + "test": test_task, + "error-test": error_test_task, + "progress-test": progress_test_task, } ) +def get_or_create_async_task(task_name, user, **task_args): + """ + :param task_name: Name of the task function (omitting the word 'task', and with dashes in place of underscores) + :param user: User object of the user performing the operation + :param task_args: A dictionary of keyword arguments to be passed down to the task, must be JSON serializable. + :return: Returns the Task model object + """ + if task_name not in type_mapping: + raise KeyError("Need to define task in type_mapping first.") + + if user is None or not isinstance(user, User): + raise TypeError("All tasks must be assigned to a user.") + + qs = Task.objects.filter( + task_type=task_name, + status__in=[STATE_QUEUED, states.PENDING, states.RECEIVED, states.STARTED], + channel_id=task_args.get("channel_id", None), + metadata={"args": task_args}, + ) + + if qs.exists(): + task_info = qs[0] + else: + _, task_info = create_async_task(task_name, user, **task_args) + + return task_info + + def create_async_task(task_name, user, apply_async=True, **task_args): """ Starts a long-running task that runs asynchronously using Celery. Also creates a Task object that can be used by @@ -320,56 +350,43 @@ def create_async_task(task_name, user, apply_async=True, **task_args): the task. :param task_name: Name of the task function (omitting the word 'task', and with dashes in place of underscores) - :param task_options: A dictionary of task properties. Acceptable values are as follows: - - Required - - 'user' or 'user_id': User object, or string id, of the user performing the operation - - Optional - - 'metadata': A dictionary of properties to be used during status and progress tracking. Examples include - a list of channels and content nodes targeted by the task, task progress ('progress' key), sub-task - progress, when applicable. + :param user: User object of the user performing the operation + :param apply_async: Boolean whether to call the Task asynchronously (the default) :param task_args: A dictionary of keyword arguments to be passed down to the task, must be JSON serializable. :return: a tuple of the Task object and a dictionary containing information about the created task. """ if task_name not in type_mapping: raise KeyError("Need to define task in type_mapping first.") - metadata = {"affects": {}} - channel_id = None - if "channel_id" in task_args: - channel_id = task_args["channel_id"] - metadata["affects"]["channel"] = channel_id - - if "node_ids" in task_args: - metadata["affects"]["nodes"] = task_args["node_ids"] - - metadata['args'] = task_args if user is None or not isinstance(user, User): raise TypeError("All tasks must be assigned to a user.") - task_type = type_mapping[task_name] - async_task = task_type["task"] - is_progress_tracking = task_type["progress_tracking"] - + async_task = type_mapping[task_name] task_info = Task.objects.create( task_type=task_name, - status="QUEUED", - is_progress_tracking=is_progress_tracking, + status=STATE_QUEUED, user=user, - metadata=metadata, - channel_id=channel_id, + channel_id=task_args.get("channel_id", None), + metadata={"args": task_args}, + ) + task_sig = async_task.signature( + task_id=str(task_info.task_id), + kwargs=task_args, ) + if apply_async: - task = async_task.apply_async(kwargs=task_args, task_id=str(task_info.task_id)) + task = task_sig.apply_async() else: - task = async_task.apply(kwargs=task_args, task_id=str(task_info.task_id)) + task = task_sig.apply() + # If there was a failure to create the task, the apply_async call will return failed, but # checking the status will still show PENDING. So make sure we write the failure to the # db directly so the frontend can know of the failure. - if task.status == "FAILURE": + if task.status == states.FAILURE: # Error information may have gotten added to the Task object during the call. task_info.refresh_from_db() logging.error("Task failed to start, please check Celery status.") - task_info.status = "FAILURE" + task_info.status = states.FAILURE error_data = { "message": _("Unknown error starting task. Please contact support.") } diff --git a/contentcuration/contentcuration/tasks_test.py b/contentcuration/contentcuration/tasks_test.py index f5df6dbcfc..2c50861fba 100644 --- a/contentcuration/contentcuration/tasks_test.py +++ b/contentcuration/contentcuration/tasks_test.py @@ -32,16 +32,18 @@ def error_test_task(**kwargs): raise Exception("I'm sorry Dave, I'm afraid I can't do that.") -@app.task(bind=True, name="progress_test_task") +@app.task(bind=True, name="progress_test_task", track_progress=True) def progress_test_task(self, **kwargs): """ This is a mock task to be used to test that we can update progress when tracking is enabled. :return: """ logger.info("Request ID = {}".format(self.request.id)) + assert getattr(self, 'progress', None) is not None + assert self.request.id is not None assert Task.objects.filter(task_id=self.request.id).count() == 1 - self.update_state(state="PROGRESS", meta={"progress": 100}) + self.progress.track(75) return 42 diff --git a/contentcuration/contentcuration/tests/base.py b/contentcuration/contentcuration/tests/base.py index 6dbb812492..721cb9a686 100644 --- a/contentcuration/contentcuration/tests/base.py +++ b/contentcuration/contentcuration/tests/base.py @@ -1,7 +1,9 @@ from __future__ import absolute_import from builtins import str +from importlib import import_module +import mock from django.conf import settings from django.core.files.uploadedfile import SimpleUploadedFile from django.core.management import call_command @@ -21,6 +23,29 @@ from contentcuration.utils import minio_utils +def mock_class_instance(target): + """ + Helper that returns a Mocked instance of the `target` class + + :param target: A class or string module path to the class + :return: A mocked class instance of `target` + """ + if isinstance(target, str): + target_split = target.split(".") + target_mod = ".".join(target_split[:-1]) + target_name = target_split[-1] + + module = import_module(target_mod) + target_cls = getattr(module, target_name) + else: + target_cls = target + + class MockClass(target_cls): + def __new__(cls, *args, **kwargs): + return mock.Mock(spec_set=cls) + return MockClass() + + class BucketTestClassMixin(object): @classmethod def create_bucket(cls): diff --git a/contentcuration/contentcuration/tests/test_asynctask.py b/contentcuration/contentcuration/tests/test_asynctask.py index be1619efd7..7acf7d7d26 100644 --- a/contentcuration/contentcuration/tests/test_asynctask.py +++ b/contentcuration/contentcuration/tests/test_asynctask.py @@ -5,6 +5,7 @@ from builtins import str import pytest +from celery import states from django.core.urlresolvers import reverse from django.db import connection from django.db.utils import OperationalError @@ -29,7 +30,6 @@ class AsyncTaskTestCase(BaseAPITestCase): These tests check that creating and updating Celery tasks using the create_async_task function result in an up-to-date Task object with the latest status and information about the task. """ - def test_asynctask_reports_success(self): """ Tests that when an async task is created and completed, the Task object has a status of 'SUCCESS' and @@ -38,83 +38,26 @@ def test_asynctask_reports_success(self): task, task_info = create_async_task("test", self.user, apply_async=False) self.assertEqual(task_info.user, self.user) self.assertEqual(task_info.task_type, "test") - self.assertEqual(task_info.is_progress_tracking, False) - result = task.get() - self.assertEqual(result, 42) - self.assertEqual(Task.objects.get(task_id=task.id).metadata["result"], 42) - self.assertEqual(Task.objects.get(task_id=task.id).status, "SUCCESS") - def test_asynctask_reports_progress(self): - """ - Test that we can retrieve task progress via the Task API. - """ - task, task_info = create_async_task( - "progress-test", self.user, apply_async=False - ) result = task.get() self.assertEqual(result, 42) - self.assertEqual(Task.objects.get(task_id=task.id).status, "SUCCESS") - - # progress is retrieved dynamically upon calls to get the task info, so - # use an API call rather than checking the db directly for progress. - url = reverse("task-detail", kwargs={"task_id": task_info.task_id}) - response = self.get(url) - self.assertEqual(response.data["status"], "SUCCESS") - self.assertEqual(response.data["task_type"], "progress-test") - self.assertEqual(response.data["metadata"]["progress"], 100) - self.assertEqual(response.data["metadata"]["result"], 42) - - def test_asynctask_filters_by_channel(self): - """ - Test that we can filter tasks by channel ID. - """ - - self.channel.editors.add(self.user) - self.channel.save() - task, task_info = create_async_task( - "progress-test", self.user, apply_async=False, channel_id=self.channel.id - ) - self.assertTrue( - Task.objects.filter(metadata__affects__channel=self.channel.id).count() == 1 - ) - result = task.get() - self.assertEqual(result, 42) - self.assertEqual(Task.objects.get(task_id=task.id).status, "SUCCESS") - - # since tasks run sync in tests, we can't test it in an actual running state - # so simulate the running state in the task object. - db_task = Task.objects.get(task_id=task.id) - db_task.status = "STARTED" - db_task.save() - url = "{}?channel={}".format(reverse("task-list"), self.channel.id) - response = self.get(url) - self.assertEqual(len(response.data), 1) - self.assertEqual(response.data[0]["status"], "STARTED") - self.assertEqual(response.data[0]["task_type"], "progress-test") - self.assertEqual(response.data[0]["metadata"]["progress"], 100) - self.assertEqual(response.data[0]["metadata"]["result"], 42) - - url = "{}?channel={}".format(reverse("task-list"), task_info.id) - response = self.get(url) - self.assertEqual(response.status_code, 412) + self.assertEqual(task.status, states.SUCCESS) + self.assertEqual(Task.objects.get(task_id=task.id).metadata["result"], 42) + self.assertEqual(Task.objects.get(task_id=task.id).status, states.SUCCESS) def test_asynctask_reports_error(self): """ Tests that if a task fails with an error, that the error information is stored in the Task object for later retrieval and analysis. """ - task, task_info = create_async_task("error-test", self.user, apply_async=False) + celery_task, task_info = create_async_task("error-test", self.user, apply_async=False) - task = Task.objects.get(task_id=task.id) - self.assertEqual(task.status, "FAILURE") - self.assertTrue("error" in task.metadata) + task_info.refresh_from_db() + self.assertEqual(task_info.status, states.FAILURE) + self.assertTrue("error" in task_info.metadata) - error = task.metadata["error"] - self.assertCountEqual( - list(error.keys()), ["message", "task_args", "task_kwargs", "traceback"] - ) - self.assertEqual(len(error["task_args"]), 0) - self.assertEqual(len(error["task_kwargs"]), 0) + error = task_info.metadata["error"] + self.assertEqual(list(error.keys()), ["message", "traceback"]) traceback_string = "\n".join(error["traceback"]) self.assertTrue("Exception" in traceback_string) self.assertTrue( @@ -167,13 +110,12 @@ def test_duplicate_nodes_task(self): url = reverse("task-detail", kwargs={"task_id": task_info.task_id}) response = self.get(url) assert ( - response.data["status"] == "SUCCESS" + response.data["status"] == states.SUCCESS ), "Task failed, exception: {}".format( response.data["metadata"]["error"]["traceback"] ) - self.assertEqual(response.data["status"], "SUCCESS") + self.assertEqual(response.data["status"], states.SUCCESS) self.assertEqual(response.data["task_type"], "duplicate-nodes") - self.assertEqual(response.data["metadata"]["progress"], 100) result = response.data["metadata"]["result"] node_id = ContentNode.objects.get(pk=task_args["pk"]).node_id self.assertEqual( diff --git a/contentcuration/contentcuration/tests/test_task_api.py b/contentcuration/contentcuration/tests/test_task_api.py index 8939355211..3ff65a3350 100644 --- a/contentcuration/contentcuration/tests/test_task_api.py +++ b/contentcuration/contentcuration/tests/test_task_api.py @@ -1,9 +1,12 @@ from __future__ import absolute_import +import mock +from celery import states from django.core.urlresolvers import reverse from .base import BaseAPITestCase from contentcuration.models import Task +from contentcuration.utils.celery.tasks import AsyncResult class TaskAPITestCase(BaseAPITestCase): @@ -16,23 +19,33 @@ def setUp(self): super(TaskAPITestCase, self).setUp() self.task_url = "/api/task" self.task_data = { - "status": "STARTED", + "status": states.STARTED, "task_type": "YOUTUBE_IMPORT", "task_id": "just_a_test", "user": self.user.pk, "metadata": {}, } - - def create_new_task(self, type, metadata, channel_id=None): + app_patcher = mock.patch("contentcuration.viewsets.task.app") + self.addCleanup(app_patcher.stop) + self.celery_app = app_patcher.start() + self.celery_app.conf.task_always_eager = False + self.celery_app.AsyncResult = mock.MagicMock(spec_set=AsyncResult) + self.async_result = self.celery_app.AsyncResult() + self.async_result.ready.return_value = False + self.async_result.status = states.STARTED + self.async_result.progress = 0 + self.async_result.result = None + + def create_new_task(self, type, channel_id=None): """ Create a new Task object in the DB to simulate the creation of a Celery task and test the Task API. :param type: A string with a task name constant. - :param metadata: A dictionary containing information about the task. See create_async_task docs for more details. + :param channel_id: The ID of the affected channel, if any :return: The created Task object """ return Task.objects.create( - task_type=type, metadata=metadata, status="STARTED", user=self.user, channel_id=channel_id + task_type=type, status="STARTED", user=self.user, channel_id=channel_id, metadata={} ) def test_get_task(self): @@ -40,18 +53,56 @@ def test_get_task(self): Ensure that GET operations using a Task ID return information about the specified task. """ task = self.create_new_task( - type="YOUTUBE_IMPORT", metadata={"channel": self.channel.id} + type="YOUTUBE_IMPORT", channel_id=self.channel.id, + ) + + url = reverse("task-detail", kwargs={"task_id": task.task_id}) + response = self.get(url) + self.assertEqual(response.data["status"], states.STARTED) + self.assertEqual(response.data["task_type"], "YOUTUBE_IMPORT") + self.assertEqual(response.data["channel"], self.channel.id) + self.assertEqual(response.data["metadata"]["progress"], 0) + self.assertIsNone(response.data["metadata"]["result"]) + + def test_get_task__finished(self): + task = self.create_new_task( + type="YOUTUBE_IMPORT", channel_id=self.channel.id, ) + self.async_result.ready.return_value = True + self.async_result.status = states.SUCCESS + self.async_result.progress = 100 + self.async_result.result = 123 url = reverse("task-detail", kwargs={"task_id": task.task_id}) response = self.get(url) - self.assertEqual(response.data["status"], "STARTED") + self.assertEqual(response.data["status"], states.SUCCESS) self.assertEqual(response.data["task_type"], "YOUTUBE_IMPORT") - self.assertEqual(response.data["metadata"], {"channel": self.channel.id}) + self.assertEqual(response.data["channel"], self.channel.id) + self.assertEqual(response.data["metadata"]["progress"], 100) + self.assertEqual(response.data["metadata"]["result"], 123) + + def test_get_task__errored(self): + task = self.create_new_task( + type="YOUTUBE_IMPORT", channel_id=self.channel.id, + ) + self.async_result.ready.return_value = True + self.async_result.status = states.FAILURE + self.async_result.progress = 75 + self.async_result.traceback = "traceback" + self.async_result.result = Exception() + + url = reverse("task-detail", kwargs={"task_id": task.task_id}) + response = self.get(url) + self.assertEqual(response.data["status"], states.FAILURE) + self.assertEqual(response.data["task_type"], "YOUTUBE_IMPORT") + self.assertEqual(response.data["channel"], self.channel.id) + self.assertEqual(response.data["metadata"]["progress"], 100) + self.assertEqual(response.data["metadata"]["result"], "traceback") + self.assertEqual(response.data["metadata"]["error"]["traceback"], "traceback") def test_get_task_list(self): self.create_new_task( - type="YOUTUBE_IMPORT", metadata={"affects": {"channel": self.channel.id}}, channel_id=self.channel.id + type="YOUTUBE_IMPORT", channel_id=self.channel.id ) url = reverse("task-list") + "?channel={}".format(self.channel.id) @@ -60,11 +111,12 @@ def test_get_task_list(self): response = self.get(url) self.assertEqual(len(response.data), 1) - self.assertEqual(response.data[0]["status"], "STARTED") - self.assertEqual(response.data[0]["task_type"], "YOUTUBE_IMPORT") - self.assertEqual( - response.data[0]["metadata"], {"affects": {"channel": self.channel.id}} - ) + data = response.data[0] + self.assertEqual(data["status"], states.STARTED) + self.assertEqual(data["task_type"], "YOUTUBE_IMPORT") + self.assertEqual(data["channel"], self.channel.id) + self.assertEqual(data["metadata"]["progress"], 0) + self.assertIsNone(data["metadata"]["result"]) def test_get_empty_task_list(self): url = reverse("task-list") @@ -87,7 +139,7 @@ def test_cannot_update_task(self): via API. """ - task = self.create_new_task(type="NONE", metadata={}) + task = self.create_new_task(type="NONE") url = reverse("task-detail", kwargs={"task_id": task.task_id}) response = self.put(url, data=self.task_data) self.assertEqual(response.status_code, 405) @@ -97,7 +149,7 @@ def test_delete_task(self): Ensure that a call to DELETE the specified task results in its deletion. """ task = self.create_new_task( - type="YOUTUBE_IMPORT", metadata={"channel": self.channel.id} + type="YOUTUBE_IMPORT", channel_id=self.channel.id ) url = reverse("task-detail", kwargs={"task_id": task.task_id}) diff --git a/contentcuration/contentcuration/tests/utils/celery/__init__.py b/contentcuration/contentcuration/tests/utils/celery/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contentcuration/contentcuration/tests/utils/celery/test_tasks.py b/contentcuration/contentcuration/tests/utils/celery/test_tasks.py new file mode 100644 index 0000000000..d7599f944e --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/celery/test_tasks.py @@ -0,0 +1,45 @@ +import mock +from django.test import SimpleTestCase + +from contentcuration.utils.celery.tasks import ProgressTracker + + +class ProgressTrackerTestCase(SimpleTestCase): + def setUp(self): + super(ProgressTrackerTestCase, self).setUp() + self.update_state = mock.Mock() + self.tracker = ProgressTracker("abc123", self.update_state) + + def test_set_total(self): + self.assertEqual(100, self.tracker.total) + self.tracker.set_total(200) + self.assertEqual(200, self.tracker.total) + + def test_increment(self): + with mock.patch.object(self.tracker, 'track') as track: + self.tracker.increment() + track.assert_called_with(1.0) + self.tracker.progress = 1 + self.tracker.increment(increment=2.0) + track.assert_called_with(3.0) + + def test_task_progress(self): + self.assertEqual(0, self.tracker.task_progress) + self.tracker.progress = 50 + self.assertEqual(50, self.tracker.task_progress) + self.tracker.total = 200 + self.assertEqual(25, self.tracker.task_progress) + + def test_track(self): + self.tracker.track(2) + self.assertEqual(2, self.tracker.progress) + self.update_state.assert_called_with(meta={'progress': 2}) + + self.update_state.assert_called_with(meta={'progress': 2}) + + def test_track__small_increment(self): + self.tracker.track(0.5) + self.assertEqual(0.5, self.tracker.progress) + self.update_state.assert_not_called() + self.tracker.track(1.0) + self.update_state.assert_called_with(meta={'progress': 1}) diff --git a/contentcuration/contentcuration/tests/utils/test_cache.py b/contentcuration/contentcuration/tests/utils/test_cache.py new file mode 100644 index 0000000000..ca79192668 --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/test_cache.py @@ -0,0 +1,83 @@ +import mock +from django.test import SimpleTestCase + +from ..base import mock_class_instance +from contentcuration.models import ContentNode +from contentcuration.utils.cache import ResourceSizeCache + + +class ResourceSizeCacheTestCase(SimpleTestCase): + def setUp(self): + super(ResourceSizeCacheTestCase, self).setUp() + self.node = mock.Mock(spec_set=ContentNode()) + self.node.pk = "abcdefghijklmnopqrstuvwxyz" + self.redis_client = mock_class_instance("redis.client.StrictRedis") + self.cache_client = mock_class_instance("django_redis.client.DefaultClient") + self.cache_client.get_client.return_value = self.redis_client + self.cache = mock.Mock(client=self.cache_client) + self.helper = ResourceSizeCache(self.node, self.cache) + + def test_redis_client(self): + self.assertEqual(self.helper.redis_client, self.redis_client) + self.cache_client.get_client.assert_called_once_with(write=True) + + def test_redis_client__not_redis(self): + self.cache.client = mock.Mock() + self.assertIsNone(self.helper.redis_client) + + def test_hash_key(self): + self.assertEqual("resource_size:abcd", self.helper.hash_key) + + def test_size_key(self): + self.assertEqual("abcdefghijklmnopqrstuvwxyz:value", self.helper.size_key) + + def test_modified_key(self): + self.assertEqual("abcdefghijklmnopqrstuvwxyz:modified", self.helper.modified_key) + + def test_cache_get(self): + self.redis_client.hget.return_value = 123 + self.assertEqual(123, self.helper.cache_get("test_key")) + self.redis_client.hget.assert_called_once_with(self.helper.hash_key, "test_key") + + def test_cache_get__not_redis(self): + self.cache.client = mock.Mock() + self.cache.get.return_value = 123 + self.assertEqual(123, self.helper.cache_get("test_key")) + self.cache.get.assert_called_once_with("{}:{}".format(self.helper.hash_key, "test_key")) + + def test_cache_set(self): + self.helper.cache_set("test_key", 123) + self.redis_client.hset.assert_called_once_with(self.helper.hash_key, "test_key", 123) + + def test_cache_set__delete(self): + self.helper.cache_set("test_key", None) + self.redis_client.hdel.assert_called_once_with(self.helper.hash_key, "test_key") + + def test_cache_set__not_redis(self): + self.cache.client = mock.Mock() + self.helper.cache_set("test_key", 123) + self.cache.set.assert_called_once_with("{}:{}".format(self.helper.hash_key, "test_key"), 123) + + def test_get_size(self): + with mock.patch.object(self.helper, 'cache_get') as cache_get: + cache_get.return_value = 123 + self.assertEqual(123, self.helper.get_size()) + cache_get.assert_called_once_with(self.helper.size_key) + + def test_set_size(self): + with mock.patch.object(self.helper, 'cache_set') as cache_set: + self.helper.set_size(123) + cache_set.assert_called_once_with(self.helper.size_key, 123) + + def test_get_modified(self): + with mock.patch.object(self.helper, 'cache_get') as cache_get: + cache_get.return_value = '2021-01-01 00:00:00' + modified = self.helper.get_modified() + self.assertIsNotNone(modified) + self.assertEqual('2021-01-01T00:00:00', modified.isoformat()) + cache_get.assert_called_once_with(self.helper.modified_key) + + def test_set_modified(self): + with mock.patch.object(self.helper, 'cache_set') as cache_set: + self.helper.set_modified('2021-01-01 00:00:00') + cache_set.assert_called_once_with(self.helper.modified_key, '2021-01-01 00:00:00') diff --git a/contentcuration/contentcuration/tests/utils/test_nodes.py b/contentcuration/contentcuration/tests/utils/test_nodes.py new file mode 100644 index 0000000000..eef2d7f6af --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/test_nodes.py @@ -0,0 +1,132 @@ +import datetime +from time import sleep + +import mock +from dateutil.parser import isoparse +from django.db.models import F +from django.db.models import Max +from django.test import SimpleTestCase + +from ..base import BaseTestCase +from contentcuration.models import ContentNode +from contentcuration.utils.nodes import calculate_resource_size +from contentcuration.utils.nodes import ResourceSizeHelper +from contentcuration.utils.nodes import SlowCalculationError +from contentcuration.utils.nodes import STALE_MAX_CALCULATION_SIZE + + +class ResourceSizeHelperTestCase(BaseTestCase): + def setUp(self): + super(ResourceSizeHelperTestCase, self).setUp() + self.root = self.channel.main_tree + self.helper = ResourceSizeHelper(self.root) + + def test_get_size(self): + self.assertEqual(10, self.helper.get_size()) + + def test_get_size__root_node_simplification(self): + self.assertEqual(10, self.helper.get_size()) + with mock.patch.object(self.root, 'is_root_node') as is_root_node: + is_root_node.return_value = False + self.assertEqual(10, self.helper.get_size()) + + def test_modified_since(self): + max_modified = self.helper.queryset.aggregate(max_modified=Max(F('modified')))['max_modified'] + before_max = max_modified - datetime.timedelta(seconds=1) + after_max = max_modified + datetime.timedelta(seconds=1) + self.assertTrue(self.helper.modified_since(before_max.isoformat())) + self.assertFalse(self.helper.modified_since(after_max.isoformat())) + + +@mock.patch("contentcuration.utils.nodes.ResourceSizeHelper") +@mock.patch("contentcuration.utils.nodes.ResourceSizeCache") +class CalculateResourceSizeTestCase(SimpleTestCase): + def setUp(self): + super(CalculateResourceSizeTestCase, self).setUp() + self.node = mock.Mock(spec_set=ContentNode()) + + def assertCalculation(self, cache, helper, force=False): + helper().get_size.return_value = 456 + now_val = isoparse('2021-01-01T00:00:00') + with mock.patch("contentcuration.utils.nodes.timezone.now") as now: + now.return_value = now_val + size, stale = calculate_resource_size(self.node, force=force) + self.assertEqual(456, size) + self.assertFalse(stale) + cache().set_size.assert_called_once_with(456) + cache().set_modified.assert_called_once_with(now_val) + + def test_cached(self, cache, helper): + cache().get_size.return_value = 123 + cache().get_modified.return_value = '2021-01-01 00:00:00' + helper().modified_since.return_value = False + size, stale = calculate_resource_size(self.node) + self.assertEqual(123, size) + self.assertFalse(stale) + + def test_stale__too_big__no_force(self, cache, helper): + self.node.get_descendant_count.return_value = STALE_MAX_CALCULATION_SIZE + 1 + cache().get_size.return_value = 123 + cache().get_modified.return_value = '2021-01-01 00:00:00' + helper().modified_since.return_value = True + size, stale = calculate_resource_size(self.node) + self.assertEqual(123, size) + self.assertTrue(stale) + + def test_stale__too_big__forced(self, cache, helper): + self.node.get_descendant_count.return_value = STALE_MAX_CALCULATION_SIZE + 1 + helper().modified_since.return_value = True + self.assertCalculation(cache, helper, force=True) + + def test_missing__too_big__no_force(self, cache, helper): + self.node.get_descendant_count.return_value = STALE_MAX_CALCULATION_SIZE + 1 + cache().get_size.return_value = None + cache().get_modified.return_value = None + size, stale = calculate_resource_size(self.node) + self.assertIsNone(size) + self.assertTrue(stale) + + def test_missing__too_big__forced(self, cache, helper): + self.node.get_descendant_count.return_value = STALE_MAX_CALCULATION_SIZE + 1 + self.assertCalculation(cache, helper, force=True) + + def test_missing__small(self, cache, helper): + self.node.get_descendant_count.return_value = 1 + cache().get_size.return_value = None + cache().get_modified.return_value = None + self.assertCalculation(cache, helper) + + def test_unforced__took_too_long(self, cache, helper): + self.node.get_descendant_count.return_value = 1 + cache().get_size.return_value = None + cache().get_modified.return_value = None + + def db_get_size(): + sleep(1.2) + return 456 + + helper().get_size.side_effect = db_get_size + + with mock.patch("contentcuration.utils.nodes.report_exception") as report_exception, \ + mock.patch("contentcuration.utils.nodes.SLOW_UNFORCED_CALC_THRESHOLD", 1): + self.assertCalculation(cache, helper) + self.assertIsInstance(report_exception.mock_calls[0][1][0], SlowCalculationError) + + +class CalculateResourceSizeIntegrationTestCase(BaseTestCase): + """ + Integration test case + """ + def setUp(self): + super(CalculateResourceSizeIntegrationTestCase, self).setUp() + self.root = self.channel.main_tree + + def test_small(self): + size, stale = calculate_resource_size(self.root) + self.assertEqual(10, size) + self.assertFalse(stale) + + # again, should be cached + size, stale = calculate_resource_size(self.root) + self.assertEqual(10, size) + self.assertFalse(stale) diff --git a/contentcuration/contentcuration/tests/viewsets/test_contentnode.py b/contentcuration/contentcuration/tests/viewsets/test_contentnode.py index 5f07765eba..8fb4cbca84 100644 --- a/contentcuration/contentcuration/tests/viewsets/test_contentnode.py +++ b/contentcuration/contentcuration/tests/viewsets/test_contentnode.py @@ -1552,7 +1552,7 @@ def test_resource_size(self): total_size = sum(files_map.values()) - self.assertEqual(response.data, total_size) + self.assertEqual(response.data.get('size', 0), total_size) class AnnotationsTest(StudioAPITestCase): diff --git a/contentcuration/contentcuration/urls.py b/contentcuration/contentcuration/urls.py index 1baa1cbc38..518ab01011 100644 --- a/contentcuration/contentcuration/urls.py +++ b/contentcuration/contentcuration/urls.py @@ -179,6 +179,5 @@ def get_redirect_url(self, *args, **kwargs): url(r'^api/deferred_user_data/$', registration_views.deferred_user_data, name="deferred_user_data"), url(r'^settings/$', settings_views.settings, name='settings'), url(r'^administration/', admin_views.administration, name='administration'), - url(r'^admin/', include(admin.site.urls)), url(r'^manifest.webmanifest$', pwa.ManifestView.as_view(), name="manifest"), ) diff --git a/contentcuration/contentcuration/utils/cache.py b/contentcuration/contentcuration/utils/cache.py index ec31dc081c..f83722b1e8 100644 --- a/contentcuration/contentcuration/utils/cache.py +++ b/contentcuration/contentcuration/utils/cache.py @@ -2,8 +2,12 @@ import math import random import time +from datetime import datetime -from django.core.cache import cache +from dateutil.parser import isoparse +from django.core.cache import cache as django_cache +from django_redis.client import DefaultClient +from django_redis.client.default import _main_exceptions DEFERRED_FLAG = "__DEFERRED" @@ -50,7 +54,7 @@ def timer(*args, **kwargs): @functools.wraps(func) def wrapper(*args, **kwargs): key = args[0] - cached = cache.get(key) + cached = django_cache.get(key) if cached is not None: metadata = cached["METADATA"] if cached["CALCULATING"]: @@ -68,7 +72,7 @@ def wrapper(*args, **kwargs): "DELTA": delta, "EXPIRE": time.time() + expire, } - cache.set(key, cached_info, timeout=None) + django_cache.set(key, cached_info, timeout=None) return wrapper @@ -87,10 +91,10 @@ def delete_cache_keys(key_pattern): :param for_view: :return: Number of keys deleted """ - if hasattr(cache, "delete_pattern"): - return cache.delete_pattern(key_pattern) - elif cache.has_key(key_pattern): - cache.delete(key_pattern) + if hasattr(django_cache, "delete_pattern"): + return django_cache.delete_pattern(key_pattern) + elif django_cache.has_key(key_pattern): # noqa: W601 + django_cache.delete(key_pattern) return 1 return 0 @@ -101,3 +105,117 @@ def delete_public_channel_cache_keys(): """ delete_cache_keys("*get_public_channel_list*") delete_cache_keys("*get_user_public_channels*") + + +def redis_retry(func): + """ + This decorator wraps a function using the lower level Redis client to mimic functionality + that occurs in the DefaultClient. It attempts a retry for certain exceptions, which this + catches and retries once + + @see django_redis.client.default.DefaultClient + """ + def redis_retry_func(*args, **kwargs): + try: + return func(*args, **kwargs) + except _main_exceptions: + # try one more time + return func(*args, **kwargs) + return redis_retry_func + + +FILE_MODIFIED = -1 + + +class ResourceSizeCache: + """ + Helper class for managing Resource size cache. + + If the django_cache is Redis, then we use the lower level Redis client to use + its hash commands, HSET and HGET, to ensure we can store lots of data in performant way + """ + def __init__(self, node, cache=None): + self.node = node + self.cache = cache or django_cache + + @classmethod + def reset_modified_for_file(cls, file, modified=FILE_MODIFIED): + """ + :type file: contentcuration.models.File + :type modified: datetime|None|FILE_MODIFIED + """ + if not file.contentnode_id: + return + cache = ResourceSizeCache(file.contentnode.get_root()) + cache.reset_modified(file.modified if modified == FILE_MODIFIED else modified) + + @property + def redis_client(self): + """ + Gets the lower level Redis client, if the cache is a Redis cache + + :rtype: redis.client.StrictRedis + """ + redis_client = None + cache_client = getattr(self.cache, 'client', None) + if isinstance(cache_client, DefaultClient): + redis_client = cache_client.get_client(write=True) + return redis_client + + @property + def hash_key(self): + # only first four characters + return "resource_size:{}".format(self.node.pk[:4]) + + @property + def size_key(self): + return "{}:value".format(self.node.pk) + + @property + def modified_key(self): + return "{}:modified".format(self.node.pk) + + @redis_retry + def cache_get(self, key): + if self.redis_client is not None: + # notice use of special `HGET` + # See: https://redis.io/commands/hget + return self.redis_client.hget(self.hash_key, key) + return self.cache.get("{}:{}".format(self.hash_key, key)) + + @redis_retry + def cache_set(self, key, val): + if self.redis_client is not None: + # notice use of special `HSET` and `HDEL` + # See: https://redis.io/commands/hset + # See: https://redis.io/commands/hdel + if val is None: + return self.redis_client.hdel(self.hash_key, key) + return self.redis_client.hset(self.hash_key, key, val) + return self.cache.set("{}:{}".format(self.hash_key, key), val) + + def get_size(self): + size = self.cache_get(self.size_key) + return int(size) if size else size + + def get_modified(self): + modified = self.cache_get(self.modified_key) + return isoparse(modified) if modified is not None else modified + + def set_size(self, size): + return self.cache_set(self.size_key, size) + + def set_modified(self, modified): + return self.cache_set(self.modified_key, modified.isoformat() if isinstance(modified, datetime) else modified) + + def reset_modified(self, modified): + """ + Sets modified if it's less than the existing, otherwise sets None if not a datetime + :param modified: A datetime or None + """ + if not isinstance(modified, datetime): + return self.set_modified(None) + + current_modified = self.get_modified() + if current_modified and current_modified > modified: + return self.set_modified(modified) diff --git a/contentcuration/contentcuration/utils/celery/__init__.py b/contentcuration/contentcuration/utils/celery/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contentcuration/contentcuration/utils/celery/app.py b/contentcuration/contentcuration/utils/celery/app.py new file mode 100644 index 0000000000..5653ee4f7a --- /dev/null +++ b/contentcuration/contentcuration/utils/celery/app.py @@ -0,0 +1,20 @@ +from celery import Celery + +from contentcuration.utils.celery.tasks import CeleryTask + + +class CeleryApp(Celery): + task_cls = CeleryTask + result_cls = 'contentcuration.utils.celery.tasks:CeleryAsyncResult' + _result_cls = None + + def on_init(self): + """ + Use init call back to set our own result class. Celery doesn't yet have an easier way + to customize this class specifically + """ + self._result_cls = self.subclass_with_self(self.result_cls) + + @property + def AsyncResult(self): + return self._result_cls diff --git a/contentcuration/contentcuration/utils/celery_signals.py b/contentcuration/contentcuration/utils/celery/signals.py similarity index 69% rename from contentcuration/contentcuration/utils/celery_signals.py rename to contentcuration/contentcuration/utils/celery/signals.py index 3934ea8dcb..84752a3418 100644 --- a/contentcuration/contentcuration/utils/celery_signals.py +++ b/contentcuration/contentcuration/utils/celery/signals.py @@ -1,7 +1,7 @@ import os -import traceback -from builtins import str +import traceback as _traceback +from celery import states from celery.signals import after_task_publish from celery.signals import task_failure from celery.signals import task_postrun @@ -52,7 +52,7 @@ def postrun(sender, **kwargs): @after_task_publish.connect -def before_start(sender, headers, body, **kwargs): +def before_start(sender, headers, **kwargs): """ Create a Task object before the task actually started, set the task object status to be PENDING, with the signal @@ -67,42 +67,31 @@ def before_start(sender, headers, body, **kwargs): try: task = Task.objects.get(task_id=task_id) - task.status = "PENDING" + task.status = states.PENDING task.save() logger.info("Task object {} updated with status PENDING.".format(task_id)) except ObjectDoesNotExist: - # If the object doesn't exist, that likely means the task was created outside of create_async_task + # If the object doesn't exist, that likely means the task was created outside of + # create_async_task pass @task_failure.connect -def on_failure(sender, **kwargs): +def on_failure(sender, task_id, traceback, **kwargs): # Ensure that the connection still works before we attempt # to access the database here. See function comment for more details. check_connection() try: - task = Task.objects.get(task_id=sender.request.id) - task.status = "FAILURE" - task_args = [] - task_kwargs = [] - - # arg values may be objects, so we need to ensure they are string representation for JSON serialization. - for arg in kwargs['args']: - task_args.append(str(arg)) - for kwarg in kwargs['kwargs']: - task_kwargs.append(str(kwarg)) - - exception_data = { - 'task_args': task_args, - 'task_kwargs': task_kwargs, - 'traceback': traceback.format_tb(kwargs['traceback']) - } + task = Task.objects.get(task_id=task_id) + task.status = states.FAILURE if 'error' not in task.metadata: task.metadata['error'] = {} - task.metadata['error'].update(exception_data) + task.metadata['error'].update(traceback=_traceback.format_tb(traceback)) task.save() except ObjectDoesNotExist: - pass # If the object doesn't exist, that likely means the task was created outside of create_async_task + # If the object doesn't exist, that likely means the task was created outside of + # create_async_task + pass @task_success.connect @@ -114,13 +103,11 @@ def on_success(sender, result, **kwargs): logger.info("on_success called, process is {}".format(os.getpid())) task_id = sender.request.id task = Task.objects.get(task_id=task_id) - task.status = "SUCCESS" + task.status = states.SUCCESS task.metadata['result'] = result - # We're finished, so go ahead and record 100% progress so that getters expecting it get a value - # even though there is no longer a Celery task to query. - if task.is_progress_tracking: - task.metadata['progress'] = 100 task.save() logger.info("Task with ID {} succeeded".format(task_id)) except ObjectDoesNotExist: - pass # If the object doesn't exist, that likely means the task was created outside of create_async_task + # If the object doesn't exist, that likely means the task was created outside of + # create_async_task + pass diff --git a/contentcuration/contentcuration/utils/celery/tasks.py b/contentcuration/contentcuration/utils/celery/tasks.py new file mode 100644 index 0000000000..846d279b81 --- /dev/null +++ b/contentcuration/contentcuration/utils/celery/tasks.py @@ -0,0 +1,141 @@ +import math +from functools import partial + +from celery import states +from celery.app.task import Task +from celery.result import AsyncResult + +from contentcuration.utils.sentry import report_exception + + +class ProgressTracker: + """ + Helper to track task progress + """ + key = 'progress' + + def __init__(self, task_id, update_state): + """ + :param task_id: The ID of the calling task + :param update_state: Callback to update the task's state + :type update_state: Callable + """ + self.task_id = task_id + self.update_state = update_state + self.total = 100.0 + self.progress = 0.0 + self.last_reported_progress = 0.0 + + def set_total(self, total): + """ + :param total: The amount for which progress is deemed 100%, defaults to 100 + """ + self.total = total + + def increment(self, increment=1.0): + """ + Increments and triggers tracking of the progress to the task meta + """ + self.track(self.progress + increment) + + def track(self, progress): + """ + :param progress: The current progress amount + """ + self.progress = progress + + # only update the task progress in >=1% increments + if math.floor(self.last_reported_progress) < math.floor(self.task_progress): + self.last_reported_progress = self.task_progress + metadata = {self.key: self.task_progress} + self.update_state(meta=metadata) + + @property + def task_progress(self): + return int(min((100.0 * self.progress / self.total), 100.0)) + + +class CeleryTask(Task): + """ + Custom task class so we can add default `after_return` handling which marks the task as + succeeded or failed. + + This is set as the Task class on our Celery app, so to track progress on a task, mark it + when decorating the task: + ``` + @app.task(bind=True, track_progress=True) + def my_task(self): + self.progress.increment() # progress tracker + ``` + """ + # by default, celery does not track task starting itself + track_started = True + send_events = True + + # custom task option + track_progress = False + + _progress_tracker = None + + def after_return(self, status, retval, task_id, args, kwargs, einfo): + """ + Ensures status is updated after task completion, otherwise our signals may not fire + """ + # we assume that if the state is past starting, that this has been handled + if states.state(status) > states.state(states.STARTED): + return + + # mark the completion + state = states.SUCCESS + if einfo or isinstance(retval, Exception): + state = states.FAILURE + self.update_state(state=state, meta=retval) + + def report_exception(self, e): + """ + Marks the task as failed and reports the exception to Sentry + :type e: Exception + """ + # @see AsyncResult.traceback + self.update_state(state=states.FAILURE, traceback=e.__traceback__) + report_exception(e) + + def update_state(self, task_id=None, state=None, meta=None, traceback=None): + """ + The super.update_state doesn't expose or pass along the traceback kwarg to the backend... :( + """ + if task_id is None: + task_id = self.request.id + self.backend.store_result(task_id, meta, state, traceback=traceback) + + @property + def progress(self): + """ + The task instance is instantiated once, so we make sure that we check request.id on the + tracker to check if it's for the current task + """ + if not self.track_progress: + return None + + if self._progress_tracker is None or self._progress_tracker.task_id != self.request.id: + self._progress_tracker = ProgressTracker( + self.request.id, + partial(self.update_state, task_id=self.request.id, state=states.STARTED), + ) + + return self._progress_tracker + + +class CeleryAsyncResult(AsyncResult): + @property + def progress(self): + """ + A somewhat confusing aspect is that even though we put `progress` into the metadata, + the Redis backend puts that within the `result` when it stores the metadata + """ + if self.ready(): + return 100 + try: + return self.result["progress"] if self.result else 0 + except KeyError: + return None diff --git a/contentcuration/contentcuration/utils/nodes.py b/contentcuration/contentcuration/utils/nodes.py index 4b7b9aaf12..ebc762523c 100644 --- a/contentcuration/contentcuration/utils/nodes.py +++ b/contentcuration/contentcuration/utils/nodes.py @@ -3,16 +3,23 @@ import json import logging import os +import time from builtins import next from builtins import str +from datetime import datetime from io import BytesIO from django.conf import settings +from django.contrib.postgres.aggregates.general import BoolOr from django.core.exceptions import ObjectDoesNotExist from django.core.exceptions import ValidationError from django.core.files.storage import default_storage from django.db.models import Count +from django.db.models import F from django.db.models import Sum +from django.db.models import Value +from django.db.models.expressions import CombinedExpression +from django.utils import timezone from le_utils.constants import content_kinds from le_utils.constants import format_presets @@ -23,7 +30,9 @@ from contentcuration.models import generate_object_storage_name from contentcuration.models import Language from contentcuration.models import User +from contentcuration.utils.cache import ResourceSizeCache from contentcuration.utils.files import get_thumbnail_encoding +from contentcuration.utils.sentry import report_exception def map_files_to_node(user, node, data): @@ -275,3 +284,140 @@ def generate_diff(updated_id, original_id): default_storage.save(jsonpath, BytesIO(json.dumps(jsondata).encode('utf-8'))) return jsondata + + +class ResourceSizeHelper: + """ + Helper class for calculating resource size + """ + def __init__(self, node): + """ + :param node: The contentnode with which to determine resource size + :type node: ContentNode + """ + self.node = node + + @property + def queryset(self): + """ + :rtype: QuerySet + """ + qs = self.node.get_descendants(include_self=True) + + if self.node.is_root_node(): + # since root node, remove unneeded filtering + qs = ContentNode.objects.filter(tree_id=self.node.tree_id) + # else if it's a leaf node, simplification handled by `get_descendants` + + return File.objects.filter(contentnode__in=qs.filter(complete=True)) + + def get_size(self): + """ + Calculates the size of the resource and it's descendants + + SQL: + SELECT SUM(file_size) + FROM ( + SELECT DISTINCT + checksum, + file_size + FROM contentcuration_file + WHERE contentnode_id IN ( SELECT id FROM contentcuration_contentnode WHERE ... ) + ) subquery + ; + + :return: An integer representing the resource size + """ + sizes = self.queryset.values("checksum").distinct().aggregate(resource_size=Sum("file_size")) + return sizes['resource_size'] + + def modified_since(self, compare_datetime): + """ + Determines if resources have been modified since ${compare_datetime} + + SQL: + SELECT BOOL_OR(modified > ${compare_datetime}) + FROM ( + SELECT + modified_at + FROM contentcuration_file + WHERE contentnode_id IN ( SELECT id FROM contentcuration_contentnode WHERE ... ) + ) subquery + ; + + : + :param compare_datetime: The datetime with which to compare. + :return: A boolean indicating whether or not resources have been modified since the datetime + """ + compare_datetime = compare_datetime.isoformat() if isinstance(compare_datetime, datetime) else compare_datetime + result = self.queryset.aggregate( + modified_since=BoolOr(CombinedExpression(F('modified'), '>', Value(compare_datetime))) + ) + return result['modified_since'] + + +STALE_MAX_CALCULATION_SIZE = 5000 +SLOW_UNFORCED_CALC_THRESHOLD = 5 + + +class SlowCalculationError(Exception): + """ Error used for tracking slow calculation times in Sentry """ + def __init__(self, node_id, time): + self.node_id = node_id + self.time = time + + message = ( + "Resource size recalculation for {} took {} seconds to complete, " + "exceeding {} second threshold." + ) + self.message = message.format( + self.node_id, self.time, SLOW_UNFORCED_CALC_THRESHOLD + ) + + super(SlowCalculationError, self).__init__(self.message) + + +def calculate_resource_size(node, force=False): + """ + Function that calculates the total file size of all files of the specified node and it's + descendants, if they're marked complete + + :param node: The ContentNode for which to calculate resource size. + :param force: A boolean to force calculation if node is too big and would otherwise do so async + :return: A tuple of (size, stale) + :rtype: (int, bool) + """ + cache = ResourceSizeCache(node) + db = ResourceSizeHelper(node) + + size = None if force else cache.get_size() + modified = None if force else cache.get_modified() + + # since we added file.modified as nullable, if the result is None/Null, then we know that it + # hasn't been modified since our last cached value, so we only need to check is False + if size is not None and modified is not None and db.modified_since(modified) is False: + # use cache if not modified since cache modified timestamp + return size, False + + # if the node is too big to calculate its size right away, we return "stale" + if not force and node.get_descendant_count() > STALE_MAX_CALCULATION_SIZE: + return size, True + + start = time.time() + + # do recalculation, marking modified time before starting + now = timezone.now() + size = db.get_size() + cache.set_size(size) + cache.set_modified(now) + elapsed = time.time() - start + + if not force and elapsed > SLOW_UNFORCED_CALC_THRESHOLD: + # warn us in Sentry if an unforced recalculation took too long + try: + # we need to raise it to get Python to fill out the stack trace. + raise SlowCalculationError(node.pk, elapsed) + except SlowCalculationError as e: + report_exception(e) + + return size, False diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py index 9dd2a8d1e3..3b6ecc0793 100644 --- a/contentcuration/contentcuration/utils/publish.py +++ b/contentcuration/contentcuration/utils/publish.py @@ -4,7 +4,6 @@ import itertools import json import logging as logmodule -import math import os import re import tempfile @@ -71,7 +70,10 @@ def send_emails(channel, user_id, version_notes=''): user.email_user(subject, message, settings.DEFAULT_FROM_EMAIL, ) -def create_content_database(channel, force, user_id, force_exercises, task_object=None): +def create_content_database(channel, force, user_id, force_exercises, progress_tracker=None): + """ + :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None + """ # increment the channel version if not force: raise_if_nodes_are_all_unchanged(channel) @@ -82,14 +84,21 @@ def create_content_database(channel, force, user_id, force_exercises, task_objec channel.main_tree.save() prepare_export_database(tempdb) - if task_object: - task_object.update_state(state='STARTED', meta={'progress': 10.0}) + if progress_tracker: + progress_tracker.track(10) map_channel_to_kolibri_channel(channel) - map_content_nodes(channel.main_tree, channel.language, channel.id, channel.name, user_id=user_id, - force_exercises=force_exercises, task_object=task_object, starting_percent=10.0) + map_content_nodes( + channel.main_tree, + channel.language, + channel.id, + channel.name, + user_id=user_id, + force_exercises=force_exercises, + progress_tracker=progress_tracker, + ) # It should be at this percent already, but just in case. - if task_object: - task_object.update_state(state='STARTED', meta={'progress': 90.0}) + if progress_tracker: + progress_tracker.track(90) map_prerequisites(channel.main_tree) save_export_database(channel.pk) @@ -113,9 +122,18 @@ def assign_license_to_contentcuration_nodes(channel, license): channel.main_tree.get_family().update(license_id=license.pk) -def map_content_nodes(root_node, default_language, channel_id, channel_name, user_id=None, - force_exercises=False, task_object=None, starting_percent=10.0): - +def map_content_nodes( + root_node, + default_language, + channel_id, + channel_name, + user_id=None, + force_exercises=False, + progress_tracker=None, +): + """ + :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None + """ # make sure we process nodes higher up in the tree first, or else when we # make mappings the parent nodes might not be there @@ -126,8 +144,6 @@ def map_content_nodes(root_node, default_language, channel_id, channel_name, use total_nodes = root_node.get_descendant_count() + 1 # make sure we include root_node percent_per_node = old_div(task_percent_total, total_nodes) - current_node_percent = 0.0 - def queue_get_return_none_when_empty(): try: return node_queue.popleft() @@ -156,13 +172,8 @@ def queue_get_return_none_when_empty(): create_associated_file_objects(kolibrinode, node) map_tags_to_node(kolibrinode, node) - # if we have a large amount of nodes, like, say, 44000, we don't want to update the percent - # of the task every node due to the latency involved, so only update in 1 percent increments. - new_node_percent = current_node_percent + percent_per_node - if task_object and new_node_percent > math.ceil(current_node_percent): - progress_percent = min(task_percent_total + starting_percent, starting_percent + new_node_percent) - task_object.update_state(state='STARTED', meta={'progress': progress_percent}) - current_node_percent = new_node_percent + if progress_tracker: + progress_tracker.increment(increment=percent_per_node) def create_slideshow_manifest(ccnode, kolibrinode, user_id=None): @@ -708,14 +719,25 @@ def wait_for_async_tasks(channel, attempts=360): logging.warning('Ran out of attempts: Tasks still detected for {} during publish'.format(channel.pk)) -def publish_channel(user_id, channel_id, version_notes='', force=False, force_exercises=False, send_email=False, task_object=None): +def publish_channel( + user_id, + channel_id, + version_notes='', + force=False, + force_exercises=False, + send_email=False, + progress_tracker=None, +): + """ + :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None + """ channel = ccmodels.Channel.objects.get(pk=channel_id) kolibri_temp_db = None try: set_channel_icon_encoding(channel) wait_for_async_tasks(channel) - kolibri_temp_db = create_content_database(channel, force, user_id, force_exercises, task_object) + kolibri_temp_db = create_content_database(channel, force, user_id, force_exercises, progress_tracker=progress_tracker) increment_channel_version(channel) mark_all_nodes_as_published(channel) add_tokens_to_channel(channel) @@ -735,8 +757,8 @@ def publish_channel(user_id, channel_id, version_notes='', force=False, force_ex record_publish_stats(channel) - if task_object: - task_object.update_state(state='STARTED', meta={'progress': 100.0}) + if progress_tracker: + progress_tracker.track(100) # No matter what, make sure publishing is set to False once the run is done finally: diff --git a/contentcuration/contentcuration/utils/sync.py b/contentcuration/contentcuration/utils/sync.py index 059a0fa471..cba8220da6 100644 --- a/contentcuration/contentcuration/utils/sync.py +++ b/contentcuration/contentcuration/utils/sync.py @@ -7,7 +7,6 @@ from django_bulk_update.helper import bulk_update from le_utils.constants import content_kinds from le_utils.constants import format_presets -from past.utils import old_div from contentcuration.models import AssessmentItem from contentcuration.models import ContentTag @@ -20,8 +19,11 @@ def sync_channel( sync_tags=False, sync_files=False, sync_assessment_items=False, - task_object=None, + progress_tracker=None, ): + """ + :type progress_tracker: contentcuration.utils.celery.ProgressTracker|None + """ nodes_to_sync = channel.main_tree.get_descendants().filter( Q(original_node__isnull=False) | Q(original_channel_id__isnull=False, original_source_node_id__isnull=False) @@ -29,9 +31,9 @@ def sync_channel( sync_node_count = nodes_to_sync.count() if not sync_node_count: raise ValueError("Tried to sync a channel that has no imported content") - total_percent = 100.0 - percent_per_node = old_div(total_percent, sync_node_count) - percent_done = 0.0 + if progress_tracker: + progress_tracker.set_total(sync_node_count) + for node in nodes_to_sync: node = sync_node( node, @@ -40,13 +42,10 @@ def sync_channel( sync_files=sync_files, sync_assessment_items=sync_assessment_items, ) - if task_object: - percent_done = min(percent_done + percent_per_node, total_percent) - task_object.update_state(state="STARTED", meta={"progress": percent_done}) + if progress_tracker: + progress_tracker.increment() if node.changed: node.save() - if task_object: - task_object.update_state(state="STARTED", meta={"progress": 100.0}) def sync_node( diff --git a/contentcuration/contentcuration/utils/tasks.py b/contentcuration/contentcuration/utils/tasks.py deleted file mode 100644 index ea3b917075..0000000000 --- a/contentcuration/contentcuration/utils/tasks.py +++ /dev/null @@ -1,15 +0,0 @@ -import celery - - -def increment_progress(increment=1): - if celery.current_task: - total = celery.current_task.total - current_progress = celery.current_task.progress - new_progress = min(current_progress + (100 * increment / total), 100) - celery.current_task.update_state(meta={"progress": new_progress}) - celery.current_task.progress = new_progress - - -def set_total(total): - if celery.current_task: - celery.current_task.total = total diff --git a/contentcuration/contentcuration/viewsets/channel.py b/contentcuration/contentcuration/viewsets/channel.py index d95213f63a..10ab46c7d9 100644 --- a/contentcuration/contentcuration/viewsets/channel.py +++ b/contentcuration/contentcuration/viewsets/channel.py @@ -42,6 +42,7 @@ from contentcuration.viewsets.base import RequiredFilterSet from contentcuration.viewsets.base import ValuesViewset from contentcuration.viewsets.common import CatalogPaginator +from contentcuration.viewsets.common import ChangeEventMixin from contentcuration.viewsets.common import ContentDefaultsSerializer from contentcuration.viewsets.common import JSONFieldDictSerializer from contentcuration.viewsets.common import SQCount @@ -378,7 +379,7 @@ def format_demo_server_url(item): } -class ChannelViewSet(ValuesViewset): +class ChannelViewSet(ChangeEventMixin, ValuesViewset): queryset = Channel.objects.all() permission_classes = [IsAuthenticated] serializer_class = ChannelSerializer @@ -432,6 +433,7 @@ def publish(self, request, pk=None): channel = self.get_edit_object() if ( + channel.main_tree.publishing or not channel.main_tree.get_descendants(include_self=True) .filter(changed=True) .exists() @@ -450,8 +452,10 @@ def publish(self, request, pk=None): "language": get_language(), } - create_async_task("export-channel", request.user, **task_args) - return Response("") + _, task_info = create_async_task("export-channel", request.user, **task_args) + return Response({ + 'changes': [self.create_task_event(task_info)] + }) @detail_route(methods=["post"]) def sync(self, request, pk=None): @@ -485,8 +489,10 @@ def sync(self, request, pk=None): "sync_assessment_items": data.get("assessment_items"), } - task, task_info = create_async_task("sync-channel", request.user, **task_args) - return Response("") + _, task_info = create_async_task("sync-channel", request.user, **task_args) + return Response({ + 'changes': [self.create_task_event(task_info)] + }) @method_decorator( diff --git a/contentcuration/contentcuration/viewsets/common.py b/contentcuration/contentcuration/viewsets/common.py index 13775bd1ce..3e76d6f435 100644 --- a/contentcuration/contentcuration/viewsets/common.py +++ b/contentcuration/contentcuration/viewsets/common.py @@ -25,6 +25,8 @@ from contentcuration.models import DEFAULT_CONTENT_DEFAULTS from contentcuration.models import License +from contentcuration.viewsets.sync.constants import TASK +from contentcuration.viewsets.sync.utils import generate_create_event class MissingRequiredParamsException(APIException): @@ -251,3 +253,14 @@ def get_queryset(self): "UserFilteredPrimaryKeyRelatedField used on queryset for model that does not have filter_edit_queryset method" ) return queryset + + +class ChangeEventMixin(object): + def create_task_event(self, task): + """ + :type task: contentcuration.models.Task + """ + from contentcuration.viewsets.task import TaskViewSet + task_viewset = TaskViewSet(request=self.request) + task_viewset.initial(self.request) + return generate_create_event(task.task_id, TASK, task_viewset.serialize_object(pk=task.pk)) diff --git a/contentcuration/contentcuration/viewsets/contentnode.py b/contentcuration/contentcuration/viewsets/contentnode.py index a2bf5fc243..416c583d10 100644 --- a/contentcuration/contentcuration/viewsets/contentnode.py +++ b/contentcuration/contentcuration/viewsets/contentnode.py @@ -3,22 +3,23 @@ from django.conf import settings from django.db import IntegrityError +from django.db import models from django.db.models import Exists from django.db.models import F from django.db.models import OuterRef from django.db.models import Q from django.db.models import Subquery -from django.db.models import Sum from django.db.models.functions import Coalesce from django.http import Http404 from django.utils.timezone import now +from django_cte import CTEQuerySet from django_filters.rest_framework import CharFilter from django_filters.rest_framework import DjangoFilterBackend from django_filters.rest_framework import UUIDFilter from le_utils.constants import content_kinds from le_utils.constants import exercises from le_utils.constants import roles -from rest_framework.decorators import detail_route +from rest_framework.decorators import action from rest_framework.permissions import IsAuthenticated from rest_framework.response import Response from rest_framework.serializers import BooleanField @@ -28,6 +29,10 @@ from rest_framework.serializers import ValidationError from rest_framework.viewsets import ViewSet +from contentcuration.db.models.expressions import IsNull +from contentcuration.db.models.query import RIGHT_JOIN +from contentcuration.db.models.query import With +from contentcuration.db.models.query import WithValues from contentcuration.models import AssessmentItem from contentcuration.models import Channel from contentcuration.models import ContentNode @@ -35,12 +40,16 @@ from contentcuration.models import File from contentcuration.models import generate_storage_url from contentcuration.models import PrerequisiteContentRelationship +from contentcuration.models import UUIDField from contentcuration.tasks import create_async_task +from contentcuration.tasks import get_or_create_async_task +from contentcuration.utils.nodes import calculate_resource_size from contentcuration.viewsets.base import BulkListSerializer from contentcuration.viewsets.base import BulkModelSerializer from contentcuration.viewsets.base import BulkUpdateMixin from contentcuration.viewsets.base import RequiredFilterSet from contentcuration.viewsets.base import ValuesViewset +from contentcuration.viewsets.common import ChangeEventMixin from contentcuration.viewsets.common import DotPathValueMixin from contentcuration.viewsets.common import JSONFieldDictSerializer from contentcuration.viewsets.common import NotNullMapArrayAgg @@ -115,60 +124,92 @@ def filter__node_id_channel_id(self, queryset, name, value): return queryset.filter(query) -def bulk_create_tag_relations(tags_relations_to_create): - if tags_relations_to_create: - # In Django 2.2 add ignore_conflicts to make this fool proof - try: - ContentNode.tags.through.objects.bulk_create(tags_relations_to_create) - except IntegrityError: - # One of the relations already exists, so just save them one by one. - # Django's default upsert behaviour should mean we get no errors this way - for to_create in tags_relations_to_create: - to_create.save() +tags_values_cte_fields = { + 'tag': models.CharField(), + 'node_id': UUIDField() +} def set_tags(tags_by_id): - all_tag_names = set() - tags_relations_to_create = [] + tag_tuples = [] tags_relations_to_delete = [] + + # put all tags into a tuple (tag_name, node_id) to send into SQL for target_node_id, tag_names in tags_by_id.items(): for tag_name, value in tag_names.items(): - if value: - all_tag_names.add(tag_name) - - # channel is no longer used on the tag object, so don't bother using it - available_tags = set( - ContentTag.objects.filter( - tag_name__in=all_tag_names, channel__isnull=True - ).values_list("tag_name", flat=True) + tag_tuples.append((tag_name, target_node_id)) + + # create CTE that holds the tag_tuples data + values_cte = WithValues(tags_values_cte_fields, tag_tuples, name='values_cte') + + # create another CTE which will RIGHT join against the tag table, so we get all of our + # tag_tuple data back, plus the tag_id if it exists. Ideally we wouldn't normally use a RIGHT + # join, we would simply swap the tables and do a LEFT, but with the VALUES CTE + # that isn't possible + tags_qs = ( + values_cte.join(ContentTag, tag_name=values_cte.col.tag, _join_type=RIGHT_JOIN) + .annotate( + tag=values_cte.col.tag, + node_id=values_cte.col.node_id, + tag_id=F('id'), + ) + .values('tag', 'node_id', 'tag_id') + ) + tags_cte = With(tags_qs, name='tags_cte') + + # the final query, we RIGHT join against the tag relation table so we get the tag_tuple back + # again, plus the tag_id from the previous CTE, plus annotate a boolean of whether + # the relation exists + qs = ( + tags_cte.join( + CTEQuerySet(model=ContentNode.tags.through), + contenttag_id=tags_cte.col.tag_id, + contentnode_id=tags_cte.col.node_id, + _join_type=RIGHT_JOIN + ) + .with_cte(values_cte) + .with_cte(tags_cte) + .annotate( + tag_name=tags_cte.col.tag, + node_id=tags_cte.col.node_id, + tag_id=tags_cte.col.tag_id, + has_relation=IsNull('contentnode_id', negate=True) + ) + .values('tag_name', 'node_id', 'tag_id', 'has_relation') ) - tags_to_create = all_tag_names.difference(available_tags) - - new_tags = [ContentTag(tag_name=tag_name) for tag_name in tags_to_create] - ContentTag.objects.bulk_create(new_tags) - - tag_id_by_tag_name = { - t["tag_name"]: t["id"] - for t in ContentTag.objects.filter( - tag_name__in=all_tag_names, channel__isnull=True - ).values("tag_name", "id") - } - - for target_node_id, tag_names in tags_by_id.items(): - for tag_name, value in tag_names.items(): - if value: - tag_id = tag_id_by_tag_name[tag_name] - tags_relations_to_create.append( - ContentNode.tags.through( - contentnode_id=target_node_id, contenttag_id=tag_id - ) - ) + created_tags = {} + for result in qs: + tag_name = result["tag_name"] + node_id = result["node_id"] + tag_id = result["tag_id"] + has_relation = result["has_relation"] + + tags = tags_by_id[node_id] + value = tags[tag_name] + + # tag wasn't found in the DB, but we're adding it to the node, so create it + if not tag_id and value: + # keep a cache of created tags during the session + if tag_name in created_tags: + tag_id = created_tags[tag_name] else: - tags_relations_to_delete.append( - Q(contentnode_id=target_node_id, contenttag__tag_name=tag_name) - ) - bulk_create_tag_relations(tags_relations_to_create) + tag, _ = ContentTag.objects.get_or_create(tag_name=tag_name, channel_id=None) + tag_id = tag.pk + created_tags.update({tag_name: tag_id}) + + # if we're adding the tag but the relation didn't exist, create it now, otherwise + # track the tag as one relation we should delete + if value and not has_relation: + ContentNode.tags.through.objects.get_or_create( + contentnode_id=node_id, contenttag_id=tag_id + ) + elif not value and has_relation: + tags_relations_to_delete.append( + Q(contentnode_id=node_id, contenttag_id=tag_id) + ) + + # delete tags if tags_relations_to_delete: ContentNode.tags.through.objects.filter( reduce(lambda x, y: x | y, tags_relations_to_delete) @@ -465,7 +506,7 @@ def delete_from_changes(self, changes): # Apply mixin first to override ValuesViewset -class ContentNodeViewSet(BulkUpdateMixin, ValuesViewset): +class ContentNodeViewSet(BulkUpdateMixin, ChangeEventMixin, ValuesViewset): queryset = ContentNode.objects.all() serializer_class = ContentNodeSerializer permission_classes = [IsAuthenticated] @@ -537,7 +578,7 @@ def get_edit_queryset(self): queryset = super(ContentNodeViewSet, self).get_edit_queryset() return self._annotate_channel_id(queryset) - @detail_route(methods=["get"]) + @action(detail=True, methods=["get"]) def requisites(self, request, pk=None): if not pk: raise Http404 @@ -568,22 +609,39 @@ def requisites(self, request, pk=None): ), ) - @detail_route(methods=["get"]) + @action(detail=True, methods=["get"]) def size(self, request, pk=None): if not pk: raise Http404 + + task_info = None node = self.get_object() - files = ( - File.objects.filter( - contentnode__in=node.get_descendants(include_self=True), - contentnode__complete=True, + + # currently we restrict triggering calculations through the API to the channel root node + if not node.is_root_node(): + raise Http404 + + # we don't force the calculation, so if the channel is large, it returns the cached size + size, stale = calculate_resource_size(node=node, force=False) + if stale: + # When stale, that means the value is not up-to-date with modified files in the DB, + # and the channel is significantly large, so we'll queue an async task for calculation. + # We don't really need more than one queued async calculation task, so we use + # get_or_create_async_task to ensure a task is queued, as well as return info about it + task_args = dict(node_id=node.pk, channel_id=node.channel_id) + task_info = get_or_create_async_task( + "calculate-resource-size", self.request.user, **task_args ) - .values("checksum") - .distinct() - ) - sizes = files.aggregate(resource_size=Sum("file_size")) - return Response(sizes["resource_size"] or 0) + changes = [] + if task_info is not None: + changes.append(self.create_task_event(task_info)) + + return Response({ + "size": size, + "stale": stale, + "changes": changes + }) def annotate_queryset(self, queryset): queryset = queryset.annotate(total_count=(F("rght") - F("lft") - 1) / 2) diff --git a/contentcuration/contentcuration/viewsets/file.py b/contentcuration/contentcuration/viewsets/file.py index 8b9b6e3233..c9467b9c65 100644 --- a/contentcuration/contentcuration/viewsets/file.py +++ b/contentcuration/contentcuration/viewsets/file.py @@ -12,6 +12,8 @@ from contentcuration.models import File from contentcuration.models import generate_object_storage_name from contentcuration.models import generate_storage_url +from contentcuration.utils.cache import ResourceSizeCache +from contentcuration.utils.sentry import report_exception from contentcuration.utils.storage_common import get_presigned_upload_url from contentcuration.utils.user import calculate_user_storage from contentcuration.viewsets.base import BulkDeleteMixin @@ -50,6 +52,13 @@ class FileSerializer(BulkModelSerializer): ) def update(self, instance, validated_data): + if "contentnode" in validated_data: + # if we're updating the file's related node, we'll trigger a reset for the + # old channel's cache modified date + update_node = validated_data.get("contentnode", None) + if not update_node or update_node.id != instance.contentnode_id: + ResourceSizeCache.reset_modified_for_file(instance) + results = super(FileSerializer, self).update(instance, validated_data) if instance.uploaded_by_id: calculate_user_storage(instance.uploaded_by_id) @@ -101,6 +110,25 @@ class FileViewSet(BulkDeleteMixin, BulkUpdateMixin, ReadOnlyValuesViewset): "assessment_item": "assessment_item_id", } + def delete_from_changes(self, changes): + try: + # reset channel resource size cache + keys = [change["key"] for change in changes] + queryset = self.filter_queryset_from_keys( + self.get_edit_queryset(), keys + ).order_by() + # find all root nodes for files, and reset the cache modified date + root_nodes = ContentNode.objects.filter( + parent__isnull=True, + tree_id__in=queryset.values_list('contentnode__tree_id', flat=True).distinct(), + ) + for root_node in root_nodes: + ResourceSizeCache(root_node).reset_modified(None) + except Exception as e: + report_exception(e) + + return super(FileViewSet, self).delete_from_changes(changes) + @list_route(methods=["post"]) def upload_url(self, request): try: diff --git a/contentcuration/contentcuration/viewsets/task.py b/contentcuration/contentcuration/viewsets/task.py index 04eca308eb..c41994baf9 100644 --- a/contentcuration/contentcuration/viewsets/task.py +++ b/contentcuration/contentcuration/viewsets/task.py @@ -1,4 +1,6 @@ -from django.conf import settings +import uuid + +from celery import states from django_filters.rest_framework import DjangoFilterBackend from django_filters.rest_framework import UUIDFilter from rest_framework.permissions import IsAuthenticated @@ -26,7 +28,8 @@ class Meta: class TaskViewSet(ReadOnlyValuesViewset, DestroyModelMixin): - queryset = Task.objects.all() + order_by = 'created' + queryset = Task.objects.order_by(order_by) permission_classes = [IsAuthenticated] filter_backends = (DjangoFilterBackend,) filter_class = TaskFilter @@ -40,9 +43,10 @@ class TaskViewSet(ReadOnlyValuesViewset, DestroyModelMixin): "is_progress_tracking", "user_id", "metadata", + "channel_id" ) - field_map = {"user": "user_id"} + field_map = {"user": "user_id", "channel": "channel_id"} @classmethod def id_attr(cls): @@ -55,29 +59,35 @@ def perform_destroy(self, instance): instance.delete() def get_edit_queryset(self): - return Task.objects.filter(user=self.request.user) + return Task.objects.filter(user=self.request.user).order_by(self.order_by) def consolidate(self, items, queryset): - if not settings.CELERY_TASK_ALWAYS_EAGER: - for item in items: - result = app.AsyncResult(item["task_id"]) - if result and result.status: - item["status"] = result.status - if "progress" not in item["metadata"]: - # Just flagging this, but this appears to be the correct way to get task metadata, - # even though the API is marked as private. - meta = result._get_task_meta() - if ( - meta - and "result" in meta - and meta["result"] - and not isinstance(meta["result"], Exception) - and "progress" in meta["result"] - ): - item["metadata"]["progress"] = meta["result"]["progress"] - else: - item["metadata"]["progress"] = None - item["channel"] = ( - item.get("metadata", {}).get("affects", {}).get("channel") - ) + if app.conf.task_always_eager: + return items + + for item in items: + if isinstance(item["channel"], uuid.UUID): + item["channel"] = item["channel"].hex + + # @see contentcuration.utils.celery.tasks:CeleryAsyncResult + task_result = app.AsyncResult(item["task_id"]) + if not task_result: + return item + + progress = task_result.progress + result = task_result.result + metadata = {} + + if task_result.status in states.EXCEPTION_STATES: + metadata.update(error={'traceback': task_result.traceback}) + if isinstance(result, Exception): + result = task_result.traceback + progress = 100 + elif not task_result.ready(): + # overwrite result if not complete, since it would have progress data + result = None + + item["status"] = task_result.status + item["metadata"].update(progress=progress, result=result, **metadata) + return items diff --git a/integration_testing/features/publish-channel.feature b/integration_testing/features/publish-channel.feature index 731d34692b..6b24e45c5a 100755 --- a/integration_testing/features/publish-channel.feature +++ b/integration_testing/features/publish-channel.feature @@ -16,4 +16,15 @@ Feature: Publish a channel And I click *Refresh* Then the browser refreshes And the published version is updated - And I receive an email for channel published successfully \ No newline at end of file + And I receive an email for channel published successfully + + Scenario: Publish a large channel + When the channel as at least 5000 nodes + And I click the *Publish* button in the top right corner + Then the *Publish modal* appears + And I see a loading spinner while it loads the channel's size + And I see the *Continue* button is disabled + When the channel's size has loaded + Then the loading spinner disappears + And I see the *Continue* button isn't disabled + And I can continue publishing diff --git a/jest_config/setup.js b/jest_config/setup.js index 353f418cf7..014ffef4d4 100644 --- a/jest_config/setup.js +++ b/jest_config/setup.js @@ -1,4 +1,3 @@ -import 'core-js'; import 'regenerator-runtime/runtime'; import * as Aphrodite from 'aphrodite'; import * as AphroditeNoImportant from 'aphrodite/no-important'; diff --git a/requirements-dev.txt b/requirements-dev.txt index 2e5347e245..9ff1b64632 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,126 +4,331 @@ # # pip-compile requirements-dev.in # -appdirs==1.4.3 # via virtualenv -aspy.yaml==1.3.0 # via pre-commit -astroid==2.3.3 # via pylint -attrs==19.3.0 # via pytest -autoflake==1.3.1 # via -r requirements-dev.in -autopep8==1.4 # via -r requirements-dev.in -backcall==0.1.0 # via ipython -certifi==2019.11.28 # via -c requirements.txt, geventhttpclient, requests -cfgv==3.0.0 # via pre-commit -chardet==3.0.4 # via -c requirements.txt, requests -click==7.0 # via flask, pip-tools -codecov==2.0.15 # via -r requirements-dev.in -colorama==0.4.3 # via pytest-watch -configargparse==1.2.3 # via locust -coreapi==2.3.3 # via drf-yasg -coreschema==0.0.4 # via coreapi, drf-yasg -coverage==5.0.3 # via -r requirements-dev.in, codecov, pytest-cov -git+https://github.com/someshchaturvedi/customizable-django-profiler.git#customizable-django-profiler # via -r requirements-dev.in -decorator==4.4.1 # via ipython, traitlets -distlib==0.3.0 # via virtualenv -django-concurrent-test-helper==0.7.0 # via -r requirements-dev.in -django-debug-panel==0.8.3 # via -r requirements-dev.in -django-debug-toolbar==1.9.1 # via -r requirements-dev.in, django-debug-panel -django==1.11.29 # via -c requirements.txt, django-debug-toolbar, drf-yasg -djangorestframework==3.9.1 # via -c requirements.txt, drf-yasg -docopt==0.6.2 # via pytest-watch -drf-yasg==1.17.1 # via -r requirements-dev.in -faker==0.7.3 # via -r requirements-dev.in, mixer -filelock==3.0.12 # via virtualenv -flake8==3.4.1 # via -r requirements-dev.in -flask-basicauth==0.2.0 # via locust -flask==1.1.2 # via flask-basicauth, locust -fonttools==4.17.1 # via -r requirements-dev.in -gevent==20.6.2 # via geventhttpclient, locust -geventhttpclient==1.4.4 # via locust -greenlet==0.4.16 # via gevent -identify==1.4.11 # via pre-commit -idna==2.8 # via -c requirements.txt, requests -importlib-metadata==1.5.0 # via importlib-resources, pluggy, pre-commit, pytest, virtualenv -importlib-resources==1.5.0 # via pre-commit, virtualenv -importmagic==0.1.7 # via -r requirements-dev.in -inflection==0.5.0 # via drf-yasg -ipdb==0.12.3 # via -r requirements-dev.in -ipython-genutils==0.2.0 # via traitlets -ipython==7.12.0 # via ipdb -isort==4.3.21 # via -r requirements-dev.in, pylint, pyls-isort -itsdangerous==1.1.0 # via flask -itypes==1.2.0 # via coreapi -jedi==0.15.2 # via -r requirements-dev.in, ipython, python-language-server -jinja2==2.11.2 # via coreschema, flask -json-rpc==1.13.0 # via -r requirements-dev.in -lazy-object-proxy==1.4.3 # via astroid -locust==1.1.1 # via -r requirements-dev.in -markupsafe==1.1.1 # via jinja2 -mccabe==0.6.1 # via flake8, pylint -mixer==5.6.6 # via -r requirements-dev.in -mock==4.0.1 # via -r requirements-dev.in, django-concurrent-test-helper -more-itertools==8.2.0 # via pytest -msgpack==1.0.0 # via locust -nodeenv==1.3.5 # via -r requirements-dev.in, pre-commit -packaging==20.1 # via drf-yasg, pytest -parso==0.6.1 # via jedi -pathtools==0.1.2 # via watchdog -pexpect==4.8.0 # via ipython -pickleshare==0.7.5 # via ipython -pip-tools==5.3.1 # via -r requirements-dev.in -pluggy==0.13.1 # via pytest, python-language-server -pre-commit==1.15.1 # via -r requirements-dev.in -prompt-toolkit==3.0.3 # via ipython -psutil==5.7.2 # via locust -ptyprocess==0.6.0 # via pexpect -py==1.8.1 # via pytest -pycodestyle==2.3.1 # via autopep8, flake8 -pyflakes==1.5.0 # via autoflake, flake8 -pygments==2.5.2 # via ipython -pyinstrument-cext==0.2.2 # via pyinstrument -pyinstrument==3.1.4 # via -r requirements-dev.in -pylint==2.4.4 # via -r requirements-dev.in -pyls-isort==0.1.1 # via -r requirements-dev.in -pympler==0.8 # via -r requirements-dev.in -pypandoc==1.5 # via -r requirements-dev.in -pyparsing==2.4.6 # via packaging -pytest-cov==2.8.1 # via -r requirements-dev.in -pytest-django==3.8.0 # via -r requirements-dev.in -pytest-logging==2015.11.4 # via -r requirements-dev.in -pytest-pythonpath==0.7.3 # via -r requirements-dev.in -pytest-timeout==1.4.2 # via -r requirements-dev.in -pytest-watch==4.2.0 # via -r requirements-dev.in -pytest==5.3.5 # via -r requirements-dev.in, pytest-cov, pytest-django, pytest-logging, pytest-pythonpath, pytest-timeout, pytest-watch -python-dateutil==2.8.1 # via -c requirements.txt, faker -python-jsonrpc-server==0.3.4 # via python-language-server -python-language-server==0.31.8 # via -r requirements-dev.in, pyls-isort -pytz==2019.3 # via -c requirements.txt, django -pyyaml==5.3 # via aspy.yaml, pre-commit -pyzmq==19.0.2 # via locust -requests==2.22.0 # via -c requirements.txt, codecov, coreapi, locust -rope==0.16.0 # via -r requirements-dev.in -ruamel.yaml.clib==0.2.0 # via ruamel.yaml -ruamel.yaml==0.16.10 # via drf-yasg -service-factory==0.1.6 # via -r requirements-dev.in -six==1.14.0 # via -c requirements.txt, astroid, django-concurrent-test-helper, drf-yasg, faker, geventhttpclient, packaging, pip-tools, pre-commit, python-dateutil, traitlets, virtualenv -sqlparse==0.3.0 # via django-debug-toolbar -tabulate==0.8.2 # via -r requirements-dev.in -tblib==1.7.0 # via django-concurrent-test-helper -toml==0.10.0 # via pre-commit -traitlets==4.3.3 # via ipython -typed-ast==1.4.1 # via astroid -ujson==1.35 # via python-jsonrpc-server, python-language-server -uritemplate==3.0.1 # via -c requirements.txt, coreapi, drf-yasg -urllib3==1.25.8 # via -c requirements.txt, requests -virtualenv==20.0.3 # via pre-commit -watchdog==0.10.2 # via pytest-watch -wcwidth==0.1.8 # via prompt-toolkit, pytest -werkzeug==1.0.1 # via flask -wheel==0.35.1 # via pypandoc -wrapt==1.11.2 # via astroid -yapf==0.29.0 # via -r requirements-dev.in -zipp==2.2.0 # via importlib-metadata, importlib-resources -zope.event==4.4 # via gevent -zope.interface==5.1.0 # via gevent +appdirs==1.4.3 + # via virtualenv +aspy.yaml==1.3.0 + # via pre-commit +astroid==2.3.3 + # via pylint +attrs==19.3.0 + # via pytest +autoflake==1.3.1 + # via -r requirements-dev.in +autopep8==1.4 + # via -r requirements-dev.in +backcall==0.1.0 + # via ipython +certifi==2019.11.28 + # via + # -c requirements.txt + # geventhttpclient + # requests +cfgv==3.0.0 + # via pre-commit +chardet==3.0.4 + # via + # -c requirements.txt + # requests +click==7.0 + # via + # flask + # pip-tools +codecov==2.0.15 + # via -r requirements-dev.in +colorama==0.4.3 + # via pytest-watch +configargparse==1.2.3 + # via locust +coreapi==2.3.3 + # via drf-yasg +coreschema==0.0.4 + # via + # coreapi + # drf-yasg +coverage==5.0.3 + # via + # -r requirements-dev.in + # codecov + # pytest-cov +git+https://github.com/someshchaturvedi/customizable-django-profiler.git#customizable-django-profiler + # via -r requirements-dev.in +decorator==4.4.1 + # via + # ipython + # traitlets +distlib==0.3.0 + # via virtualenv +django-concurrent-test-helper==0.7.0 + # via -r requirements-dev.in +django-debug-panel==0.8.3 + # via -r requirements-dev.in +django-debug-toolbar==1.9.1 + # via + # -r requirements-dev.in + # django-debug-panel +django==1.11.29 + # via + # -c requirements.txt + # django-debug-toolbar + # drf-yasg +djangorestframework==3.9.1 + # via + # -c requirements.txt + # drf-yasg +docopt==0.6.2 + # via pytest-watch +drf-yasg==1.17.1 + # via -r requirements-dev.in +faker==0.7.3 + # via + # -r requirements-dev.in + # mixer +filelock==3.0.12 + # via virtualenv +flake8==3.4.1 + # via -r requirements-dev.in +flask-basicauth==0.2.0 + # via locust +flask==1.1.2 + # via + # flask-basicauth + # locust +fonttools==4.17.1 + # via -r requirements-dev.in +gevent==20.6.2 + # via + # geventhttpclient + # locust +geventhttpclient==1.4.4 + # via locust +greenlet==0.4.16 + # via gevent +identify==1.4.11 + # via pre-commit +idna==2.8 + # via + # -c requirements.txt + # requests +importlib-metadata==1.5.0 + # via pre-commit +importmagic==0.1.7 + # via -r requirements-dev.in +inflection==0.5.0 + # via drf-yasg +ipdb==0.12.3 + # via -r requirements-dev.in +ipython-genutils==0.2.0 + # via traitlets +ipython==7.12.0 + # via ipdb +isort==4.3.21 + # via + # -r requirements-dev.in + # pylint + # pyls-isort +itsdangerous==1.1.0 + # via flask +itypes==1.2.0 + # via coreapi +jedi==0.15.2 + # via + # -r requirements-dev.in + # ipython + # python-language-server +jinja2==2.11.3 + # via + # coreschema + # flask +json-rpc==1.13.0 + # via -r requirements-dev.in +lazy-object-proxy==1.4.3 + # via astroid +locust==1.1.1 + # via -r requirements-dev.in +markupsafe==1.1.1 + # via jinja2 +mccabe==0.6.1 + # via + # flake8 + # pylint +mixer==5.6.6 + # via -r requirements-dev.in +mock==4.0.1 + # via + # -r requirements-dev.in + # django-concurrent-test-helper +more-itertools==8.2.0 + # via pytest +msgpack==1.0.0 + # via locust +nodeenv==1.3.5 + # via + # -r requirements-dev.in + # pre-commit +packaging==20.1 + # via + # drf-yasg + # pytest +parso==0.6.1 + # via jedi +pathtools==0.1.2 + # via watchdog +pexpect==4.8.0 + # via ipython +pickleshare==0.7.5 + # via ipython +pip-tools==5.3.1 + # via -r requirements-dev.in +pluggy==0.13.1 + # via + # pytest + # python-language-server +pre-commit==1.15.1 + # via -r requirements-dev.in +prompt-toolkit==3.0.3 + # via ipython +psutil==5.7.2 + # via locust +ptyprocess==0.6.0 + # via pexpect +py==1.8.1 + # via pytest +pycodestyle==2.3.1 + # via + # autopep8 + # flake8 +pyflakes==1.5.0 + # via + # autoflake + # flake8 +pygments==2.7.4 + # via ipython +pyinstrument-cext==0.2.2 + # via pyinstrument +pyinstrument==3.1.4 + # via -r requirements-dev.in +pylint==2.4.4 + # via -r requirements-dev.in +pyls-isort==0.1.1 + # via -r requirements-dev.in +pympler==0.8 + # via -r requirements-dev.in +pypandoc==1.5 + # via -r requirements-dev.in +pyparsing==2.4.6 + # via packaging +pytest-cov==2.8.1 + # via -r requirements-dev.in +pytest-django==3.8.0 + # via -r requirements-dev.in +pytest-logging==2015.11.4 + # via -r requirements-dev.in +pytest-pythonpath==0.7.3 + # via -r requirements-dev.in +pytest-timeout==1.4.2 + # via -r requirements-dev.in +pytest-watch==4.2.0 + # via -r requirements-dev.in +pytest==5.3.5 + # via + # -r requirements-dev.in + # pytest-cov + # pytest-django + # pytest-logging + # pytest-pythonpath + # pytest-timeout + # pytest-watch +python-dateutil==2.8.1 + # via + # -c requirements.txt + # faker +python-jsonrpc-server==0.3.4 + # via python-language-server +python-language-server==0.31.8 + # via + # -r requirements-dev.in + # pyls-isort +pytz==2019.3 + # via + # -c requirements.txt + # django +pyyaml==5.4 + # via + # aspy.yaml + # pre-commit +pyzmq==19.0.2 + # via locust +requests==2.22.0 + # via + # -c requirements.txt + # codecov + # coreapi + # locust +rope==0.16.0 + # via -r requirements-dev.in +ruamel.yaml==0.16.10 + # via drf-yasg +service-factory==0.1.6 + # via -r requirements-dev.in +six==1.14.0 + # via + # -c requirements.txt + # astroid + # django-concurrent-test-helper + # drf-yasg + # faker + # geventhttpclient + # packaging + # pip-tools + # pre-commit + # python-dateutil + # traitlets + # virtualenv +sqlparse==0.3.0 + # via django-debug-toolbar +tabulate==0.8.2 + # via -r requirements-dev.in +tblib==1.7.0 + # via django-concurrent-test-helper +toml==0.10.0 + # via pre-commit +traitlets==4.3.3 + # via ipython +ujson==1.35 + # via + # python-jsonrpc-server + # python-language-server +uritemplate==3.0.1 + # via + # -c requirements.txt + # coreapi + # drf-yasg +urllib3==1.25.8 + # via + # -c requirements.txt + # requests +virtualenv==20.0.3 + # via pre-commit +watchdog==0.10.2 + # via pytest-watch +wcwidth==0.1.8 + # via + # prompt-toolkit + # pytest +werkzeug==1.0.1 + # via flask +wheel==0.35.1 + # via pypandoc +wrapt==1.11.2 + # via astroid +yapf==0.29.0 + # via -r requirements-dev.in +zipp==2.2.0 + # via importlib-metadata +zope.event==4.4 + # via gevent +zope.interface==5.1.0 + # via gevent # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/requirements-docs.txt b/requirements-docs.txt index 42b5f12cec..f1957a4cbc 100644 --- a/requirements-docs.txt +++ b/requirements-docs.txt @@ -4,35 +4,92 @@ # # pip-compile requirements-docs.in # -alabaster==0.7.12 # via sphinx -argh==0.26.2 # via sphinx-autobuild -babel==2.8.0 # via sphinx, sphinx-intl -certifi==2019.11.28 # via -c requirements.txt, requests -chardet==3.0.4 # via -c requirements.txt, requests -click==7.1.2 # via sphinx-intl -docutils==0.15.2 # via -c requirements.txt, sphinx -idna==2.8 # via -c requirements.txt, requests -imagesize==1.2.0 # via sphinx -jinja2==2.11.2 # via sphinx -livereload==2.6.3 # via sphinx-autobuild -markupsafe==1.1.1 # via jinja2 -pathtools==0.1.2 # via sphinx-autobuild, watchdog -port_for==0.3.1 # via sphinx-autobuild -pygments==2.7.2 # via sphinx -pytz==2019.3 # via -c requirements.txt, babel -pyyaml==5.3.1 # via sphinx-autobuild -requests==2.22.0 # via -c requirements.txt, sphinx -six==1.14.0 # via -c requirements.txt, livereload, sphinx -snowballstemmer==2.0.0 # via sphinx -sphinx-autobuild==0.7.1 # via -r requirements-docs.in -sphinx-intl==2.0.1 # via -r requirements-docs.in -sphinx-rtd-theme==0.5.0 # via -r requirements-docs.in -sphinx==1.6.4 # via -r requirements-docs.in, sphinx-intl, sphinx-rtd-theme -sphinxcontrib-serializinghtml==1.1.4 # via sphinxcontrib-websupport -sphinxcontrib-websupport==1.2.4 # via -r requirements-docs.in, sphinx -tornado==6.0.4 # via livereload, sphinx-autobuild -urllib3==1.25.8 # via -c requirements.txt, requests -watchdog==0.10.3 # via sphinx-autobuild +alabaster==0.7.12 + # via sphinx +argh==0.26.2 + # via sphinx-autobuild +babel==2.8.0 + # via + # sphinx + # sphinx-intl +certifi==2019.11.28 + # via + # -c requirements.txt + # requests +chardet==3.0.4 + # via + # -c requirements.txt + # requests +click==7.1.2 + # via sphinx-intl +docutils==0.15.2 + # via + # -c requirements.txt + # sphinx +idna==2.8 + # via + # -c requirements.txt + # requests +imagesize==1.2.0 + # via sphinx +jinja2==2.11.3 + # via sphinx +livereload==2.6.3 + # via sphinx-autobuild +markupsafe==1.1.1 + # via jinja2 +pathtools==0.1.2 + # via + # sphinx-autobuild + # watchdog +port_for==0.3.1 + # via sphinx-autobuild +pygments==2.7.4 + # via sphinx +pytz==2019.3 + # via + # -c requirements.txt + # babel +pyyaml==5.4 + # via sphinx-autobuild +requests==2.22.0 + # via + # -c requirements.txt + # sphinx +six==1.14.0 + # via + # -c requirements.txt + # livereload + # sphinx +snowballstemmer==2.0.0 + # via sphinx +sphinx-autobuild==0.7.1 + # via -r requirements-docs.in +sphinx-intl==2.0.1 + # via -r requirements-docs.in +sphinx-rtd-theme==0.5.0 + # via -r requirements-docs.in +sphinx==1.6.4 + # via + # -r requirements-docs.in + # sphinx-intl + # sphinx-rtd-theme +sphinxcontrib-serializinghtml==1.1.4 + # via sphinxcontrib-websupport +sphinxcontrib-websupport==1.2.4 + # via + # -r requirements-docs.in + # sphinx +tornado==6.0.4 + # via + # livereload + # sphinx-autobuild +urllib3==1.25.8 + # via + # -c requirements.txt + # requests +watchdog==0.10.3 + # via sphinx-autobuild # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements.in b/requirements.in index cb1aad9104..5b349ee71b 100644 --- a/requirements.in +++ b/requirements.in @@ -42,4 +42,5 @@ sentry-sdk raven django-bulk-update html5lib==1.1 -pillow==8.0.1 +pillow==8.1.1 +python-dateutil>=2.8.1 diff --git a/requirements.txt b/requirements.txt index d633fa9005..a0fd1d3a9e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,7 +61,7 @@ newrelic==5.6.0.135 # via -r requirements.in oauth2client==4.1.3 # via -r requirements.in oauthlib==3.1.0 # via requests-oauthlib pathlib==1.0.1 # via -r requirements.in -pillow==8.0.1 # via -r requirements.in +pillow==8.1.1 # via -r requirements.in progressbar2==3.38.0 # via -r requirements.in prometheus-client==0.7.1 # via django-prometheus protobuf==3.11.3 # via google-api-core, googleapis-common-protos @@ -69,7 +69,7 @@ psycopg2-binary==2.7.4 # via -r requirements.in pyasn1-modules==0.2.8 # via google-auth, oauth2client pyasn1==0.4.8 # via oauth2client, pyasn1-modules, rsa pycountry==17.5.14 # via -r requirements.in, le-utils -python-dateutil==2.8.1 # via botocore +python-dateutil==2.8.1 # via -r requirements.in, botocore python-postmark==0.5.0 # via -r requirements.in python-utils==2.4.0 # via progressbar2 pytz==2019.3 # via celery, django, django-postmark, google-api-core, minio @@ -77,7 +77,7 @@ raven==6.10.0 # via -r requirements.in redis==2.10.5 # via -r requirements.in, django-redis requests-oauthlib==1.3.0 # via google-auth-oauthlib requests==2.22.0 # via -r requirements.in, google-api-core, gspread, requests-oauthlib -rsa==4.0 # via google-auth, oauth2client +rsa==4.1 # via google-auth, oauth2client s3transfer==0.3.3 # via boto3 sentry-sdk==0.14.1 # via -r requirements.in six==1.14.0 # via google-api-core, google-api-python-client, google-auth, google-resumable-media, grpcio, html5lib, oauth2client, progressbar2, protobuf, python-dateutil, python-utils