1

Here's sample records of my_task collection, when the task status is changed, there will be a record added into the status_changed_utc field with the status code and time, status 6 stands for the task has been completed; there's also requested_completion_utc field on the top level indicates what is the expected completion time of the task, so how do I find out the records with actual completion time that is later than the requested_completion_utc, for the tasks created in this month? pls note that requested_completion_utc and the status_changed_utc with status 6 both may not exist in the record.

sample records:

   { 
        "_id" : ObjectId("5ce726ae92e2247db561a2f2"), 
       "created_utc" : ISODate("2019-05-23T23:03:10.000+0000"), 
        "requested_completion_utc" : ISODate("2019-05-23T00:00:00.000+0000"), 
        "status_changed_utc" : [
            {
                "status" : NumberInt(1), 
                "time" : ISODate("2019-05-23T23:03:10.000+0000")
            }, 
            {
                "status" : NumberInt(2), 
                "time" : ISODate("2019-05-23T23:05:03.000+0000")
            }, 
            {
                "status" : NumberInt(4), 
                "time" : ISODate("2019-05-23T23:05:06.000+0000")
            }, 
            {
                "status" : NumberInt(5), 
                "time" : ISODate("2019-05-23T23:05:07.000+0000")
            }, 
            {
                "status" : NumberInt(6), 
                "time" : ISODate("2019-05-23T23:05:09.000+0000")
            }
        ]
    },

    { 
        "_id" : ObjectId("5ce726ae92e2247db561a232"), 
      "created_utc" : ISODate("2019-05-23T23:03:10.000+0000"), 
        "status_changed_utc" : [
            {
                "status" : NumberInt(1), 
                "time" : ISODate("2019-05-23T23:03:10.000+0000")
            }
        ]
    },

    { 
        "_id" : ObjectId("5ce726ae92e2247db561a231"), 
      "created_utc" : ISODate("2019-09-23T21:03:10.000+0000"), 
       "requested_completion_utc" : ISODate("2019-09-21T00:00:00.000+0000"), 
        "status_changed_utc" : [
            {
                "status" : NumberInt(1), 
                "time" : ISODate("2019-09-23T23:03:10.000+0000")
            }
        ]
    }

3 Answers 3

1

I think there is probably a more efficient way to write this but I was able to come up with this using the aggregation framework.

db.tasks.aggregate([
  // Only show results with the requested_completion_utc value
  { $match: { requested_completion_utc: { $exists: 1 } } },

  // Get only the most recent status change by slicing the last element of the array
  { $project: { 
    id: 1, 
    requested_completion_utc: 1, 
    status_changed_utc: { $slice: [ "$status_changed_utc", -1 ] } 
  }},

  // Perform the comparison to determine if the operation was late
  { $project: { 
    id: 1,
    requested_completion_utc: 1,
    status_changed_utc: 1,
    isLate: { $gt: [ "$requested_completion_utc", "$status_changed_utc.time" ] },
  }},

  // Only display the late results
  { $match: { isLate: true } },
]);

With your documents from above this returns the following:

{ "_id" : ObjectId("5ce726ae92e2247db561a2f2"), "requested_completion_utc" : ISODate("2019-05-23T00:00:00Z"), "status_changed_utc" : [ { "status" : 6, "time" : ISODate("2019-05-23T23:05:09Z") } ], "isLate" : true }
{ "_id" : ObjectId("5ce726ae92e2247db561a231"), "requested_completion_utc" : ISODate("2019-09-21T00:00:00Z"), "status_changed_utc" : [ { "status" : 1, "time" : ISODate("2019-09-23T23:03:10Z") } ], "isLate" : true }

Note that this only returns the most recent 'late' offender, if you wanted all operations that occurred after that time you could use the $unwind aggregation operator to create a separate document for every element in the status_changed_utc array rather than the $project stage slicing the most recent element of each array.

Sign up to request clarification or add additional context in comments.

Comments

0

the following aggregate query will return the IDs of tasks created in the month of may which has a completed status (6) that was completed after the requested completion date. hope i understood your requirement correctly.

db.my_task.aggregate([
    {
        "$match": {
            "created_utc": {
                "$gt": ISODate("2019-04-30T18:30:00.000Z"),
                "$lt": ISODate("2019-05-31T18:30:00.000Z")
            },
            "status_changed_utc": {
                "$elemMatch": {
                    "status": 6
                }
            }
        }
    },
    {
        "$unwind": "$status_changed_utc"
    },
    {
        "$project": {
            "status": "$status_changed_utc.status",
            "isLate": {
                "$gt": [
                    "$status_changed_utc.time",
                    "$requested_completion_utc"
                ]
            },
            "_id": 1
        }
    },
    {
        "$match": {
            "isLate": true,
            "status": 6
        }
    },
    {
        "$project": {
            "_id": 1
        }
    }
])

here's the c# code that generated the above command in case anybody's interested:

using MongoDB.Entities;
using System;
using System.Linq;

namespace StackOverflow
{
    [Name("my_task")]
    public class MyTask : Entity
    {
        public DateTime created_utc { get; set; }
        public DateTime requested_completion_utc { get; set; }
        public Status[] status_changed_utc { get; set; }
    }

    public class Status
    {
        public int status { get; set; }
        public DateTime time { get; set; }
    }

    public static class Program
    {
        public static void Main()
        {
            new DB("my_database", "localhost");

            var start = new DateTime(2019, 05, 1, 0, 0, 0).ToUniversalTime();
            var end = new DateTime(2019, 06, 1, 0, 0, 0).ToUniversalTime();

            var result = DB.Queryable<MyTask>()
                           .Where(t =>
                                  t.created_utc > start &&
                                  t.created_utc < end &&
                                  t.status_changed_utc.Any(s => s.status == 6))
                           .SelectMany(t => t.status_changed_utc,
                                            (t, s) => new
                                            {
                                                t.ID,
                                                s.status,
                                                isLate = s.time > t.requested_completion_utc
                                            })
                           .Where(x => x.isLate == true && x.status == 6)
                           .Select(x => x.ID)
                           .ToList();
        }
    }
}

Comments

0

I have similar question.

Sorting first, and then project the latest one is not working my case because I need to output more than one completed time.

Here is my question, except for using $unwind function are there any other ways to handle this array in documents?

The reason I don't want to use $unwind is deconstruct an array field to output a document for each element, and I have multiple filters applies to the query, and it will increase the total amount of records to match with other filters.

This is my sample records

"_id" : ObjectId("5d9b69fae4757402b4b4ca0d"), 
"status_changed_utc" : [
    {
        "status" : NumberInt(1), 
        "time" : ISODate("2019-05-20T23:03:10.000+0000")
    }, 
    {
        "status" : NumberInt(2), 
        "time" : ISODate("2019-05-23T23:04:03.000+0000")
    }, 
    {
        "status" : NumberInt(4), 
        "time" : ISODate("2019-05-23T23:05:06.000+0000")
    }, 
    {
        "status" : NumberInt(5), 
        "time" : ISODate("2019-05-23T23:05:07.000+0000")
    }, 
    {
        "status" : NumberInt(6), 
        "time" : ISODate("2019-05-23T23:05:09.000+0000")
    }
], 
"requested_completion_utc" : ISODate("2019-05-22T23:05:09.000+0000")

When I try to use this query

db.getCollection("test").aggregate(
[
    { 
        "$match" : {
            "requested_completion_utc" : {
                "$exists" : 1.0
            }
        }
    }, 
    { 
        "$project" : {
            "_id" : 1.0, 
            "requested_completion_utc" : 1.0, 
            "status_changed_utc" : {
                "$slice" : [
                    "$status_changed_utc", 
                    -1.0
                ]
            }
        }
    }
]

It will return this result to me.

{ 
"_id" : ObjectId("5d9b69fae4757402b4b4ca0d"), 
"requested_completion_utc" : ISODate("2019-05-22T23:05:09.000+0000"), 
"status_changed_utc" : [
    {
        "status" : NumberInt(6), 
        "time" : ISODate("2019-05-23T23:05:09.000+0000")
    }
]
}

Is that possible to return status NumberInt(2) instead of NumberInt(6) which is the latest completed status to me?

This is what I expected result

{ 
    "_id" : ObjectId("5d9b69fae4757402b4b4ca0d"), 
    "requested_completion_utc" : ISODate("2019-05-22T23:05:09.000+0000"), 
    "status_changed_utc" : [
        {
            "status" : NumberInt(2), 
            "time" : ISODate("2019-05-23T23:04:03.000+0000")
        }
    ]
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.