From c874ef7ca5cc38dcd3367679765e847b05ef6ee2 Mon Sep 17 00:00:00 2001 From: Supriya Garg Date: Wed, 24 Jan 2018 17:25:29 -0500 Subject: [PATCH 1/5] Add support for the Query class, and creating Pandas DataFrame. Most of the code was ported over from the master branch, and updated to use the autogenerated code. Also, add utf-8 encoding to proto/common_pb2.py. --- .../google/cloud/monitoring/__init__.py | 2 + .../google/cloud/monitoring/_dataframe.py | 143 +++++ monitoring/google/cloud/monitoring/query.py | 604 ++++++++++++++++++ .../cloud/monitoring_v3/proto/common_pb2.py | 1 + 4 files changed, 750 insertions(+) create mode 100644 monitoring/google/cloud/monitoring/_dataframe.py create mode 100644 monitoring/google/cloud/monitoring/query.py diff --git a/monitoring/google/cloud/monitoring/__init__.py b/monitoring/google/cloud/monitoring/__init__.py index a0f3fbfc22ab..80bd1f78383b 100644 --- a/monitoring/google/cloud/monitoring/__init__.py +++ b/monitoring/google/cloud/monitoring/__init__.py @@ -14,6 +14,7 @@ from __future__ import absolute_import +from google.cloud.monitoring.query import Query from google.cloud.monitoring_v3 import GroupServiceClient from google.cloud.monitoring_v3 import MetricServiceClient from google.cloud.monitoring_v3 import enums @@ -23,4 +24,5 @@ 'enums', 'types', 'GroupServiceClient', + 'Query', 'MetricServiceClient', ) diff --git a/monitoring/google/cloud/monitoring/_dataframe.py b/monitoring/google/cloud/monitoring/_dataframe.py new file mode 100644 index 000000000000..3ccc7e1eaa4c --- /dev/null +++ b/monitoring/google/cloud/monitoring/_dataframe.py @@ -0,0 +1,143 @@ +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Time series as :mod:`pandas` dataframes.""" + +import itertools + +from google.cloud.monitoring_v3.types import TimeSeries + +TOP_RESOURCE_LABELS = ( + 'project_id', + 'aws_account', + 'location', + 'region', + 'zone', +) + + +def _extract_header(time_series): + """Return a copy of time_series with the points removed.""" + return TimeSeries( + metric=time_series.metric, + resource=time_series.resource, + metric_kind=time_series.metric_kind, + value_type=time_series.value_type + ) + + +def _extract_labels(time_series): + """Build the combined resource and metric labels, with resource_type.""" + labels = {'resource_type': time_series.resource.type} + labels.update(time_series.resource.labels) + labels.update(time_series.metric.labels) + return labels + + +def _extract_value(typed_value): + """Extract the value from a TypedValue.""" + value_type = typed_value.WhichOneof('value') + return typed_value.__getattribute__(value_type) + + +def _build_dataframe(time_series_iterable, + label=None, labels=None): # pragma: NO COVER + """Build a :mod:`pandas` dataframe out of time series. + + :type time_series_iterable: + iterable over :class:`~google.cloud.monitoring_v3.types.TimeSeries` + :param time_series_iterable: + An iterable (e.g., a query object) yielding time series. + + :type label: str + :param label: + (Optional) The label name to use for the dataframe header. This can be + the name of a resource label or metric label (e.g., + ``"instance_name"``), or the string ``"resource_type"``. + + :type labels: list of strings, or None + :param labels: + A list or tuple of label names to use for the dataframe header. + If more than one label name is provided, the resulting dataframe + will have a multi-level column header. + + Specifying neither ``label`` or ``labels`` results in a dataframe + with a multi-level column header including the resource type and + all available resource and metric labels. + + Specifying both ``label`` and ``labels`` is an error. + + :rtype: :class:`pandas.DataFrame` + :returns: A dataframe where each column represents one time series. + """ + import pandas # pylint: disable=import-error + + if labels is not None: + if label is not None: + raise ValueError('Cannot specify both "label" and "labels".') + elif not labels: + raise ValueError('"labels" must be non-empty or None.') + + columns = [] + headers = [] + for time_series in time_series_iterable: + pandas_series = pandas.Series( + data=[_extract_value(point.value) for point in time_series.points], + index=[point.interval.end_time.ToNanoseconds() + for point in time_series.points], + ) + columns.append(pandas_series) + headers.append(_extract_header(time_series)) + + # Implement a smart default of using all available labels. + if label is None and labels is None: + resource_labels = set(itertools.chain.from_iterable( + header.resource.labels for header in headers)) + metric_labels = set(itertools.chain.from_iterable( + header.metric.labels for header in headers)) + labels = (['resource_type'] + + _sorted_resource_labels(resource_labels) + + sorted(metric_labels)) + + # Assemble the columns into a DataFrame. + dataframe = pandas.DataFrame.from_records(columns).T + + # Convert the timestamp strings into a DatetimeIndex. + dataframe.index = pandas.to_datetime(dataframe.index) + + # Build a multi-level stack of column headers. Some labels may + # be undefined for some time series. + levels = [] + for key in labels or [label]: + level = [_extract_labels(header).get(key, '') for header in headers] + levels.append(level) + + # Build a column Index or MultiIndex. Do not include level names + # in the column header if the user requested a single-level header + # by specifying "label". + dataframe.columns = pandas.MultiIndex.from_arrays( + levels, + names=labels or None) + + # Sort the rows just in case (since the API doesn't guarantee the + # ordering), and sort the columns lexicographically. + return dataframe.sort_index(axis=0).sort_index(axis=1) + + +def _sorted_resource_labels(labels): + """Sort label names, putting well-known resource labels first.""" + head = [label for label in TOP_RESOURCE_LABELS if label in labels] + tail = sorted(label for label in labels + if label not in TOP_RESOURCE_LABELS) + return head + tail diff --git a/monitoring/google/cloud/monitoring/query.py b/monitoring/google/cloud/monitoring/query.py new file mode 100644 index 000000000000..4675fb9b2a82 --- /dev/null +++ b/monitoring/google/cloud/monitoring/query.py @@ -0,0 +1,604 @@ +# Copyright 2016 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Time series query for the `Google Stackdriver Monitoring API (V3)`_. + +.. _Google Stackdriver Monitoring API (V3): + https://cloud.google.com/monitoring/api/ref_v3/rest/v3/\ + projects.timeSeries/list +""" + +import copy +import datetime + +import six + +from google.cloud.monitoring._dataframe import _build_dataframe +from google.cloud.monitoring_v3 import types +from google.cloud.monitoring_v3.gapic import enums + +_UTCNOW = datetime.datetime.utcnow # To be replaced by tests. + + +class Query(object): + """Query object for retrieving metric data. + + :type client: :class:`google.cloud.monitoring_v3.gapic.metric_service_client.MetricServiceClient` + :param client: The client to use. + + :type project: str + :param project: The project ID or number. + + :type metric_type: str + :param metric_type: The metric type name. The default value is + :data:`Query.DEFAULT_METRIC_TYPE + `, + but please note that this default value is provided only for + demonstration purposes and is subject to change. See the + `supported metrics`_. + + :type end_time: :class:`datetime.datetime` + :param end_time: (Optional) The end time (inclusive) of the time interval + for which results should be returned, as a datetime object. + The default is the start of the current minute. + + The start time (exclusive) is determined by combining the + values of ``days``, ``hours``, and ``minutes``, and + subtracting the resulting duration from the end time. + + It is also allowed to omit the end time and duration here, + in which case + :meth:`~google.cloud.monitoring.query.Query.select_interval` + must be called before the query is executed. + + :type days: int + :param days: The number of days in the time interval. + + :type hours: int + :param hours: The number of hours in the time interval. + + :type minutes: int + :param minutes: The number of minutes in the time interval. + + :raises: :exc:`ValueError` if ``end_time`` is specified but + ``days``, ``hours``, and ``minutes`` are all zero. + If you really want to specify a point in time, use + :meth:`~google.cloud.monitoring.query.Query.select_interval`. + + .. _supported metrics: https://cloud.google.com/monitoring/api/metrics + """ + + DEFAULT_METRIC_TYPE = 'compute.googleapis.com/instance/cpu/utilization' + + def __init__(self, client, project, + metric_type=DEFAULT_METRIC_TYPE, + end_time=None, days=0, hours=0, minutes=0): + start_time = None + if days or hours or minutes: + if end_time is None: + end_time = _UTCNOW().replace(second=0, microsecond=0) + start_time = end_time - datetime.timedelta(days=days, + hours=hours, + minutes=minutes) + elif end_time is not None: + raise ValueError('Non-zero duration required for time interval.') + + self._client = client + self._project_path = self._client.project_path(project) + self._end_time = end_time + self._start_time = start_time + self._filter = _Filter(metric_type) + + self._per_series_aligner = 0 + self._alignment_period_seconds = 0 + self._cross_series_reducer = 0 + self._group_by_fields = () + + def __iter__(self): + return self.iter() + + @property + def metric_type(self): + """The metric type name.""" + return self._filter.metric_type + + @property + def filter(self): + """The filter string. + + This is constructed from the metric type, the resource type, and + selectors for the group ID, monitored projects, resource labels, + and metric labels. + """ + return str(self._filter) + + def select_interval(self, end_time, start_time=None): + """Copy the query and set the query time interval. + + Example:: + + import datetime + + now = datetime.datetime.utcnow() + query = query.select_interval( + end_time=now, + start_time=now - datetime.timedelta(minutes=5)) + + As a convenience, you can alternatively specify the end time and + an interval duration when you create the query initially. + + :type end_time: :class:`datetime.datetime` + :param end_time: The end time (inclusive) of the time interval + for which results should be returned, as a datetime object. + + :type start_time: :class:`datetime.datetime` + :param start_time: + (Optional) The start time (exclusive) of the time interval + for which results should be returned, as a datetime object. + If not specified, the interval is a point in time. + + :rtype: :class:`Query` + :returns: The new query object. + """ + new_query = self.copy() + new_query._end_time = end_time + new_query._start_time = start_time + return new_query + + def select_group(self, group_id): + """Copy the query and add filtering by group. + + Example:: + + query = query.select_group('1234567') + + :type group_id: str + :param group_id: The ID of a group to filter by. + + :rtype: :class:`Query` + :returns: The new query object. + """ + new_query = self.copy() + new_query._filter.group_id = group_id + return new_query + + def select_projects(self, *args): + """Copy the query and add filtering by monitored projects. + + This is only useful if the target project represents a Stackdriver + account containing the specified monitored projects. + + Examples:: + + query = query.select_projects('project-1') + query = query.select_projects('project-1', 'project-2') + + :type args: tuple + :param args: Project IDs limiting the resources to be included + in the query. + + :rtype: :class:`Query` + :returns: The new query object. + """ + new_query = self.copy() + new_query._filter.projects = args + return new_query + + def select_resources(self, *args, **kwargs): + """Copy the query and add filtering by resource labels. + + Examples:: + + query = query.select_resources(zone='us-central1-a') + query = query.select_resources(zone_prefix='europe-') + query = query.select_resources(resource_type='gce_instance') + + A keyword argument ``