[appengine-afterburner] r30 committed - ...

0 views

Skip to first unread message

appengine-...@googlecode.com

unread,

Jul 18, 2011, 8:28:36 PM7/18/11

to appengine-after...@googlegroups.com

Revision: 30
Author: appengine...@gmail.com
Date: Mon Jul 18 17:27:33 2011
Log:
Adds 'samplez' module to App Engine Afterburner.

Revision created by MOE tool push_codebase.
MOE_MIGRATION=2618

http://code.google.com/p/appengine-afterburner/source/detail?r=30

Added:
/trunk/python/demo/materialized_map
/trunk/python/demo/materialized_map/app.yaml
/trunk/python/demo/materialized_map/appengine_config.py
/trunk/python/demo/materialized_map/cron.yaml
/trunk/python/demo/materialized_map/main.py
/trunk/python/demo/samplez
/trunk/python/demo/samplez/app.yaml
/trunk/python/demo/samplez/form.html
/trunk/python/demo/samplez/main.py
/trunk/python/src/afterburner/experimental/samplez
/trunk/python/src/afterburner/experimental/samplez/__init__.py
/trunk/python/src/afterburner/experimental/samplez/samplez.html
/trunk/python/src/afterburner/experimental/samplez/samplez.py
/trunk/python/src/afterburner/experimental/samplez/samplez.rst
/trunk/python/src/afterburner/experimental/samplez/samplez.yaml
/trunk/python/src/afterburner/experimental/samplez/samplez_test.py
Deleted:
/trunk/python/demo/app.yaml
/trunk/python/demo/appengine_config.py
/trunk/python/demo/cron.yaml
/trunk/python/demo/main.py
Modified:
/trunk/python/build.sh
/trunk/python/src/afterburner/testing/test_base.py
/trunk/python/src/index.rst

=======================================
--- /dev/null
+++ /trunk/python/demo/materialized_map/app.yaml Mon Jul 18 17:27:33 2011
@@ -0,0 +1,11 @@
+application: abdemo-materialized-map
+version: 1
+runtime: python
+api_version: 1
+
+includes:
+- afterburner/experimental/db/triggers.yaml
+
+handlers:
+- url: /
+ script: main.py
=======================================
--- /dev/null
+++ /trunk/python/demo/materialized_map/appengine_config.py Mon Jul 18
17:27:33 2011
@@ -0,0 +1,2 @@
+#!/usr/bin/env python
+import main
=======================================
--- /dev/null
+++ /trunk/python/demo/materialized_map/cron.yaml Mon Jul 18 17:27:33 2011
@@ -0,0 +1,4 @@
+cron:
+- description: triggers processing
+ url: /_ab/triggers/process
+ schedule: every 1 minutes
=======================================
--- /dev/null
+++ /trunk/python/demo/materialized_map/main.py Mon Jul 18 17:27:33 2011
@@ -0,0 +1,54 @@
+#!/usr/bin/env python
+#
+# Copyright 2010 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Demo app for the materialized_map and triggers modules."""
+
+import logging
+
+from google.appengine.ext import db
+from google.appengine.ext import webapp
+from google.appengine.ext.webapp.util import run_wsgi_app
+
+from afterburner.experimental.db import triggers
+from afterburner.experimental.db import materialized_map as mmap
+
+
+class TestEntity(db.Model):
+ pass
+
+class FooEntity(db.Model):
+ pass
+
+...@mmap.materialized_map(TestEntity.kind())
+def test_map(entity):
+ yield FooEntity()
+
+
+class MainPage(webapp.RequestHandler):
+ def get(self):
+ TestEntity().put()
+ self.response.out.write('Hello, webapp World!')
+
+application = webapp.WSGIApplication([('/', MainPage)],
+ debug=True)
+
+
+def main():
+ run_wsgi_app(application)
+
+if __name__ == "__main__":
+ main()
+
=======================================
--- /dev/null
+++ /trunk/python/demo/samplez/app.yaml Mon Jul 18 17:27:33 2011
@@ -0,0 +1,12 @@
+application: abdemo-samplez
+version: 1
+runtime: python
+api_version: 1
+
+
+includes:
+- afterburner/experimental/samplez/samplez.yaml
+
+handlers:
+- url: /
+ script: main.py
=======================================
--- /dev/null
+++ /trunk/python/demo/samplez/form.html Mon Jul 18 17:27:33 2011
@@ -0,0 +1,19 @@
+<!doctype html>
+<html>
+<head>
+ <title>Samplez example app</title>
+</head>
+<body>
+
+<h1>Samplez example app</h1>
+
+<a href="/_ab/samplez">View samplez</a>
+
+<form action="" method="post" accept-charset="utf-8">
+ <p><label for="key">Key:</label> <input type="text" name="key">
+ <p><label for="value">Value:</label> <input type="text" name="value">
+ <p><input type="submit" value="Sample">
+</form>
+
+</body>
+</html>
=======================================
--- /dev/null
+++ /trunk/python/demo/samplez/main.py Mon Jul 18 17:27:33 2011
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+#
+# Copyright 2010 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Demo app for the samplez module."""
+
+import logging
+
+from google.appengine.ext import db
+from google.appengine.ext import webapp
+from google.appengine.ext.webapp import template
+from google.appengine.ext.webapp import util
+
+from afterburner.experimental import samplez
+
+
+SAMPLE_VALUE = samplez.Section(
+ 'Popular value',
+ samplez.Config(
+ 'popular_1m',
+ period=60,
+ by_value=True,
+ samples=10000,
+ value_units='value'),
+ samplez.Config(
+ 'popular_5m',
+ period=300,
+ by_value=True,
+ samples=10000,
+ value_units='value'))
+
+
+class MainHandler(webapp.RequestHandler):
+ def get(self):
+ self.response.out.write(template.render('form.html', {}))
+
+ def post(self):
+ key = self.request.get('key')
+ value = int(self.request.get('value'))
+ samplez.set(SAMPLE_VALUE, key, value)
+ self.redirect(self.request.url)
+
+
+application = samplez.Middleware(
+ webapp.WSGIApplication([('/', MainHandler)], debug=True))
+
+
+def main():
+ util.run_wsgi_app(application)
+
+
+if __name__ == "__main__":
+ main()
=======================================
--- /dev/null
+++ /trunk/python/src/afterburner/experimental/samplez/__init__.py Mon Jul
18 17:27:33 2011
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from samplez import *
=======================================
--- /dev/null
+++ /trunk/python/src/afterburner/experimental/samplez/samplez.html Mon Jul
18 17:27:33 2011
@@ -0,0 +1,85 @@
+<html>
+<head>
+ <title>Samplez</title>
+ <style type="text/css" media="screen">
+ .stats-table {
+ width: 100%;
+ border-color: #000;
+ border-width: 0 0 1px 1px;
+ border-style: solid;
+ border-spacing: 0;
+ border-collapse: collapse;
+ }
+ .stats-table td,
+ .stats-table th {
+ border-color: #000;
+ border-width: 1px 1px 0 0;
+ border-style: solid;
+ margin: 0;
+ padding: 3px;
+ background-color: #E0ECF8;
+ }
+ .summary-stat {
+ margin-right: 0.5em;
+ }
+ </style>
+</head>
+<body>
+
+<h1>Samplez sections</h1>
+<ul>
+{% for section in all_sections %}
+ <li>
+ <a href="#{{section.title}}">{{section.title}}</a>
+ <ul>
+ {% for config in all_configs %}
+ <li>
+ <a href="#{{config.title}}">{{config.title}}</a>
+ </li>
+ {% endfor %}
+ </ul>
+ </li>
+{% endfor %}
+</ul>
+
+{% for section in all_sections %}
+ <h2 id="{{section.title}}">{{section.title}}</h1>
+ <div>
+ {% for result in section.results %}
+ <div class="stats-table-section" id="{{result.title}}">
+ <h3>{{result.title}}</h3>
+ <div class="summary">
+ <span class="summary-stat">Sample window: {{result.time_elapsed|
floatformat:"0"}} seconds</span>
+ <span class="summary-stat">Total samples:
{{result.total_samples}}</span>
+ <span class="summary-stat">Unique keys:
{{result.unique_samples}}</span>
+ <span class="summary-stat">Overall rate: {{result.overall_rate|
floatformat:"-4"}}/sec</span>
+ </div>
+ {% if result.unique_samples %}
+ <table class="stats-table">
+ <tr align="center">
+ <th align="left">{{result.key_name}}</th>
+ <th>Samples</th>
+ <th>Frequency</th>
+ <th>Min</th>
+ <th>Max</th>
+ <th>Average</th>
+ </tr>
+ {% for sample in result.sample_objects|dictsortreversed:"frequency"|
slice:":40" %}
+ <tr align="right">
+ <td align="left">{{sample.key|escape}}</td>
+ <td>{{sample.count}}</td>
+ <td>{{sample.frequency|floatformat:"-2"}}/sec</td>
+ <td>{{sample.min|floatformat:"-2"}}</td>
+ <td>{{sample.max|floatformat:"-2"}}</td>
+ <td>{{sample.average|floatformat:"-2"}} {{result.value_units}}</td>
+ </tr>
+ {% endfor %}
+ </table>
+ {% endif %}
+ </div>
+ {% endfor %}
+ </div>
+{% endfor %}
+
+</body>
+</html>
=======================================
--- /dev/null
+++ /trunk/python/src/afterburner/experimental/samplez/samplez.py Mon Jul
18 17:27:33 2011
@@ -0,0 +1,893 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Utility for statistically sampling strings with associated values.
+
+Overview
+========
+
+Install 'Middleware' into your WSGI app to use. Call the 'set' function at
any
+time during your request to sample a key/value probabilistically. Uses a
+simple algorithm (http://en.wikipedia.org/wiki/Reservoir_sampling)
+to approximate incidence rates and sums. Cost per request is ~3 memcache
calls.
+
+Gathered stats are available via built-in UI that's separated into related
+sections. Users are encouraged to use multiple overlapping time-periods of
+samplers to provide different levels of resolution.
+
+
+Usage
+=====
+
+Example code
+
+::
+
+ SAMPLEZ_POPULAR_CONTENT = samplez.Section(
+ 'Popular content',
+ samplez.Config(
+ 'content_10m',
+ period=600,
+ by_value=True,
+ samples=10000,
+ value_units='qps'),
+ samplez.Config(
+ 'content_1h',
+ period=3600,
+ by_value=True,
+ samples=10000,
+ value_units='qps'))
+
+ SAMPLEZ_LATENCY = samplez.Section(
+ 'Latency',
+ samplez.Config(
+ 'latency_10m',
+ period=300,
+ by_value=True,
+ samples=10000,
+ value_units='ms'),
+ samplez.Config(
+ 'latency_1h',
+ period=3600,
+ by_value=True,
+ samples=10000,
+ value_units='ms'))
+
+ samplez.set(SAMPLEZ_LATENCY, 'http://example.com/some/content', 124)
+ samplez.set(SAMPLEZ_POPULAR_CONTENT, 'http://example.com/some/content')
+
+
+Attribution
+===========
+
+Code originally from the PubSubHubbub project:
+
+http://pubsubhubbub.googlecode.com
+"""
+
+__author__ = 'Brett Slatkin (bsla...@google.com)'
+
+import gc
+import logging
+import os
+import random
+import re
+import struct
+import threading
+import time
+
+from google.appengine.api import memcache
+from google.appengine.ext import webapp
+from google.appengine.ext.webapp import util as webapp_util
+
+
+# TODO: Support configs that *always* sample particular URLs or domains.
+# Could have a console insert a list of values into memcache which is
fetched
+# and cached in each runtime, and then those domains would always match.
+
+# TODO: Allow for synchronized reservoirs that reset at synchronized
intervals.
+# This will let us view overlapping windows of samples across a time period
+# instead of having to reset every N seconds.
+
+# TODO: Enable MultiSampler to make memcache calls that are asynchronous
+# and/or fire-and-forget to minimize the latency overhead of using the
sampler.
+
+
+class ConfigError(Exception):
+ """Something is wrong with a configured DoS limit, sampler, or scorer."""
+
+
+# Matches four groups:
+# 1) an IP, 2) a domain prefix, 3) a domain suffix, 4) other (eg,
localhost)
+_URL_DOMAIN_RE = re.compile(
+ r'https?://(?:'
+ r'([0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)|' # IP address
+ r'(?:((?:[a-zA-Z0-9-]+\.)*)([a-zA-Z0-9-]+\.[a-zA-Z0-9-]+))|' # Domain
+ r'([^/]+)' # Anyting else
+ r')(?:/.*)?') # The rest of the URL
+
+# Domains where only the suffix is important.
+_DOMAIN_SUFFIX_EXCEPTIONS = frozenset([
+ # Examples: blogspot.com, appspot.com
+])
+
+# Maximum size of the cache of URLs to domains.
+_DOMAIN_CACHE_SIZE = 100
+
+# Simple local cache used for per-request URL to domain mappings.
+_DOMAIN_CACHE = {}
+
+
+def _get_url_domain(url):
+ """Returns the domain for a URL or 'bad_url if it's not a valid URL."""
+ result = _DOMAIN_CACHE.get(url)
+ if result is not None:
+ return result
+ if len(_DOMAIN_CACHE) >= _DOMAIN_CACHE_SIZE:
+ _DOMAIN_CACHE.clear()
+
+ match = _URL_DOMAIN_RE.match(url)
+ if match:
+ groups = list(match.groups())
+ if groups[1] and groups[2] and groups[2] not in
_DOMAIN_SUFFIX_EXCEPTIONS:
+ groups[2] = groups[1] + groups[2]
+ groups[1] = None
+ groups = filter(bool, groups)
+ else:
+ groups = []
+ result = (groups + ['bad_url'])[0]
+
+ _DOMAIN_CACHE[url] = result
+ return result
+
+
+class Config(object):
+ """Configuration for a reservoir sampler."""
+
+ def __init__(self,
+ name,
+ period=None,
+ samples=None,
+ by_domain=False,
+ by_value=False,
+ rate=1,
+ max_value_length=75,
+ tolerance=10,
+ title=None,
+ key_name='Key',
+ value_units=''):
+ """Initializer.
+
+ Args:
+ name: Programmatic name to use for this reservoir in memcache.
+ period: Time period for this reservoir in seconds.
+ samples: Total number of samples to use in the reservoir.
+ by_domain: True if keys are URLs and the URL domains should be used
as
+ the sampling key.
+ by_value: True if the whole value should be used as the sampling key.
+ rate: Sampling rate (between 0 and 1) to reduce the latency overhead
of
+ applying this sampler.
+ max_value_length: Length to truncate a sampling key to before storing
+ it in memcache.
+ tolerance: Number of seconds to allow for samples to stay valid for
+ after a reservoir reset has happened.
+ title: Nice-looking title of this config; will use the name if
+ not supplied.
+ key_name: The noun of the key (e.g., 'domain', 'url').
+ value_units: The noun of the value units (e.g., 'milliseconds').
+ """
+ if not name:
+ raise ConfigError('Must specify a name')
+
+ try:
+ period = int(period)
+ except ValueError, e:
+ raise ConfigError('Invalid period: %s' % e)
+ if period <= 0:
+ raise ConfigError('period must be positive')
+
+ try:
+ samples = int(samples)
+ except ValueError, e:
+ raise ConfigError('Invalid samples: %s' % e)
+ if samples <= 0:
+ raise ConfigError('samples must be positive')
+
+ if not (by_domain ^ by_value):
+ raise ConfigError('Must specify by_domain or by_value')
+
+ try:
+ rate = float(rate)
+ except ValueError, e:
+ raise ConfigError('Invalid rate: %s' % e)
+ if not (0 <= rate <= 1):
+ raise ConfigError('rate must be between 0 and 1')
+
+ try:
+ tolerance = int(tolerance)
+ except ValueError, e:
+ raise ConfigError('Invalid tolerance: %s' % e)
+ if tolerance < 0:
+ raise ConfigError('tolerance must be non-negative')
+
+ self.name = name
+ self.title = title or name
+ self.key_name = key_name
+ self.value_units = value_units
+ self.samples = samples
+ self.rate = rate
+ self.period = period
+ self.inverse_rate = 1.0 / self.rate
+ self.by_value = by_value
+ self.by_domain = by_domain
+ self.max_value_length = max_value_length
+ self.tolerance = tolerance
+
+ if by_value:
+ self.kind = 'by_value'
+ else:
+ self.kind = 'by_domain'
+ self.counter_key = '%s:%s:counter' % (self.name, self.kind)
+ self.start_key = '%s:%s:start_time' % (self.name, self.kind)
+ self._position_key_template = '%s:%s:%%d' % (self.name, self.kind)
+
+ def position_key(self, index):
+ """Generates the position key for the sample slot with the given index.
+
+ Args:
+ index: Numerical index of the sample position who's key to retrieve.
+
+ Returns:
+ Memcache key to use.
+ """
+ return self._position_key_template % index
+
+ def is_expired(self, last_time, current_time):
+ """Checks if this config is expired.
+
+ Args:
+ last_time: UNIX timestamp when this config's period started.
+ current_time: UNIX timestamp of the current time.
+
+ Returns:
+ True if the config period has expired.
+ """
+ return (current_time - last_time) > self.period
+
+ def adjust_value(self, key):
+ """Adjust the value for a sampling key.
+
+ Args:
+ key: The sampling key to adjust.
+
+ Returns:
+ The adjusted key.
+ """
+ if self.by_value:
+ adjusted_key = key
+ else:
+ adjusted_key = _get_url_domain(key)
+
+ # Truncate and encode the key.
+ if len(adjusted_key) > self.max_value_length:
+ adjusted_key = adjusted_key[:self.max_value_length]
+ if isinstance(adjusted_key, unicode):
+ adjusted_key = unicode(adjusted_key).encode('utf-8')
+
+ return adjusted_key
+
+ def should_sample(self, key, coin_flip):
+ """Checks if the key should be sampled.
+
+ Args:
+ key: The sampling key to check.
+ coin_flip: Random value between 0 and 1.
+
+ Return:
+ True if the sample should be taken, False otherwise.
+ """
+ return coin_flip < self.rate
+
+ def compute_frequency(self, count, found, total, elapsed):
+ """Computes the frequency of a sample.
+
+ Args:
+ count: Total number of samples of this key.
+ found: Total number of samples present for all keys.
+ total: The total number of sampling events so far, regardless of
+ whether or not the sample was saved.
+ elapsed: Seconds elapsed during the current sampling period.
+
+ Returns:
+ The frequency, in events per second, of this key in the time period,
+ or None if no samples have been taken yet.
+ """
+ if not total or not found:
+ return None
+ return self.inverse_rate * (1.0 * count / found) * (1.0 * total /
elapsed)
+
+
+class Section(object):
+ """A set of related Configs with a pretty name.
+
+ Configs that are added to a Section will be auto-registered with the
module
+ so they can be displayed on built-in status pages.
+ """
+
+ _ALL_SECTIONS = []
+
+ def __init__(self, title, *configs):
+ """Initializer.
+
+ Args:
+ title: Pretty title for the section.
+ *configs: Set of Config instances for this section.
+ """
+ self.title = title
+ self.configs = list(configs)
+ Section._ALL_SECTIONS.append(self)
+
+ def results(self, **kwargs):
+ """Gets statistics for a Section, optionally for a single key.
+
+ Use when retrieving data from multiple configs; ensures that the memory
+ usage of the previous result is garbage collected before the next one
+ is returned.
+
+ Args:
+ sampler: Used for testing; the MultiSampler instance to use for
fetching
+ results.
+ kwargs: Keyword arguments to pass to the 'get' method of this class.
+
+ Returns:
+ Generator that yields each SampleResult object for each config
belonging
+ to the given Section.
+ """
+ sampler = kwargs.pop('sampler', None)
+ if not sampler:
+ sampler = MultiSampler()
+ for config in self.configs:
+ result = sampler.get(config, **kwargs)
+ yield result
+ del result
+ # NOTE: This kinda sucks, but the result sets are really large so
+ # we need to make sure the garbage collector is doing its job so we
+ # don't run bloat memory over the course of a single stats request.
+ gc.collect()
+
+
+class Reporter(object):
+ """Contains a batch of keys and values for potential sampling."""
+
+ def __init__(self):
+ """Initializer."""
+ # Keep a list of input keys in order so we can iterate through the
+ # dictionary in order during testing. This costs little and vastly
+ # simplifies testing.
+ self.keys = []
+ # Maps key -> {config -> value}
+ self.param_dict = {}
+ # Maps config -> [key, ...]
+ self.config_dict = {}
+
+ def set(self, config_or_section, key, value=1):
+ """Sets a key/value for one or more configs.
+
+ Each config/key combination may only have a single value. Subsequent
+ calls to this method with the same key/config will overwrite the
+ previous value.
+
+ Args:
+ config_or_section: The Config object to set the value for or a
Section
+ instance comprised of multiple Config objects that should all be
+ updated by this call.
+ key: The sampling key to add.
+ value: The value to set for this config. Coerced to an integer.
+ """
+ value = int(value)
+
+ config_list = []
+ if isinstance(config_or_section, Section):
+ config_list.extend(config_or_section.configs)
+ else:
+ config_list.append(config_or_section)
+
+ value_dict = self.param_dict.get(key)
+ if value_dict is None:
+ self.param_dict[key] = value_dict = {}
+
+ for config in config_list:
+ # Add it once per config to simulate the behavior as if we had called
+ # the set() method multiple times on separate config instances.
+ self.keys.append(key)
+ value_dict[config] = value
+ present_list = self.config_dict.get(config)
+ if present_list is None:
+ self.config_dict[config] = present_list = []
+ present_list.append(key)
+
+ def get(self, key, config):
+ """Gets the value for a key/config.
+
+ Args:
+ key: The sampling key to retrieve the value for.
+ config: The Config object to get the value for.
+
+ Returns:
+ The value for the key/config or None if it's not present.
+ """
+ return self.param_dict.get(key, {}).get(config)
+
+ def remove(self, key, config):
+ """Removes a key/value for a specific config.
+
+ If the key is not present for the config, this method does nothing.
+
+ Args:
+ key: The sampling key to remove.
+ config: The Config object to remove the key for.
+ """
+ try:
+ del self.param_dict[key][config]
+ self.config_dict[config].remove(key)
+ except KeyError:
+ pass
+
+ def all_keys(self):
+ """Returns all the sampling keys present across all configs.
+
+ Each key will be present at least once, but some keys may be present
+ more than once if they were inserted repeatedly. The keys are in
+ insertion order. This simplifies testing of this class.
+ """
+ return self.keys
+
+ def get_keys(self, config):
+ """Retrieves the keys present for a specific Config.
+
+ Args:
+ config: The Config object to get the keys for.
+
+ Returns:
+ The list of keys present for this config, with no duplicates.
+ """
+ return self.config_dict.get(config, [])
+
+
+class SampleResult(object):
+ """Contains the current results of a sampler for a given config."""
+
+ def __init__(self, config, total_samples, time_elapsed):
+ """Initializer.
+
+ Args:
+ config: The Config these results are for.
+ total_samples: The total number of sampling events that have
occurred.
+ This is *not* the number of unique samples present in the table.
+ time_elapsed: Time in seconds that have elapsed in the current
period.
+ """
+ self.config = config
+ self.total_samples = total_samples
+ self.time_elapsed = time_elapsed
+ self.unique_samples = 0
+ self.title = config.title
+ self.key_name = config.key_name
+ self.value_units = config.value_units
+
+ # Maps key -> [(when, value), ...]
+ self.sample_dict = {}
+
+ def add(self, key, when, value):
+ """Adds a new sample to these results.
+
+ Args:
+ key: The sampling key.
+ when: When the sample was made, as a UNIX timestamp.
+ value: The value that was sampled.
+ """
+ samples = self.sample_dict.get(key)
+ if samples is None:
+ self.sample_dict[key] = samples = []
+ samples.append((when, value))
+ self.unique_samples += 1
+
+ def overall_rate(self):
+ """Gets the overall rate of events.
+
+ Returns:
+ Total events per second.
+ """
+ return 1.0 * self.total_samples / self.time_elapsed
+
+ def get_min(self, key):
+ """Gets the min value seen for a key.
+
+ Args:
+ key: The sampling key.
+
+ Returns:
+ The minimum value or None if this key does not exist.
+ """
+ samples = self.sample_dict.get(key)
+ if samples is None:
+ return None
+ return min(samples, key=lambda x: x[1])[1]
+
+ def get_max(self, key):
+ """Gets the max value seen for a key.
+
+ Args:
+ key: The sampling key.
+
+ Returns:
+ The maximum value or None if this key does not exist.
+ """
+ samples = self.sample_dict.get(key)
+ if samples is None:
+ return None
+ return max(samples, key=lambda x: x[1])[1]
+
+ def get_frequency(self, key):
+ """Gets the frequency of events for this key during the sampling
period.
+
+ Args:
+ key: The sampling key.
+
+ Returns:
+ The frequency as events per second or None if this key does not
exist.
+ """
+ samples = self.sample_dict.get(key)
+ if samples is None:
+ return None
+ return self.config.compute_frequency(
+ len(samples),
+ self.unique_samples,
+ self.total_samples,
+ self.time_elapsed)
+
+ def get_average(self, key):
+ """Gets the weighted average of this key's sampled values.
+
+ Args:
+ key: The sampling key.
+
+ Returns:
+ The weighted average or None if this key does not exist.
+ """
+ samples = self.sample_dict.get(key)
+ if not samples:
+ return None
+ total = 0.0
+ for sample in samples:
+ total += sample[1]
+ return total / len(samples)
+
+ def get_count(self, key):
+ """Gets the count of unique samples for a key.
+
+ Args:
+ key: The sampling key.
+
+ Returns:
+ The number of items. Will be zero if the key does not exist.
+ """
+ return len(self.sample_dict.get(key, []))
+
+ def get_samples(self, key):
+ """Gets the unique sample data for a key.
+
+ Args:
+ key: The sampling key.
+
+ Returns:
+ List of tuple (when, value) where:
+ when: The UNIX timestamp for the sample.
+ value: The sample value.
+ """
+ return self.sample_dict.get(key, [])
+
+ def set_single_sample(self, key):
+ """Sets that this result is for a single key.
+
+ Args:
+ key: The sampling key.
+ """
+ self.total_samples = self.get_count(key)
+
+ def sample_objects(self):
+ """Gets the contents of this result object for use in template
rendering.
+
+ Returns:
+ Generator of model objects.
+ """
+ for key in self.sample_dict:
+ yield {
+ 'key': key,
+ 'count': self.get_count(key),
+ 'frequency': self.get_frequency(key),
+ 'min': self.get_min(key),
+ 'max': self.get_max(key),
+ 'average': self.get_average(key),
+ }
+
+
+class MultiSampler(object):
+ """Sampler that saves key/value pairs for multiple reservoirs in
parallel.
+
+ The basic algorithm is:
+
+ 1. Get the reservoir start timestamp.
+ 2. If more than period seconds have elapsed, set the timestamp to now,
set
+ the reservoir's event counter to zero (average case this is
skipped).
+ 3. Increment the event counter by the number of new samples.
+ 4. Set memcache values to incoming samples following the reservoir
+ algorithm, potentially only sampling a subset.
+
+ The benefit of this approach is it can be applied to many reservoirs in
+ parallel without incurring additional API calls. The only limit is the
32MB
+ limit on App Engine batch API calls, which puts a cap on the amount of
+ samples that can be made simultaneously.
+
+ Samples are stored in keys like: 'sampler_name:0', 'sampler_name:1'
+
+ Values stored for samples look like: 'key_sample:NNNN:WWWW' where
the 'N's
+ represent the sample value as a big-endian-encoded 4-byte string, and the
+ 'W's are a UNIX timestamp as a big-endian-encoded 4-byte string. The
+ timestamp is used to ignore samples that are not from the current period.
+
+ There can be a race for resetting the timestamp for a sampler right after
+ the period starts, but it always favors the caller who inserted last (all
+ earlier data will be overwritten). This results in some missing data for
+ short-period samplers, but it's okay.
+ """
+
+ def __init__(self, gettime=time.time):
+ """Initializer.
+
+ Args:
+ gettime: Used for testing.
+ """
+ self.gettime = gettime
+
+ def sample(self,
+ reporter,
+ getrandom=random.random,
+ randrange=random.randrange):
+ """Samples a set of reported key/values synchronously.
+
+ Args:
+ reporter: Reporter instance containing key/values to sample.
+ getrandom: Used for testing.
+ randrange: Used for testing.
+ """
+ config_list = reporter.config_dict.keys()
+
+ # Update period start times if they're expired or non-existent.
+ now = int(self.gettime())
+ start_times = memcache.get_multi([c.start_key for c in config_list])
+ config_sets = {}
+ for config in config_list:
+ start = start_times.get(config.start_key)
+ if start is None or config.is_expired(start, now):
+ config_sets[config.start_key] = now
+ config_sets[config.counter_key] = 0
+ if config_sets:
+ memcache.set_multi(config_sets)
+
+ # Flip coin for sample rate of all Keys on all configs.
+ for key in reporter.all_keys():
+ coin_flip = getrandom()
+ for config in config_list:
+ if not config.should_sample(key, coin_flip):
+ reporter.remove(key, config)
+
+ # Increment counters for affected configs.
+ counter_offsets = {}
+ for config in config_list:
+ matching = reporter.get_keys(config)
+ if matching:
+ counter_offsets[config.counter_key] = len(matching)
+ if not counter_offsets:
+ return
+ counter_results = memcache.offset_multi(counter_offsets,
initial_value=0)
+
+ # Apply the reservoir algorithm.
+ value_sets = {}
+ now_encoded = struct.pack('!l', now)
+ for config in config_list:
+ matching = list(reporter.get_keys(config))
+ counter = counter_results.get(config.counter_key)
+ if counter is None:
+ # Incrementing the config failed, so give up on these Key samples.
+ continue
+ counter = int(counter) # Deal with wonky serialization types.
+ for (value_index, sample_number) in zip(
+ xrange(len(matching)), xrange(counter - len(matching), counter)):
+ insert_index = None
+ if sample_number < config.samples:
+ insert_index = sample_number
+ else:
+ random_index = randrange(sample_number)
+ if random_index < config.samples:
+ insert_index = random_index
+ if insert_index is not None:
+ key = matching[value_index]
+ value_key = config.position_key(insert_index)
+ value = reporter.get(key, config)
+ if value is not None:
+ # Value may be none if this key was removed from the samples
+ # list due to not passing the coin flip.
+ value_encoded = struct.pack('!l', value)
+ sample = '%s:%s:%s' % (
+ config.adjust_value(key), now_encoded, value_encoded)
+ value_sets[value_key] = sample
+ memcache.set_multi(value_sets)
+
+ def get(self, config, single_key=None):
+ """Gets statistics for a particular config and/or key.
+
+ This will only retrieve samples for the current time period. Samples
+ from previous time periods will be ignored.
+
+ Args:
+ config: The Config to retrieve stats for.
+ single_key: If None, then global stats for the config will be
retrieved.
+ When a key value (a string), then only stats for that particular key
+ will be returned to the caller.
+
+ Returns:
+ SampleResult object containing the result data.
+ """
+ # Make sure the key is converted into the format expected by the
config.
+ if single_key is not None:
+ single_key = config.adjust_value(single_key)
+
+ keys = [config.start_key, config.counter_key]
+ for i in xrange(config.samples):
+ keys.append(config.position_key(i))
+ sample_data = memcache.get_multi(keys)
+
+ # Deal with wonky serialization types.
+ counter = int(sample_data.get(config.counter_key, 0))
+ start_time = sample_data.get(config.start_key)
+ now = self.gettime()
+ if start_time is None:
+ # If the start time isn't there, then just assume it started exactly
+ # the period ago. This should only happen if the start time gets
+ # evicted for some weird reason.
+ start_time = now - config.period
+ elapsed = now - start_time
+
+ # Find all samples that fall within the reset time validity window.
+ results = SampleResult(config, counter, elapsed)
+ for i in xrange(config.samples):
+ combined_value = sample_data.get(config.position_key(i))
+ if combined_value is None:
+ continue
+ key, when_encoded, value_encoded = (
+ combined_value.rsplit(':', 2) + ['', '', ''])[:3]
+ if single_key is not None and single_key != key:
+ continue
+
+ if len(when_encoded) != 4:
+ continue
+ when = struct.unpack('!l', when_encoded)[0]
+ if len(value_encoded) != 4:
+ continue
+ value = struct.unpack('!l', value_encoded)[0]
+
+ if ((start_time - config.tolerance)
+ < when <
+ (start_time + config.period + config.tolerance)):
+ results.add(key, when, value)
+
+ # For a single sample we need to set the counter to the number of
unique
+ # samples so we don't leak the overall QPS being pushed for this event.
+ if single_key is not None:
+ results.set_single_sample(single_key)
+
+ return results
+
+
+class SamplezHandler(webapp.RequestHandler):
+ """Handler that serves samplez data."""
+
+ def get(self):
+ # TODO: Do this import at the top of the file once the OSS tests can
+ # properly resolve the Django dependency.
+ from google.appengine.ext.webapp import template
+ context = {
+ 'all_sections': Section._ALL_SECTIONS,
+ }
+ self.response.out.write(template.render(
+ os.path.join(os.path.dirname(__file__), 'samplez.html'),
+ context))
+
+
+# Thread-local contains list of reporter instances that need to
+# be run at the end of the current request to update samplez tables.
+_REPORTERS = threading.local()
+
+
+def set(*args, **kwargs):
+ """WSGI convenience method; sets a key/value for one or more configs.
+
+ Each config/key combination may only have a single value. Subsequent
+ calls to this method with the same key/config will overwrite the
+ previous value.
+
+ Args:
+ config_or_section: The Config object to set the value for or a Section
+ instance comprised of multiple Config objects that should all be
+ updated by this call.
+ key: The sampling key to add.
+ value: The value to set for this config.
+ """
+ if getattr(_REPORTERS, 'pending', None) is not None:
+ _REPORTERS.pending.set(*args, **kwargs)
+ else:
+ logging.critical(
+ 'App not wrapped in samplez WSGI middleware; ignoring set()')
+
+
+class Middleware(object):
+ """WSGI middleware that asynchronously updates samplez tables."""
+
+ def __init__(self, app):
+ self.app = app
+
+ def __call__(self, environ, start_response):
+ reporter = Reporter()
+ _REPORTERS.pending = reporter
+ try:
+ self.app(environ, start_response)
+ finally:
+ sampler = MultiSampler()
+ try:
+ sampler.sample(reporter)
+ except Exception, e:
+ logging.warning('Could not run samplez on collected data. %s: %s',
+ e.__class__.__name__, e)
+
+ _REPORTERS.pending = None
+
+
+def setup_for_testing():
+ """Set up this module for use in tests without WSGI middleware."""
+ reporter = Reporter()
+ _REPORTERS.pending = reporter
+
+
+def apply_for_testing():
+ """Applies all pending samples without the need for WSGI middleware."""
+ sampler = MultiSampler()
+ sampler.sample(_REPORTERS.pending)
+ _REPORTERS.pending = None
+
+
+application = webapp.WSGIApplication(
+ [('.*', SamplezHandler)],
+ debug=False)
+
+
+def main():
+ webapp_util.run_wsgi_app(application)
+
+
+if __name__ == '__main__':
+ main()
=======================================
--- /dev/null
+++ /trunk/python/src/afterburner/experimental/samplez/samplez.rst Mon Jul
18 17:27:33 2011
@@ -0,0 +1,7 @@
+Samplez Module
+==============
+
+.. automodule:: afterburner.experimental.samplez.samplez
+ :members:
+
+
=======================================
--- /dev/null
+++ /trunk/python/src/afterburner/experimental/samplez/samplez.yaml Mon Jul
18 17:27:33 2011
@@ -0,0 +1,4 @@
+handlers:
+- url: /_ab/samplez(/.*)?
+ script: afterburner/experimental/samplez/samplez.py
+ login: admin
=======================================
--- /dev/null
+++ /trunk/python/src/afterburner/experimental/samplez/samplez_test.py Mon
Jul 18 17:27:33 2011
@@ -0,0 +1,1149 @@
+#!/usr/bin/env python
+#
+# Copyright 2011 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Tests for the samplez module."""
+
+__author__ = 'Brett Slatkin (bsla...@google.com)'
+
+import cProfile
+import gc
+import logging
+logging.basicConfig(format='%(levelname)-8s %(filename)s] %(message)s')
+import os
+import random
+import sys
+import unittest
+
+from google.appengine.api import memcache
+from google.appengine.ext import webapp
+
+from afterburner.experimental.samplez import samplez
+from afterburner.testing import test_base
+
+
+class GetUrlDomainTest(unittest.TestCase):
+ """Tests for the get_url_domain function."""
+
+ def testDomain(self):
+ """Tests good domain names."""
+ # No subdomain
+ self.assertEquals(
+ 'example.com',
+
samplez._get_url_domain('http://example.com/foo/bar?meep=stuff#asdf'))
+ # One subdomain
+ self.assertEquals(
+ 'www.example.com',
+ samplez._get_url_domain(
+ 'http://www.example.com/foo/bar?meep=stuff#asdf'))
+ # Many subdomains
+ self.assertEquals(
+ '1.2.3.many.sub.example.com',
+ samplez._get_url_domain('http://1.2.3.many.sub.example.com/'))
+ # Domain with no trailing path
+ self.assertEquals(
+ 'www.example.com',
+ samplez._get_url_domain('http://www.example.com'))
+
+ def testDomainExceptions(self):
+ """Tests that some URLs may use more than the domain suffix."""
+ exceptions_before = samplez._DOMAIN_SUFFIX_EXCEPTIONS
+ samplez._DOMAIN_SUFFIX_EXCEPTIONS = set(['blogspot.com'])
+ try:
+ self.assertEquals(
+ 'blogspot.com',
+ samplez._get_url_domain(
+ 'http://example.blogspot.com/this-is?some=test'))
+ finally:
+ samplez._DOMAIN_SUFFIX_EXCEPTIONS = exceptions_before
+
+ def testIP(self):
+ """Tests IP addresses."""
+ self.assertEquals(
+ '192.168.1.1',
+
samplez._get_url_domain('http://192.168.1.1/foo/bar?meep=stuff#asdf'))
+ # No trailing path
+ self.assertEquals(
+ '192.168.1.1',
+ samplez._get_url_domain('http://192.168.1.1'))
+
+ def testOther(self):
+ """Tests anything that's not IP- or domain-like."""
+ self.assertEquals(
+ 'localhost',
+
samplez._get_url_domain('http://localhost/foo/bar?meep=stuff#asdf'))
+ # No trailing path
+ self.assertEquals(
+ 'localhost',
+ samplez._get_url_domain('http://localhost'))
+
+ def testBadUrls(self):
+ """Tests URLs that are bad."""
+ self.assertEquals('bad_url',
+ samplez._get_url_domain('this is bad'))
+ self.assertEquals('bad_url',
+ samplez._get_url_domain('example.com/foo/bar?meep=stuff#asdf'))
+ self.assertEquals('bad_url',
+ samplez._get_url_domain('example.com'))
+ self.assertEquals('bad_url',
+ samplez._get_url_domain('//example.com'))
+ self.assertEquals('bad_url',
+ samplez._get_url_domain('/myfeed.atom'))
+ self.assertEquals('bad_url',
+ samplez._get_url_domain('192.168.0.1/foobar'))
+ self.assertEquals('bad_url',
+ samplez._get_url_domain('192.168.0.1'))
+
+ def testCaching(self):
+ """Tests that cache eviction works properly."""
+ samplez._DOMAIN_CACHE.clear()
+ old_size = samplez._DOMAIN_CACHE_SIZE
+ try:
+ samplez._DOMAIN_CACHE_SIZE = 2
+ samplez._DOMAIN_CACHE['http://a.example.com/stuff'] = 'a.example.com'
+ samplez._DOMAIN_CACHE['http://b.example.com/stuff'] = 'b.example.com'
+ samplez._DOMAIN_CACHE['http://c.example.com/stuff'] = 'c.example.com'
+ self.assertEquals(3, len(samplez._DOMAIN_CACHE))
+
+ # Old cache entries are hit:
+ self.assertEquals('c.example.com',
+
samplez._get_url_domain('http://c.example.com/stuff'))
+ self.assertEquals(3, len(samplez._DOMAIN_CACHE))
+
+ # New cache entries clear the contents.
+ self.assertEquals('d.example.com',
+
samplez._get_url_domain('http://d.example.com/stuff'))
+ self.assertEquals(1, len(samplez._DOMAIN_CACHE))
+ finally:
+ samplez._DOMAIN_CACHE_SIZE = old_size
+
+
+class SamplezTest(test_base.TestBase):
+ """Tests for the MultiSampler and associated classes."""
+
+ def setUp(self):
+ """Sets up the test harness."""
+ test_base.TestBase.setUp(self)
+ self.domainA = 'mydomain.com'
+ self.domainB = 'example.com'
+ self.domainC = 'other.com'
+ self.domainD = 'meep.com'
+ self.url1 = 'http://mydomain.com/stuff/meep'
+ self.url2 = 'http://example.com/some-path?a=b'
+ self.url3 = 'http://example.com'
+ self.url4 = 'http://other.com/relative'
+ self.url5 = 'http://meep.com/another-one'
+ self.all_urls = [self.url1, self.url2, self.url3, self.url4, self.url5]
+
+ self.randrange_results = []
+ self.fake_randrange = lambda value: self.randrange_results.pop(0)
+
+ self.random_results = []
+ self.fake_random = lambda: self.random_results.pop(0)
+
+ self.gettime_results = []
+ self.fake_gettime = lambda: self.gettime_results.pop(0)
+
+ def verify_sample(self,
+ results,
+ key,
+ expected_count,
+ expected_frequency,
+ expected_average=1,
+ expected_min=1,
+ expected_max=1):
+ """Verifies a sample key is present in the results.
+
+ Args:
+ results: SampleResult object.
+ key: String key of the sample to test.
+ expected_count: How many samples should be present in the results.
+ expected_frequency: The frequency of this single key.
+ expected_average: Expected average value across samples of this key.
+ expected_min: Expected minimum value across samples of this key.
+ expected_max: Expected maximum value across samples of this key.
+
+ Raises:
+ AssertionError if any of the expectations are not met.
+ """
+ self.assertEquals(expected_count, results.get_count(key))
+ self.assertTrue(
+ -0.001 < (expected_frequency - results.get_frequency(key)) < 0.001,
+ '%r: Difference %f - %f = %f' % (
+ key, expected_frequency, results.get_frequency(key),
+ expected_frequency - results.get_frequency(key)))
+ self.assertTrue(
+ -0.001 < (expected_average - results.get_average(key)) < 0.001,
+ '%r: Difference %f - %f = %f' % (
+ key, expected_average, results.get_average(key),
+ expected_average - results.get_average(key)))
+ self.assertEquals(expected_min, results.get_min(key))
+ self.assertEquals(expected_max, results.get_max(key))
+
+ def verify_no_sample(self, results, key):
+ """Verifies a sample key is not present in the results.
+
+ Args:
+ results: SampleResult object.
+ key: String key of the sample to test.
+
+ Raises:
+ AssertionError if the key is present.
+ """
+ self.assertEquals(0, len(results.get_samples(key)))
+
+ def testSingleAlways(self):
+ """Tests single-config sampling when the sampling rate is 100%."""
+ config = samplez.Config(
+ 'always',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ reporter.set(config, self.url4)
+ reporter.set(config, self.url5)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(5, results.unique_samples)
+ self.verify_sample(results, self.domainA, 1, 0.1)
+ self.verify_sample(results, self.domainB, 2, 0.2)
+ self.verify_sample(results, self.domainC, 1, 0.1)
+ self.verify_sample(results, self.domainD, 1, 0.1)
+
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(10, results.total_samples)
+ self.assertEquals(10, results.unique_samples)
+ self.verify_sample(results, self.domainA, 2, 0.2)
+ self.verify_sample(results, self.domainB, 4, 0.4)
+ self.verify_sample(results, self.domainC, 2, 0.2)
+ self.verify_sample(results, self.domainD, 2, 0.2)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(11, results.total_samples)
+ self.assertEquals(11, results.unique_samples)
+ self.verify_sample(results, self.domainA, 3, 0.3)
+ self.verify_sample(results, self.domainB, 4, 0.4)
+ self.verify_sample(results, self.domainC, 2, 0.2)
+ self.verify_sample(results, self.domainD, 2, 0.2)
+
+ def testSingleOverwrite(self):
+ """Tests when the number of slots is lower than the sample count."""
+ config = samplez.Config(
+ 'always',
+ period=300,
+ rate=1,
+ samples=2,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ # Writes samples index 0 and 1, then overwrites index 1 again with
+ # a URL in the same domain.
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ self.gettime_results.extend([0, 1])
+ self.randrange_results.extend([1])
+ sampler.sample(reporter, randrange=self.fake_randrange)
+ results = sampler.get(config)
+ self.assertEquals(3, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_sample(results, self.domainA, 1, 1.5)
+ self.verify_sample(results, self.domainB, 1, 1.5)
+
+ # Overwrites the sample at index 0, skewing all results towards the
+ # domain from index 1.
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url3)
+ self.gettime_results.extend([0, 1])
+ self.randrange_results.extend([0])
+ sampler.sample(reporter, randrange=self.fake_randrange)
+ results = sampler.get(config)
+ self.assertEquals(4, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_sample(results, self.domainB, 2, 4.0)
+ self.verify_no_sample(results, self.domainA)
+
+ # Now a sample outside the range won't replace anything.
+ self.gettime_results.extend([0, 1])
+ self.randrange_results.extend([3])
+ sampler.sample(reporter, randrange=self.fake_randrange)
+ results = sampler.get(config)
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_sample(results, self.domainB, 2, 5.0)
+ self.verify_no_sample(results, self.domainA)
+
+ def testSingleSampleRate(self):
+ """Tests when the sampling rate is less than 1."""
+ config = samplez.Config(
+ 'always',
+ period=300,
+ rate=0.2,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ reporter.set(config, self.url4)
+ reporter.set(config, self.url5)
+ self.gettime_results.extend([0, 10])
+ self.random_results.extend([0.25, 0.199, 0.1, 0, 0.201])
+ sampler.sample(reporter, getrandom=self.fake_random)
+ results = sampler.get(config)
+ self.assertEquals(3, results.total_samples)
+ self.assertEquals(3, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainD)
+ self.verify_sample(results, self.domainB, 2,
+ (1.0/0.2) * (2.0/3.0) * (3.0/10.0))
+ self.verify_sample(results, self.domainC, 1,
+ (1.0/0.2) * (1.0/3.0) * (3.0/10.0))
+
+ def testSingleDoubleSampleRemoved(self):
+ """Tests when the same sample key is set twice and one is skipped.
+
+ Setting the value twice should just overwite the previous value for a
key,
+ but we store the keys in full order (with dupes) for simpler tests.
This
+ ensures that incorrectly using the sampler with multiple sets won't
barf.
+ """
+ config = samplez.Config(
+ 'always',
+ period=300,
+ rate=0.2,
+ samples=4,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ reporter.set(config, self.url4)
+ reporter.set(config, self.url5)
+ self.gettime_results.extend([0, 10])
+ self.randrange_results.extend([0])
+ self.random_results.extend([0.25, 0.199, 0.1, 0, 0.3, 0.3])
+ sampler.sample(reporter, getrandom=self.fake_random)
+ results = sampler.get(config)
+ self.assertEquals(3, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainC)
+ self.verify_no_sample(results, self.domainD)
+ self.verify_sample(results, self.domainB, 2,
+ (1.0/0.2) * (2.0/2.0) * (3.0/10.0))
+
+ def testSingleSampleRateReplacement(self):
+ """Tests when the sample rate is < 1 and slots are overwritten."""
+ config = samplez.Config(
+ 'always',
+ period=300,
+ rate=0.2,
+ samples=2,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ reporter.set(config, self.url4)
+ self.gettime_results.extend([0, 10])
+ self.randrange_results.extend([1])
+ self.random_results.extend([0.25, 0.199, 0.1, 0])
+ sampler.sample(reporter, getrandom=self.fake_random)
+ results = sampler.get(config)
+ self.assertEquals(3, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainD)
+ self.verify_sample(results, self.domainB, 1,
+ (1.0/0.2) * (1.0/2.0) * (3.0/10.0))
+ self.verify_sample(results, self.domainC, 1,
+ (1.0/0.2) * (1.0/2.0) * (3.0/10.0))
+
+ def testSingleSampleValues(self):
+ """Tests various samples with expected values."""
+ config = samplez.Config(
+ 'always',
+ period=300,
+ rate=0.2,
+ samples=4,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1, 5)
+ reporter.set(config, self.url1, 20) # in
+ reporter.set(config, self.url2, 10) # in
+ reporter.set(config, self.url2 + '&more=true', 25) # in
+ reporter.set(config, self.url3, 20) # in
+ reporter.set(config, self.url4, 40) # in
+ reporter.set(config, self.url5, 60)
+ self.gettime_results.extend([0, 10])
+ self.randrange_results.extend([0])
+ self.random_results.extend([0.25, 0.199, 0.1, 0, 0, 0.1, 0.3])
+ sampler.sample(reporter,
+ randrange=self.fake_randrange,
+ getrandom=self.fake_random)
+ results = sampler.get(config)
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(4, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainD)
+ self.verify_sample(results, self.domainB, 3,
+ (1.0/0.2) * (3.0/4.0) * (5.0/10.0),
+ expected_average=18.333,
+ expected_min=10,
+ expected_max=25)
+ self.verify_sample(results, self.domainC, 1,
+ (1.0/0.2) * (1.0/4.0) * (5.0/10.0),
+ expected_average=40,
+ expected_min=40,
+ expected_max=40)
+
+ def testResetTimestamp(self):
+ """Tests resetting the timestamp after the period elapses."""
+ config = samplez.Config(
+ 'always',
+ period=10,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ self.gettime_results.extend([0, 5])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(1, results.total_samples)
+ self.assertEquals(1, results.unique_samples)
+ self.verify_sample(results, self.domainA, 1, 1.0 / 5)
+ self.verify_no_sample(results, self.domainB)
+ self.verify_no_sample(results, self.domainC)
+ self.verify_no_sample(results, self.domainD)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url2)
+ self.gettime_results.extend([15, 16])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(1, results.total_samples)
+ self.assertEquals(1, results.unique_samples)
+ self.verify_sample(results, self.domainB, 1, 1.0)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainC)
+ self.verify_no_sample(results, self.domainD)
+
+ def testSingleUnicodeKey(self):
+ """Tests when a sampling key is unicode.
+
+ Keys must be UTF-8 encoded because the memcache API will do this for us
+ (and break) if we don't.
+ """
+ config = samplez.Config(
+ 'always',
+ period=300,
+ samples=10000,
+ by_value=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ key = u'this-breaks-stuff\u30d6\u30ed\u30b0\u8846'
+ key_utf8 = key.encode('utf-8')
+ reporter.set(config, key)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(1, results.total_samples)
+ self.assertEquals(1, results.unique_samples)
+ self.verify_sample(results, key_utf8, 1, 0.1)
+
+ def testMultiple(self):
+ """Tests multiple configs being applied together."""
+ config1 = samplez.Config(
+ 'first',
+ period=300,
+ samples=10000,
+ by_domain=True)
+ config2 = samplez.Config(
+ 'second',
+ period=300,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config1, self.url1)
+ reporter.set(config1, self.url2)
+ reporter.set(config1, self.url3)
+ reporter.set(config1, self.url4)
+ reporter.set(config1, self.url5)
+ reporter.set(config2, self.url1, 5)
+ reporter.set(config2, self.url2, 5)
+ reporter.set(config2, self.url3, 5)
+ reporter.set(config2, self.url4, 5)
+ reporter.set(config2, self.url5, 5)
+ self.gettime_results.extend([0, 10, 10])
+ sampler.sample(reporter)
+
+ results1 = sampler.get(config1)
+ self.assertEquals(5, results1.total_samples)
+ self.assertEquals(5, results1.unique_samples)
+ self.verify_sample(results1, self.domainA, 1, 0.1)
+ self.verify_sample(results1, self.domainB, 2, 0.2)
+ self.verify_sample(results1, self.domainC, 1, 0.1)
+ self.verify_sample(results1, self.domainD, 1, 0.1)
+
+ results2 = sampler.get(config2)
+ self.assertEquals(5, results2.total_samples)
+ self.assertEquals(5, results2.unique_samples)
+ self.verify_sample(results2, self.domainA, 1, 0.1,
+ expected_max=5,
+ expected_min=5,
+ expected_average=5)
+ self.verify_sample(results2, self.domainB, 2, 0.2,
+ expected_max=5,
+ expected_min=5,
+ expected_average=5)
+ self.verify_sample(results2, self.domainC, 1, 0.1,
+ expected_max=5,
+ expected_min=5,
+ expected_average=5)
+ self.verify_sample(results2, self.domainD, 1, 0.1,
+ expected_max=5,
+ expected_min=5,
+ expected_average=5)
+
+ def testGetSingleKey(self):
+ """Tests getting the stats for a single key."""
+ config = samplez.Config(
+ 'single-sample',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ reporter.set(config, self.url3 + '&okay=1')
+ reporter.set(config, self.url3 + '&okay=2')
+ reporter.set(config, self.url3 + '&okay=3')
+ reporter.set(config, self.url3 + '&okay=4')
+ reporter.set(config, self.url4)
+ reporter.set(config, self.url5)
+ self.gettime_results.extend([0, 10, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(9, results.total_samples)
+ self.assertEquals(9, results.unique_samples)
+ self.verify_sample(results, self.domainA, 1, 0.1)
+ self.verify_sample(results, self.domainB, 6, 0.6)
+ self.verify_sample(results, self.domainC, 1, 0.1)
+ self.verify_sample(results, self.domainD, 1, 0.1)
+
+ results = sampler.get(config, self.url2)
+ self.assertEquals(6, results.total_samples)
+ self.assertEquals(6, results.unique_samples)
+ self.verify_sample(results, self.domainB, 6, 0.6)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainC)
+ self.verify_no_sample(results, self.domainD)
+
+ def testCountLost(self):
+ """Tests when the count variable disappears between samples."""
+ config = samplez.Config(
+ 'lost_count',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(2, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_no_sample(results, self.domainC)
+ self.verify_no_sample(results, self.domainD)
+ self.verify_sample(results, self.domainA, 1, 0.1)
+ self.verify_sample(results, self.domainB, 1, 0.1)
+
+ memcache.delete('lost_count:by_domain:counter')
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url4)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(1, results.total_samples)
+
+ # Two samples found because we're still in the same period tolerance.
+ # Sample at index 0 will be overwritten with the new entry, meaning
+ # domain A is gone.
+ self.assertEquals(2, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainD)
+ self.verify_sample(results, self.domainB, 1, 0.05)
+ self.verify_sample(results, self.domainC, 1, 0.05)
+
+ def testStampLost(self):
+ """Tests when the start timestamp is lost between samples."""
+ config = samplez.Config(
+ 'lost_stamp',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(2, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_no_sample(results, self.domainC)
+ self.verify_no_sample(results, self.domainD)
+ self.verify_sample(results, self.domainA, 1, 0.1)
+ self.verify_sample(results, self.domainB, 1, 0.1)
+
+ memcache.delete('lost_stamp:by_domain:start_time')
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url4)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config)
+ self.assertEquals(1, results.total_samples)
+
+ # Just like losing the count, old samples found because we're still in
the
+ # same period tolerance. Sample at index 0 will be overwritten with
the new
+ # entry, meaning domain A is gone.
+ self.assertEquals(2, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainD)
+ self.verify_sample(results, self.domainB, 1, 0.05)
+ self.verify_sample(results, self.domainC, 1, 0.05)
+
+ def testSamplesLost(self):
+ """Tests when some unique samples were evicted."""
+ config = samplez.Config(
+ 'lost_sample',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ reporter.set(config, self.url4)
+ reporter.set(config, self.url5)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+
+ memcache.delete_multi([
+ 'lost_sample:by_domain:0',
+ 'lost_sample:by_domain:1',
+ 'lost_sample:by_domain:2',
+ ])
+
+ results = sampler.get(config)
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainB)
+ self.verify_sample(results, self.domainC, 1, 0.25)
+ self.verify_sample(results, self.domainD, 1, 0.25)
+
+ def testBeforePeriod(self):
+ """Tests when the samples retrieved are too old."""
+ config = samplez.Config(
+ 'old_samples',
+ period=10,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ reporter.set(config, self.url4)
+ reporter.set(config, self.url5)
+ self.gettime_results.extend([20, 40])
+ sampler.sample(reporter)
+
+ memcache.set('old_samples:by_domain:start_time', 0)
+ results = sampler.get(config)
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(0, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainB)
+ self.verify_no_sample(results, self.domainC)
+ self.verify_no_sample(results, self.domainD)
+
+ def testBadSamples(self):
+ """Tests when getting samples with memcache values that are bad."""
+ config = samplez.Config(
+ 'bad_samples',
+ period=10,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1)
+ reporter.set(config, self.url2)
+ reporter.set(config, self.url3)
+ reporter.set(config, self.url4)
+ reporter.set(config, self.url5)
+ self.gettime_results.extend([0, 10])
+ sampler.sample(reporter)
+
+ # Totaly bad
+ memcache.set('bad_samples:by_domain:0', 'garbage')
+ # Bad value.
+ memcache.set('bad_samples:by_domain:1',
+ '%s:\0\0\0\1:' % self.domainB)
+ # Bad when.
+ memcache.set('bad_samples:by_domain:2',
+ '%s::\0\0\0\1' % self.domainB)
+
+ results = sampler.get(config)
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_no_sample(results, self.domainA)
+ self.verify_no_sample(results, self.domainB)
+ self.verify_sample(results, self.domainC, 1, 0.25)
+ self.verify_sample(results, self.domainD, 1, 0.25)
+
+ def testNonIntegerValue(self):
+ """Tests when reporter set() calls are not integers."""
+ config1 = samplez.Config(
+ 'first',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(config1, self.url1, 1.5)
+ reporter.set(config1, self.url2, 2.5)
+ reporter.set(config1, self.url3, 3.4567)
+ self.gettime_results.extend([0, 10, 10])
+ sampler.sample(reporter)
+ results = sampler.get(config1)
+
+ self.assertEquals(3, results.total_samples)
+ self.assertEquals(3, results.unique_samples)
+ self.verify_sample(results, self.domainA, 1, 0.1)
+ self.verify_sample(results, self.domainB, 2, 0.2, 2.5, 2, 3)
+
+ results = sampler.get(config1, single_key=self.url2)
+ self.assertEquals(2, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_sample(results, self.domainB, 2, 0.2, 2.5, 2, 3)
+
+ def testSection(self):
+ """Tests setting and getting values for Section instances."""
+ config1 = samplez.Config(
+ 'first',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ config2 = samplez.Config(
+ 'second',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_value=True)
+ section = samplez.Section('My section', config1, config2)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+
+ reporter = samplez.Reporter()
+ reporter.set(section, self.url1)
+ reporter.set(section, self.url2)
+ reporter.set(section, self.url3)
+ reporter.set(section, self.url4)
+ reporter.set(section, self.url5)
+ self.gettime_results.extend([0, 10, 10, 10, 10, 10, 10])
+ sampler.sample(reporter)
+ result_iter = section.results(sampler=sampler)
+
+ # Results for config1
+ results = result_iter.next()
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(5, results.unique_samples)
+ self.verify_sample(results, self.domainA, 1, 0.1)
+ self.verify_sample(results, self.domainB, 2, 0.2)
+ self.verify_sample(results, self.domainC, 1, 0.1)
+ self.verify_sample(results, self.domainD, 1, 0.1)
+
+ # Results for config2
+ results = result_iter.next()
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(5, results.unique_samples)
+ self.verify_sample(results, self.url1, 1, 0.1)
+ self.verify_sample(results, self.url2, 1, 0.1)
+ self.verify_sample(results, self.url3, 1, 0.1)
+ self.verify_sample(results, self.url4, 1, 0.1)
+ self.verify_sample(results, self.url5, 1, 0.1)
+
+ # Single key test
+ result_iter = section.results(sampler=sampler, single_key=self.url2)
+
+ # Results for config1
+ results = result_iter.next()
+ self.assertEquals(2, results.total_samples)
+ self.assertEquals(2, results.unique_samples)
+ self.verify_sample(results, self.domainB, 2, 0.2)
+
+ # Results for config2
+ results = result_iter.next()
+ self.assertEquals(1, results.total_samples)
+ self.assertEquals(1, results.unique_samples)
+ self.verify_sample(results, self.url2, 1, 0.1)
+
+ def testResultObjects(self):
+ """Tests that result objects are formatted correctly for templates."""
+ config = samplez.Config(
+ 'always',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ sampler = samplez.MultiSampler(gettime=self.fake_gettime)
+ self.gettime_results.extend([0, 10, 10])
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1, 123)
+ reporter.set(config, self.url2, 100)
+ sampler.sample(reporter)
+
+ reporter = samplez.Reporter()
+ reporter.set(config, self.url1, 456)
+ reporter.set(config, self.url2, 200)
+ sampler.sample(reporter)
+
+ self.gettime_results.extend([0, 10])
+
+ results = sampler.get(config)
+ self.assertEquals(4, results.total_samples)
+ self.assertEquals(4, results.unique_samples)
+
+ # Verify the sample_objects method
+ self.assertEquals(
+ [{'count': 2, 'min': 123, 'max': 456,
+ 'average': 289.5, 'frequency': 0.2, 'key': 'mydomain.com'},
+ {'count': 2, 'min': 100, 'max': 200, 'average': 150.0,
+ 'frequency': 0.2, 'key': 'example.com'}],
+ list(results.sample_objects()))
+
+ # Verify the overall_rate method
+ self.assertEquals(0.4, results.overall_rate())
+
+ def testMiddleware(self):
+ """Tests the global middleware and set() function."""
+ config1 = samplez.Config(
+ 'first',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_domain=True)
+ config2 = samplez.Config(
+ 'second',
+ period=300,
+ rate=1,
+ samples=10000,
+ by_value=True)
+ section = samplez.Section('My section', config1, config2)
+
+ def my_app(*args):
+ samplez.set(section, self.url1)
+ samplez.set(section, self.url2)
+ samplez.set(section, self.url3)
+ samplez.set(section, self.url4)
+ samplez.set(section, self.url5)
+
+ wrapper = samplez.Middleware(my_app)
+ wrapper(None, None)
+
+ result_iter = section.results()
+
+ # Results for config1
+ results = result_iter.next()
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(5, results.unique_samples)
+ self.assertEquals(1, results.get_count(self.domainA))
+ self.assertEquals(2, results.get_count(self.domainB))
+ self.assertEquals(1, results.get_count(self.domainC))
+ self.assertEquals(1, results.get_count(self.domainD))
+
+ # Results for config2
+ results = result_iter.next()
+ self.assertEquals(5, results.total_samples)
+ self.assertEquals(5, results.unique_samples)
+ self.assertEquals(1, results.get_count(self.url1))
+ self.assertEquals(1, results.get_count(self.url2))
+ self.assertEquals(1, results.get_count(self.url3))
+ self.assertEquals(1, results.get_count(self.url4))
+ self.assertEquals(1, results.get_count(self.url5))
+
+ ## TODO: Turn this back on once we can get mock_webapp
+ ## into the external source distribution.
+ ## def testTemplate(self):
+ ## """Tests the stats table rending template."""
+ ## handler = samplez.SamplezHandler()
+ ## request = mock_webapp.MockRequest()
+ ## response = mock_webapp.MockResponse()
+ ## handler.initialize(request, response)
+ ## handler.get()
+ ## self.assertTrue('Samplez sections' in response.out.getvalue())
+
+ def testConfig(self):
+ """Tests config validation."""
+ # Bad name.
+ self.assertRaises(
+ samplez.ConfigError,
+ samplez.Config,
+ '',
+ period=10,
+ samples=10,
+ by_domain=True)
+
+ # Bad period.
+ self.assertRaises(
+ samplez.ConfigError,
+ samplez.Config,
+ 'my name',
+ period=0,
+ samples=10,
+ by_domain=True)
+ self.assertRaises(
+ samplez.ConfigError,
+ samplez.Config,
+ 'my name',
+ period=-1,
+ samples=10,
+ by_domain=True)
+ self.assertRaises(
+ samplez.ConfigError,
+ samplez.Config,
+ 'my name',
+ period='bad',
+ samples=10,
+ by_domain=True)
+
+ # Bad samples.
+ self.assertRaises(
+ samplez.ConfigError,
+ samplez.Config,
+ 'my name',
+ period=10,
+ samples=0,
+ by_domain=True)
+ self.assertRaises(
+ samplez.ConfigError,
+ samplez.Config,
+ 'my name',
+ period=10,
+ samples=-1,
+ by_domain=True)
***The diff for this file has been truncated for email.***
=======================================
--- /trunk/python/demo/app.yaml Fri Nov 12 03:18:40 2010
+++ /dev/null
@@ -1,12 +0,0 @@
-application: abdemo
-version: 1
-runtime: python
-api_version: 1
-
-includes:
-- afterburner/experimental/db/triggers.yaml
-
-handlers:
-- url: /
- script: main.py
-
=======================================
--- /trunk/python/demo/appengine_config.py Mon Mar 14 17:50:48 2011
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/usr/bin/env python
-import main
=======================================
--- /trunk/python/demo/cron.yaml Fri Nov 12 03:18:40 2010
+++ /dev/null
@@ -1,4 +0,0 @@
-cron:
-- description: triggers processing
- url: /_ab/triggers/process
- schedule: every 1 minutes
=======================================
--- /trunk/python/demo/main.py Mon Mar 14 17:50:48 2011
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python
-import logging
-
-from google.appengine.ext import db
-from google.appengine.ext import webapp
-from google.appengine.ext.webapp.util import run_wsgi_app
-
-from afterburner.experimental.db import triggers
-from afterburner.experimental.db import materialized_map as mmap
-
-
-class TestEntity(db.Model):
- pass
-
-class FooEntity(db.Model):
- pass
-
-...@mmap.materialized_map(TestEntity.kind())
-def test_map(entity):
- yield FooEntity()
-
-
-class MainPage(webapp.RequestHandler):
- def get(self):
- TestEntity().put()
- self.response.out.write('Hello, webapp World!')
-
-application = webapp.WSGIApplication([('/', MainPage)],
- debug=True)
-
-
-def main():
- run_wsgi_app(application)
-
-if __name__ == "__main__":
- main()
-
=======================================
--- /trunk/python/build.sh Thu Mar 17 12:28:01 2011
+++ /trunk/python/build.sh Mon Jul 18 17:27:33 2011
@@ -35,7 +35,8 @@
test () {
exit_status=0
for t in $(find "$dir/src" -name "*test.py"); do
- if python2.5 $t
+ echo "Run $t"
+ if python $t
then
echo "PASSED"
else
=======================================
--- /trunk/python/src/afterburner/testing/test_base.py Mon Mar 14 17:50:48
2011
+++ /trunk/python/src/afterburner/testing/test_base.py Mon Jul 18 17:27:33
2011
@@ -1,13 +1,30 @@
#!/usr/bin/env python
+#
+# Copyright 2010 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
import os
import unittest

from google.appengine.api import apiproxy_stub_map
from google.appengine.api import datastore_file_stub
+from google.appengine.api.memcache import memcache_stub
from google.appengine.api.taskqueue import taskqueue_stub

+
class TestBase(unittest.TestCase):
- """Base class for appengine tests."""
+ """Base class for Afterburner tests."""

def setUp(self):
unittest.TestCase.setUp(self)
@@ -17,9 +34,9 @@
self.taskqueue = taskqueue_stub.TaskQueueServiceStub()
self.datastore = datastore_file_stub.DatastoreFileStub(
self.appid, "/dev/null", "/dev/null")
+ self.memcache = memcache_stub.MemcacheServiceStub()

apiproxy_stub_map.apiproxy = apiproxy_stub_map.APIProxyStubMap()
apiproxy_stub_map.apiproxy.RegisterStub("taskqueue", self.taskqueue)
apiproxy_stub_map.apiproxy.RegisterStub("datastore_v3", self.datastore)
-
-
+ apiproxy_stub_map.apiproxy.RegisterStub("memcache", self.memcache)
=======================================
--- /trunk/python/src/index.rst Fri Nov 5 21:29:00 2010
+++ /trunk/python/src/index.rst Mon Jul 18 17:27:33 2011
@@ -26,6 +26,7 @@
* :py:mod:`afterburner.experimental.memoize` - function decorator to cache
function value in memcache.
* :py:mod:`afterburner.experimental.db.triggers` - datastore triggers
system.
+* :py:mod:`afterburner.experimental.samplez.samplez` - statistical
sampling.