class StartHandler(webapp2.RequestHandler):
GCS_BUCKET="/"
def debugMessage(self, msg):
logging.debug(msg)
logservice.flush()
def get(self):
suffix = str(backends.get_instance())
filename=self.GCS_BUCKET + "/testwrite" + suffix + ".txt"
gcs_file = cloudstorage.open(filename, 'w', content_type='text/plain' )
self.debugMessage("opened file")
gcs_file.write("f" * 1024 * 1024 * 1 + '\n')
self.debugMessage("data written")
gcs_file.close()
self.debugMessage("file closed")
0.1.0.3 - - [10/Sep/2013:02:30:17 -0700] "GET /_ah/start HTTP/1.1" 200 119 - - "0.v1.storagetest.benstoragetest.appspot.com" ms=3156 cpu_ms=606 cpm_usd=0.000013 loading_request=1 instance=0 app_engine_release=1.8.4
sleeping
opened file
data written
file closed
This request caused a new process to be started for your application, and thus caused your application code to be loaded for the first time. This request may thus take longer and use more CPU than a typical request for your application.
0.1.0.3 - - [10/Sep/2013:02:46:33 -0700] "GET /_ah/start HTTP/1.1" 500 0 - - "7.v1.storagetest.benstoragetest.appspot.com" ms=978207 cpu_ms=866 loading_request=1 exit_code=114 instance=7 app_engine_release=1.8.4
sleeping
opened file
This request caused a new process to be started for your application, and thus caused your application code to be loaded for the first time. This request may thus take longer and use more CPU than a typical request for your application.
Process terminated because the backend took too long to shutdown.
137.222.108.37 - - [10/Sep/2013:02:51:41 -0700] "GET /gcs?suffix=15 HTTP/1.1" 500 0 - "Wget/1.13.4 (linux-gnu)" "benstoragetest.appspot.com" ms=60705 cpu_ms=1280 loading_request=1 app_engine_release=1.8.4 instance=00c61b117c1bed3e63c87fa8b71e8c142c32b7
opened file
Traceback (most recent call last): File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/runtime/wsgi.py", line 266, in Handle result = handler(dict(self._environ), self._StartResponse) File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 1505, in __call__ rv = self.router.dispatch(request, response) File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 1253, in default_dispatcher return route.handler_adapter(request, response) File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 1077, in __call__ return handler.dispatch() File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 545, in dispatch return method(*args, **kwargs) File "/base/data/home/apps/s~benstoragetest/v1.370120547795114149/storagetest.py", line 26, in get gcs_file.write("f" * 1024 * 1024 * 1 + '\n') File "/base/data/home/apps/s~benstoragetest/v1.370120547795114149/cloudstorage/storage_api.py", line 578, in write self._flush() File "/base/data/home/apps/s~benstoragetest/v1.370120547795114149/cloudstorage/storage_api.py", line 659, in _flush self._send_data(''.join(buffer), last) File "/base/data/home/apps/s~benstoragetest/v1.370120547795114149/cloudstorage/storage_api.py", line 676, in _send_data self._path_with_token, payload=data, headers=headers) File "/base/data/home/apps/s~benstoragetest/v1.370120547795114149/cloudstorage/rest_api.py", line 41, in sync_wrapper return future.get_result() File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/ndb/tasklets.py", line 325, in get_result self.check_success() File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/ndb/tasklets.py", line 320, in check_success self.wait() File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/ndb/tasklets.py", line 304, in wait if not ev.run1(): File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/ndb/eventloop.py", line 219, in run1 delay = self.run0() File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/ndb/eventloop.py", line 181, in run0 callback(*args, **kwds) File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/ext/ndb/tasklets.py", line 375, in _help_tasklet_along ds_conn = datastore._GetConnection() File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/api/datastore.py", line 391, in _GetConnection return _thread_local.connection_stack[-1] DeadlineExceededError
This request caused a new process to be started for your application, and thus caused your application code to be loaded for the first time. This request may thus take longer and use more CPU than a typical request for your application.
Has anyone else observed this behaviour? Does anyone have any suggestions for what I am doing wrong?
gcs_file = cloudstorage.open(filename, 'w', content_type='text/plain' )
self.debugMessage("opened file")
gcs_file.write("f")
self.debugMessage("One character written")
gcs_file.close()
self.debugMessage("file closed. Sleeping")
time.sleep(int(suffix) * 10)
gcs_file = cloudstorage.open(filename, 'w', content_type='text/plain')
self.debugMessage("opened file")
gcs_file.write("f" * 1024 * 1024 * 1 + '\n')
self.debugMessage("data written")
gcs_file.close()
self.debugMessage("file closed")
I ran this three times (10 instances each time) - in all cases the file is written with the single character successfully, but a number of the larger writes then hang.
You were able to write a single character without a problem, but 1 MB of characters was too much for it. So I were you, I'd try to find that inflection point and then save to Cloud Storage in chunks less than that. For example, try to write to Cloud Storage in chunks of 250 KB, if that works, try writing in chunks of 400 KB, and so on. Of course this is only a bandaid solution - something is clearly wrong with the underlying urlfetch implementation.
But if I had to make a recommendation, I'd say skip the whole backend writing thing altogether. It's a waste of money to load up backends for the purposes of writing (unless you're Bill Gates or similar!). You said frontend instances seem to be much better in terms of writing - try using task queues to spawn off frontend requests which are solely dedicated to writing to Cloud Storage. If you need a temporary place to store data, write it to the datastore or Cloud SQL until the tasks can get to the GCS writing.
import webapp2
from google.appengine.ext import db
from google.appengine.api.logservice import logservice
import logging
class TestEntity(db.Model):
data = db.TextProperty()
class StartHandler(webapp2.RequestHandler):
def debugMessage(self, msg):
logging.debug(msg)
logservice.flush()
def get(self):
data = "f" * 1024 * 750
entity = TestEntity(data=data)
self.debugMessage("Entity created")
entity.put()
self.debugMessage("Entity saved")
app = webapp2.WSGIApplication([
('/_ah/start', StartHandler),
], debug=True)
def write_in_chunks(self,data):
chunk_size = 1024 * 30
file_name = files.blobstore.create(mime_type="text/plain")
num_chunks = int(math.ceil(len(data)/chunk_size))
with files.open(file_name, 'a') as f:
for i in range(0, num_chunks):
f.write(data[i*chunk_size:(i+1)*chunk_size])
files.finalize(file_name)
def write(self, data):
"""Write some bytes."""
self._check_open()
assert isinstance(data, str)
if not data:
return
self._buffer.append(data)
self._buffered += len(data)
self._offset += len(data)
if self._buffered >= self._blocksize:
self._flush()
It looks like Vinny was correct - writes of a smaller size do not seem to cause a problem. Specifically, writes from backend instances to datastore, blobstore and cloud storage of 32KBs all display the hanging behaviour
In light of Doug's comments, I have also confirmed the same behaviour (success on writing 30KB; some hangs on writing >=32KBs) occurs with the latest release (r105) of the client library from SVN. I was expecting this, as it doesn't look like the GCS library is the problem.
Yeah from the evidences so far I am also hoping the hangs aren't originated from the GCS lib.