An error occured while connecting to the server

155 views
Skip to first unread message

정래진

unread,
Jun 30, 2016, 7:58:53 AM6/30/16
to Google App Engine

The question about the errors in the source code to scraping web pages. Code written in Python is running on the Google App Engine. url address can be properly connected, and also works fine scraping code. If it might be the cause of error please answer me.

Python + google app engine source

import sys
reload
(sys)
sys
.setdefaultencoding("utf-8")
sys
.path.insert(0, 'libs')
import webapp2
from bs4 import BeautifulSoup
import math
import urllib2

class MainPage(webapp2.RequestHandler):
def get(self, args1, args2):

    url
= "http://emart.ssg.com/category/list.ssg?dispCtgId="
    url
+= str(args1)
    url
+= "&page="
    url
+= str(args2)

   
print("log_print" + url);
    data
= ""    
    source_code
= urllib2.urlopen(url).read()
    plain_text
= source_code
    soup
= BeautifulSoup(plain_text, "html.parser")

   
for info_list in soup.find("tbody").find_all(class_="item w202"):
        title
= info_list.find(class_="title").a["title"]

       
if title is None:
           
continue
       
else:
            data
+= title
            data
+= "\t"

        price
= info_list.find(class_="price")

       
if price is None:
           
continue
       
else:
            data
+= price.strong.string
            data
+= "\t"

        img_url
= info_list.find(class_="thm").a.img["src"]

       
if img_url is None:
           
continue
       
else:
            data
+= "http:" + img_url
            data
+= "\t"

        code
= img_url.split("/")[7].split("_")[0]

        data
+= code + "\n"

   
self.response.write(data)

 app
= webapp2.WSGIApplication([('/emart_product_v2/(\d+)_(\d+)', MainPage),], debug=True)

error code
Traceback (most recent call last):
   File "/base/data/home/runtimes/python27/python27_lib/versions/1/google/appengine/runtime/wsgi.py", line 267, in Handle
result = handler(dict(self._environ), self._StartResponse)
   File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 1519, in __call__
response = self._internal_error(e)
   File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 1511, in __call__
rv = self.handle_exception(request, response, e)
   File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 1505, in __call__
rv = self.router.dispatch(request, response)
   File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 1253, in default_dispatcher
return route.handler_adapter(request, response)
   File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 1077, in __call__
return handler.dispatch()
   File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 547, in dispatch
return self.handle_exception(e, self.app.debug)
   File "/base/data/home/runtimes/python27/python27_lib/versions/third_party/webapp2-2.3/webapp2.py", line 545, in dispatch
return method(*args, **kwargs)
   File "/base/data/home/apps/s~inte-core1/v1.393867712548898881/emart_product.py", line 28, in get
source_code = urllib2.urlopen(url).read()
   File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
   File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 404, in open
response = self._open(req, data)
   File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 422, in _open
'_open', req)
   File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 382, in _call_chain
result = func(*args)
   File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 1214, in http_open
return self.do_open(httplib.HTTPConnection, req)
   File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/urllib2.py", line 1187, in do_open
r = h.getresponse(buffering=True)
   File "/base/data/home/runtimes/python27/python27_dist/lib/python2.7/gae_override/httplib.py", line 536, in getresponse
'An error occured while connecting to the server: %s' % e)
 error: An error occured while connecting to the server: Connection closed unexpectedly by server at URL: http://emart.ssg.com/category/list.ssg?dispCtgId=0006511712&page=1




Adam (Cloud Platform Support)

unread,
Jul 1, 2016, 6:05:08 PM7/1/16
to Google App Engine
The stack trace indicates a runtime error 'Connection closed unexpectedly by server'. The stack trace also shows you that the code does not get past 'source_code = urllib2.urlopen(url).read()' on line 28 of 'emart_product.py'.

You should probably troubleshoot why you cannot read from the URL first before troubleshooting the rest of the scraping code.
Reply all
Reply to author
Forward
0 new messages