[gpapers] 4 new revisions pushed by marcelCo...@gmail.com on 2012-07-31 22:07 GMT

4 views
Skip to first unread message

codesite...@google.com

unread,
Jul 31, 2012, 6:08:42 PM7/31/12
to gpapers...@googlegroups.com
4 new revisions:

Revision: ef2cda3c2e06
Author: Marcel Stimberg <marcel...@gmail.com>
Date: Sun Jul 29 07:16:35 2012
Log: move basic package info into a dedicated module
http://code.google.com/p/gpapers/source/detail?r=ef2cda3c2e06

Revision: c89f6a6e1dfa
Author: Marcel Stimberg <marcel...@gmail.com>
Date: Sun Jul 29 15:16:55 2012
Log: use a standard date format
http://code.google.com/p/gpapers/source/detail?r=c89f6a6e1dfa

Revision: ed11b79da7d8
Author: Marcel Stimberg <marcel...@gmail.com>
Date: Tue Jul 31 14:29:21 2012
Log: Fix hang in DOI dialog (Fixes issue 12). Allow more PDF mime
types (Fi...
http://code.google.com/p/gpapers/source/detail?r=ed11b79da7d8

Revision: 6e8746c1f2a0
Author: Marcel Stimberg <marcel...@gmail.com>
Date: Tue Jul 31 15:06:51 2012
Log: do not throw away DOI info after downloading a paper via DOI,
instead ...
http://code.google.com/p/gpapers/source/detail?r=6e8746c1f2a0

==============================================================================
Revision: ef2cda3c2e06
Author: Marcel Stimberg <marcel...@gmail.com>
Date: Sun Jul 29 07:16:35 2012
Log: move basic package info into a dedicated module
http://code.google.com/p/gpapers/source/detail?r=ef2cda3c2e06

Added:
/gpapers_info.py
Modified:
/gpapers/__init__.py
/gpapers/importer/__init__.py
/setup.py

=======================================
--- /dev/null
+++ /gpapers_info.py Sun Jul 29 07:16:35 2012
@@ -0,0 +1,10 @@
+'''
+This file contains some general information that should be accessible
without
+having to import big modules.
+'''
+
+__version__ = '0.5dev'
+__copyright__ = 'Copyright (c) 2008, 2009 Derek Anderson; 2012 Derek
Anderson & Marcel Stimberg'
+__authors__ = ['Derek Anderson <pub...@kered.org>', 'Marcel Stimberg
<marcel...@googlemail.com']
+__url__ = 'http://gpapers.org/'
+__description__ = 'The Gnome-based Scientific Paper Organizer'
=======================================
--- /gpapers/__init__.py Mon Jul 2 14:18:52 2012
+++ /gpapers/__init__.py Sun Jul 29 07:16:35 2012
@@ -20,8 +20,6 @@
# You should have received a copy of the GNU General Public License
# along with gPapers. If not, see <http://www.gnu.org/licenses/>.

-__version__ = '0.5dev'
-
from datetime import datetime, timedelta, date
import math
import mimetypes
@@ -55,6 +53,8 @@
import gpapers.importer as importer
from gpapers.importer import pango_escape
from gpapers.importer import pubmed, google_scholar, jstor, arxiv
+from gpapers_info import (__version__, __authors__, __url__,
+ __description__, __copyright__)

log_level_debug()

@@ -654,12 +654,11 @@
about = Gtk.AboutDialog()
about.set_program_name('gPapers')
about.set_version(__version__)
- about.set_copyright('Copyright (c) 2008, 2009 Derek Anderson; 2012
Derek Anderson & Marcel Stimberg')
- about.set_comments('''The Gnome-based Scientific Paper
Organizer''')
+ about.set_copyright(__copyright__)
+ about.set_comments(__description__)
about.set_license_type(Gtk.License.GPL_3_0)
- about.set_website('http://gpapers.org/')
- about.set_authors(['Derek Anderson <pub...@kered.org>',
- 'Marcel Stimberg'])
+ about.set_website(__url__)
+ about.set_authors(__authors__)
about.connect('response', lambda x, y: about.destroy())
about.show()

=======================================
--- /gpapers/importer/__init__.py Tue Jun 26 08:36:55 2012
+++ /gpapers/importer/__init__.py Sun Jul 29 07:16:35 2012
@@ -34,6 +34,7 @@
import BeautifulSoup

import gpapers
+from gpapers_info import __version__
from gpapers.logger import *
from gpapers.gPapers.models import Paper

@@ -44,7 +45,7 @@

soup_session = Soup.SessionAsync()
#arXiv disallows requests if no user-agent is set
-soup_session.set_property("user-agent", "gPapers/%s" % gpapers.__version__)
+soup_session.set_property("user-agent", "gPapers/%s" % __version__)


def _decode_htmlentities(string):
=======================================
--- /setup.py Fri Jun 29 15:25:36 2012
+++ /setup.py Sun Jul 29 07:16:35 2012
@@ -23,7 +23,7 @@
from distutils.core import setup
from distutils.command.build_scripts import build_scripts as
build_scripts_class

-from gpapers import __version__
+from gpapers_info import (__version__, __authors__, __description__,
__url__)

class gpapers_build_scripts_class(build_scripts_class):
# Adjust bin/gpapers.py --> gpapers
@@ -53,11 +53,12 @@
setup(name='gPapers',
cmdclass={'build_scripts': gpapers_build_scripts_class},
version=__version__,
- description='The Gnome-based Scientific Paper Organizer',
- author='Derek Anderson <pub...@kered.org>, Marcel Stimberg
<marcel...@googlemail.com>',
+ description=__description__,
+ author=', '.join(__authors__),
author_email='gpapers...@googlegroups.com',
- url='http://gpapers.org/',
+ url=__url__,
packages=['gpapers', 'gpapers.gPapers', 'gpapers.importer'],
+ py_modules=['gpapers_info'],
package_data={'gpapers': ['data/*', 'icons/*']},
data_files=[('share/applications', ['xdg/gpapers.desktop'])],
scripts=['gpapers.py'],

==============================================================================
Revision: c89f6a6e1dfa
Author: Marcel Stimberg <marcel...@gmail.com>
Date: Sun Jul 29 15:16:55 2012
Log: use a standard date format
http://code.google.com/p/gpapers/source/detail?r=c89f6a6e1dfa

Modified:
/gpapers/__init__.py

=======================================
--- /gpapers/__init__.py Sun Jul 29 07:16:35 2012
+++ /gpapers/__init__.py Sun Jul 29 15:16:55 2012
@@ -270,6 +270,8 @@
else:
pub_year = ''
return pub_year
+ elif attribute == 'Created':
+ return paper.created.strftime('%x')
else:
# Get the value of the respective attribute
return unicode(getattr(paper, attribute.lower()))

==============================================================================
Revision: ed11b79da7d8
Author: Marcel Stimberg <marcel...@gmail.com>
Date: Tue Jul 31 14:29:21 2012
Log: Fix hang in DOI dialog (Fixes issue 12). Allow more PDF mime
types (Fixes issue 13). Thanks to 98310brem for reports and fixes
http://code.google.com/p/gpapers/source/detail?r=ed11b79da7d8

Modified:
/gpapers/__init__.py
/gpapers/importer/__init__.py
/gpapers/importer/provider_base.py

=======================================
--- /gpapers/__init__.py Sun Jul 29 15:16:55 2012
+++ /gpapers/__init__.py Tue Jul 31 14:29:21 2012
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-

# gPapers
=======================================
--- /gpapers/importer/__init__.py Sun Jul 29 07:16:35 2012
+++ /gpapers/importer/__init__.py Tue Jul 31 14:29:21 2012
@@ -142,16 +142,17 @@
callback is called with the `paper_info` (a dictionary) and
`paper_data`
(binary data) as an argument.
'''
-
- importer.active_threads[url] = 'Importing %s' % url
-
- def data_received(session, message, user_data):
+
+ active_threads[url] = 'Importing %s' % url
+
+ def data_received(session, message, user_data):
+
if not message.status_code == Soup.KnownStatusCode.OK:
# FIXME: Use error handler here
log_warn('URL %s responded with error code %d' % (user_data,

message.status_code))
- if url in importer.active_threads:
- del importer.active_threads[url]
+ if url in active_threads:
+ del active_threads[url]
callback(user_data=user_data)
return

@@ -173,12 +174,12 @@

message.get_uri()))

if content_type == 'application/pdf':
- if url in importer.active_threads:
- del importer.active_threads[url]
+ if url in active_threads:
+ del active_threads[url]
callback(paper_info=paper_info, paper_data=data,
user_data=user_data)
elif (content_type == 'text/x-bibtex' or first_letter == '@') and
not paper_info:
- if url in importer.active_threads:
- del importer.active_threads[url]
+ if url in active_threads:
+ del active_threads[url]
callback(paper_info=bibtex.paper_info_from_bibtex(data),
paper_data=paper_data, user_data=user_data)
elif content_type == 'text/html':
@@ -221,19 +222,19 @@
#Combine the base URL with the PDF link (necessary for
relative URLs)
urls = [urlparse.urljoin(orig_url, url) for url in urls]
log_debug('Calling import_from_urls with %s' % str(urls))
- if url in importer.active_threads:
- del importer.active_threads[url]
+ if url in active_threads:
+ del active_threads[url]
import_from_urls(urls, callback, user_data)
else:
log_warn('Nothing found...')
- if url in importer.active_threads:
- del importer.active_threads[url]
+ if url in active_threads:
+ del active_threads[url]
callback(paper_info=paper_info, paper_data=paper_data,
user_data=user_data)
else:
log_warn('Do not know what to do with content type %s of
URL %s' % (content_type, orig_url))
- if url in importer.active_threads:
- del importer.active_threads[url]
+ if url in active_threads:
+ del active_threads[url]
callback(paper_info=paper_info, paper_data=paper_data,
user_data=user_data)

try:
@@ -247,8 +248,8 @@
soup_session.queue_message(message, data_received, url)
log_debug('Message queued')
else:
- if url in importer.active_threads:
- del importer.active_threads[url]
+ if url in active_threads:
+ del active_threads[url]
callback(paper_info, paper_data, url)


@@ -280,7 +281,7 @@
log_debug('Received content type %s for URI %s' % (content_type,

message.get_uri()))

- if content_type == 'application/pdf' and not paper_data:
+ if content_type.startswith('application/pdf') and not paper_data:
_import_from_urls(urls, callback, user_data,
paper_info=paper_info,
paper_data=data)
elif (content_type == 'text/x-bibtex' or first_letter == '@') and
not paper_info:
=======================================
--- /gpapers/importer/provider_base.py Tue Jun 26 08:36:55 2012
+++ /gpapers/importer/provider_base.py Tue Jul 31 14:29:21 2012
@@ -24,7 +24,7 @@

from gpapers.logger import log_info, log_debug
from gpapers.importer import soup_session
-
+from gpapers.importer import active_threads

class WebSearchProvider(object):
'''

==============================================================================
Revision: 6e8746c1f2a0
Author: Marcel Stimberg <marcel...@gmail.com>
Date: Tue Jul 31 15:06:51 2012
Log: do not throw away DOI info after downloading a paper via DOI,
instead use DOI for downloading metadata
http://code.google.com/p/gpapers/source/detail?r=6e8746c1f2a0

Modified:
/gpapers/__init__.py
/gpapers/importer/__init__.py

=======================================
--- /gpapers/__init__.py Tue Jul 31 14:29:21 2012
+++ /gpapers/__init__.py Tue Jul 31 15:06:51 2012
@@ -347,6 +347,9 @@
if paper_data is None and paper_info is None and paper_obj is None:
# FIXME: This should be handled via an error callback
return
+
+ if paper_info is None:
+ paper_info = {}

if paper_obj is not None:
# This is a paper imported after a search, merge its info with
@@ -354,34 +357,26 @@
# paper object info -- e.g. google search gives very imprecise
# results for a search but the BibTeX contains more accurate
# info)
- if paper_info is None:
- paper_info = {}
-
for key in paper_obj.paper_info:
if not key in paper_info:
paper_info[key] = paper_obj.paper_info[key]

- if paper_data is not None:
-
+ if paper_data is not None:
# Get some info from the PDF:
self.active_threads[str(user_data)] = 'Extracting data from
PDF'
+ # FIXME: This extraction should be done asynchronously
paper_info_pdf = pdf_file.get_paper_info_from_pdf(paper_data)
del self.active_threads[str(user_data)]

- log_debug('in document_imported: paper_info is %s' %
paper_info)
- # Add everything that is not already known
- if paper_info is None:
- paper_info = paper_info_pdf
- # If we get a DOI, download the metadata
- need_paper_info = True
- else:
- need_paper_info = False
- for key in paper_info_pdf.keys():
- if not key in paper_info:
- paper_info[key] = paper_info_pdf[key]
-
- paper = paper_from_dictionary(paper_info)
-
+ # The info directly taken from the PDF is generally not very
good,
+ # overwrite conflicting info with any additionally given info
+ paper_info_pdf.update(paper_info)
+ paper_info = paper_info_pdf
+
+ paper = paper_from_dictionary(paper_info)
+
+ # If we have a PDF file, save the file
+ if paper_data is not None:
#TODO: What is a good filename? Make this configurable?
if paper.doi:
filename = 'doi_' + paper.doi
@@ -394,12 +389,11 @@
log_debug('Saving paper to "%s"' % filename)
paper.save_file(filename, paper_data)
log_debug('Paper saved')
- if need_paper_info and paper.doi:
- log_debug('Downloading metadata')
- importer.get_bibtex_for_doi(paper.doi,
self.bibtex_received)
- else:
- log_debug('Calling paper_from_dictionary for %s' %
str(paper_info))
- paper = paper_from_dictionary(paper_info)
+
+ # If we have a DOI, try to get bibtex metadata for the paper
+ if paper.doi:
+ log_debug('Downloading metadata for doi %s' % str(paper.doi))
+ importer.get_bibtex_for_doi(paper.doi, self.bibtex_received)

paper.save()

@@ -447,8 +441,12 @@
dialog.show_all()
response = dialog.run()
if response == Gtk.ResponseType.OK:
- url = 'http://dx.doi.org/' + entry.get_text().strip()
- importer.import_from_url(url, self.document_imported)
+ doi = entry.get_text().strip()
+ url = 'http://dx.doi.org/' + doi
+ # Already set the DOI, useful for downloading full metadata
+ paper_info = dict(doi=doi)
+ importer.import_from_url(url, self.document_imported,
+ paper_info=paper_info)

dialog.destroy()

=======================================
--- /gpapers/importer/__init__.py Tue Jul 31 14:29:21 2012
+++ /gpapers/importer/__init__.py Tue Jul 31 15:06:51 2012
@@ -142,11 +142,10 @@
callback is called with the `paper_info` (a dictionary) and
`paper_data`
(binary data) as an argument.
'''
-
+
active_threads[url] = 'Importing %s' % url

- def data_received(session, message, user_data):
-
+ def data_received(session, message, user_data):
if not message.status_code == Soup.KnownStatusCode.OK:
# FIXME: Use error handler here
log_warn('URL %s responded with error code %d' % (user_data,
@@ -164,7 +163,7 @@

if message.response_body.data:
data = message.response_body.flatten().get_data()
- # Heuristic: BibTeX data starts with a @
+ # Heuristic: BibTeX data starts with a @
first_letter = data.strip()[0]
else:
first_letter = None
@@ -224,7 +223,8 @@
log_debug('Calling import_from_urls with %s' % str(urls))
if url in active_threads:
del active_threads[url]
- import_from_urls(urls, callback, user_data)
+ import_from_urls(urls, callback, user_data,
+ paper_info=paper_info,
paper_data=paper_data)
else:
log_warn('Nothing found...')
if url in active_threads:
@@ -261,7 +261,7 @@
called with the `paper_info` (a dictionary) and `paper_data` (binary
data)
as an argument.
'''
- log_debug('_import_from_urls')
+ log_debug('_import_from_urls, paper_info: %s' % str(paper_info))
if not urls or (paper_info and paper_data):
callback(paper_info, paper_data, user_data)
return
@@ -273,7 +273,6 @@
if message.response_body.data:
# Heuristic: BibTeX data starts with a @
first_letter = message.response_body.data.strip()[0]
- log_debug('First letter of Body is: %s' % first_letter)
data = message.response_body.flatten().get_data()
else:
first_letter = None
@@ -297,7 +296,7 @@
soup_session.queue_message(message, data_received, user_data)


-def import_from_urls(urls, callback, user_data):
+def import_from_urls(urls, callback, user_data, paper_info=None,
paper_data=None):
'''
Searches the given urls (asynchronously) for a PDF and/or metadata
(currently it will only look for BibTeX data). When either all URLs
have
@@ -306,7 +305,8 @@
as an argument.
'''
if urls is None:
- callback(user_data=user_data)
+ callback(paper_info=paper_info, paper_data=paper_data,
+ user_data=user_data)

log_info(('Starting to look for PDF and/or metadata '
'from %d possible URLs' % len(urls)))
@@ -316,4 +316,5 @@
callback(paper_info=paper_info, paper_data=paper_data,
user_data=user_data)

- _import_from_urls(urls, _import_from_urls_finished, user_data)
+ _import_from_urls(urls, _import_from_urls_finished, user_data,
+ paper_info=paper_info, paper_data=paper_data)
Reply all
Reply to author
Forward
0 new messages