Exporting Plone content as JSON

Below is a simple Python script for exporting data from Plone CMS as JSON.

Use cases

  • Migrating to other CMSs
  • Downloading data dumps to play with
  • Making content items available to be played around within Javascript

The script handles

  • Nested folders
  • Binary data
  • Can be executed from the command-line or from a web browser

Pros

  • Much simpler and lightweight anything involving collective.transmogrify tool. Time to write this script was around 1,5 hours.

The script is also on developer.plone.org. See the orignal blog post for syntax highlighting. Use with love and care or with booz and whiskey.

"""

    Export folder contents as JSON.

    Can be run as a browser view or command line script.

"""

import os
import base64

try:
    import json
except ImportError:
    # Python 2.54 / Plone 3.3 use simplejson
    # version > 2.3 < 3.0
    import simplejson as json

from Products.Five.browser import BrowserView
from Products.CMFCore.interfaces import IFolderish
from DateTime import DateTime

#: Private attributes we add to the export list
EXPORT_ATTRIBUTES = ["portal_type", "id"]

#: Do we dump out binary data... default we do, but can be controlled with env var
EXPORT_BINARY = os.getenv("EXPORT_BINARY", None)
if EXPORT_BINARY:
    EXPORT_BINARY = EXPORT_BINARY == "true"
else:
    EXPORT_BINARY = True

class ExportFolderAsJSON(BrowserView):
    """
    Exports the current context folder Archetypes as JSON.

    Returns downloadable JSON from the data.
    """

    def convert(self, value):
        """
        Convert value to more JSON friendly format.
        """
        if isinstance(value, DateTime):
            # Zope DateTime
            # http://pypi.python.org/pypi/DateTime/3.0.2
            return value.ISO8601()
        elif hasattr(value, "isBinary") and value.isBinary():

            if not EXPORT_BINARY:
                return None

            # Archetypes FileField and ImageField payloads
            # are binary as OFS.Image.File object
            data = getattr(value.data, "data", None)
            if not data:
                return None
            return base64.b64encode(data)
        else:
            # Passthrough
            return value

    def grabArchetypesData(self, obj):
        """
        Export Archetypes schemad data as dictionary object.

        Binary fields are encoded as BASE64.
        """
        data = {}
        for field in obj.Schema().fields():
            name = field.getName()
            value = field.getRaw(obj)
            print "%s" % (value.__class__)

            data[name] = self.convert(value)
        return data

    def grabAttributes(self, obj):
        data = {}
        for key in EXPORT_ATTRIBUTES:
            data[key] = self.convert(getattr(obj, key, None))
        return data

    def export(self, folder, recursive=False):
        """
        Export content items.

        Possible to do recursively nesting into the children.

        :return: list of dictionaries
        """

        array = []
        for obj in folder.listFolderContents():
            data = self.grabArchetypesData(obj)
            data.update(self.grabAttributes(obj))

            if recursive:
                if IFolderish.providedBy(obj):
                    data["children"] = self.export(obj, True)

            array.append(data)

        return array

    def __call__(self):
        """
        """
        folder = self.context.aq_inner
        data = self.export(folder)
        pretty = json.dumps(data, sort_keys=True, indent='    ')
        self.request.response.setHeader("Content-type", "application/json")
        return pretty

def spoof_request(app):
    """
    http://developer.plone.org/misc/commandline.html
    """
    from AccessControl.SecurityManagement import newSecurityManager
    from AccessControl.SecurityManager import setSecurityPolicy
    from Products.CMFCore.tests.base.security import PermissiveSecurityPolicy, OmnipotentUser
    _policy = PermissiveSecurityPolicy()
    setSecurityPolicy(_policy)
    newSecurityManager(None, OmnipotentUser().__of__(app.acl_users))
    return app

def run_export_as_script(path):
    """ Command line helper function.

    Using from the command line::

        bin/instance script export.py yoursiteid/path/to/folder

    If you have a lot of binary data (images) you probably want

        bin/instance script export.py yoursiteid/path/to/folder > yourdata.json

    ... to prevent your terminal being flooded with base64.

    Or just pure data, no binary::

        EXPORT_BINARY=false bin/instance run export.py yoursiteid/path/to/folder

    :param path: Full ZODB path to the folder
    """
    global app

    secure_aware_app = spoof_request(app)
    folder = secure_aware_app.unrestrictedTraverse(path)
    view = ExportFolderAsJSON(folder, None)
    data = view.export(folder, recursive=True)
    # Pretty pony is prettttyyyyy
    pretty = json.dumps(data, sort_keys=True, indent='    ')
    print pretty

# Detect if run as a bin/instance run script
if "app" in globals():
    run_export_as_script(sys.argv[1])

 

 Subscribe to RSS feed Follow me on Twitter Follow me on Facebook Follow me Google+

4 thoughts on “Exporting Plone content as JSON

  1. Me gustaría estudiar este contenido y aprender más … Paso a paso …

  2. How do I use this script. I’m a PHP dev and have no Python experience at all!

  3. Thanks for this Mikko, very useful!

    Two things in passing:
    1. I needed to change “indent=’ ‘” to “indent=4″ in the call to “json.dumps(..)” to get it working for me on Plone 4.2.4/Python 2.7.1 (otherwise I get “TypeError: can’t multiply sequence by non-int of type ‘str’”)
    2. If one iterates over “Schema().viewableFields(obj)” rather than “Schema().fields()” the view permission on the field is checked for the user invoking this. That might be useful for people who want to expose a JSON view to regular users based on this.

    Cheers,

    Raphael

Leave a Reply

Your email address will not be published. Required fields are marked *

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>