This commit is contained in:
2023-07-03 20:39:45 +02:00
parent 25445b2a98
commit 5152f44924
149 changed files with 17896 additions and 5 deletions

View File

@ -0,0 +1,64 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This module is the central entity that determines which implementation of the
API is used.
"""
__author__ = 'petar@google.com (Petar Petrov)'
import os
# This environment variable can be used to switch to a certain implementation
# of the Python API. Right now only 'python' and 'cpp' are valid values. Any
# other value will be ignored.
_implementation_type = os.getenv('PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION',
'python')
if _implementation_type != 'python':
# For now, by default use the pure-Python implementation.
# The code below checks if the C extension is available and
# uses it if it is available.
_implementation_type = 'cpp'
## Determine automatically which implementation to use.
#try:
# from google.protobuf.internal import cpp_message
# _implementation_type = 'cpp'
#except ImportError, e:
# _implementation_type = 'python'
# Usage of this function is discouraged. Clients shouldn't care which
# implementation of the API is in use. Note that there is no guarantee
# that differences between APIs will be maintained.
# Please don't use this function if possible.
def Type():
return _implementation_type

View File

@ -0,0 +1,259 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Contains container classes to represent different protocol buffer types.
This file defines container classes which represent categories of protocol
buffer field types which need extra maintenance. Currently these categories
are:
- Repeated scalar fields - These are all repeated fields which aren't
composite (e.g. they are of simple types like int32, string, etc).
- Repeated composite fields - Repeated fields which are composite. This
includes groups and nested messages.
"""
__author__ = 'petar@google.com (Petar Petrov)'
class BaseContainer(object):
"""Base container class."""
# Minimizes memory usage and disallows assignment to other attributes.
__slots__ = ['_message_listener', '_values']
def __init__(self, message_listener):
"""
Args:
message_listener: A MessageListener implementation.
The RepeatedScalarFieldContainer will call this object's
Modified() method when it is modified.
"""
self._message_listener = message_listener
self._values = []
def __getitem__(self, key):
"""Retrieves item by the specified key."""
return self._values[key]
def __len__(self):
"""Returns the number of elements in the container."""
return len(self._values)
def __ne__(self, other):
"""Checks if another instance isn't equal to this one."""
# The concrete classes should define __eq__.
return not self == other
def __hash__(self):
raise TypeError('unhashable object')
def __repr__(self):
return repr(self._values)
def sort(self, sort_function=cmp):
self._values.sort(sort_function)
class RepeatedScalarFieldContainer(BaseContainer):
"""Simple, type-checked, list-like container for holding repeated scalars."""
# Disallows assignment to other attributes.
__slots__ = ['_type_checker']
def __init__(self, message_listener, type_checker):
"""
Args:
message_listener: A MessageListener implementation.
The RepeatedScalarFieldContainer will call this object's
Modified() method when it is modified.
type_checker: A type_checkers.ValueChecker instance to run on elements
inserted into this container.
"""
super(RepeatedScalarFieldContainer, self).__init__(message_listener)
self._type_checker = type_checker
def append(self, value):
"""Appends an item to the list. Similar to list.append()."""
self._type_checker.CheckValue(value)
self._values.append(value)
if not self._message_listener.dirty:
self._message_listener.Modified()
def insert(self, key, value):
"""Inserts the item at the specified position. Similar to list.insert()."""
self._type_checker.CheckValue(value)
self._values.insert(key, value)
if not self._message_listener.dirty:
self._message_listener.Modified()
def extend(self, elem_seq):
"""Extends by appending the given sequence. Similar to list.extend()."""
if not elem_seq:
return
new_values = []
for elem in elem_seq:
self._type_checker.CheckValue(elem)
new_values.append(elem)
self._values.extend(new_values)
self._message_listener.Modified()
def MergeFrom(self, other):
"""Appends the contents of another repeated field of the same type to this
one. We do not check the types of the individual fields.
"""
self._values.extend(other._values)
self._message_listener.Modified()
def remove(self, elem):
"""Removes an item from the list. Similar to list.remove()."""
self._values.remove(elem)
self._message_listener.Modified()
def __setitem__(self, key, value):
"""Sets the item on the specified position."""
self._type_checker.CheckValue(value)
self._values[key] = value
self._message_listener.Modified()
def __getslice__(self, start, stop):
"""Retrieves the subset of items from between the specified indices."""
return self._values[start:stop]
def __setslice__(self, start, stop, values):
"""Sets the subset of items from between the specified indices."""
new_values = []
for value in values:
self._type_checker.CheckValue(value)
new_values.append(value)
self._values[start:stop] = new_values
self._message_listener.Modified()
def __delitem__(self, key):
"""Deletes the item at the specified position."""
del self._values[key]
self._message_listener.Modified()
def __delslice__(self, start, stop):
"""Deletes the subset of items from between the specified indices."""
del self._values[start:stop]
self._message_listener.Modified()
def __eq__(self, other):
"""Compares the current instance with another one."""
if self is other:
return True
# Special case for the same type which should be common and fast.
if isinstance(other, self.__class__):
return other._values == self._values
# We are presumably comparing against some other sequence type.
return other == self._values
class RepeatedCompositeFieldContainer(BaseContainer):
"""Simple, list-like container for holding repeated composite fields."""
# Disallows assignment to other attributes.
__slots__ = ['_message_descriptor']
def __init__(self, message_listener, message_descriptor):
"""
Note that we pass in a descriptor instead of the generated directly,
since at the time we construct a _RepeatedCompositeFieldContainer we
haven't yet necessarily initialized the type that will be contained in the
container.
Args:
message_listener: A MessageListener implementation.
The RepeatedCompositeFieldContainer will call this object's
Modified() method when it is modified.
message_descriptor: A Descriptor instance describing the protocol type
that should be present in this container. We'll use the
_concrete_class field of this descriptor when the client calls add().
"""
super(RepeatedCompositeFieldContainer, self).__init__(message_listener)
self._message_descriptor = message_descriptor
def add(self, **kwargs):
"""Adds a new element at the end of the list and returns it. Keyword
arguments may be used to initialize the element.
"""
new_element = self._message_descriptor._concrete_class(**kwargs)
new_element._SetListener(self._message_listener)
self._values.append(new_element)
if not self._message_listener.dirty:
self._message_listener.Modified()
return new_element
def extend(self, elem_seq):
"""Extends by appending the given sequence of elements of the same type
as this one, copying each individual message.
"""
message_class = self._message_descriptor._concrete_class
listener = self._message_listener
values = self._values
for message in elem_seq:
new_element = message_class()
new_element._SetListener(listener)
new_element.MergeFrom(message)
values.append(new_element)
listener.Modified()
def MergeFrom(self, other):
"""Appends the contents of another repeated field of the same type to this
one, copying each individual message.
"""
self.extend(other._values)
def __getslice__(self, start, stop):
"""Retrieves the subset of items from between the specified indices."""
return self._values[start:stop]
def __delitem__(self, key):
"""Deletes the item at the specified position."""
del self._values[key]
self._message_listener.Modified()
def __delslice__(self, start, stop):
"""Deletes the subset of items from between the specified indices."""
del self._values[start:stop]
self._message_listener.Modified()
def __eq__(self, other):
"""Compares the current instance with another one."""
if self is other:
return True
if not isinstance(other, self.__class__):
raise TypeError('Can only compare repeated composite fields against '
'other repeated composite fields.')
return self._values == other._values

View File

@ -0,0 +1,616 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Contains helper functions used to create protocol message classes from
Descriptor objects at runtime backed by the protocol buffer C++ API.
"""
__author__ = 'petar@google.com (Petar Petrov)'
import operator
from google.protobuf.internal import _net_proto2___python
from google.protobuf import message
_LABEL_REPEATED = _net_proto2___python.LABEL_REPEATED
_LABEL_OPTIONAL = _net_proto2___python.LABEL_OPTIONAL
_CPPTYPE_MESSAGE = _net_proto2___python.CPPTYPE_MESSAGE
_TYPE_MESSAGE = _net_proto2___python.TYPE_MESSAGE
def GetDescriptorPool():
"""Creates a new DescriptorPool C++ object."""
return _net_proto2___python.NewCDescriptorPool()
_pool = GetDescriptorPool()
def GetFieldDescriptor(full_field_name):
"""Searches for a field descriptor given a full field name."""
return _pool.FindFieldByName(full_field_name)
def BuildFile(content):
"""Registers a new proto file in the underlying C++ descriptor pool."""
_net_proto2___python.BuildFile(content)
def GetExtensionDescriptor(full_extension_name):
"""Searches for extension descriptor given a full field name."""
return _pool.FindExtensionByName(full_extension_name)
def NewCMessage(full_message_name):
"""Creates a new C++ protocol message by its name."""
return _net_proto2___python.NewCMessage(full_message_name)
def ScalarProperty(cdescriptor):
"""Returns a scalar property for the given descriptor."""
def Getter(self):
return self._cmsg.GetScalar(cdescriptor)
def Setter(self, value):
self._cmsg.SetScalar(cdescriptor, value)
return property(Getter, Setter)
def CompositeProperty(cdescriptor, message_type):
"""Returns a Python property the given composite field."""
def Getter(self):
sub_message = self._composite_fields.get(cdescriptor.name, None)
if sub_message is None:
cmessage = self._cmsg.NewSubMessage(cdescriptor)
sub_message = message_type._concrete_class(__cmessage=cmessage)
self._composite_fields[cdescriptor.name] = sub_message
return sub_message
return property(Getter)
class RepeatedScalarContainer(object):
"""Container for repeated scalar fields."""
__slots__ = ['_message', '_cfield_descriptor', '_cmsg']
def __init__(self, msg, cfield_descriptor):
self._message = msg
self._cmsg = msg._cmsg
self._cfield_descriptor = cfield_descriptor
def append(self, value):
self._cmsg.AddRepeatedScalar(
self._cfield_descriptor, value)
def extend(self, sequence):
for element in sequence:
self.append(element)
def insert(self, key, value):
values = self[slice(None, None, None)]
values.insert(key, value)
self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
def remove(self, value):
values = self[slice(None, None, None)]
values.remove(value)
self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
def __setitem__(self, key, value):
values = self[slice(None, None, None)]
values[key] = value
self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
def __getitem__(self, key):
return self._cmsg.GetRepeatedScalar(self._cfield_descriptor, key)
def __delitem__(self, key):
self._cmsg.DeleteRepeatedField(self._cfield_descriptor, key)
def __len__(self):
return len(self[slice(None, None, None)])
def __eq__(self, other):
if self is other:
return True
if not operator.isSequenceType(other):
raise TypeError(
'Can only compare repeated scalar fields against sequences.')
# We are presumably comparing against some other sequence type.
return other == self[slice(None, None, None)]
def __ne__(self, other):
return not self == other
def __hash__(self):
raise TypeError('unhashable object')
def sort(self, sort_function=cmp):
values = self[slice(None, None, None)]
values.sort(sort_function)
self._cmsg.AssignRepeatedScalar(self._cfield_descriptor, values)
def RepeatedScalarProperty(cdescriptor):
"""Returns a Python property the given repeated scalar field."""
def Getter(self):
container = self._composite_fields.get(cdescriptor.name, None)
if container is None:
container = RepeatedScalarContainer(self, cdescriptor)
self._composite_fields[cdescriptor.name] = container
return container
def Setter(self, new_value):
raise AttributeError('Assignment not allowed to repeated field '
'"%s" in protocol message object.' % cdescriptor.name)
doc = 'Magic attribute generated for "%s" proto field.' % cdescriptor.name
return property(Getter, Setter, doc=doc)
class RepeatedCompositeContainer(object):
"""Container for repeated composite fields."""
__slots__ = ['_message', '_subclass', '_cfield_descriptor', '_cmsg']
def __init__(self, msg, cfield_descriptor, subclass):
self._message = msg
self._cmsg = msg._cmsg
self._subclass = subclass
self._cfield_descriptor = cfield_descriptor
def add(self, **kwargs):
cmessage = self._cmsg.AddMessage(self._cfield_descriptor)
return self._subclass(__cmessage=cmessage, __owner=self._message, **kwargs)
def extend(self, elem_seq):
"""Extends by appending the given sequence of elements of the same type
as this one, copying each individual message.
"""
for message in elem_seq:
self.add().MergeFrom(message)
def MergeFrom(self, other):
for message in other[:]:
self.add().MergeFrom(message)
def __getitem__(self, key):
cmessages = self._cmsg.GetRepeatedMessage(
self._cfield_descriptor, key)
subclass = self._subclass
if not isinstance(cmessages, list):
return subclass(__cmessage=cmessages, __owner=self._message)
return [subclass(__cmessage=m, __owner=self._message) for m in cmessages]
def __delitem__(self, key):
self._cmsg.DeleteRepeatedField(
self._cfield_descriptor, key)
def __len__(self):
return self._cmsg.FieldLength(self._cfield_descriptor)
def __eq__(self, other):
"""Compares the current instance with another one."""
if self is other:
return True
if not isinstance(other, self.__class__):
raise TypeError('Can only compare repeated composite fields against '
'other repeated composite fields.')
messages = self[slice(None, None, None)]
other_messages = other[slice(None, None, None)]
return messages == other_messages
def __hash__(self):
raise TypeError('unhashable object')
def sort(self, sort_function=cmp):
messages = []
for index in range(len(self)):
# messages[i][0] is where the i-th element of the new array has to come
# from.
# messages[i][1] is where the i-th element of the old array has to go.
messages.append([index, 0, self[index]])
messages.sort(lambda x,y: sort_function(x[2], y[2]))
# Remember which position each elements has to move to.
for i in range(len(messages)):
messages[messages[i][0]][1] = i
# Apply the transposition.
for i in range(len(messages)):
from_position = messages[i][0]
if i == from_position:
continue
self._cmsg.SwapRepeatedFieldElements(
self._cfield_descriptor, i, from_position)
messages[messages[i][1]][0] = from_position
def RepeatedCompositeProperty(cdescriptor, message_type):
"""Returns a Python property for the given repeated composite field."""
def Getter(self):
container = self._composite_fields.get(cdescriptor.name, None)
if container is None:
container = RepeatedCompositeContainer(
self, cdescriptor, message_type._concrete_class)
self._composite_fields[cdescriptor.name] = container
return container
def Setter(self, new_value):
raise AttributeError('Assignment not allowed to repeated field '
'"%s" in protocol message object.' % cdescriptor.name)
doc = 'Magic attribute generated for "%s" proto field.' % cdescriptor.name
return property(Getter, Setter, doc=doc)
class ExtensionDict(object):
"""Extension dictionary added to each protocol message."""
def __init__(self, msg):
self._message = msg
self._cmsg = msg._cmsg
self._values = {}
def __setitem__(self, extension, value):
from google.protobuf import descriptor
if not isinstance(extension, descriptor.FieldDescriptor):
raise KeyError('Bad extension %r.' % (extension,))
cdescriptor = extension._cdescriptor
if (cdescriptor.label != _LABEL_OPTIONAL or
cdescriptor.cpp_type == _CPPTYPE_MESSAGE):
raise TypeError('Extension %r is repeated and/or a composite type.' % (
extension.full_name,))
self._cmsg.SetScalar(cdescriptor, value)
self._values[extension] = value
def __getitem__(self, extension):
from google.protobuf import descriptor
if not isinstance(extension, descriptor.FieldDescriptor):
raise KeyError('Bad extension %r.' % (extension,))
cdescriptor = extension._cdescriptor
if (cdescriptor.label != _LABEL_REPEATED and
cdescriptor.cpp_type != _CPPTYPE_MESSAGE):
return self._cmsg.GetScalar(cdescriptor)
ext = self._values.get(extension, None)
if ext is not None:
return ext
ext = self._CreateNewHandle(extension)
self._values[extension] = ext
return ext
def ClearExtension(self, extension):
from google.protobuf import descriptor
if not isinstance(extension, descriptor.FieldDescriptor):
raise KeyError('Bad extension %r.' % (extension,))
self._cmsg.ClearFieldByDescriptor(extension._cdescriptor)
if extension in self._values:
del self._values[extension]
def HasExtension(self, extension):
from google.protobuf import descriptor
if not isinstance(extension, descriptor.FieldDescriptor):
raise KeyError('Bad extension %r.' % (extension,))
return self._cmsg.HasFieldByDescriptor(extension._cdescriptor)
def _FindExtensionByName(self, name):
"""Tries to find a known extension with the specified name.
Args:
name: Extension full name.
Returns:
Extension field descriptor.
"""
return self._message._extensions_by_name.get(name, None)
def _CreateNewHandle(self, extension):
cdescriptor = extension._cdescriptor
if (cdescriptor.label != _LABEL_REPEATED and
cdescriptor.cpp_type == _CPPTYPE_MESSAGE):
cmessage = self._cmsg.NewSubMessage(cdescriptor)
return extension.message_type._concrete_class(__cmessage=cmessage)
if cdescriptor.label == _LABEL_REPEATED:
if cdescriptor.cpp_type == _CPPTYPE_MESSAGE:
return RepeatedCompositeContainer(
self._message, cdescriptor, extension.message_type._concrete_class)
else:
return RepeatedScalarContainer(self._message, cdescriptor)
# This shouldn't happen!
assert False
return None
def NewMessage(message_descriptor, dictionary):
"""Creates a new protocol message *class*."""
_AddClassAttributesForNestedExtensions(message_descriptor, dictionary)
_AddEnumValues(message_descriptor, dictionary)
_AddDescriptors(message_descriptor, dictionary)
def InitMessage(message_descriptor, cls):
"""Constructs a new message instance (called before instance's __init__)."""
cls._extensions_by_name = {}
_AddInitMethod(message_descriptor, cls)
_AddMessageMethods(message_descriptor, cls)
_AddPropertiesForExtensions(message_descriptor, cls)
def _AddDescriptors(message_descriptor, dictionary):
"""Sets up a new protocol message class dictionary.
Args:
message_descriptor: A Descriptor instance describing this message type.
dictionary: Class dictionary to which we'll add a '__slots__' entry.
"""
dictionary['__descriptors'] = {}
for field in message_descriptor.fields:
dictionary['__descriptors'][field.name] = GetFieldDescriptor(
field.full_name)
dictionary['__slots__'] = list(dictionary['__descriptors'].iterkeys()) + [
'_cmsg', '_owner', '_composite_fields', 'Extensions']
def _AddEnumValues(message_descriptor, dictionary):
"""Sets class-level attributes for all enum fields defined in this message.
Args:
message_descriptor: Descriptor object for this message type.
dictionary: Class dictionary that should be populated.
"""
for enum_type in message_descriptor.enum_types:
for enum_value in enum_type.values:
dictionary[enum_value.name] = enum_value.number
def _AddClassAttributesForNestedExtensions(message_descriptor, dictionary):
"""Adds class attributes for the nested extensions."""
extension_dict = message_descriptor.extensions_by_name
for extension_name, extension_field in extension_dict.iteritems():
assert extension_name not in dictionary
dictionary[extension_name] = extension_field
def _AddInitMethod(message_descriptor, cls):
"""Adds an __init__ method to cls."""
# Create and attach message field properties to the message class.
# This can be done just once per message class, since property setters and
# getters are passed the message instance.
# This makes message instantiation extremely fast, and at the same time it
# doesn't require the creation of property objects for each message instance,
# which saves a lot of memory.
for field in message_descriptor.fields:
field_cdescriptor = cls.__descriptors[field.name]
if field.label == _LABEL_REPEATED:
if field.cpp_type == _CPPTYPE_MESSAGE:
value = RepeatedCompositeProperty(field_cdescriptor, field.message_type)
else:
value = RepeatedScalarProperty(field_cdescriptor)
elif field.cpp_type == _CPPTYPE_MESSAGE:
value = CompositeProperty(field_cdescriptor, field.message_type)
else:
value = ScalarProperty(field_cdescriptor)
setattr(cls, field.name, value)
# Attach a constant with the field number.
constant_name = field.name.upper() + '_FIELD_NUMBER'
setattr(cls, constant_name, field.number)
def Init(self, **kwargs):
"""Message constructor."""
cmessage = kwargs.pop('__cmessage', None)
if cmessage is None:
self._cmsg = NewCMessage(message_descriptor.full_name)
else:
self._cmsg = cmessage
# Keep a reference to the owner, as the owner keeps a reference to the
# underlying protocol buffer message.
owner = kwargs.pop('__owner', None)
if owner is not None:
self._owner = owner
self.Extensions = ExtensionDict(self)
self._composite_fields = {}
for field_name, field_value in kwargs.iteritems():
field_cdescriptor = self.__descriptors.get(field_name, None)
if field_cdescriptor is None:
raise ValueError('Protocol message has no "%s" field.' % field_name)
if field_cdescriptor.label == _LABEL_REPEATED:
if field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE:
for val in field_value:
getattr(self, field_name).add().MergeFrom(val)
else:
getattr(self, field_name).extend(field_value)
elif field_cdescriptor.cpp_type == _CPPTYPE_MESSAGE:
getattr(self, field_name).MergeFrom(field_value)
else:
setattr(self, field_name, field_value)
Init.__module__ = None
Init.__doc__ = None
cls.__init__ = Init
def _IsMessageSetExtension(field):
"""Checks if a field is a message set extension."""
return (field.is_extension and
field.containing_type.has_options and
field.containing_type.GetOptions().message_set_wire_format and
field.type == _TYPE_MESSAGE and
field.message_type == field.extension_scope and
field.label == _LABEL_OPTIONAL)
def _AddMessageMethods(message_descriptor, cls):
"""Adds the methods to a protocol message class."""
if message_descriptor.is_extendable:
def ClearExtension(self, extension):
self.Extensions.ClearExtension(extension)
def HasExtension(self, extension):
return self.Extensions.HasExtension(extension)
def HasField(self, field_name):
return self._cmsg.HasField(field_name)
def ClearField(self, field_name):
if field_name in self._composite_fields:
del self._composite_fields[field_name]
self._cmsg.ClearField(field_name)
def Clear(self):
return self._cmsg.Clear()
def IsInitialized(self, errors=None):
if self._cmsg.IsInitialized():
return True
if errors is not None:
errors.extend(self.FindInitializationErrors());
return False
def SerializeToString(self):
if not self.IsInitialized():
raise message.EncodeError(
'Message is missing required fields: ' +
','.join(self.FindInitializationErrors()))
return self._cmsg.SerializeToString()
def SerializePartialToString(self):
return self._cmsg.SerializePartialToString()
def ParseFromString(self, serialized):
self.Clear()
self.MergeFromString(serialized)
def MergeFromString(self, serialized):
byte_size = self._cmsg.MergeFromString(serialized)
if byte_size < 0:
raise message.DecodeError('Unable to merge from string.')
return byte_size
def MergeFrom(self, msg):
if not isinstance(msg, cls):
raise TypeError(
"Parameter to MergeFrom() must be instance of same class.")
self._cmsg.MergeFrom(msg._cmsg)
def CopyFrom(self, msg):
self._cmsg.CopyFrom(msg._cmsg)
def ByteSize(self):
return self._cmsg.ByteSize()
def SetInParent(self):
return self._cmsg.SetInParent()
def ListFields(self):
all_fields = []
field_list = self._cmsg.ListFields()
fields_by_name = cls.DESCRIPTOR.fields_by_name
for is_extension, field_name in field_list:
if is_extension:
extension = cls._extensions_by_name[field_name]
all_fields.append((extension, self.Extensions[extension]))
else:
field_descriptor = fields_by_name[field_name]
all_fields.append(
(field_descriptor, getattr(self, field_name)))
all_fields.sort(key=lambda item: item[0].number)
return all_fields
def FindInitializationErrors(self):
return self._cmsg.FindInitializationErrors()
def __str__(self):
return self._cmsg.DebugString()
def __eq__(self, other):
if self is other:
return True
if not isinstance(other, self.__class__):
return False
return self.ListFields() == other.ListFields()
def __ne__(self, other):
return not self == other
def __hash__(self):
raise TypeError('unhashable object')
def __unicode__(self):
return text_format.MessageToString(self, as_utf8=True).decode('utf-8')
# Attach the local methods to the message class.
for key, value in locals().copy().iteritems():
if key not in ('key', 'value', '__builtins__', '__name__', '__doc__'):
setattr(cls, key, value)
# Static methods:
def RegisterExtension(extension_handle):
extension_handle.containing_type = cls.DESCRIPTOR
cls._extensions_by_name[extension_handle.full_name] = extension_handle
if _IsMessageSetExtension(extension_handle):
# MessageSet extension. Also register under type name.
cls._extensions_by_name[
extension_handle.message_type.full_name] = extension_handle
cls.RegisterExtension = staticmethod(RegisterExtension)
def FromString(string):
msg = cls()
msg.MergeFromString(string)
return msg
cls.FromString = staticmethod(FromString)
def _AddPropertiesForExtensions(message_descriptor, cls):
"""Adds properties for all fields in this protocol message type."""
extension_dict = message_descriptor.extensions_by_name
for extension_name, extension_field in extension_dict.iteritems():
constant_name = extension_name.upper() + '_FIELD_NUMBER'
setattr(cls, constant_name, extension_field.number)

View File

@ -0,0 +1,714 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Code for decoding protocol buffer primitives.
This code is very similar to encoder.py -- read the docs for that module first.
A "decoder" is a function with the signature:
Decode(buffer, pos, end, message, field_dict)
The arguments are:
buffer: The string containing the encoded message.
pos: The current position in the string.
end: The position in the string where the current message ends. May be
less than len(buffer) if we're reading a sub-message.
message: The message object into which we're parsing.
field_dict: message._fields (avoids a hashtable lookup).
The decoder reads the field and stores it into field_dict, returning the new
buffer position. A decoder for a repeated field may proactively decode all of
the elements of that field, if they appear consecutively.
Note that decoders may throw any of the following:
IndexError: Indicates a truncated message.
struct.error: Unpacking of a fixed-width field failed.
message.DecodeError: Other errors.
Decoders are expected to raise an exception if they are called with pos > end.
This allows callers to be lax about bounds checking: it's fineto read past
"end" as long as you are sure that someone else will notice and throw an
exception later on.
Something up the call stack is expected to catch IndexError and struct.error
and convert them to message.DecodeError.
Decoders are constructed using decoder constructors with the signature:
MakeDecoder(field_number, is_repeated, is_packed, key, new_default)
The arguments are:
field_number: The field number of the field we want to decode.
is_repeated: Is the field a repeated field? (bool)
is_packed: Is the field a packed field? (bool)
key: The key to use when looking up the field within field_dict.
(This is actually the FieldDescriptor but nothing in this
file should depend on that.)
new_default: A function which takes a message object as a parameter and
returns a new instance of the default value for this field.
(This is called for repeated fields and sub-messages, when an
instance does not already exist.)
As with encoders, we define a decoder constructor for every type of field.
Then, for every field of every message class we construct an actual decoder.
That decoder goes into a dict indexed by tag, so when we decode a message
we repeatedly read a tag, look up the corresponding decoder, and invoke it.
"""
__author__ = 'kenton@google.com (Kenton Varda)'
import struct
from google.protobuf.internal import encoder
from google.protobuf.internal import wire_format
from google.protobuf import message
# This will overflow and thus become IEEE-754 "infinity". We would use
# "float('inf')" but it doesn't work on Windows pre-Python-2.6.
_POS_INF = 1e10000
_NEG_INF = -_POS_INF
_NAN = _POS_INF * 0
# This is not for optimization, but rather to avoid conflicts with local
# variables named "message".
_DecodeError = message.DecodeError
def _VarintDecoder(mask):
"""Return an encoder for a basic varint value (does not include tag).
Decoded values will be bitwise-anded with the given mask before being
returned, e.g. to limit them to 32 bits. The returned decoder does not
take the usual "end" parameter -- the caller is expected to do bounds checking
after the fact (often the caller can defer such checking until later). The
decoder returns a (value, new_pos) pair.
"""
local_ord = ord
def DecodeVarint(buffer, pos):
result = 0
shift = 0
while 1:
b = local_ord(buffer[pos])
result |= ((b & 0x7f) << shift)
pos += 1
if not (b & 0x80):
result &= mask
return (result, pos)
shift += 7
if shift >= 64:
raise _DecodeError('Too many bytes when decoding varint.')
return DecodeVarint
def _SignedVarintDecoder(mask):
"""Like _VarintDecoder() but decodes signed values."""
local_ord = ord
def DecodeVarint(buffer, pos):
result = 0
shift = 0
while 1:
b = local_ord(buffer[pos])
result |= ((b & 0x7f) << shift)
pos += 1
if not (b & 0x80):
if result > 0x7fffffffffffffff:
result -= (1 << 64)
result |= ~mask
else:
result &= mask
return (result, pos)
shift += 7
if shift >= 64:
raise _DecodeError('Too many bytes when decoding varint.')
return DecodeVarint
_DecodeVarint = _VarintDecoder((1 << 64) - 1)
_DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1)
# Use these versions for values which must be limited to 32 bits.
_DecodeVarint32 = _VarintDecoder((1 << 32) - 1)
_DecodeSignedVarint32 = _SignedVarintDecoder((1 << 32) - 1)
def ReadTag(buffer, pos):
"""Read a tag from the buffer, and return a (tag_bytes, new_pos) tuple.
We return the raw bytes of the tag rather than decoding them. The raw
bytes can then be used to look up the proper decoder. This effectively allows
us to trade some work that would be done in pure-python (decoding a varint)
for work that is done in C (searching for a byte string in a hash table).
In a low-level language it would be much cheaper to decode the varint and
use that, but not in Python.
"""
start = pos
while ord(buffer[pos]) & 0x80:
pos += 1
pos += 1
return (buffer[start:pos], pos)
# --------------------------------------------------------------------
def _SimpleDecoder(wire_type, decode_value):
"""Return a constructor for a decoder for fields of a particular type.
Args:
wire_type: The field's wire type.
decode_value: A function which decodes an individual value, e.g.
_DecodeVarint()
"""
def SpecificDecoder(field_number, is_repeated, is_packed, key, new_default):
if is_packed:
local_DecodeVarint = _DecodeVarint
def DecodePackedField(buffer, pos, end, message, field_dict):
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
(endpoint, pos) = local_DecodeVarint(buffer, pos)
endpoint += pos
if endpoint > end:
raise _DecodeError('Truncated message.')
while pos < endpoint:
(element, pos) = decode_value(buffer, pos)
value.append(element)
if pos > endpoint:
del value[-1] # Discard corrupt value.
raise _DecodeError('Packed element was truncated.')
return pos
return DecodePackedField
elif is_repeated:
tag_bytes = encoder.TagBytes(field_number, wire_type)
tag_len = len(tag_bytes)
def DecodeRepeatedField(buffer, pos, end, message, field_dict):
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
while 1:
(element, new_pos) = decode_value(buffer, pos)
value.append(element)
# Predict that the next tag is another copy of the same repeated
# field.
pos = new_pos + tag_len
if buffer[new_pos:pos] != tag_bytes or new_pos >= end:
# Prediction failed. Return.
if new_pos > end:
raise _DecodeError('Truncated message.')
return new_pos
return DecodeRepeatedField
else:
def DecodeField(buffer, pos, end, message, field_dict):
(field_dict[key], pos) = decode_value(buffer, pos)
if pos > end:
del field_dict[key] # Discard corrupt value.
raise _DecodeError('Truncated message.')
return pos
return DecodeField
return SpecificDecoder
def _ModifiedDecoder(wire_type, decode_value, modify_value):
"""Like SimpleDecoder but additionally invokes modify_value on every value
before storing it. Usually modify_value is ZigZagDecode.
"""
# Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but
# not enough to make a significant difference.
def InnerDecode(buffer, pos):
(result, new_pos) = decode_value(buffer, pos)
return (modify_value(result), new_pos)
return _SimpleDecoder(wire_type, InnerDecode)
def _StructPackDecoder(wire_type, format):
"""Return a constructor for a decoder for a fixed-width field.
Args:
wire_type: The field's wire type.
format: The format string to pass to struct.unpack().
"""
value_size = struct.calcsize(format)
local_unpack = struct.unpack
# Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but
# not enough to make a significant difference.
# Note that we expect someone up-stack to catch struct.error and convert
# it to _DecodeError -- this way we don't have to set up exception-
# handling blocks every time we parse one value.
def InnerDecode(buffer, pos):
new_pos = pos + value_size
result = local_unpack(format, buffer[pos:new_pos])[0]
return (result, new_pos)
return _SimpleDecoder(wire_type, InnerDecode)
def _FloatDecoder():
"""Returns a decoder for a float field.
This code works around a bug in struct.unpack for non-finite 32-bit
floating-point values.
"""
local_unpack = struct.unpack
def InnerDecode(buffer, pos):
# We expect a 32-bit value in little-endian byte order. Bit 1 is the sign
# bit, bits 2-9 represent the exponent, and bits 10-32 are the significand.
new_pos = pos + 4
float_bytes = buffer[pos:new_pos]
# If this value has all its exponent bits set, then it's non-finite.
# In Python 2.4, struct.unpack will convert it to a finite 64-bit value.
# To avoid that, we parse it specially.
if ((float_bytes[3] in '\x7F\xFF')
and (float_bytes[2] >= '\x80')):
# If at least one significand bit is set...
if float_bytes[0:3] != '\x00\x00\x80':
return (_NAN, new_pos)
# If sign bit is set...
if float_bytes[3] == '\xFF':
return (_NEG_INF, new_pos)
return (_POS_INF, new_pos)
# Note that we expect someone up-stack to catch struct.error and convert
# it to _DecodeError -- this way we don't have to set up exception-
# handling blocks every time we parse one value.
result = local_unpack('<f', float_bytes)[0]
return (result, new_pos)
return _SimpleDecoder(wire_format.WIRETYPE_FIXED32, InnerDecode)
def _DoubleDecoder():
"""Returns a decoder for a double field.
This code works around a bug in struct.unpack for not-a-number.
"""
local_unpack = struct.unpack
def InnerDecode(buffer, pos):
# We expect a 64-bit value in little-endian byte order. Bit 1 is the sign
# bit, bits 2-12 represent the exponent, and bits 13-64 are the significand.
new_pos = pos + 8
double_bytes = buffer[pos:new_pos]
# If this value has all its exponent bits set and at least one significand
# bit set, it's not a number. In Python 2.4, struct.unpack will treat it
# as inf or -inf. To avoid that, we treat it specially.
if ((double_bytes[7] in '\x7F\xFF')
and (double_bytes[6] >= '\xF0')
and (double_bytes[0:7] != '\x00\x00\x00\x00\x00\x00\xF0')):
return (_NAN, new_pos)
# Note that we expect someone up-stack to catch struct.error and convert
# it to _DecodeError -- this way we don't have to set up exception-
# handling blocks every time we parse one value.
result = local_unpack('<d', double_bytes)[0]
return (result, new_pos)
return _SimpleDecoder(wire_format.WIRETYPE_FIXED64, InnerDecode)
# --------------------------------------------------------------------
Int32Decoder = EnumDecoder = _SimpleDecoder(
wire_format.WIRETYPE_VARINT, _DecodeSignedVarint32)
Int64Decoder = _SimpleDecoder(
wire_format.WIRETYPE_VARINT, _DecodeSignedVarint)
UInt32Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint32)
UInt64Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint)
SInt32Decoder = _ModifiedDecoder(
wire_format.WIRETYPE_VARINT, _DecodeVarint32, wire_format.ZigZagDecode)
SInt64Decoder = _ModifiedDecoder(
wire_format.WIRETYPE_VARINT, _DecodeVarint, wire_format.ZigZagDecode)
# Note that Python conveniently guarantees that when using the '<' prefix on
# formats, they will also have the same size across all platforms (as opposed
# to without the prefix, where their sizes depend on the C compiler's basic
# type sizes).
Fixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<I')
Fixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<Q')
SFixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<i')
SFixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<q')
FloatDecoder = _FloatDecoder()
DoubleDecoder = _DoubleDecoder()
BoolDecoder = _ModifiedDecoder(
wire_format.WIRETYPE_VARINT, _DecodeVarint, bool)
def StringDecoder(field_number, is_repeated, is_packed, key, new_default):
"""Returns a decoder for a string field."""
local_DecodeVarint = _DecodeVarint
local_unicode = unicode
assert not is_packed
if is_repeated:
tag_bytes = encoder.TagBytes(field_number,
wire_format.WIRETYPE_LENGTH_DELIMITED)
tag_len = len(tag_bytes)
def DecodeRepeatedField(buffer, pos, end, message, field_dict):
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
while 1:
(size, pos) = local_DecodeVarint(buffer, pos)
new_pos = pos + size
if new_pos > end:
raise _DecodeError('Truncated string.')
value.append(local_unicode(buffer[pos:new_pos], 'utf-8'))
# Predict that the next tag is another copy of the same repeated field.
pos = new_pos + tag_len
if buffer[new_pos:pos] != tag_bytes or new_pos == end:
# Prediction failed. Return.
return new_pos
return DecodeRepeatedField
else:
def DecodeField(buffer, pos, end, message, field_dict):
(size, pos) = local_DecodeVarint(buffer, pos)
new_pos = pos + size
if new_pos > end:
raise _DecodeError('Truncated string.')
field_dict[key] = local_unicode(buffer[pos:new_pos], 'utf-8')
return new_pos
return DecodeField
def BytesDecoder(field_number, is_repeated, is_packed, key, new_default):
"""Returns a decoder for a bytes field."""
local_DecodeVarint = _DecodeVarint
assert not is_packed
if is_repeated:
tag_bytes = encoder.TagBytes(field_number,
wire_format.WIRETYPE_LENGTH_DELIMITED)
tag_len = len(tag_bytes)
def DecodeRepeatedField(buffer, pos, end, message, field_dict):
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
while 1:
(size, pos) = local_DecodeVarint(buffer, pos)
new_pos = pos + size
if new_pos > end:
raise _DecodeError('Truncated string.')
value.append(buffer[pos:new_pos])
# Predict that the next tag is another copy of the same repeated field.
pos = new_pos + tag_len
if buffer[new_pos:pos] != tag_bytes or new_pos == end:
# Prediction failed. Return.
return new_pos
return DecodeRepeatedField
else:
def DecodeField(buffer, pos, end, message, field_dict):
(size, pos) = local_DecodeVarint(buffer, pos)
new_pos = pos + size
if new_pos > end:
raise _DecodeError('Truncated string.')
field_dict[key] = buffer[pos:new_pos]
return new_pos
return DecodeField
def GroupDecoder(field_number, is_repeated, is_packed, key, new_default):
"""Returns a decoder for a group field."""
end_tag_bytes = encoder.TagBytes(field_number,
wire_format.WIRETYPE_END_GROUP)
end_tag_len = len(end_tag_bytes)
assert not is_packed
if is_repeated:
tag_bytes = encoder.TagBytes(field_number,
wire_format.WIRETYPE_START_GROUP)
tag_len = len(tag_bytes)
def DecodeRepeatedField(buffer, pos, end, message, field_dict):
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
while 1:
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
# Read sub-message.
pos = value.add()._InternalParse(buffer, pos, end)
# Read end tag.
new_pos = pos+end_tag_len
if buffer[pos:new_pos] != end_tag_bytes or new_pos > end:
raise _DecodeError('Missing group end tag.')
# Predict that the next tag is another copy of the same repeated field.
pos = new_pos + tag_len
if buffer[new_pos:pos] != tag_bytes or new_pos == end:
# Prediction failed. Return.
return new_pos
return DecodeRepeatedField
else:
def DecodeField(buffer, pos, end, message, field_dict):
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
# Read sub-message.
pos = value._InternalParse(buffer, pos, end)
# Read end tag.
new_pos = pos+end_tag_len
if buffer[pos:new_pos] != end_tag_bytes or new_pos > end:
raise _DecodeError('Missing group end tag.')
return new_pos
return DecodeField
def MessageDecoder(field_number, is_repeated, is_packed, key, new_default):
"""Returns a decoder for a message field."""
local_DecodeVarint = _DecodeVarint
assert not is_packed
if is_repeated:
tag_bytes = encoder.TagBytes(field_number,
wire_format.WIRETYPE_LENGTH_DELIMITED)
tag_len = len(tag_bytes)
def DecodeRepeatedField(buffer, pos, end, message, field_dict):
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
while 1:
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
# Read length.
(size, pos) = local_DecodeVarint(buffer, pos)
new_pos = pos + size
if new_pos > end:
raise _DecodeError('Truncated message.')
# Read sub-message.
if value.add()._InternalParse(buffer, pos, new_pos) != new_pos:
# The only reason _InternalParse would return early is if it
# encountered an end-group tag.
raise _DecodeError('Unexpected end-group tag.')
# Predict that the next tag is another copy of the same repeated field.
pos = new_pos + tag_len
if buffer[new_pos:pos] != tag_bytes or new_pos == end:
# Prediction failed. Return.
return new_pos
return DecodeRepeatedField
else:
def DecodeField(buffer, pos, end, message, field_dict):
value = field_dict.get(key)
if value is None:
value = field_dict.setdefault(key, new_default(message))
# Read length.
(size, pos) = local_DecodeVarint(buffer, pos)
new_pos = pos + size
if new_pos > end:
raise _DecodeError('Truncated message.')
# Read sub-message.
if value._InternalParse(buffer, pos, new_pos) != new_pos:
# The only reason _InternalParse would return early is if it encountered
# an end-group tag.
raise _DecodeError('Unexpected end-group tag.')
return new_pos
return DecodeField
# --------------------------------------------------------------------
MESSAGE_SET_ITEM_TAG = encoder.TagBytes(1, wire_format.WIRETYPE_START_GROUP)
def MessageSetItemDecoder(extensions_by_number):
"""Returns a decoder for a MessageSet item.
The parameter is the _extensions_by_number map for the message class.
The message set message looks like this:
message MessageSet {
repeated group Item = 1 {
required int32 type_id = 2;
required string message = 3;
}
}
"""
type_id_tag_bytes = encoder.TagBytes(2, wire_format.WIRETYPE_VARINT)
message_tag_bytes = encoder.TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)
item_end_tag_bytes = encoder.TagBytes(1, wire_format.WIRETYPE_END_GROUP)
local_ReadTag = ReadTag
local_DecodeVarint = _DecodeVarint
local_SkipField = SkipField
def DecodeItem(buffer, pos, end, message, field_dict):
type_id = -1
message_start = -1
message_end = -1
# Technically, type_id and message can appear in any order, so we need
# a little loop here.
while 1:
(tag_bytes, pos) = local_ReadTag(buffer, pos)
if tag_bytes == type_id_tag_bytes:
(type_id, pos) = local_DecodeVarint(buffer, pos)
elif tag_bytes == message_tag_bytes:
(size, message_start) = local_DecodeVarint(buffer, pos)
pos = message_end = message_start + size
elif tag_bytes == item_end_tag_bytes:
break
else:
pos = SkipField(buffer, pos, end, tag_bytes)
if pos == -1:
raise _DecodeError('Missing group end tag.')
if pos > end:
raise _DecodeError('Truncated message.')
if type_id == -1:
raise _DecodeError('MessageSet item missing type_id.')
if message_start == -1:
raise _DecodeError('MessageSet item missing message.')
extension = extensions_by_number.get(type_id)
if extension is not None:
value = field_dict.get(extension)
if value is None:
value = field_dict.setdefault(
extension, extension.message_type._concrete_class())
if value._InternalParse(buffer, message_start,message_end) != message_end:
# The only reason _InternalParse would return early is if it encountered
# an end-group tag.
raise _DecodeError('Unexpected end-group tag.')
return pos
return DecodeItem
# --------------------------------------------------------------------
# Optimization is not as heavy here because calls to SkipField() are rare,
# except for handling end-group tags.
def _SkipVarint(buffer, pos, end):
"""Skip a varint value. Returns the new position."""
while ord(buffer[pos]) & 0x80:
pos += 1
pos += 1
if pos > end:
raise _DecodeError('Truncated message.')
return pos
def _SkipFixed64(buffer, pos, end):
"""Skip a fixed64 value. Returns the new position."""
pos += 8
if pos > end:
raise _DecodeError('Truncated message.')
return pos
def _SkipLengthDelimited(buffer, pos, end):
"""Skip a length-delimited value. Returns the new position."""
(size, pos) = _DecodeVarint(buffer, pos)
pos += size
if pos > end:
raise _DecodeError('Truncated message.')
return pos
def _SkipGroup(buffer, pos, end):
"""Skip sub-group. Returns the new position."""
while 1:
(tag_bytes, pos) = ReadTag(buffer, pos)
new_pos = SkipField(buffer, pos, end, tag_bytes)
if new_pos == -1:
return pos
pos = new_pos
def _EndGroup(buffer, pos, end):
"""Skipping an END_GROUP tag returns -1 to tell the parent loop to break."""
return -1
def _SkipFixed32(buffer, pos, end):
"""Skip a fixed32 value. Returns the new position."""
pos += 4
if pos > end:
raise _DecodeError('Truncated message.')
return pos
def _RaiseInvalidWireType(buffer, pos, end):
"""Skip function for unknown wire types. Raises an exception."""
raise _DecodeError('Tag had invalid wire type.')
def _FieldSkipper():
"""Constructs the SkipField function."""
WIRETYPE_TO_SKIPPER = [
_SkipVarint,
_SkipFixed64,
_SkipLengthDelimited,
_SkipGroup,
_EndGroup,
_SkipFixed32,
_RaiseInvalidWireType,
_RaiseInvalidWireType,
]
wiretype_mask = wire_format.TAG_TYPE_MASK
local_ord = ord
def SkipField(buffer, pos, end, tag_bytes):
"""Skips a field with the specified tag.
|pos| should point to the byte immediately after the tag.
Returns:
The new position (after the tag value), or -1 if the tag is an end-group
tag (in which case the calling loop should break).
"""
# The wire type is always in the first byte since varints are little-endian.
wire_type = local_ord(tag_bytes[0]) & wiretype_mask
return WIRETYPE_TO_SKIPPER[wire_type](buffer, pos, end)
return SkipField
SkipField = _FieldSkipper()

View File

@ -0,0 +1,769 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Code for encoding protocol message primitives.
Contains the logic for encoding every logical protocol field type
into one of the 5 physical wire types.
This code is designed to push the Python interpreter's performance to the
limits.
The basic idea is that at startup time, for every field (i.e. every
FieldDescriptor) we construct two functions: a "sizer" and an "encoder". The
sizer takes a value of this field's type and computes its byte size. The
encoder takes a writer function and a value. It encodes the value into byte
strings and invokes the writer function to write those strings. Typically the
writer function is the write() method of a cStringIO.
We try to do as much work as possible when constructing the writer and the
sizer rather than when calling them. In particular:
* We copy any needed global functions to local variables, so that we do not need
to do costly global table lookups at runtime.
* Similarly, we try to do any attribute lookups at startup time if possible.
* Every field's tag is encoded to bytes at startup, since it can't change at
runtime.
* Whatever component of the field size we can compute at startup, we do.
* We *avoid* sharing code if doing so would make the code slower and not sharing
does not burden us too much. For example, encoders for repeated fields do
not just call the encoders for singular fields in a loop because this would
add an extra function call overhead for every loop iteration; instead, we
manually inline the single-value encoder into the loop.
* If a Python function lacks a return statement, Python actually generates
instructions to pop the result of the last statement off the stack, push
None onto the stack, and then return that. If we really don't care what
value is returned, then we can save two instructions by returning the
result of the last statement. It looks funny but it helps.
* We assume that type and bounds checking has happened at a higher level.
"""
__author__ = 'kenton@google.com (Kenton Varda)'
import struct
from google.protobuf.internal import wire_format
# This will overflow and thus become IEEE-754 "infinity". We would use
# "float('inf')" but it doesn't work on Windows pre-Python-2.6.
_POS_INF = 1e10000
_NEG_INF = -_POS_INF
def _VarintSize(value):
"""Compute the size of a varint value."""
if value <= 0x7f: return 1
if value <= 0x3fff: return 2
if value <= 0x1fffff: return 3
if value <= 0xfffffff: return 4
if value <= 0x7ffffffff: return 5
if value <= 0x3ffffffffff: return 6
if value <= 0x1ffffffffffff: return 7
if value <= 0xffffffffffffff: return 8
if value <= 0x7fffffffffffffff: return 9
return 10
def _SignedVarintSize(value):
"""Compute the size of a signed varint value."""
if value < 0: return 10
if value <= 0x7f: return 1
if value <= 0x3fff: return 2
if value <= 0x1fffff: return 3
if value <= 0xfffffff: return 4
if value <= 0x7ffffffff: return 5
if value <= 0x3ffffffffff: return 6
if value <= 0x1ffffffffffff: return 7
if value <= 0xffffffffffffff: return 8
if value <= 0x7fffffffffffffff: return 9
return 10
def _TagSize(field_number):
"""Returns the number of bytes required to serialize a tag with this field
number."""
# Just pass in type 0, since the type won't affect the tag+type size.
return _VarintSize(wire_format.PackTag(field_number, 0))
# --------------------------------------------------------------------
# In this section we define some generic sizers. Each of these functions
# takes parameters specific to a particular field type, e.g. int32 or fixed64.
# It returns another function which in turn takes parameters specific to a
# particular field, e.g. the field number and whether it is repeated or packed.
# Look at the next section to see how these are used.
def _SimpleSizer(compute_value_size):
"""A sizer which uses the function compute_value_size to compute the size of
each value. Typically compute_value_size is _VarintSize."""
def SpecificSizer(field_number, is_repeated, is_packed):
tag_size = _TagSize(field_number)
if is_packed:
local_VarintSize = _VarintSize
def PackedFieldSize(value):
result = 0
for element in value:
result += compute_value_size(element)
return result + local_VarintSize(result) + tag_size
return PackedFieldSize
elif is_repeated:
def RepeatedFieldSize(value):
result = tag_size * len(value)
for element in value:
result += compute_value_size(element)
return result
return RepeatedFieldSize
else:
def FieldSize(value):
return tag_size + compute_value_size(value)
return FieldSize
return SpecificSizer
def _ModifiedSizer(compute_value_size, modify_value):
"""Like SimpleSizer, but modify_value is invoked on each value before it is
passed to compute_value_size. modify_value is typically ZigZagEncode."""
def SpecificSizer(field_number, is_repeated, is_packed):
tag_size = _TagSize(field_number)
if is_packed:
local_VarintSize = _VarintSize
def PackedFieldSize(value):
result = 0
for element in value:
result += compute_value_size(modify_value(element))
return result + local_VarintSize(result) + tag_size
return PackedFieldSize
elif is_repeated:
def RepeatedFieldSize(value):
result = tag_size * len(value)
for element in value:
result += compute_value_size(modify_value(element))
return result
return RepeatedFieldSize
else:
def FieldSize(value):
return tag_size + compute_value_size(modify_value(value))
return FieldSize
return SpecificSizer
def _FixedSizer(value_size):
"""Like _SimpleSizer except for a fixed-size field. The input is the size
of one value."""
def SpecificSizer(field_number, is_repeated, is_packed):
tag_size = _TagSize(field_number)
if is_packed:
local_VarintSize = _VarintSize
def PackedFieldSize(value):
result = len(value) * value_size
return result + local_VarintSize(result) + tag_size
return PackedFieldSize
elif is_repeated:
element_size = value_size + tag_size
def RepeatedFieldSize(value):
return len(value) * element_size
return RepeatedFieldSize
else:
field_size = value_size + tag_size
def FieldSize(value):
return field_size
return FieldSize
return SpecificSizer
# ====================================================================
# Here we declare a sizer constructor for each field type. Each "sizer
# constructor" is a function that takes (field_number, is_repeated, is_packed)
# as parameters and returns a sizer, which in turn takes a field value as
# a parameter and returns its encoded size.
Int32Sizer = Int64Sizer = EnumSizer = _SimpleSizer(_SignedVarintSize)
UInt32Sizer = UInt64Sizer = _SimpleSizer(_VarintSize)
SInt32Sizer = SInt64Sizer = _ModifiedSizer(
_SignedVarintSize, wire_format.ZigZagEncode)
Fixed32Sizer = SFixed32Sizer = FloatSizer = _FixedSizer(4)
Fixed64Sizer = SFixed64Sizer = DoubleSizer = _FixedSizer(8)
BoolSizer = _FixedSizer(1)
def StringSizer(field_number, is_repeated, is_packed):
"""Returns a sizer for a string field."""
tag_size = _TagSize(field_number)
local_VarintSize = _VarintSize
local_len = len
assert not is_packed
if is_repeated:
def RepeatedFieldSize(value):
result = tag_size * len(value)
for element in value:
l = local_len(element.encode('utf-8'))
result += local_VarintSize(l) + l
return result
return RepeatedFieldSize
else:
def FieldSize(value):
l = local_len(value.encode('utf-8'))
return tag_size + local_VarintSize(l) + l
return FieldSize
def BytesSizer(field_number, is_repeated, is_packed):
"""Returns a sizer for a bytes field."""
tag_size = _TagSize(field_number)
local_VarintSize = _VarintSize
local_len = len
assert not is_packed
if is_repeated:
def RepeatedFieldSize(value):
result = tag_size * len(value)
for element in value:
l = local_len(element)
result += local_VarintSize(l) + l
return result
return RepeatedFieldSize
else:
def FieldSize(value):
l = local_len(value)
return tag_size + local_VarintSize(l) + l
return FieldSize
def GroupSizer(field_number, is_repeated, is_packed):
"""Returns a sizer for a group field."""
tag_size = _TagSize(field_number) * 2
assert not is_packed
if is_repeated:
def RepeatedFieldSize(value):
result = tag_size * len(value)
for element in value:
result += element.ByteSize()
return result
return RepeatedFieldSize
else:
def FieldSize(value):
return tag_size + value.ByteSize()
return FieldSize
def MessageSizer(field_number, is_repeated, is_packed):
"""Returns a sizer for a message field."""
tag_size = _TagSize(field_number)
local_VarintSize = _VarintSize
assert not is_packed
if is_repeated:
def RepeatedFieldSize(value):
result = tag_size * len(value)
for element in value:
l = element.ByteSize()
result += local_VarintSize(l) + l
return result
return RepeatedFieldSize
else:
def FieldSize(value):
l = value.ByteSize()
return tag_size + local_VarintSize(l) + l
return FieldSize
# --------------------------------------------------------------------
# MessageSet is special.
def MessageSetItemSizer(field_number):
"""Returns a sizer for extensions of MessageSet.
The message set message looks like this:
message MessageSet {
repeated group Item = 1 {
required int32 type_id = 2;
required string message = 3;
}
}
"""
static_size = (_TagSize(1) * 2 + _TagSize(2) + _VarintSize(field_number) +
_TagSize(3))
local_VarintSize = _VarintSize
def FieldSize(value):
l = value.ByteSize()
return static_size + local_VarintSize(l) + l
return FieldSize
# ====================================================================
# Encoders!
def _VarintEncoder():
"""Return an encoder for a basic varint value (does not include tag)."""
local_chr = chr
def EncodeVarint(write, value):
bits = value & 0x7f
value >>= 7
while value:
write(local_chr(0x80|bits))
bits = value & 0x7f
value >>= 7
return write(local_chr(bits))
return EncodeVarint
def _SignedVarintEncoder():
"""Return an encoder for a basic signed varint value (does not include
tag)."""
local_chr = chr
def EncodeSignedVarint(write, value):
if value < 0:
value += (1 << 64)
bits = value & 0x7f
value >>= 7
while value:
write(local_chr(0x80|bits))
bits = value & 0x7f
value >>= 7
return write(local_chr(bits))
return EncodeSignedVarint
_EncodeVarint = _VarintEncoder()
_EncodeSignedVarint = _SignedVarintEncoder()
def _VarintBytes(value):
"""Encode the given integer as a varint and return the bytes. This is only
called at startup time so it doesn't need to be fast."""
pieces = []
_EncodeVarint(pieces.append, value)
return "".join(pieces)
def TagBytes(field_number, wire_type):
"""Encode the given tag and return the bytes. Only called at startup."""
return _VarintBytes(wire_format.PackTag(field_number, wire_type))
# --------------------------------------------------------------------
# As with sizers (see above), we have a number of common encoder
# implementations.
def _SimpleEncoder(wire_type, encode_value, compute_value_size):
"""Return a constructor for an encoder for fields of a particular type.
Args:
wire_type: The field's wire type, for encoding tags.
encode_value: A function which encodes an individual value, e.g.
_EncodeVarint().
compute_value_size: A function which computes the size of an individual
value, e.g. _VarintSize().
"""
def SpecificEncoder(field_number, is_repeated, is_packed):
if is_packed:
tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
local_EncodeVarint = _EncodeVarint
def EncodePackedField(write, value):
write(tag_bytes)
size = 0
for element in value:
size += compute_value_size(element)
local_EncodeVarint(write, size)
for element in value:
encode_value(write, element)
return EncodePackedField
elif is_repeated:
tag_bytes = TagBytes(field_number, wire_type)
def EncodeRepeatedField(write, value):
for element in value:
write(tag_bytes)
encode_value(write, element)
return EncodeRepeatedField
else:
tag_bytes = TagBytes(field_number, wire_type)
def EncodeField(write, value):
write(tag_bytes)
return encode_value(write, value)
return EncodeField
return SpecificEncoder
def _ModifiedEncoder(wire_type, encode_value, compute_value_size, modify_value):
"""Like SimpleEncoder but additionally invokes modify_value on every value
before passing it to encode_value. Usually modify_value is ZigZagEncode."""
def SpecificEncoder(field_number, is_repeated, is_packed):
if is_packed:
tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
local_EncodeVarint = _EncodeVarint
def EncodePackedField(write, value):
write(tag_bytes)
size = 0
for element in value:
size += compute_value_size(modify_value(element))
local_EncodeVarint(write, size)
for element in value:
encode_value(write, modify_value(element))
return EncodePackedField
elif is_repeated:
tag_bytes = TagBytes(field_number, wire_type)
def EncodeRepeatedField(write, value):
for element in value:
write(tag_bytes)
encode_value(write, modify_value(element))
return EncodeRepeatedField
else:
tag_bytes = TagBytes(field_number, wire_type)
def EncodeField(write, value):
write(tag_bytes)
return encode_value(write, modify_value(value))
return EncodeField
return SpecificEncoder
def _StructPackEncoder(wire_type, format):
"""Return a constructor for an encoder for a fixed-width field.
Args:
wire_type: The field's wire type, for encoding tags.
format: The format string to pass to struct.pack().
"""
value_size = struct.calcsize(format)
def SpecificEncoder(field_number, is_repeated, is_packed):
local_struct_pack = struct.pack
if is_packed:
tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
local_EncodeVarint = _EncodeVarint
def EncodePackedField(write, value):
write(tag_bytes)
local_EncodeVarint(write, len(value) * value_size)
for element in value:
write(local_struct_pack(format, element))
return EncodePackedField
elif is_repeated:
tag_bytes = TagBytes(field_number, wire_type)
def EncodeRepeatedField(write, value):
for element in value:
write(tag_bytes)
write(local_struct_pack(format, element))
return EncodeRepeatedField
else:
tag_bytes = TagBytes(field_number, wire_type)
def EncodeField(write, value):
write(tag_bytes)
return write(local_struct_pack(format, value))
return EncodeField
return SpecificEncoder
def _FloatingPointEncoder(wire_type, format):
"""Return a constructor for an encoder for float fields.
This is like StructPackEncoder, but catches errors that may be due to
passing non-finite floating-point values to struct.pack, and makes a
second attempt to encode those values.
Args:
wire_type: The field's wire type, for encoding tags.
format: The format string to pass to struct.pack().
"""
value_size = struct.calcsize(format)
if value_size == 4:
def EncodeNonFiniteOrRaise(write, value):
# Remember that the serialized form uses little-endian byte order.
if value == _POS_INF:
write('\x00\x00\x80\x7F')
elif value == _NEG_INF:
write('\x00\x00\x80\xFF')
elif value != value: # NaN
write('\x00\x00\xC0\x7F')
else:
raise
elif value_size == 8:
def EncodeNonFiniteOrRaise(write, value):
if value == _POS_INF:
write('\x00\x00\x00\x00\x00\x00\xF0\x7F')
elif value == _NEG_INF:
write('\x00\x00\x00\x00\x00\x00\xF0\xFF')
elif value != value: # NaN
write('\x00\x00\x00\x00\x00\x00\xF8\x7F')
else:
raise
else:
raise ValueError('Can\'t encode floating-point values that are '
'%d bytes long (only 4 or 8)' % value_size)
def SpecificEncoder(field_number, is_repeated, is_packed):
local_struct_pack = struct.pack
if is_packed:
tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
local_EncodeVarint = _EncodeVarint
def EncodePackedField(write, value):
write(tag_bytes)
local_EncodeVarint(write, len(value) * value_size)
for element in value:
# This try/except block is going to be faster than any code that
# we could write to check whether element is finite.
try:
write(local_struct_pack(format, element))
except SystemError:
EncodeNonFiniteOrRaise(write, element)
return EncodePackedField
elif is_repeated:
tag_bytes = TagBytes(field_number, wire_type)
def EncodeRepeatedField(write, value):
for element in value:
write(tag_bytes)
try:
write(local_struct_pack(format, element))
except SystemError:
EncodeNonFiniteOrRaise(write, element)
return EncodeRepeatedField
else:
tag_bytes = TagBytes(field_number, wire_type)
def EncodeField(write, value):
write(tag_bytes)
try:
write(local_struct_pack(format, value))
except SystemError:
EncodeNonFiniteOrRaise(write, value)
return EncodeField
return SpecificEncoder
# ====================================================================
# Here we declare an encoder constructor for each field type. These work
# very similarly to sizer constructors, described earlier.
Int32Encoder = Int64Encoder = EnumEncoder = _SimpleEncoder(
wire_format.WIRETYPE_VARINT, _EncodeSignedVarint, _SignedVarintSize)
UInt32Encoder = UInt64Encoder = _SimpleEncoder(
wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize)
SInt32Encoder = SInt64Encoder = _ModifiedEncoder(
wire_format.WIRETYPE_VARINT, _EncodeVarint, _VarintSize,
wire_format.ZigZagEncode)
# Note that Python conveniently guarantees that when using the '<' prefix on
# formats, they will also have the same size across all platforms (as opposed
# to without the prefix, where their sizes depend on the C compiler's basic
# type sizes).
Fixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<I')
Fixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<Q')
SFixed32Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED32, '<i')
SFixed64Encoder = _StructPackEncoder(wire_format.WIRETYPE_FIXED64, '<q')
FloatEncoder = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED32, '<f')
DoubleEncoder = _FloatingPointEncoder(wire_format.WIRETYPE_FIXED64, '<d')
def BoolEncoder(field_number, is_repeated, is_packed):
"""Returns an encoder for a boolean field."""
false_byte = chr(0)
true_byte = chr(1)
if is_packed:
tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
local_EncodeVarint = _EncodeVarint
def EncodePackedField(write, value):
write(tag_bytes)
local_EncodeVarint(write, len(value))
for element in value:
if element:
write(true_byte)
else:
write(false_byte)
return EncodePackedField
elif is_repeated:
tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_VARINT)
def EncodeRepeatedField(write, value):
for element in value:
write(tag_bytes)
if element:
write(true_byte)
else:
write(false_byte)
return EncodeRepeatedField
else:
tag_bytes = TagBytes(field_number, wire_format.WIRETYPE_VARINT)
def EncodeField(write, value):
write(tag_bytes)
if value:
return write(true_byte)
return write(false_byte)
return EncodeField
def StringEncoder(field_number, is_repeated, is_packed):
"""Returns an encoder for a string field."""
tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
local_EncodeVarint = _EncodeVarint
local_len = len
assert not is_packed
if is_repeated:
def EncodeRepeatedField(write, value):
for element in value:
encoded = element.encode('utf-8')
write(tag)
local_EncodeVarint(write, local_len(encoded))
write(encoded)
return EncodeRepeatedField
else:
def EncodeField(write, value):
encoded = value.encode('utf-8')
write(tag)
local_EncodeVarint(write, local_len(encoded))
return write(encoded)
return EncodeField
def BytesEncoder(field_number, is_repeated, is_packed):
"""Returns an encoder for a bytes field."""
tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
local_EncodeVarint = _EncodeVarint
local_len = len
assert not is_packed
if is_repeated:
def EncodeRepeatedField(write, value):
for element in value:
write(tag)
local_EncodeVarint(write, local_len(element))
write(element)
return EncodeRepeatedField
else:
def EncodeField(write, value):
write(tag)
local_EncodeVarint(write, local_len(value))
return write(value)
return EncodeField
def GroupEncoder(field_number, is_repeated, is_packed):
"""Returns an encoder for a group field."""
start_tag = TagBytes(field_number, wire_format.WIRETYPE_START_GROUP)
end_tag = TagBytes(field_number, wire_format.WIRETYPE_END_GROUP)
assert not is_packed
if is_repeated:
def EncodeRepeatedField(write, value):
for element in value:
write(start_tag)
element._InternalSerialize(write)
write(end_tag)
return EncodeRepeatedField
else:
def EncodeField(write, value):
write(start_tag)
value._InternalSerialize(write)
return write(end_tag)
return EncodeField
def MessageEncoder(field_number, is_repeated, is_packed):
"""Returns an encoder for a message field."""
tag = TagBytes(field_number, wire_format.WIRETYPE_LENGTH_DELIMITED)
local_EncodeVarint = _EncodeVarint
assert not is_packed
if is_repeated:
def EncodeRepeatedField(write, value):
for element in value:
write(tag)
local_EncodeVarint(write, element.ByteSize())
element._InternalSerialize(write)
return EncodeRepeatedField
else:
def EncodeField(write, value):
write(tag)
local_EncodeVarint(write, value.ByteSize())
return value._InternalSerialize(write)
return EncodeField
# --------------------------------------------------------------------
# As before, MessageSet is special.
def MessageSetItemEncoder(field_number):
"""Encoder for extensions of MessageSet.
The message set message looks like this:
message MessageSet {
repeated group Item = 1 {
required int32 type_id = 2;
required string message = 3;
}
}
"""
start_bytes = "".join([
TagBytes(1, wire_format.WIRETYPE_START_GROUP),
TagBytes(2, wire_format.WIRETYPE_VARINT),
_VarintBytes(field_number),
TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)])
end_bytes = TagBytes(1, wire_format.WIRETYPE_END_GROUP)
local_EncodeVarint = _EncodeVarint
def EncodeField(write, value):
write(start_bytes)
local_EncodeVarint(write, value.ByteSize())
value._InternalSerialize(write)
return write(end_bytes)
return EncodeField

View File

@ -0,0 +1,78 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Defines a listener interface for observing certain
state transitions on Message objects.
Also defines a null implementation of this interface.
"""
__author__ = 'robinson@google.com (Will Robinson)'
class MessageListener(object):
"""Listens for modifications made to a message. Meant to be registered via
Message._SetListener().
Attributes:
dirty: If True, then calling Modified() would be a no-op. This can be
used to avoid these calls entirely in the common case.
"""
def Modified(self):
"""Called every time the message is modified in such a way that the parent
message may need to be updated. This currently means either:
(a) The message was modified for the first time, so the parent message
should henceforth mark the message as present.
(b) The message's cached byte size became dirty -- i.e. the message was
modified for the first time after a previous call to ByteSize().
Therefore the parent should also mark its byte size as dirty.
Note that (a) implies (b), since new objects start out with a client cached
size (zero). However, we document (a) explicitly because it is important.
Modified() will *only* be called in response to one of these two events --
not every time the sub-message is modified.
Note that if the listener's |dirty| attribute is true, then calling
Modified at the moment would be a no-op, so it can be skipped. Performance-
sensitive callers should check this attribute directly before calling since
it will be true most of the time.
"""
raise NotImplementedError
class NullMessageListener(object):
"""No-op MessageListener implementation."""
def Modified(self):
pass

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,286 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Provides type checking routines.
This module defines type checking utilities in the forms of dictionaries:
VALUE_CHECKERS: A dictionary of field types and a value validation object.
TYPE_TO_BYTE_SIZE_FN: A dictionary with field types and a size computing
function.
TYPE_TO_SERIALIZE_METHOD: A dictionary with field types and serialization
function.
FIELD_TYPE_TO_WIRE_TYPE: A dictionary with field typed and their
coresponding wire types.
TYPE_TO_DESERIALIZE_METHOD: A dictionary with field types and deserialization
function.
"""
__author__ = 'robinson@google.com (Will Robinson)'
from google.protobuf.internal import decoder
from google.protobuf.internal import encoder
from google.protobuf.internal import wire_format
from google.protobuf import descriptor
_FieldDescriptor = descriptor.FieldDescriptor
def GetTypeChecker(cpp_type, field_type):
"""Returns a type checker for a message field of the specified types.
Args:
cpp_type: C++ type of the field (see descriptor.py).
field_type: Protocol message field type (see descriptor.py).
Returns:
An instance of TypeChecker which can be used to verify the types
of values assigned to a field of the specified type.
"""
if (cpp_type == _FieldDescriptor.CPPTYPE_STRING and
field_type == _FieldDescriptor.TYPE_STRING):
return UnicodeValueChecker()
return _VALUE_CHECKERS[cpp_type]
# None of the typecheckers below make any attempt to guard against people
# subclassing builtin types and doing weird things. We're not trying to
# protect against malicious clients here, just people accidentally shooting
# themselves in the foot in obvious ways.
class TypeChecker(object):
"""Type checker used to catch type errors as early as possible
when the client is setting scalar fields in protocol messages.
"""
def __init__(self, *acceptable_types):
self._acceptable_types = acceptable_types
def CheckValue(self, proposed_value):
if not isinstance(proposed_value, self._acceptable_types):
message = ('%.1024r has type %s, but expected one of: %s' %
(proposed_value, type(proposed_value), self._acceptable_types))
raise TypeError(message)
# IntValueChecker and its subclasses perform integer type-checks
# and bounds-checks.
class IntValueChecker(object):
"""Checker used for integer fields. Performs type-check and range check."""
def CheckValue(self, proposed_value):
if not isinstance(proposed_value, (int, long)):
message = ('%.1024r has type %s, but expected one of: %s' %
(proposed_value, type(proposed_value), (int, long)))
raise TypeError(message)
if not self._MIN <= proposed_value <= self._MAX:
raise ValueError('Value out of range: %d' % proposed_value)
class UnicodeValueChecker(object):
"""Checker used for string fields."""
def CheckValue(self, proposed_value):
if not isinstance(proposed_value, (str, unicode)):
message = ('%.1024r has type %s, but expected one of: %s' %
(proposed_value, type(proposed_value), (str, unicode)))
raise TypeError(message)
# If the value is of type 'str' make sure that it is in 7-bit ASCII
# encoding.
if isinstance(proposed_value, str):
try:
unicode(proposed_value, 'ascii')
except UnicodeDecodeError:
raise ValueError('%.1024r has type str, but isn\'t in 7-bit ASCII '
'encoding. Non-ASCII strings must be converted to '
'unicode objects before being added.' %
(proposed_value))
class Int32ValueChecker(IntValueChecker):
# We're sure to use ints instead of longs here since comparison may be more
# efficient.
_MIN = -2147483648
_MAX = 2147483647
class Uint32ValueChecker(IntValueChecker):
_MIN = 0
_MAX = (1 << 32) - 1
class Int64ValueChecker(IntValueChecker):
_MIN = -(1 << 63)
_MAX = (1 << 63) - 1
class Uint64ValueChecker(IntValueChecker):
_MIN = 0
_MAX = (1 << 64) - 1
# Type-checkers for all scalar CPPTYPEs.
_VALUE_CHECKERS = {
_FieldDescriptor.CPPTYPE_INT32: Int32ValueChecker(),
_FieldDescriptor.CPPTYPE_INT64: Int64ValueChecker(),
_FieldDescriptor.CPPTYPE_UINT32: Uint32ValueChecker(),
_FieldDescriptor.CPPTYPE_UINT64: Uint64ValueChecker(),
_FieldDescriptor.CPPTYPE_DOUBLE: TypeChecker(
float, int, long),
_FieldDescriptor.CPPTYPE_FLOAT: TypeChecker(
float, int, long),
_FieldDescriptor.CPPTYPE_BOOL: TypeChecker(bool, int),
_FieldDescriptor.CPPTYPE_ENUM: Int32ValueChecker(),
_FieldDescriptor.CPPTYPE_STRING: TypeChecker(str),
}
# Map from field type to a function F, such that F(field_num, value)
# gives the total byte size for a value of the given type. This
# byte size includes tag information and any other additional space
# associated with serializing "value".
TYPE_TO_BYTE_SIZE_FN = {
_FieldDescriptor.TYPE_DOUBLE: wire_format.DoubleByteSize,
_FieldDescriptor.TYPE_FLOAT: wire_format.FloatByteSize,
_FieldDescriptor.TYPE_INT64: wire_format.Int64ByteSize,
_FieldDescriptor.TYPE_UINT64: wire_format.UInt64ByteSize,
_FieldDescriptor.TYPE_INT32: wire_format.Int32ByteSize,
_FieldDescriptor.TYPE_FIXED64: wire_format.Fixed64ByteSize,
_FieldDescriptor.TYPE_FIXED32: wire_format.Fixed32ByteSize,
_FieldDescriptor.TYPE_BOOL: wire_format.BoolByteSize,
_FieldDescriptor.TYPE_STRING: wire_format.StringByteSize,
_FieldDescriptor.TYPE_GROUP: wire_format.GroupByteSize,
_FieldDescriptor.TYPE_MESSAGE: wire_format.MessageByteSize,
_FieldDescriptor.TYPE_BYTES: wire_format.BytesByteSize,
_FieldDescriptor.TYPE_UINT32: wire_format.UInt32ByteSize,
_FieldDescriptor.TYPE_ENUM: wire_format.EnumByteSize,
_FieldDescriptor.TYPE_SFIXED32: wire_format.SFixed32ByteSize,
_FieldDescriptor.TYPE_SFIXED64: wire_format.SFixed64ByteSize,
_FieldDescriptor.TYPE_SINT32: wire_format.SInt32ByteSize,
_FieldDescriptor.TYPE_SINT64: wire_format.SInt64ByteSize
}
# Maps from field types to encoder constructors.
TYPE_TO_ENCODER = {
_FieldDescriptor.TYPE_DOUBLE: encoder.DoubleEncoder,
_FieldDescriptor.TYPE_FLOAT: encoder.FloatEncoder,
_FieldDescriptor.TYPE_INT64: encoder.Int64Encoder,
_FieldDescriptor.TYPE_UINT64: encoder.UInt64Encoder,
_FieldDescriptor.TYPE_INT32: encoder.Int32Encoder,
_FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Encoder,
_FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Encoder,
_FieldDescriptor.TYPE_BOOL: encoder.BoolEncoder,
_FieldDescriptor.TYPE_STRING: encoder.StringEncoder,
_FieldDescriptor.TYPE_GROUP: encoder.GroupEncoder,
_FieldDescriptor.TYPE_MESSAGE: encoder.MessageEncoder,
_FieldDescriptor.TYPE_BYTES: encoder.BytesEncoder,
_FieldDescriptor.TYPE_UINT32: encoder.UInt32Encoder,
_FieldDescriptor.TYPE_ENUM: encoder.EnumEncoder,
_FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Encoder,
_FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Encoder,
_FieldDescriptor.TYPE_SINT32: encoder.SInt32Encoder,
_FieldDescriptor.TYPE_SINT64: encoder.SInt64Encoder,
}
# Maps from field types to sizer constructors.
TYPE_TO_SIZER = {
_FieldDescriptor.TYPE_DOUBLE: encoder.DoubleSizer,
_FieldDescriptor.TYPE_FLOAT: encoder.FloatSizer,
_FieldDescriptor.TYPE_INT64: encoder.Int64Sizer,
_FieldDescriptor.TYPE_UINT64: encoder.UInt64Sizer,
_FieldDescriptor.TYPE_INT32: encoder.Int32Sizer,
_FieldDescriptor.TYPE_FIXED64: encoder.Fixed64Sizer,
_FieldDescriptor.TYPE_FIXED32: encoder.Fixed32Sizer,
_FieldDescriptor.TYPE_BOOL: encoder.BoolSizer,
_FieldDescriptor.TYPE_STRING: encoder.StringSizer,
_FieldDescriptor.TYPE_GROUP: encoder.GroupSizer,
_FieldDescriptor.TYPE_MESSAGE: encoder.MessageSizer,
_FieldDescriptor.TYPE_BYTES: encoder.BytesSizer,
_FieldDescriptor.TYPE_UINT32: encoder.UInt32Sizer,
_FieldDescriptor.TYPE_ENUM: encoder.EnumSizer,
_FieldDescriptor.TYPE_SFIXED32: encoder.SFixed32Sizer,
_FieldDescriptor.TYPE_SFIXED64: encoder.SFixed64Sizer,
_FieldDescriptor.TYPE_SINT32: encoder.SInt32Sizer,
_FieldDescriptor.TYPE_SINT64: encoder.SInt64Sizer,
}
# Maps from field type to a decoder constructor.
TYPE_TO_DECODER = {
_FieldDescriptor.TYPE_DOUBLE: decoder.DoubleDecoder,
_FieldDescriptor.TYPE_FLOAT: decoder.FloatDecoder,
_FieldDescriptor.TYPE_INT64: decoder.Int64Decoder,
_FieldDescriptor.TYPE_UINT64: decoder.UInt64Decoder,
_FieldDescriptor.TYPE_INT32: decoder.Int32Decoder,
_FieldDescriptor.TYPE_FIXED64: decoder.Fixed64Decoder,
_FieldDescriptor.TYPE_FIXED32: decoder.Fixed32Decoder,
_FieldDescriptor.TYPE_BOOL: decoder.BoolDecoder,
_FieldDescriptor.TYPE_STRING: decoder.StringDecoder,
_FieldDescriptor.TYPE_GROUP: decoder.GroupDecoder,
_FieldDescriptor.TYPE_MESSAGE: decoder.MessageDecoder,
_FieldDescriptor.TYPE_BYTES: decoder.BytesDecoder,
_FieldDescriptor.TYPE_UINT32: decoder.UInt32Decoder,
_FieldDescriptor.TYPE_ENUM: decoder.EnumDecoder,
_FieldDescriptor.TYPE_SFIXED32: decoder.SFixed32Decoder,
_FieldDescriptor.TYPE_SFIXED64: decoder.SFixed64Decoder,
_FieldDescriptor.TYPE_SINT32: decoder.SInt32Decoder,
_FieldDescriptor.TYPE_SINT64: decoder.SInt64Decoder,
}
# Maps from field type to expected wiretype.
FIELD_TYPE_TO_WIRE_TYPE = {
_FieldDescriptor.TYPE_DOUBLE: wire_format.WIRETYPE_FIXED64,
_FieldDescriptor.TYPE_FLOAT: wire_format.WIRETYPE_FIXED32,
_FieldDescriptor.TYPE_INT64: wire_format.WIRETYPE_VARINT,
_FieldDescriptor.TYPE_UINT64: wire_format.WIRETYPE_VARINT,
_FieldDescriptor.TYPE_INT32: wire_format.WIRETYPE_VARINT,
_FieldDescriptor.TYPE_FIXED64: wire_format.WIRETYPE_FIXED64,
_FieldDescriptor.TYPE_FIXED32: wire_format.WIRETYPE_FIXED32,
_FieldDescriptor.TYPE_BOOL: wire_format.WIRETYPE_VARINT,
_FieldDescriptor.TYPE_STRING:
wire_format.WIRETYPE_LENGTH_DELIMITED,
_FieldDescriptor.TYPE_GROUP: wire_format.WIRETYPE_START_GROUP,
_FieldDescriptor.TYPE_MESSAGE:
wire_format.WIRETYPE_LENGTH_DELIMITED,
_FieldDescriptor.TYPE_BYTES:
wire_format.WIRETYPE_LENGTH_DELIMITED,
_FieldDescriptor.TYPE_UINT32: wire_format.WIRETYPE_VARINT,
_FieldDescriptor.TYPE_ENUM: wire_format.WIRETYPE_VARINT,
_FieldDescriptor.TYPE_SFIXED32: wire_format.WIRETYPE_FIXED32,
_FieldDescriptor.TYPE_SFIXED64: wire_format.WIRETYPE_FIXED64,
_FieldDescriptor.TYPE_SINT32: wire_format.WIRETYPE_VARINT,
_FieldDescriptor.TYPE_SINT64: wire_format.WIRETYPE_VARINT,
}

View File

@ -0,0 +1,268 @@
# Protocol Buffers - Google's data interchange format
# Copyright 2008 Google Inc. All rights reserved.
# http://code.google.com/p/protobuf/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Constants and static functions to support protocol buffer wire format."""
__author__ = 'robinson@google.com (Will Robinson)'
import struct
from google.protobuf import descriptor
from google.protobuf import message
TAG_TYPE_BITS = 3 # Number of bits used to hold type info in a proto tag.
TAG_TYPE_MASK = (1 << TAG_TYPE_BITS) - 1 # 0x7
# These numbers identify the wire type of a protocol buffer value.
# We use the least-significant TAG_TYPE_BITS bits of the varint-encoded
# tag-and-type to store one of these WIRETYPE_* constants.
# These values must match WireType enum in google/protobuf/wire_format.h.
WIRETYPE_VARINT = 0
WIRETYPE_FIXED64 = 1
WIRETYPE_LENGTH_DELIMITED = 2
WIRETYPE_START_GROUP = 3
WIRETYPE_END_GROUP = 4
WIRETYPE_FIXED32 = 5
_WIRETYPE_MAX = 5
# Bounds for various integer types.
INT32_MAX = int((1 << 31) - 1)
INT32_MIN = int(-(1 << 31))
UINT32_MAX = (1 << 32) - 1
INT64_MAX = (1 << 63) - 1
INT64_MIN = -(1 << 63)
UINT64_MAX = (1 << 64) - 1
# "struct" format strings that will encode/decode the specified formats.
FORMAT_UINT32_LITTLE_ENDIAN = '<I'
FORMAT_UINT64_LITTLE_ENDIAN = '<Q'
FORMAT_FLOAT_LITTLE_ENDIAN = '<f'
FORMAT_DOUBLE_LITTLE_ENDIAN = '<d'
# We'll have to provide alternate implementations of AppendLittleEndian*() on
# any architectures where these checks fail.
if struct.calcsize(FORMAT_UINT32_LITTLE_ENDIAN) != 4:
raise AssertionError('Format "I" is not a 32-bit number.')
if struct.calcsize(FORMAT_UINT64_LITTLE_ENDIAN) != 8:
raise AssertionError('Format "Q" is not a 64-bit number.')
def PackTag(field_number, wire_type):
"""Returns an unsigned 32-bit integer that encodes the field number and
wire type information in standard protocol message wire format.
Args:
field_number: Expected to be an integer in the range [1, 1 << 29)
wire_type: One of the WIRETYPE_* constants.
"""
if not 0 <= wire_type <= _WIRETYPE_MAX:
raise message.EncodeError('Unknown wire type: %d' % wire_type)
return (field_number << TAG_TYPE_BITS) | wire_type
def UnpackTag(tag):
"""The inverse of PackTag(). Given an unsigned 32-bit number,
returns a (field_number, wire_type) tuple.
"""
return (tag >> TAG_TYPE_BITS), (tag & TAG_TYPE_MASK)
def ZigZagEncode(value):
"""ZigZag Transform: Encodes signed integers so that they can be
effectively used with varint encoding. See wire_format.h for
more details.
"""
if value >= 0:
return value << 1
return (value << 1) ^ (~0)
def ZigZagDecode(value):
"""Inverse of ZigZagEncode()."""
if not value & 0x1:
return value >> 1
return (value >> 1) ^ (~0)
# The *ByteSize() functions below return the number of bytes required to
# serialize "field number + type" information and then serialize the value.
def Int32ByteSize(field_number, int32):
return Int64ByteSize(field_number, int32)
def Int32ByteSizeNoTag(int32):
return _VarUInt64ByteSizeNoTag(0xffffffffffffffff & int32)
def Int64ByteSize(field_number, int64):
# Have to convert to uint before calling UInt64ByteSize().
return UInt64ByteSize(field_number, 0xffffffffffffffff & int64)
def UInt32ByteSize(field_number, uint32):
return UInt64ByteSize(field_number, uint32)
def UInt64ByteSize(field_number, uint64):
return TagByteSize(field_number) + _VarUInt64ByteSizeNoTag(uint64)
def SInt32ByteSize(field_number, int32):
return UInt32ByteSize(field_number, ZigZagEncode(int32))
def SInt64ByteSize(field_number, int64):
return UInt64ByteSize(field_number, ZigZagEncode(int64))
def Fixed32ByteSize(field_number, fixed32):
return TagByteSize(field_number) + 4
def Fixed64ByteSize(field_number, fixed64):
return TagByteSize(field_number) + 8
def SFixed32ByteSize(field_number, sfixed32):
return TagByteSize(field_number) + 4
def SFixed64ByteSize(field_number, sfixed64):
return TagByteSize(field_number) + 8
def FloatByteSize(field_number, flt):
return TagByteSize(field_number) + 4
def DoubleByteSize(field_number, double):
return TagByteSize(field_number) + 8
def BoolByteSize(field_number, b):
return TagByteSize(field_number) + 1
def EnumByteSize(field_number, enum):
return UInt32ByteSize(field_number, enum)
def StringByteSize(field_number, string):
return BytesByteSize(field_number, string.encode('utf-8'))
def BytesByteSize(field_number, b):
return (TagByteSize(field_number)
+ _VarUInt64ByteSizeNoTag(len(b))
+ len(b))
def GroupByteSize(field_number, message):
return (2 * TagByteSize(field_number) # START and END group.
+ message.ByteSize())
def MessageByteSize(field_number, message):
return (TagByteSize(field_number)
+ _VarUInt64ByteSizeNoTag(message.ByteSize())
+ message.ByteSize())
def MessageSetItemByteSize(field_number, msg):
# First compute the sizes of the tags.
# There are 2 tags for the beginning and ending of the repeated group, that
# is field number 1, one with field number 2 (type_id) and one with field
# number 3 (message).
total_size = (2 * TagByteSize(1) + TagByteSize(2) + TagByteSize(3))
# Add the number of bytes for type_id.
total_size += _VarUInt64ByteSizeNoTag(field_number)
message_size = msg.ByteSize()
# The number of bytes for encoding the length of the message.
total_size += _VarUInt64ByteSizeNoTag(message_size)
# The size of the message.
total_size += message_size
return total_size
def TagByteSize(field_number):
"""Returns the bytes required to serialize a tag with this field number."""
# Just pass in type 0, since the type won't affect the tag+type size.
return _VarUInt64ByteSizeNoTag(PackTag(field_number, 0))
# Private helper function for the *ByteSize() functions above.
def _VarUInt64ByteSizeNoTag(uint64):
"""Returns the number of bytes required to serialize a single varint
using boundary value comparisons. (unrolled loop optimization -WPierce)
uint64 must be unsigned.
"""
if uint64 <= 0x7f: return 1
if uint64 <= 0x3fff: return 2
if uint64 <= 0x1fffff: return 3
if uint64 <= 0xfffffff: return 4
if uint64 <= 0x7ffffffff: return 5
if uint64 <= 0x3ffffffffff: return 6
if uint64 <= 0x1ffffffffffff: return 7
if uint64 <= 0xffffffffffffff: return 8
if uint64 <= 0x7fffffffffffffff: return 9
if uint64 > UINT64_MAX:
raise message.EncodeError('Value out of range: %d' % uint64)
return 10
NON_PACKABLE_TYPES = (
descriptor.FieldDescriptor.TYPE_STRING,
descriptor.FieldDescriptor.TYPE_GROUP,
descriptor.FieldDescriptor.TYPE_MESSAGE,
descriptor.FieldDescriptor.TYPE_BYTES
)
def IsTypePackable(field_type):
"""Return true iff packable = true is valid for fields of this type.
Args:
field_type: a FieldDescriptor::Type value.
Returns:
True iff fields of this type are packable.
"""
return field_type not in NON_PACKABLE_TYPES