import cherrypy
import datetime
import os
from .model_base import Model, AccessControlledModel
import girder
from girder import auditLogger, events
from girder.constants import AccessType, CoreEventHandler
from girder.exceptions import FilePathException, ValidationException
from girder.models.setting import Setting
from girder.settings import SettingKey
from girder.utility import acl_mixin, path as path_util
from girder.utility.model_importer import ModelImporter
[docs]class File(acl_mixin.AccessControlMixin, Model):
"""
This model represents a File, which is stored in an assetstore.
"""
[docs] def initialize(self):
from girder.utility import assetstore_utilities
self.name = 'file'
self.ensureIndices(
['itemId', 'assetstoreId', 'exts']
+ assetstore_utilities.fileIndexFields())
self.ensureTextIndex({'name': 1})
self.resourceColl = 'item'
self.resourceParent = 'itemId'
self.exposeFields(level=AccessType.READ, fields=(
'_id', 'mimeType', 'itemId', 'exts', 'name', 'created', 'creatorId',
'size', 'updated', 'linkUrl'))
self.exposeFields(level=AccessType.SITE_ADMIN, fields=('assetstoreId',))
events.bind('model.file.save.created',
CoreEventHandler.FILE_PROPAGATE_SIZE,
self._propagateSizeToItem)
[docs] def remove(self, file, updateItemSize=True, **kwargs):
"""
Use the appropriate assetstore adapter for whatever assetstore the
file is stored in, and call deleteFile on it, then delete the file
record from the database.
:param file: The file document to remove.
:param updateItemSize: Whether to update the item size. Only set this
to False if you plan to delete the item and do not care about
updating its size.
"""
from .item import Item
if file.get('assetstoreId'):
self.getAssetstoreAdapter(file).deleteFile(file)
if file['itemId']:
item = Item().load(file['itemId'], force=True)
if item is not None:
# files that are linkUrls might not have a size field
if file.get('size') is not None:
self.propagateSizeChange(item, -file['size'], updateItemSize)
else:
girder.logger.warning('Broken reference in file %s: no item %s exists' %
(file['_id'], file['itemId']))
super().remove(file)
[docs] def download(self, file, offset=0, headers=True, endByte=None,
contentDisposition=None, extraParameters=None):
"""
Use the appropriate assetstore adapter for whatever assetstore the
file is stored in, and call downloadFile on it. If the file is a link
file rather than a file in an assetstore, we redirect to it.
:param file: The file to download.
:param offset: The start byte within the file.
:type offset: int
:param headers: Whether to set headers (i.e. is this an HTTP request
for a single file, or something else).
:type headers: bool
:param endByte: Final byte to download. If ``None``, downloads to the
end of the file.
:type endByte: int or None
:param contentDisposition: Content-Disposition response header
disposition-type value.
:type contentDisposition: str or None
:type extraParameters: str or None
"""
events.trigger('model.file.download.request', info={
'file': file,
'startByte': offset,
'endByte': endByte})
auditLogger.info('file.download', extra={
'details': {
'fileId': file['_id'],
'startByte': offset,
'endByte': endByte,
'extraParameters': extraParameters
}
})
if file.get('assetstoreId'):
try:
fileDownload = self.getAssetstoreAdapter(file).downloadFile(
file, offset=offset, headers=headers, endByte=endByte,
contentDisposition=contentDisposition,
extraParameters=extraParameters)
def downloadGenerator():
yield from fileDownload()
if endByte is None or endByte >= file['size']:
events.trigger('model.file.download.complete', info={
'file': file,
'startByte': offset,
'endByte': endByte,
'redirect': False})
return downloadGenerator
except cherrypy.HTTPRedirect:
events.trigger('model.file.download.complete', info={
'file': file,
'startByte': offset,
'endByte': endByte,
'redirect': True})
raise
elif file.get('linkUrl'):
if headers:
events.trigger('model.file.download.complete', info={
'file': file,
'startByte': offset,
'endByte': endByte,
'redirect': True})
raise cherrypy.HTTPRedirect(file['linkUrl'])
else:
endByte = endByte or len(file['linkUrl'])
def stream():
yield file['linkUrl'][offset:endByte]
if endByte >= len(file['linkUrl']):
events.trigger('model.file.download.complete', info={
'file': file,
'startByte': offset,
'endByte': endByte,
'redirect': False})
return stream
else:
raise Exception('File has no known download mechanism.')
[docs] def validate(self, doc):
if doc.get('assetstoreId') is None:
if 'linkUrl' not in doc:
raise ValidationException(
'File must have either an assetstore ID or a link URL.',
'linkUrl')
doc['linkUrl'] = doc['linkUrl'].strip()
if not doc['linkUrl'].startswith(('http:', 'https:')):
raise ValidationException(
'Linked file URL must start with http: or https:.',
'linkUrl')
if doc.get('assetstoreType'):
# If assetstore model is overridden, make sure it's a valid model
self._getAssetstoreModel(doc)
if 'name' not in doc or not doc['name']:
raise ValidationException('File name must not be empty.', 'name')
doc['exts'] = [ext.lower() for ext in doc['name'].split('.')[1:]]
return doc
def _getAssetstoreModel(self, file):
from .assetstore import Assetstore
if file.get('assetstoreType'):
try:
if isinstance(file['assetstoreType'], str):
return ModelImporter.model(file['assetstoreType'])
else:
return ModelImporter.model(*file['assetstoreType'])
except Exception:
raise ValidationException(
'Invalid assetstore type: %s.' % (file['assetstoreType'],))
else:
return Assetstore()
[docs] def createLinkFile(self, name, parent, parentType, url, creator, size=None,
mimeType=None, reuseExisting=False):
"""
Create a file that is a link to a URL, rather than something we maintain
in an assetstore.
:param name: The local name for the file.
:type name: str
:param parent: The parent object for this file.
:type parent: girder.models.folder or girder.models.item
:param parentType: The parent type (folder or item)
:type parentType: str
:param url: The URL that this file points to
:param creator: The user creating the file.
:type creator: dict
:param size: The size of the file in bytes. (optional)
:type size: int
:param mimeType: The mimeType of the file. (optional)
:type mimeType: str
:param reuseExisting: If a file with the same name already exists in
this location, return it rather than creating a new file.
:type reuseExisting: bool
"""
from .item import Item
if parentType == 'folder':
# Create a new item with the name of the file.
item = Item().createItem(
name=name, creator=creator, folder=parent, reuseExisting=reuseExisting)
elif parentType == 'item':
item = parent
existing = None
if reuseExisting:
existing = self.findOne({
'itemId': item['_id'],
'name': name
})
if existing:
file = existing
else:
file = {
'created': datetime.datetime.utcnow(),
'itemId': item['_id'],
'assetstoreId': None,
'name': name
}
file.update({
'creatorId': creator['_id'],
'mimeType': mimeType,
'linkUrl': url
})
if size is not None:
file['size'] = int(size)
try:
if existing:
file = self.updateFile(file)
else:
file = self.save(file)
return file
except ValidationException:
if parentType == 'folder':
Item().remove(item)
raise
[docs] def propagateSizeChange(self, item, sizeIncrement, updateItemSize=True):
"""
Propagates a file size change (or file creation) to the necessary
parents in the hierarchy. Internally, this records subtree size in
the item, the parent folder, and the root node under which the item
lives. Should be called anytime a new file is added, a file is
deleted, or a file size changes.
:param item: The parent item of the file.
:type item: dict
:param sizeIncrement: The change in size to propagate.
:type sizeIncrement: int
:param updateItemSize: Whether the item size should be updated. Set to
False if you plan to delete the item immediately and don't care to
update its size.
"""
from .folder import Folder
from .item import Item
if updateItemSize:
# Propagate size up to item
Item().increment(query={
'_id': item['_id']
}, field='size', amount=sizeIncrement, multi=False)
# Propagate size to direct parent folder
Folder().increment(query={
'_id': item['folderId']
}, field='size', amount=sizeIncrement, multi=False)
# Propagate size up to root data node
ModelImporter.model(item['baseParentType']).increment(query={
'_id': item['baseParentId']
}, field='size', amount=sizeIncrement, multi=False)
[docs] def createFile(self, creator, item, name, size, assetstore, mimeType=None,
saveFile=True, reuseExisting=False, assetstoreType=None):
"""
Create a new file record in the database.
:param item: The parent item.
:param creator: The user creating the file.
:param assetstore: The assetstore this file is stored in.
:param name: The filename.
:type name: str
:param size: The size of the file in bytes.
:type size: int
:param mimeType: The mimeType of the file.
:type mimeType: str
:param saveFile: if False, don't save the file, just return it.
:type saveFile: bool
:param reuseExisting: If a file with the same name already exists in
this location, return it rather than creating a new file.
:type reuseExisting: bool
:param assetstoreType: If a model other than assetstore will be used to
initialize the assetstore adapter for this file, use this parameter to
specify it. If it's a core model, pass its string name. If it's a plugin
model, use a 2-tuple of the form (modelName, pluginName).
:type assetstoreType: str or tuple
"""
if reuseExisting:
existing = self.findOne({
'itemId': item['_id'],
'name': name
})
if existing:
return existing
file = {
'created': datetime.datetime.utcnow(),
'creatorId': creator['_id'],
'assetstoreId': assetstore['_id'],
'name': name,
'mimeType': mimeType,
'size': size,
'itemId': item['_id'] if item else None
}
if assetstoreType:
file['assetstoreType'] = assetstoreType
if saveFile:
return self.save(file)
return file
def _propagateSizeToItem(self, event):
"""
This callback updates an item's size to include that of a newly-created
file.
This generally should not be called or overridden directly. This should
not be unregistered, as that would cause item, folder, and collection
sizes to be inaccurate.
"""
# This task is not performed in "createFile", in case
# "saveFile==False". The item size should be updated only when it's
# certain that the file will actually be saved. It is also possible for
# "model.file.save" to set "defaultPrevented", which would prevent the
# item from being saved initially.
from .item import Item
fileDoc = event.info
itemId = fileDoc.get('itemId')
if itemId and fileDoc.get('size'):
item = Item().load(itemId, force=True)
self.propagateSizeChange(item, fileDoc['size'])
[docs] def updateFile(self, file):
"""
Call this when changing properties of an existing file, such as name
or MIME type. This causes the updated stamp to change, and also alerts
the underlying assetstore adapter that file information has changed.
"""
file['updated'] = datetime.datetime.utcnow()
file = self.save(file)
if file.get('assetstoreId'):
self.getAssetstoreAdapter(file).fileUpdated(file)
return file
[docs] def getAssetstoreAdapter(self, file):
"""
Return the assetstore adapter for the given file. Return None if the
file has no assetstore.
"""
from girder.utility import assetstore_utilities
if not file.get('assetstoreId'):
return None
assetstore = self._getAssetstoreModel(file).load(file['assetstoreId'])
return assetstore_utilities.getAssetstoreAdapter(assetstore)
[docs] def copyFile(self, srcFile, creator, item=None):
"""
Copy a file so that we don't need to duplicate stored data.
:param srcFile: The file to copy.
:type srcFile: dict
:param creator: The user copying the file.
:param item: a new item to assign this file to (optional)
:returns: a dict with the new file.
"""
# Copy the source file's dictionary. The individual assetstore
# implementations will need to fix references if they cannot be
# directly duplicated.
file = srcFile.copy()
# Immediately delete the original id so that we get a new one.
del file['_id']
file['copied'] = datetime.datetime.utcnow()
file['copierId'] = creator['_id']
if item:
file['itemId'] = item['_id']
if file.get('assetstoreId'):
self.getAssetstoreAdapter(file).copyFile(srcFile, file)
elif file.get('linkUrl'):
file['linkUrl'] = srcFile['linkUrl']
return self.save(file)
[docs] def isOrphan(self, file):
"""
Returns True if this file is orphaned (its item or attached entity is
missing).
:param file: The file to check.
:type file: dict
"""
if file.get('attachedToId'):
attachedToType = file.get('attachedToType')
if isinstance(attachedToType, str):
modelType = ModelImporter.model(attachedToType)
elif isinstance(attachedToType, list) and len(attachedToType) == 2:
modelType = ModelImporter.model(*attachedToType)
else:
# Invalid 'attachedToType'
return True
if isinstance(modelType, (acl_mixin.AccessControlMixin,
AccessControlledModel)):
attachedDoc = modelType.load(
file.get('attachedToId'), force=True)
else:
attachedDoc = modelType.load(
file.get('attachedToId'))
else:
from .item import Item
attachedDoc = Item().load(file.get('itemId'), force=True)
return not attachedDoc
[docs] def updateSize(self, file):
"""
Returns the size of this file. Does not currently check the underlying
assetstore to verify the size.
:param file: The file.
:type file: dict
"""
fixes = 0
if file.get('assetstoreId'):
size = self.getAssetstoreAdapter(file).getFileSize(file)
if size != file.get('size', 0):
file['size'] = size
self.update({'_id': file['_id']}, {'$set': {'size': size}})
fixes += 1
return file.get('size', 0), fixes
[docs] def open(self, file):
"""
Use this to expose a Girder file as a python file-like object. At the
moment, this is a read-only interface, the equivalent of opening a
system file with ``'rb'`` mode. This can also be used as a context
manager, e.g.:
>>> with File().open(file) as fh:
>>> while True:
>>> chunk = fh.read(CHUNK_LEN)
>>> if not chunk:
>>> break
Using it this way will automatically close the file handle for you when
the ``with`` block is left.
:param file: A Girder file document.
:type file: dict
:return: A file-like object containing the bytes of the file.
:rtype: girder.utility.abstract_assetstore_adapter.FileHandle
"""
return self.getAssetstoreAdapter(file).open(file)
[docs] def getGirderMountFilePath(self, file, validate=True, preferFlat=False):
"""
If possible, get the path of the file on a local girder mount. Flat
paths exclude the item component, which, for some types of access of
relative paths can be preferred.
:param file: The file document.
:param validate: if True, check if the path exists and raise an
exception if it does not.
:param preferFlat: if True and the mount supports it, return the flat
path that doesn't include item information.
:returns: a girder mount path to the file or None if no such path is
available.
"""
mount = Setting().get(SettingKey.GIRDER_MOUNT_INFORMATION)
if mount:
resPath = path_util.getResourcePath('file', file, force=True)
path = os.path.join(mount['path'], resPath.lstrip('/'))
if preferFlat and mount.get('hasFlat'):
flatpath = os.path.join(mount['path'], 'flat', os.path.dirname(resPath).lstrip('/'))
# An item can have multiple files, while flatten returns the
# "first" file, which might not be the file that was requested.
# Perform a simple size check to verify it is the file
# requested. This happens even if validation is False.
if (
os.path.exists(path) and os.path.exists(flatpath)
and os.path.getsize(path) == os.path.getsize(flatpath)):
return flatpath
if not validate or os.path.exists(path):
return path
if validate:
raise FilePathException("This file isn't accessible from a Girder mount.")
[docs] def getLocalFilePath(self, file):
"""
If an assetstore adapter supports it, return a path to the file on the
local file system.
:param file: The file document.
:returns: a local path to the file or None if no such path is known.
"""
adapter = self.getAssetstoreAdapter(file)
try:
return adapter.getLocalFilePath(file)
except FilePathException as exc:
try:
return self.getGirderMountFilePath(file, True)
except Exception:
# If getting a Girder mount path throws, raise the original
# exception
pass
raise exc