Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
8defb21
get name and size from metadata and header of file, avoid input strea…
Paurikova2 May 21, 2025
6d55c95
checkstyle violations
Paurikova2 May 21, 2025
d1fa497
remove temp file, checkstyle, do not load full file
Paurikova2 May 21, 2025
c52090c
add { } after if
Paurikova2 May 21, 2025
8e576a7
added check for max preview file
Paurikova2 May 21, 2025
1e1ea9f
used ZipFile and TarArchived for filepreview generating
Paurikova2 May 27, 2025
1ed0c68
added removed lines
Paurikova2 May 27, 2025
9fea2e2
used 7z for zip and tar files
Paurikova2 May 27, 2025
76be5b3
removed 7z and used zip and tar entry
Paurikova2 May 27, 2025
09bcf79
checkstyle violations
Paurikova2 May 27, 2025
8f07fe2
improved file previrew generating speed, used string builder, xml bui…
Paurikova2 Jun 6, 2025
0ba1947
checkstyle, return boolean from haspreview and previrews from getPrev…
Paurikova2 Jun 6, 2025
719fa77
fix test, better doc, checkstyle
Paurikova2 Jun 6, 2025
2d45b6c
removed empty lines
Paurikova2 Jun 6, 2025
a23fc50
removed maxresult limit
Paurikova2 Jun 6, 2025
058824f
fix long conversion
Paurikova2 Jun 6, 2025
5831fda
fix problem with hibernate session
Paurikova2 Jun 6, 2025
e8001e7
fix problem with hibernate session
Paurikova2 Jun 6, 2025
316434b
fix .tar.gz generating
Paurikova2 Jun 6, 2025
4944c00
Merge branch 'ufal/improve-file-preview-generating' of github.com:dat…
Paurikova2 Jun 6, 2025
d6468d5
removed fix for .tar.gz - not work
Paurikova2 Jun 6, 2025
7f2374b
skip fully entry for tar
Paurikova2 Jun 6, 2025
252f59c
added indexes for speed up queries
Paurikova2 Jun 9, 2025
3c40823
removed unused improt
Paurikova2 Jun 9, 2025
e13b55d
fix comments, added indexes to separated sql file
Paurikova2 Jun 9, 2025
62693ed
added license header
Paurikova2 Jun 9, 2025
6805000
named constant by upper case
Paurikova2 Jun 10, 2025
edeeca0
added empty lines
Paurikova2 Jun 10, 2025
5fbdf5d
removed empty lines
Paurikova2 Jun 10, 2025
06d8f0c
set static attribute to static final
Paurikova2 Jun 10, 2025
95e7dfb
set public method to private, interupt threat in exception
Paurikova2 Jun 10, 2025
3522292
inicialized fileInfo, refactorization of code based on copilot review
Paurikova2 Jun 10, 2025
7066843
restore sql files, restore vanilla comments
Paurikova2 Jun 10, 2025
dd26d98
removed interuption of threat from exception
Paurikova2 Jun 10, 2025
dd48091
Merge branch 'dtq-dev' into ufal/improve-file-preview-generating
milanmajchrak Jun 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
package org.dspace.content;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.sql.SQLException;
Expand Down Expand Up @@ -315,6 +316,15 @@ public InputStream retrieve(Context context, Bitstream bitstream)
return bitstreamStorageService.retrieve(context, bitstream);
}

@Override
public File retrieveFile(Context context, Bitstream bitstream, boolean authorization)
throws IOException, SQLException, AuthorizeException {
if (authorization) {
authorizeService.authorizeAction(context, bitstream, Constants.READ);
}
return bitstreamStorageService.retrieveFile(context, bitstream);
}

@Override
public boolean isRegisteredBitstream(Bitstream bitstream) {
return bitstreamStorageService.isRegisteredBitstream(bitstream.getInternalId());
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,16 @@ public interface PreviewContentDAO extends GenericDAO<PreviewContent> {
*/
List<PreviewContent> findByBitstream(Context context, UUID bitstreamId) throws SQLException;

/**
* Returns true if the bitstream has associated preview content.
*
* @param context DSpace context
* @param bitstream The bitstream to get bitstream UUID
* @return True if preview content exists, false otherwise
* @throws SQLException If a database error occurs
*/
boolean hasPreview(Context context, Bitstream bitstream) throws SQLException;

/**
* Find all preview content based on bitstream that are the root directory.
*
Expand All @@ -41,5 +51,5 @@ public interface PreviewContentDAO extends GenericDAO<PreviewContent> {
* @return List of found preview content
* @throws SQLException If a database error occurs
*/
List<PreviewContent> hasPreview(Context context, Bitstream bitstream) throws SQLException;
List<PreviewContent> getPreview(Context context, Bitstream bitstream) throws SQLException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,16 @@ public List<PreviewContent> findByBitstream(Context context, UUID bitstreamId) t
}

@Override
public List<PreviewContent> hasPreview(Context context, Bitstream bitstream) throws SQLException {
public boolean hasPreview(Context context, Bitstream bitstream) throws SQLException {
Query query = createQuery(context,
"SELECT COUNT(pc) FROM " + PreviewContent.class.getSimpleName() +
" pc WHERE pc.bitstream.id = :bitstream_id");
query.setParameter("bitstream_id", bitstream.getID());
return count(query) > 0;
}

@Override
public List<PreviewContent> getPreview(Context context, Bitstream bitstream) throws SQLException {
// select only data from the previewcontent table whose ID is not a child in the preview2preview table
Query query = getHibernateSession(context).createNativeQuery(
"SELECT pc.* FROM previewcontent pc " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
package org.dspace.content.service;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.sql.SQLException;
Expand Down Expand Up @@ -154,7 +155,7 @@ public Bitstream register(Context context, int assetstore, String bitstreamPath)
public void setFormat(Context context, Bitstream bitstream, BitstreamFormat bitstreamFormat) throws SQLException;

/**
* Retrieve the contents of the bitstream
* Retrieve the contents of the bitstream.
*
* @param context DSpace context object
* @param bitstream DSpace bitstream
Expand All @@ -166,6 +167,20 @@ public Bitstream register(Context context, int assetstore, String bitstreamPath)
public InputStream retrieve(Context context, Bitstream bitstream)
throws IOException, SQLException, AuthorizeException;

/**
* Retrieve the contents of the bitstream.
*
* @param context DSpace context object
* @param bitstream DSpace bitstream
* @param authorization true if authorization is required else false
* @return a File from which the bitstream can be read.
* @throws IOException if IO error
* @throws SQLException if database error
* @throws AuthorizeException if authorization error
*/
public File retrieveFile(Context context, Bitstream bitstream, boolean authorization)
throws IOException, SQLException, AuthorizeException;

/**
* Determine if this bitstream is registered (available elsewhere on
* filesystem than in assetstore). More about registered items:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
*/
package org.dspace.content.service;

import java.io.InputStream;
import java.io.File;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -82,14 +82,25 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String
*/
List<PreviewContent> findByBitstream(Context context, UUID bitstream_id) throws SQLException;

/**
* Returns true if the bitstream has associated preview content.
*
* @param context DSpace context
* @param bitstream The bitstream to get bitstream UUID
* @return True if preview content exists, false otherwise
* @throws SQLException If a database error occurs
*/
boolean hasPreview(Context context, Bitstream bitstream) throws SQLException;

/**
* Find all preview content based on bitstream that are the root directory.
*
* @param context DSpace context
* @param bitstream The bitstream to get bitstream UUID
* @return List of preview contents
* @throws SQLException If a database error occurs
*/
List<PreviewContent> hasPreview(Context context, Bitstream bitstream) throws SQLException;
List<PreviewContent> getPreview(Context context, Bitstream bitstream) throws SQLException;

/**
* Find all preview contents from database.
Expand All @@ -104,9 +115,11 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String
*
* @param context DSpace context object
* @param bitstream check if this bitstream could be previewed
* @param authorization true if authorization is required else false
* @return true if the bitstream could be previewed, false otherwise
*/
boolean canPreview(Context context, Bitstream bitstream) throws SQLException, AuthorizeException;
boolean canPreview(Context context, Bitstream bitstream, boolean authorization)
throws SQLException, AuthorizeException;

/**
* Return converted ZIP file content into FileInfo classes.
Expand Down Expand Up @@ -143,13 +156,13 @@ PreviewContent create(Context context, Bitstream bitstream, String name, String
FileInfo createFileInfo(PreviewContent pc);

/**
* Convert InputStream of the ZIP file into FileInfo classes.
* Convert File of the ZIP file into FileInfo classes.
*
* @param context DSpace context object
* @param bitstream previewing bitstream
* @param inputStream content of the zip file
* @param file content of the zip file
* @return List of FileInfo classes where is wrapped ZIP file content
*/
List<FileInfo> processInputStreamToFilePreview(Context context, Bitstream bitstream, InputStream inputStream)
List<FileInfo> processFileToFilePreview(Context context, Bitstream bitstream, File file)
throws Exception;
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.Item;
import org.dspace.content.PreviewContent;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.content.service.PreviewContentService;
Expand Down Expand Up @@ -147,21 +146,20 @@ private void generateItemFilePreviews(Context context, UUID itemUUID) throws Exc
for (Bundle bundle : bundles) {
List<Bitstream> bitstreams = bundle.getBitstreams();
for (Bitstream bitstream : bitstreams) {
boolean canPreview = previewContentService.canPreview(context, bitstream);
boolean canPreview = previewContentService.canPreview(context, bitstream, false);
if (!canPreview) {
return;
continue;
}
List<PreviewContent> prContents = previewContentService.hasPreview(context, bitstream);
// Generate new content if we didn't find any
if (!prContents.isEmpty()) {
return;
if (previewContentService.hasPreview(context, bitstream)) {
continue;
}

List<FileInfo> fileInfos = previewContentService.getFilePreviewContent(context, bitstream);
// Do not store HTML content in the database because it could be longer than the limit
// of the database column
if (StringUtils.equals("text/html", bitstream.getFormat(context).getMIMEType())) {
return;
continue;
}

for (FileInfo fi : fileInfos) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
package org.dspace.storage.bitstore;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
Expand Down Expand Up @@ -45,6 +46,16 @@ public interface BitStoreService {
*/
public InputStream get(Bitstream bitstream) throws IOException;

/**
* Retrieve the bits for bitstream
*
* @param bitstream DSpace Bitstream object
* @return The File
* @throws java.io.IOException If a problem occurs while retrieving the bits, or if no
* asset with ID exists in the store
*/
public File getFile(Bitstream bitstream) throws IOException;

/**
* Store a stream of bits.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
package org.dspace.storage.bitstore;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.sql.SQLException;
Expand Down Expand Up @@ -214,6 +215,13 @@ public InputStream retrieve(Context context, Bitstream bitstream)
return this.getStore(storeNumber).get(bitstream);
}

@Override
public File retrieveFile(Context context, Bitstream bitstream)
throws IOException {
Integer storeNumber = bitstream.getStoreNumber();
return this.getStore(storeNumber).getFile(bitstream);
}

@Override
public void cleanup(boolean deleteDbRecords, boolean verbose) throws SQLException, IOException, AuthorizeException {
Context context = new Context(Context.Mode.BATCH_EDIT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ private synchronized static void deleteParents(File file) {
* @return The corresponding file in the file system, or <code>null</code>
* @throws IOException If a problem occurs while determining the file
*/
protected File getFile(Bitstream bitstream) throws IOException {
public File getFile(Bitstream bitstream) throws IOException {
// Check that bitstream is not null
if (bitstream == null) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,29 @@ public InputStream get(Bitstream bitstream) throws IOException {
}
}

@Override
public File getFile(Bitstream bitstream) throws IOException {
String key = getFullKey(bitstream.getInternalId());
// Strip -R from bitstream key if it's registered
if (isRegisteredBitstream(key)) {
key = key.substring(REGISTERED_FLAG.length());
}
try {
File tempFile = File.createTempFile("s3-disk-copy-" + UUID.randomUUID(), "temp");
tempFile.deleteOnExit();

GetObjectRequest getObjectRequest = new GetObjectRequest(bucketName, key);

Download download = tm.download(getObjectRequest, tempFile);
download.waitForCompletion();

return tempFile;
} catch (AmazonClientException | InterruptedException e) {
log.error("getFile(" + key + ")", e);
throw new IOException(e);
}
}

/**
* Store a stream of bits.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/
package org.dspace.storage.bitstore.service;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.sql.SQLException;
Expand Down Expand Up @@ -126,6 +127,18 @@ public UUID register(Context context, Bitstream bitstream, int assetstore, Strin
public InputStream retrieve(Context context, Bitstream bitstream)
throws SQLException, IOException;

/**
* Retrieve the file of the bitstream with ID. If the bitstream does not
* exist, or is marked deleted, returns null.
*
* @param context The current context
* @param bitstream The bitstream to retrieve
* @return The file, or null
* @throws IOException If a problem occurs while retrieving the bits
* @throws SQLException If a problem occurs accessing the RDBMS
*/
public File retrieveFile(Context context, Bitstream bitstream) throws SQLException, IOException;

/**
* Clean up the bitstream storage area. This method deletes any bitstreams
* which are more than 1 hour old and marked deleted. The deletions cannot
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
--
-- The contents of this file are subject to the license and copyright
-- detailed in the LICENSE and NOTICE files at the root of the source
-- tree and available online at
--
-- http://www.dspace.org/license/
--

-- ===================================================================
-- PERFORMANCE INDEXES
-- ===================================================================

--
-- Index to speed up queries filtering previewcontent by bitstream_id,
-- used in hasPreview() and getPreview() JOIN with bitstream table.
--
CREATE INDEX idx_previewcontent_bitstream_id
ON previewcontent (bitstream_id);

--
-- Index to optimize NOT EXISTS subquery in getPreview(),
-- checking for existence of child_id in preview2preview.
--
CREATE INDEX idx_preview2preview_child_id
ON preview2preview (child_id);
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
--
-- The contents of this file are subject to the license and copyright
-- detailed in the LICENSE and NOTICE files at the root of the source
-- tree and available online at
--
-- http://www.dspace.org/license/
--

-- ===================================================================
-- PERFORMANCE INDEXES
-- ===================================================================

--
-- Index to speed up queries filtering previewcontent by bitstream_id,
-- used in hasPreview() and getPreview() JOIN with bitstream table.
--
CREATE INDEX idx_previewcontent_bitstream_id
ON previewcontent (bitstream_id);

--
-- Index to optimize NOT EXISTS subquery in getPreview(),
-- checking for existence of child_id in preview2preview.
--
CREATE INDEX idx_preview2preview_child_id
ON preview2preview (child_id);
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,14 @@ public Page<MetadataBitstreamWrapperRest> findByHandle(@Parameter(value = "handl
for (Bitstream bitstream : bitstreams) {
String url = previewContentService.composePreviewURL(context, item, bitstream, contextPath);
List<FileInfo> fileInfos = new ArrayList<>();
boolean canPreview = previewContentService.canPreview(context, bitstream);
boolean canPreview = previewContentService.canPreview(context, bitstream, false);
String mimeType = bitstream.getFormat(context).getMIMEType();
// HTML content could be longer than the limit, so we do not store it in the DB.
// It has to be generated even if property is false.
if (StringUtils.equals(mimeType, TEXT_HTML_MIME_TYPE) || canPreview) {
try {
List<PreviewContent> prContents = previewContentService.hasPreview(context, bitstream);
// Generate new content if we didn't find any
if (prContents.isEmpty()) {
if (!previewContentService.hasPreview(context, bitstream)) {
boolean allowComposePreviewContent = configurationService.getBooleanProperty
("create.file-preview.on-item-page-load", false);
if (allowComposePreviewContent) {
Expand All @@ -131,6 +130,7 @@ public Page<MetadataBitstreamWrapperRest> findByHandle(@Parameter(value = "handl
}
}
} else {
List<PreviewContent> prContents = previewContentService.getPreview(context, bitstream);
for (PreviewContent pc : prContents) {
fileInfos.add(previewContentService.createFileInfo(pc));
}
Expand Down
Loading
Loading