/* FreeRDP: A Remote Desktop Protocol Client * Copy operations. * vi:ts=4 sw=4: * * (c) Copyright 2012 Hewlett-Packard Development Company, L.P. * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may obtain * a copy of the License at http://www.apache.org/licenses/LICENSE-2.0. * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing * permissions and limitations under the License. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include #include #ifdef WITH_IPP #include #include #endif /* WITH_IPP */ #include "prim_internal.h" static primitives_t* generic = NULL; /* ------------------------------------------------------------------------- */ /*static inline BOOL memory_regions_overlap_1d(*/ static BOOL memory_regions_overlap_1d(const BYTE* p1, const BYTE* p2, size_t bytes) { const ULONG_PTR p1m = (const ULONG_PTR)p1; const ULONG_PTR p2m = (const ULONG_PTR)p2; if (p1m <= p2m) { if (p1m + bytes > p2m) return TRUE; } else { if (p2m + bytes > p1m) return TRUE; } /* else */ return FALSE; } /* ------------------------------------------------------------------------- */ /*static inline BOOL memory_regions_overlap_2d( */ static BOOL memory_regions_overlap_2d(const BYTE* p1, int p1Step, int p1Size, const BYTE* p2, int p2Step, int p2Size, int width, int height) { ULONG_PTR p1m = (ULONG_PTR)p1; ULONG_PTR p2m = (ULONG_PTR)p2; if (p1m <= p2m) { ULONG_PTR p1mEnd = p1m + (height - 1) * p1Step * 1ULL + width * p1Size * 1ULL; if (p1mEnd > p2m) return TRUE; } else { ULONG_PTR p2mEnd = p2m + (height - 1) * p2Step * 1ULL + width * p2Size * 1ULL; if (p2mEnd > p1m) return TRUE; } /* else */ return FALSE; } /* ------------------------------------------------------------------------- */ static pstatus_t general_copy_8u(const BYTE* pSrc, BYTE* pDst, INT32 len) { if (memory_regions_overlap_1d(pSrc, pDst, (size_t)len)) { memmove((void*)pDst, (const void*)pSrc, (size_t)len); } else { memcpy((void*)pDst, (const void*)pSrc, (size_t)len); } return PRIMITIVES_SUCCESS; } /* ------------------------------------------------------------------------- */ /* Copy a block of pixels from one buffer to another. * The addresses are assumed to have been already offset to the upper-left * corners of the source and destination region of interest. */ static pstatus_t general_copy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep, INT32 width, INT32 height) { const BYTE* src = (const BYTE*)pSrc; BYTE* dst = (BYTE*)pDst; int rowbytes = width * sizeof(UINT32); if ((width == 0) || (height == 0)) return PRIMITIVES_SUCCESS; if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32), pDst, dstStep, sizeof(UINT32), width, height)) { do { generic->copy(src, dst, rowbytes); src += srcStep; dst += dstStep; } while (--height); } else { /* TODO: do it in one operation when the rowdata is adjacent. */ do { /* If we find a replacement for memcpy that is consistently * faster, this could be replaced with that. */ memcpy(dst, src, rowbytes); src += srcStep; dst += dstStep; } while (--height); } return PRIMITIVES_SUCCESS; } #ifdef WITH_IPP /* ------------------------------------------------------------------------- */ /* This is just ippiCopy_8u_AC4R without the IppiSize structure parameter. */ static pstatus_t ippiCopy_8u_AC4r(const BYTE* pSrc, INT32 srcStep, BYTE* pDst, INT32 dstStep, INT32 width, INT32 height) { IppiSize roi; roi.width = width; roi.height = height; return (pstatus_t)ippiCopy_8u_AC4R(pSrc, srcStep, pDst, dstStep, roi); } #endif /* WITH_IPP */ /* ------------------------------------------------------------------------- */ void primitives_init_copy(primitives_t* prims) { /* Start with the default. */ prims->copy_8u = general_copy_8u; prims->copy_8u_AC4r = general_copy_8u_AC4r; /* This is just an alias with void* parameters */ prims->copy = (__copy_t)(prims->copy_8u); } void primitives_init_copy_opt(primitives_t* prims) { generic = primitives_get_generic(); primitives_init_copy(prims); /* Pick tuned versions if possible. */ #ifdef WITH_IPP prims->copy_8u = (__copy_8u_t)ippsCopy_8u; prims->copy_8u_AC4r = (__copy_8u_AC4r_t)ippiCopy_8u_AC4r; #endif /* Performance with an SSE2 version with no prefetch seemed to be * all over the map vs. memcpy. * Sometimes it was significantly faster, sometimes dreadfully slower, * and it seemed to vary a lot depending on block size and processor. * Hence, no SSE version is used here unless once can be written that * is consistently faster than memcpy. */ /* This is just an alias with void* parameters */ prims->copy = (__copy_t)(prims->copy_8u); }