diff options
author | James Cowgill <james410@cowgill.org.uk> | 2016-07-04 11:19:11 +0200 |
---|---|---|
committer | James Cowgill <james410@cowgill.org.uk> | 2016-07-04 11:19:11 +0200 |
commit | b3df5144ae0631b8634e535ba90245e8cdfd2a0a (patch) | |
tree | bc955df92f24b7140d3e0d4ec56edcfa74b32c5b /video | |
parent | 36e11d485bf132c7ae9cf5c3433ae40d63adb54d (diff) |
Imported Upstream version 0.18.0
Diffstat (limited to 'video')
112 files changed, 6336 insertions, 3482 deletions
diff --git a/video/csputils.c b/video/csputils.c index 69d3b80..ffa1f82 100644 --- a/video/csputils.c +++ b/video/csputils.c @@ -77,6 +77,7 @@ const struct m_opt_choice_alternatives mp_csp_trc_names[] = { {"gamma2.2", MP_CSP_TRC_GAMMA22}, {"gamma2.8", MP_CSP_TRC_GAMMA28}, {"prophoto", MP_CSP_TRC_PRO_PHOTO}, + {"st2084", MP_CSP_TRC_SMPTE_ST2084}, {0} }; @@ -170,6 +171,9 @@ enum mp_csp_trc avcol_trc_to_mp_csp_trc(int avtrc) case AVCOL_TRC_LINEAR: return MP_CSP_TRC_LINEAR; case AVCOL_TRC_GAMMA22: return MP_CSP_TRC_GAMMA22; case AVCOL_TRC_GAMMA28: return MP_CSP_TRC_GAMMA28; +#if HAVE_AVUTIL_ST2084 + case AVCOL_TRC_SMPTEST2084: return MP_CSP_TRC_SMPTE_ST2084; +#endif default: return MP_CSP_TRC_AUTO; } } @@ -213,12 +217,15 @@ int mp_csp_trc_to_avcol_trc(enum mp_csp_trc trc) { switch (trc) { // We just call it BT.1886 since we're decoding, but it's still BT.709 - case MP_CSP_TRC_BT_1886: return AVCOL_TRC_BT709; - case MP_CSP_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1; - case MP_CSP_TRC_LINEAR: return AVCOL_TRC_LINEAR; - case MP_CSP_TRC_GAMMA22: return AVCOL_TRC_GAMMA22; - case MP_CSP_TRC_GAMMA28: return AVCOL_TRC_GAMMA28; - default: return AVCOL_TRC_UNSPECIFIED; + case MP_CSP_TRC_BT_1886: return AVCOL_TRC_BT709; + case MP_CSP_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1; + case MP_CSP_TRC_LINEAR: return AVCOL_TRC_LINEAR; + case MP_CSP_TRC_GAMMA22: return AVCOL_TRC_GAMMA22; + case MP_CSP_TRC_GAMMA28: return AVCOL_TRC_GAMMA28; +#if HAVE_AVUTIL_ST2084 + case MP_CSP_TRC_SMPTE_ST2084: return AVCOL_TRC_SMPTEST2084; +#endif + default: return AVCOL_TRC_UNSPECIFIED; } } diff --git a/video/csputils.h b/video/csputils.h index 1d8d3b1..19dd88f 100644 --- a/video/csputils.h +++ b/video/csputils.h @@ -78,6 +78,7 @@ enum mp_csp_trc { MP_CSP_TRC_GAMMA22, MP_CSP_TRC_GAMMA28, MP_CSP_TRC_PRO_PHOTO, + MP_CSP_TRC_SMPTE_ST2084, MP_CSP_TRC_COUNT }; diff --git a/video/d3d.h b/video/d3d.h deleted file mode 100644 index 30bee49..0000000 --- a/video/d3d.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef MP_D3D_H_ -#define MP_D3D_H_ - -#include <d3d9.h> - -#include "hwdec.h" - -struct mp_d3d_ctx { - struct mp_hwdec_ctx hwctx; - IDirect3DDevice9 *d3d9_device; -}; - -#endif diff --git a/video/d3d11va.c b/video/d3d11va.c deleted file mode 100644 index a9be571..0000000 --- a/video/d3d11va.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include "mp_image.h" -#include "d3d11va.h" - -struct d3d11va_surface { - HMODULE d3d11_dll; - ID3D11Texture2D *texture; - ID3D11VideoDecoderOutputView *surface; -}; - -ID3D11VideoDecoderOutputView *d3d11_surface_in_mp_image(struct mp_image *mpi) -{ - return mpi && mpi->imgfmt == IMGFMT_D3D11VA ? - (ID3D11VideoDecoderOutputView *)mpi->planes[3] : NULL; -} - -ID3D11Texture2D *d3d11_texture_in_mp_image(struct mp_image *mpi) -{ - if (!mpi || mpi->imgfmt != IMGFMT_D3D11VA) - return NULL; - struct d3d11va_surface *surface = (void *)mpi->planes[0]; - return surface->texture; -} - -static void d3d11va_release_img(void *arg) -{ - struct d3d11va_surface *surface = arg; - if (surface->surface) - ID3D11VideoDecoderOutputView_Release(surface->surface); - - if (surface->texture) - ID3D11Texture2D_Release(surface->texture); - - if (surface->d3d11_dll) - FreeLibrary(surface->d3d11_dll); - - talloc_free(surface); -} - -struct mp_image *d3d11va_new_ref(ID3D11VideoDecoderOutputView *view, - int w, int h) -{ - if (!view) - return NULL; - struct d3d11va_surface *surface = talloc_zero(NULL, struct d3d11va_surface); - - surface->d3d11_dll = LoadLibrary(L"d3d11.dll"); - if (!surface->d3d11_dll) - goto fail; - - surface->surface = view; - ID3D11VideoDecoderOutputView_AddRef(surface->surface); - ID3D11VideoDecoderOutputView_GetResource( - surface->surface, (ID3D11Resource **)&surface->texture); - - struct mp_image *mpi = mp_image_new_custom_ref( - &(struct mp_image){0}, surface, d3d11va_release_img); - if (!mpi) - abort(); - - mp_image_setfmt(mpi, IMGFMT_D3D11VA); - mp_image_set_size(mpi, w, h); - mpi->planes[0] = (void *)surface; - mpi->planes[3] = (void *)surface->surface; - - return mpi; -fail: - d3d11va_release_img(surface); - return NULL; -} diff --git a/video/d3d11va.h b/video/d3d11va.h deleted file mode 100644 index db2f295..0000000 --- a/video/d3d11va.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MPV_D3D11_H -#define MPV_D3D11_H - -#include <d3d11.h> - -struct mp_image; - -ID3D11VideoDecoderOutputView *d3d11_surface_in_mp_image(struct mp_image *mpi); -ID3D11Texture2D *d3d11_texture_in_mp_image(struct mp_image *mpi); -struct mp_image *d3d11va_new_ref(ID3D11VideoDecoderOutputView *view, - int w, int h); - -#endif diff --git a/video/decode/d3d.c b/video/decode/d3d.c index 35d1af9..b978472 100644 --- a/video/decode/d3d.c +++ b/video/decode/d3d.c @@ -15,6 +15,8 @@ * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ +#include <pthread.h> + #include <libavcodec/avcodec.h> #include "lavc.h" @@ -48,7 +50,6 @@ DEFINE_GUID(DXVA2_ModeVP9_VLD_Profile0, 0x463707f8, 0xa1d0, 0x4585, 0x87 DEFINE_GUID(DXVA2_NoEncrypt, 0x1b81beD0, 0xa0c7, 0x11d3, 0xb9, 0x84, 0x00, 0xc0, 0x4f, 0x2e, 0x73, 0xc5); -static const int PROF_MPEG2_SIMPLE[] = {FF_PROFILE_MPEG2_SIMPLE, 0}; static const int PROF_MPEG2_MAIN[] = {FF_PROFILE_MPEG2_SIMPLE, FF_PROFILE_MPEG2_MAIN, 0}; static const int PROF_H264_HIGH[] = {FF_PROFILE_H264_CONSTRAINED_BASELINE, @@ -70,14 +71,14 @@ struct d3dva_mode { // Prefered modes must come first static const struct d3dva_mode d3dva_modes[] = { // MPEG-1/2 - {MODE2(MPEG2_VLD), AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_SIMPLE}, + {MODE2(MPEG2_VLD), AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_MAIN}, {MODE2(MPEG2and1_VLD), AV_CODEC_ID_MPEG2VIDEO, PROF_MPEG2_MAIN}, {MODE2(MPEG2and1_VLD), AV_CODEC_ID_MPEG1VIDEO}, // H.264 {MODE2(H264_F), AV_CODEC_ID_H264, PROF_H264_HIGH}, - {MODE (Intel_H264_NoFGT_ClearVideo), AV_CODEC_ID_H264, PROF_H264_HIGH}, {MODE2(H264_E), AV_CODEC_ID_H264, PROF_H264_HIGH}, + {MODE (Intel_H264_NoFGT_ClearVideo), AV_CODEC_ID_H264, PROF_H264_HIGH}, {MODE (ModeH264_VLD_WithFMOASO_NoFGT), AV_CODEC_ID_H264, PROF_H264_HIGH}, {MODE (ModeH264_VLD_NoFGT_Flash), AV_CODEC_ID_H264, PROF_H264_HIGH}, @@ -97,6 +98,22 @@ static const struct d3dva_mode d3dva_modes[] = { #undef MODE #undef MODE2 +HMODULE d3d11_dll, d3d9_dll, dxva2_dll; + +static pthread_once_t d3d_load_once = PTHREAD_ONCE_INIT; + +static void d3d_do_load(void) +{ + d3d11_dll = LoadLibrary(L"d3d11.dll"); + d3d9_dll = LoadLibrary(L"d3d9.dll"); + dxva2_dll = LoadLibrary(L"dxva2.dll"); +} + +void d3d_load_dlls(void) +{ + pthread_once(&d3d_load_once, d3d_do_load); +} + int d3d_probe_codec(const char *codec) { enum AVCodecID codecid = mp_codec_to_av_codec_id(codec); @@ -132,12 +149,13 @@ static bool mode_supported(const struct d3dva_mode *mode, struct d3d_decoder_fmt d3d_select_decoder_mode( struct lavc_ctx *s, const GUID *device_guids, UINT n_guids, - DWORD (*get_dxfmt_cb)(struct lavc_ctx *s, const GUID *guid, int depth)) + const struct d3d_decoded_format *formats, int n_formats, + bool (*test_fmt_cb)(struct lavc_ctx *s, const GUID *guid, + const struct d3d_decoded_format *fmt)) { struct d3d_decoder_fmt fmt = { - .guid = &GUID_NULL, - .mpfmt_decoded = IMGFMT_NONE, - .dxfmt_decoded = 0, + .guid = &GUID_NULL, + .format = NULL, }; // this has the right bit-depth, but is unfortunately not the native format @@ -146,8 +164,6 @@ struct d3d_decoder_fmt d3d_select_decoder_mode( return fmt; int depth = IMGFMT_RGB_DEPTH(sw_img_fmt); - int p010 = mp_imgfmt_find(1, 1, 2, 10, MP_IMGFLAG_YUV_NV); - int mpfmt_decoded = depth <= 8 ? IMGFMT_NV12 : p010; for (int i = 0; i < MP_ARRAY_SIZE(d3dva_modes); i++) { const struct d3dva_mode *mode = &d3dva_modes[i]; @@ -155,12 +171,23 @@ struct d3d_decoder_fmt d3d_select_decoder_mode( profile_compatible(mode, s->avctx->profile) && mode_supported(mode, device_guids, n_guids)) { - DWORD dxfmt_decoded = get_dxfmt_cb(s, mode->guid, depth); - if (dxfmt_decoded) { - fmt.guid = mode->guid; - fmt.mpfmt_decoded = mpfmt_decoded; - fmt.dxfmt_decoded = dxfmt_decoded; - return fmt; + for (int n = 0; n < n_formats; n++) { + const struct d3d_decoded_format *format = &formats[n]; + + if (depth <= format->depth && test_fmt_cb(s, mode->guid, format)) + { + MP_VERBOSE(s, "Selecting %s ", + d3d_decoder_guid_to_desc(mode->guid)); + if (format->dxfmt >= (1 << 16)) { + MP_VERBOSE(s, "%s\n", mp_tag_str(format->dxfmt)); + } else { + MP_VERBOSE(s, "%d\n", (int)format->dxfmt); + } + + fmt.guid = mode->guid; + fmt.format = format; + return fmt; + } } } } diff --git a/video/decode/d3d.h b/video/decode/d3d.h index bbd6bdf..15c423a 100644 --- a/video/decode/d3d.h +++ b/video/decode/d3d.h @@ -24,16 +24,31 @@ struct mp_image; struct lavc_ctx; +struct d3d_decoded_format { + DWORD dxfmt; // D3DFORMAT or DXGI_FORMAT + const char *name; // informational string repr. of dxfmt_decoded + int depth; // significant bits (not full size) + int mpfmt; // IMGFMT_ with compatible memory layout and semantics +}; + struct d3d_decoder_fmt { const GUID *guid; - int mpfmt_decoded; - DWORD dxfmt_decoded; // D3DFORMAT or DXGI_FORMAT + const struct d3d_decoded_format *format; }; +// Must call d3d_load_dlls() before accessing. Once this is done, the DLLs +// remain loaded forever. +extern HMODULE d3d11_dll, d3d9_dll, dxva2_dll; + +void d3d_load_dlls(void); + int d3d_probe_codec(const char *codec); + struct d3d_decoder_fmt d3d_select_decoder_mode( struct lavc_ctx *s, const GUID *device_guids, UINT n_guids, - DWORD (*get_dxfmt_cb)(struct lavc_ctx *s, const GUID *guid, int depth)); + const struct d3d_decoded_format *formats, int n_formats, + bool (*test_fmt_cb)(struct lavc_ctx *s, const GUID *guid, + const struct d3d_decoded_format *fmt)); char *d3d_decoder_guid_to_desc_buf(char *buf, size_t buf_size, const GUID *mode_guid); diff --git a/video/decode/d3d11va.c b/video/decode/d3d11va.c index 622a289..d929e1e 100644 --- a/video/decode/d3d11va.c +++ b/video/decode/d3d11va.c @@ -15,6 +15,7 @@ * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ +#include <initguid.h> #include <libavcodec/d3d11va.h> #include "lavc.h" @@ -25,7 +26,6 @@ #include "video/mp_image_pool.h" #include "video/hwdec.h" -#include "video/d3d11va.h" #include "d3d.h" #define ADDITIONAL_SURFACES (4 + HWDEC_DELAY_QUEUE_COUNT) @@ -40,7 +40,6 @@ struct d3d11va_decoder { struct priv { struct mp_log *log; - HMODULE d3d11_dll; ID3D11Device *device; ID3D11DeviceContext *device_ctx; ID3D11VideoDevice *video_dev; @@ -50,6 +49,53 @@ struct priv { struct mp_image_pool *sw_pool; }; +struct d3d11va_surface { + ID3D11Texture2D *texture; + ID3D11VideoDecoderOutputView *surface; +}; + +static void d3d11va_release_img(void *arg) +{ + struct d3d11va_surface *surface = arg; + if (surface->surface) + ID3D11VideoDecoderOutputView_Release(surface->surface); + + if (surface->texture) + ID3D11Texture2D_Release(surface->texture); + + talloc_free(surface); +} + +static struct mp_image *d3d11va_new_ref(ID3D11VideoDecoderOutputView *view, + int w, int h) +{ + if (!view) + return NULL; + struct d3d11va_surface *surface = talloc_zero(NULL, struct d3d11va_surface); + + surface->surface = view; + ID3D11VideoDecoderOutputView_AddRef(surface->surface); + ID3D11VideoDecoderOutputView_GetResource( + surface->surface, (ID3D11Resource **)&surface->texture); + + D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC surface_desc; + ID3D11VideoDecoderOutputView_GetDesc(surface->surface, &surface_desc); + + struct mp_image *mpi = + mp_image_new_custom_ref(NULL, surface, d3d11va_release_img); + if (!mpi) + abort(); + + mp_image_setfmt(mpi, IMGFMT_D3D11VA); + mp_image_set_size(mpi, w, h); + mpi->planes[0] = NULL; + mpi->planes[1] = (void *)surface->texture; + mpi->planes[2] = (void *)(intptr_t)surface_desc.Texture2D.ArraySlice; + mpi->planes[3] = (void *)surface->surface; + + return mpi; +} + static struct mp_image *d3d11va_allocate_image(struct lavc_ctx *s, int w, int h) { struct priv *p = s->hwdec_priv; @@ -66,10 +112,14 @@ static struct mp_image *d3d11va_retrieve_image(struct lavc_ctx *s, HRESULT hr; struct priv *p = s->hwdec_priv; ID3D11Texture2D *staging = p->decoder->staging; - ID3D11Texture2D *texture = d3d11_texture_in_mp_image(img); - ID3D11VideoDecoderOutputView *surface = d3d11_surface_in_mp_image(img); - if (!texture || !surface) { + if (img->imgfmt != IMGFMT_D3D11VA) + return img; + + ID3D11Texture2D *texture = (void *)img->planes[1]; + int subindex = (intptr_t)img->planes[2]; + + if (!texture) { MP_ERR(p, "Failed to get Direct3D texture and surface from mp_image\n"); return img; } @@ -82,12 +132,10 @@ static struct mp_image *d3d11va_retrieve_image(struct lavc_ctx *s, } // copy to the staging texture - D3D11_VIDEO_DECODER_OUTPUT_VIEW_DESC surface_desc; - ID3D11VideoDecoderOutputView_GetDesc(surface, &surface_desc); ID3D11DeviceContext_CopySubresourceRegion( p->device_ctx, (ID3D11Resource *)staging, 0, 0, 0, 0, - (ID3D11Resource *)texture, surface_desc.Texture2D.ArraySlice, NULL); + (ID3D11Resource *)texture, subindex, NULL); struct mp_image *sw_img = mp_image_pool_get(p->sw_pool, p->decoder->mpfmt_decoded, @@ -117,27 +165,47 @@ static struct mp_image *d3d11va_retrieve_image(struct lavc_ctx *s, return sw_img; } -struct d3d11_format { - DXGI_FORMAT format; - const char *name; - int depth; -}; - #define DFMT(name) MP_CONCAT(DXGI_FORMAT_, name), # name -static const struct d3d11_format d3d11_formats[] = { - {DFMT(NV12), 8}, - {DFMT(P010), 10}, - {DFMT(P016), 16}, +static const struct d3d_decoded_format d3d11_formats[] = { + {DFMT(NV12), 8, IMGFMT_NV12}, + {DFMT(P010), 10, IMGFMT_P010}, + {DFMT(P016), 16, IMGFMT_P010}, }; #undef DFMT -static BOOL d3d11_format_supported(struct lavc_ctx *s, const GUID *guid, - const struct d3d11_format *format) +// Update hw_subfmt to the underlying format. Needed because AVFrame does not +// have such an attribute, so it can't be passed through, and is updated here +// instead. (But in the future, AVHWFramesContext could be used.) +static struct mp_image *d3d11va_update_image_attribs(struct lavc_ctx *s, + struct mp_image *img) +{ + ID3D11Texture2D *texture = (void *)img->planes[1]; + + if (!texture) + return img; + + D3D11_TEXTURE2D_DESC texture_desc; + ID3D11Texture2D_GetDesc(texture, &texture_desc); + for (int n = 0; n < MP_ARRAY_SIZE(d3d11_formats); n++) { + if (d3d11_formats[n].dxfmt == texture_desc.Format) { + img->params.hw_subfmt = d3d11_formats[n].mpfmt; + break; + } + } + + if (img->params.hw_subfmt == IMGFMT_NV12) + mp_image_setfmt(img, IMGFMT_D3D11NV12); + + return img; +} + +static bool d3d11_format_supported(struct lavc_ctx *s, const GUID *guid, + const struct d3d_decoded_format *format) { struct priv *p = s->hwdec_priv; BOOL is_supported = FALSE; HRESULT hr = ID3D11VideoDevice_CheckVideoDecoderFormat( - p->video_dev, guid, format->format, &is_supported); + p->video_dev, guid, format->dxfmt, &is_supported); if (FAILED(hr)) { MP_ERR(p, "Check decoder output format %s for decoder %s: %s\n", format->name, d3d_decoder_guid_to_desc(guid), @@ -151,25 +219,13 @@ static void dump_decoder_info(struct lavc_ctx *s, const GUID *guid) struct priv *p = s->hwdec_priv; char fmts[256] = {0}; for (int i = 0; i < MP_ARRAY_SIZE(d3d11_formats); i++) { - const struct d3d11_format *format = &d3d11_formats[i]; + const struct d3d_decoded_format *format = &d3d11_formats[i]; if (d3d11_format_supported(s, guid, format)) mp_snprintf_cat(fmts, sizeof(fmts), " %s", format->name); } MP_VERBOSE(p, "%s %s\n", d3d_decoder_guid_to_desc(guid), fmts); } -static DWORD get_dxfmt_cb(struct lavc_ctx *s, const GUID *guid, int depth) -{ - for (int i = 0; i < MP_ARRAY_SIZE(d3d11_formats); i++) { - const struct d3d11_format *format = &d3d11_formats[i]; - if (depth <= format->depth && - d3d11_format_supported(s, guid, format)) { - return format->format; - } - } - return 0; -} - static void d3d11va_destroy_decoder(void *arg) { struct d3d11va_decoder *decoder = arg; @@ -188,6 +244,7 @@ static int d3d11va_init_decoder(struct lavc_ctx *s, int w, int h) struct priv *p = s->hwdec_priv; TA_FREEP(&p->decoder); + ID3D11Texture2D *texture = NULL; void *tmp = talloc_new(NULL); UINT n_guids = ID3D11VideoDevice_GetVideoDecoderProfileCount(p->video_dev); @@ -204,31 +261,32 @@ static int d3d11va_init_decoder(struct lavc_ctx *s, int w, int h) } struct d3d_decoder_fmt fmt = - d3d_select_decoder_mode(s, device_guids, n_guids, get_dxfmt_cb); - if (fmt.mpfmt_decoded == IMGFMT_NONE) { + d3d_select_decoder_mode(s, device_guids, n_guids, + d3d11_formats, MP_ARRAY_SIZE(d3d11_formats), + d3d11_format_supported); + if (!fmt.format) { MP_ERR(p, "Failed to find a suitable decoder\n"); goto done; } struct d3d11va_decoder *decoder = talloc_zero(tmp, struct d3d11va_decoder); talloc_set_destructor(decoder, d3d11va_destroy_decoder); - decoder->mpfmt_decoded = fmt.mpfmt_decoded; + decoder->mpfmt_decoded = fmt.format->mpfmt; int n_surfaces = hwdec_get_max_refs(s) + ADDITIONAL_SURFACES; int w_align = w, h_align = h; d3d_surface_align(s, &w_align, &h_align); - ID3D11Texture2D *texture = NULL; D3D11_TEXTURE2D_DESC tex_desc = { .Width = w_align, .Height = h_align, .MipLevels = 1, - .Format = fmt.dxfmt_decoded, + .Format = fmt.format->dxfmt, .SampleDesc.Count = 1, .MiscFlags = 0, .ArraySize = n_surfaces, .Usage = D3D11_USAGE_DEFAULT, - .BindFlags = D3D11_BIND_DECODER, + .BindFlags = D3D11_BIND_DECODER | D3D11_BIND_SHADER_RESOURCE, .CPUAccessFlags = 0, }; hr = ID3D11Device_CreateTexture2D(p->device, &tex_desc, NULL, &texture); @@ -290,7 +348,7 @@ static int d3d11va_init_decoder(struct lavc_ctx *s, int w, int h) .Guid = *fmt.guid, .SampleWidth = w, .SampleHeight = h, - .OutputFormat = fmt.dxfmt_decoded, + .OutputFormat = fmt.format->dxfmt, }; UINT n_cfg; hr = ID3D11VideoDevice_GetVideoDecoderConfigCount(p->video_dev, @@ -365,9 +423,6 @@ static void destroy_device(struct lavc_ctx *s) if (p->device_ctx) ID3D11DeviceContext_Release(p->device_ctx); - - if (p->d3d11_dll) - FreeLibrary(p->d3d11_dll); } static bool create_device(struct lavc_ctx *s, BOOL thread_safe) @@ -375,14 +430,14 @@ static bool create_device(struct lavc_ctx *s, BOOL thread_safe) HRESULT hr; struct priv *p = s->hwdec_priv; - p->d3d11_dll = LoadLibrary(L"d3d11.dll"); - if (!p->d3d11_dll) { + d3d_load_dlls(); + if (!d3d11_dll) { MP_ERR(p, "Failed to load D3D11 library\n"); return false; } PFN_D3D11_CREATE_DEVICE CreateDevice = - (void *)GetProcAddress(p->d3d11_dll, "D3D11CreateDevice"); + (void *)GetProcAddress(d3d11_dll, "D3D11CreateDevice"); if (!CreateDevice) { MP_ERR(p, "Failed to get D3D11CreateDevice symbol from DLL: %s\n", mp_LastError_to_str()); @@ -445,8 +500,20 @@ static int d3d11va_init(struct lavc_ctx *s) p->sw_pool = talloc_steal(p, mp_image_pool_new(17)); } - if (!create_device(s, FALSE)) + p->device = hwdec_devices_load(s->hwdec_devs, s->hwdec->type); + if (p->device) { + ID3D11Device_AddRef(p->device); + ID3D11Device_GetImmediateContext(p->device, &p->device_ctx); + if (!p->device_ctx) + goto fail; + MP_VERBOSE(p, "Using VO-supplied device %p.\n", p->device); + } else if (s->hwdec->type == HWDEC_D3D11VA) { + MP_ERR(p, "No Direct3D device provided for native d3d11 decoding\n"); goto fail; + } else { + if (!create_device(s, FALSE)) + goto fail; + } hr = ID3D11DeviceContext_QueryInterface(p->device_ctx, &IID_ID3D11VideoContext, @@ -478,16 +545,31 @@ fail: return -1; } -static int d3d11va_probe(struct vd_lavc_hwdec *hwdec, - struct mp_hwdec_info *info, +static int d3d11va_probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, const char *codec) { - hwdec_request_api(info, "d3d11va"); + // d3d11va-copy can do without external context; dxva2 requires it. + if (hwdec->type != HWDEC_D3D11VA_COPY) { + if (!hwdec_devices_load(ctx->hwdec_devs, HWDEC_D3D11VA)) + return HWDEC_ERR_NO_CTX; + } return d3d_probe_codec(codec); } +const struct vd_lavc_hwdec mp_vd_lavc_d3d11va = { + .type = HWDEC_D3D11VA, + .image_format = IMGFMT_D3D11VA, + .probe = d3d11va_probe, + .init = d3d11va_init, + .uninit = d3d11va_uninit, + .init_decoder = d3d11va_init_decoder, + .allocate_image = d3d11va_allocate_image, + .process_image = d3d11va_update_image_attribs, +}; + const struct vd_lavc_hwdec mp_vd_lavc_d3d11va_copy = { .type = HWDEC_D3D11VA_COPY, + .copying = true, .image_format = IMGFMT_D3D11VA, .probe = d3d11va_probe, .init = d3d11va_init, diff --git a/video/decode/dec_video.h b/video/decode/dec_video.h index f4646a9..1030973 100644 --- a/video/decode/dec_video.h +++ b/video/decode/dec_video.h @@ -32,7 +32,7 @@ struct dec_video { struct mpv_global *global; struct MPOpts *opts; const struct vd_functions *vd_driver; - struct mp_hwdec_info *hwdec_info; // video output hwdec handles + struct mp_hwdec_devices *hwdec_devs; // video output hwdec handles struct sh_stream *header; struct mp_codec_params *codec; diff --git a/video/decode/dxva2.c b/video/decode/dxva2.c index c90fa76..fc52aca 100644 --- a/video/decode/dxva2.c +++ b/video/decode/dxva2.c @@ -19,6 +19,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <initguid.h> + #define DXVA2API_USE_BITFIELDS #include <libavcodec/dxva2.h> @@ -30,8 +32,6 @@ #include "video/mp_image_pool.h" #include "video/hwdec.h" -#include "video/d3d.h" -#include "video/dxva2.h" #include "d3d.h" #define ADDITIONAL_SURFACES (4 + HWDEC_DELAY_QUEUE_COUNT) @@ -39,8 +39,6 @@ struct priv { struct mp_log *log; - HMODULE d3d9_dll; - HMODULE dxva2_dll; IDirect3D9 *d3d9; IDirect3DDevice9 *device; HANDLE device_handle; @@ -52,6 +50,47 @@ struct priv { int mpfmt_decoded; }; +struct dxva2_surface { + IDirectXVideoDecoder *decoder; + IDirect3DSurface9 *surface; +}; + +static void dxva2_release_img(void *arg) +{ + struct dxva2_surface *surface = arg; + if (surface->surface) + IDirect3DSurface9_Release(surface->surface); + + if (surface->decoder) + IDirectXVideoDecoder_Release(surface->decoder); + + talloc_free(surface); +} + +static struct mp_image *dxva2_new_ref(IDirectXVideoDecoder *decoder, + IDirect3DSurface9 *d3d9_surface, + int w, int h) +{ + if (!decoder || !d3d9_surface) + return NULL; + struct dxva2_surface *surface = talloc_zero(NULL, struct dxva2_surface); + + surface->surface = d3d9_surface; + IDirect3DSurface9_AddRef(surface->surface); + surface->decoder = decoder; + IDirectXVideoDecoder_AddRef(surface->decoder); + + struct mp_image *mpi = + mp_image_new_custom_ref(NULL, surface, dxva2_release_img); + if (!mpi) + abort(); + + mp_image_setfmt(mpi, IMGFMT_DXVA2); + mp_image_set_size(mpi, w, h); + mpi->planes[3] = (void *)surface->surface; + return mpi; +} + static struct mp_image *dxva2_allocate_image(struct lavc_ctx *s, int w, int h) { struct priv *p = s->hwdec_priv; @@ -67,7 +106,8 @@ static struct mp_image *dxva2_retrieve_image(struct lavc_ctx *s, { HRESULT hr; struct priv *p = s->hwdec_priv; - IDirect3DSurface9 *surface = d3d9_surface_in_mp_image(img); + IDirect3DSurface9 *surface = img->imgfmt == IMGFMT_DXVA2 ? + (IDirect3DSurface9 *)img->planes[3] : NULL; if (!surface) { MP_ERR(p, "Failed to get Direct3D surface from mp_image\n"); @@ -108,15 +148,10 @@ static struct mp_image *dxva2_retrieve_image(struct lavc_ctx *s, return sw_img; } -struct d3d9_format { - D3DFORMAT format; - int depth; -}; - -static const struct d3d9_format d3d9_formats[] = { - {MKTAG('N','V','1','2'), 8}, - {MKTAG('P','0','1','0'), 10}, - {MKTAG('P','0','1','6'), 16}, +static const struct d3d_decoded_format d3d9_formats[] = { + {MKTAG('N','V','1','2'), "NV12", 8, IMGFMT_NV12}, + {MKTAG('P','0','1','0'), "P010", 10, IMGFMT_P010}, + {MKTAG('P','0','1','6'), "P016", 16, IMGFMT_P010}, }; static void dump_decoder_info(struct lavc_ctx *s, @@ -133,7 +168,7 @@ static void dump_decoder_info(struct lavc_ctx *s, HRESULT hr = IDirectXVideoDecoderService_GetDecoderRenderTargets( p->decoder_service, guid, &n_formats, &formats); if (FAILED(hr)) { - MP_ERR(p, "Failed to get render targets for decoder %s:%s", + MP_ERR(p, "Failed to get render targets for decoder %s:%s\n", description, mp_HRESULT_to_str(hr)); } @@ -148,9 +183,10 @@ static void dump_decoder_info(struct lavc_ctx *s, } } -static DWORD get_dxfmt_cb(struct lavc_ctx *s, const GUID *guid, int depth) +static bool dxva2_format_supported(struct lavc_ctx *s, const GUID *guid, + const struct d3d_decoded_format *format) { - DWORD ret = 0; + bool ret = false; struct priv *p = s->hwdec_priv; D3DFORMAT *formats = NULL; UINT n_formats = 0; @@ -162,19 +198,12 @@ static DWORD get_dxfmt_cb(struct lavc_ctx *s, const GUID *guid, int depth) return 0; } - for (int i = 0; i < MP_ARRAY_SIZE(d3d9_formats); i++) { - const struct d3d9_format *d3d9_fmt = &d3d9_formats[i]; - if (d3d9_fmt->depth < depth) - continue; - - for (UINT j = 0; j < n_formats; j++) { - if (formats[i] == d3d9_fmt->format) { - ret = formats[i]; - goto done; - } - } + for (int i = 0; i < n_formats; i++) { + ret = formats[i] == format->dxfmt; + if (ret) + break; } -done: + CoTaskMemFree(formats); return ret; } @@ -204,14 +233,16 @@ static int dxva2_init_decoder(struct lavc_ctx *s, int w, int h) dump_decoder_info(s, device_guids, n_guids); struct d3d_decoder_fmt fmt = - d3d_select_decoder_mode(s, device_guids, n_guids, get_dxfmt_cb); + d3d_select_decoder_mode(s, device_guids, n_guids, + d3d9_formats, MP_ARRAY_SIZE(d3d9_formats), + dxva2_format_supported); CoTaskMemFree(device_guids); - if (fmt.mpfmt_decoded == IMGFMT_NONE) { + if (!fmt.format) { MP_ERR(p, "Failed to find a suitable decoder\n"); goto done; } - p->mpfmt_decoded = fmt.mpfmt_decoded; + p->mpfmt_decoded = fmt.format->mpfmt; struct mp_image_pool *decoder_pool = talloc_steal(tmp, mp_image_pool_new(n_surfaces)); DXVA2_ConfigPictureDecode *decoder_config = @@ -222,7 +253,7 @@ static int dxva2_init_decoder(struct lavc_ctx *s, int w, int h) DXVA2_VideoDesc video_desc ={ .SampleWidth = w, .SampleHeight = h, - .Format = fmt.dxfmt_decoded, + .Format = fmt.format->dxfmt, }; UINT n_configs = 0; DXVA2_ConfigPictureDecode *configs = NULL; @@ -255,7 +286,7 @@ static int dxva2_init_decoder(struct lavc_ctx *s, int w, int h) hr = IDirectXVideoDecoderService_CreateSurface( p->decoder_service, w_align, h_align, - n_surfaces - 1, fmt.dxfmt_decoded, D3DPOOL_DEFAULT, 0, + n_surfaces - 1, fmt.format->dxfmt, D3DPOOL_DEFAULT, 0, DXVA2_VideoDecoderRenderTarget, surfaces, NULL); if (FAILED(hr)) { MP_ERR(p, "Failed to create %d video surfaces: %s\n", @@ -316,25 +347,20 @@ static void destroy_device(struct lavc_ctx *s) if (p->d3d9) IDirect3D9_Release(p->d3d9); - - if (p->d3d9_dll) - FreeLibrary(p->d3d9_dll); - - if (p->dxva2_dll) - FreeLibrary(p->dxva2_dll); } static bool create_device(struct lavc_ctx *s) { struct priv *p = s->hwdec_priv; - p->d3d9_dll = LoadLibrary(L"d3d9.dll"); - if (!p->d3d9_dll) { + + d3d_load_dlls(); + if (!d3d9_dll) { MP_ERR(p, "Failed to load D3D9 library\n"); return false; } IDirect3D9* (WINAPI *Direct3DCreate9)(UINT) = - (void *)GetProcAddress(p->d3d9_dll, "Direct3DCreate9"); + (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9"); if (!Direct3DCreate9) { MP_ERR(p, "Failed to locate Direct3DCreate9\n"); return false; @@ -413,9 +439,7 @@ static int dxva2_init(struct lavc_ctx *s) p->sw_pool = talloc_steal(p, mp_image_pool_new(17)); } - if (s->hwdec_info && s->hwdec_info->hwctx && s->hwdec_info->hwctx->d3d_ctx) - p->device = s->hwdec_info->hwctx->d3d_ctx->d3d9_device; - + p->device = hwdec_devices_load(s->hwdec_devs, s->hwdec->type); if (p->device) { IDirect3D9_AddRef(p->device); MP_VERBOSE(p, "Using VO-supplied device %p.\n", p->device); @@ -427,15 +451,14 @@ static int dxva2_init(struct lavc_ctx *s) goto fail; } - p->dxva2_dll = LoadLibrary(L"dxva2.dll"); - if (!p->dxva2_dll) { + d3d_load_dlls(); + if (!dxva2_dll) { MP_ERR(p, "Failed to load DXVA2 library\n"); goto fail; } HRESULT (WINAPI *CreateDeviceManager9)(UINT *, IDirect3DDeviceManager9 **) = - (void *)GetProcAddress(p->dxva2_dll, - "DXVA2CreateDirect3DDeviceManager9"); + (void *)GetProcAddress(dxva2_dll, "DXVA2CreateDirect3DDeviceManager9"); if (!CreateDeviceManager9) { MP_ERR(p, "Failed to locate DXVA2CreateDirect3DDeviceManager9\n"); goto fail; @@ -484,15 +507,15 @@ fail: return -1; } -static int dxva2_probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, +static int dxva2_probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, const char *codec) { - hwdec_request_api(info, "dxva2"); // dxva2-copy can do without external context; dxva2 requires it. - if (hwdec->type != HWDEC_DXVA2_COPY) { - if (!info || !info->hwctx || !info->hwctx->d3d_ctx || - info->hwctx->type == HWDEC_DXVA2_COPY) + if (hwdec->type == HWDEC_DXVA2) { + if (!hwdec_devices_load(ctx->hwdec_devs, HWDEC_DXVA2)) return HWDEC_ERR_NO_CTX; + } else { + hwdec_devices_load(ctx->hwdec_devs, HWDEC_DXVA2_COPY); } return d3d_probe_codec(codec); } @@ -509,6 +532,7 @@ const struct vd_lavc_hwdec mp_vd_lavc_dxva2 = { const struct vd_lavc_hwdec mp_vd_lavc_dxva2_copy = { .type = HWDEC_DXVA2_COPY, + .copying = true, .image_format = IMGFMT_DXVA2, .probe = dxva2_probe, .init = dxva2_init, diff --git a/video/decode/lavc.h b/video/decode/lavc.h index 826edbf..689222d 100644 --- a/video/decode/lavc.h +++ b/video/decode/lavc.h @@ -30,7 +30,7 @@ typedef struct lavc_ctx { int max_delay_queue; // From VO - struct mp_hwdec_info *hwdec_info; + struct mp_hwdec_devices *hwdec_devs; // For free use by hwdec implementation void *hwdec_priv; @@ -49,12 +49,14 @@ struct vd_lavc_hwdec { // If not-0: the IMGFMT_ format that should be accepted in the libavcodec // get_format callback. int image_format; + // Always returns a non-hwaccel image format. + bool copying; // Setting this will queue the given number of frames before calling // process_image() or returning them to the renderer. This can increase // efficiency by not blocking on the hardware pipeline by reading back // immediately after decoding. int delay_queue; - int (*probe)(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, + int (*probe)(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, const char *codec); int (*init)(struct lavc_ctx *ctx); int (*init_decoder)(struct lavc_ctx *ctx, int w, int h); @@ -69,6 +71,10 @@ struct vd_lavc_hwdec { void (*unlock)(struct lavc_ctx *ctx); // Optional; if a special hardware decoder is needed (instead of "hwaccel"). const char *(*get_codec)(struct lavc_ctx *ctx, const char *codec); + // Suffix for libavcodec decoder. If non-NULL, get_codec() is overridden + // with hwdec_find_decoder. + // Intuitively, this will force the corresponding wrapper decoder. + const char *lavc_suffix; }; enum { @@ -89,4 +95,6 @@ bool hwdec_check_codec_support(const char *codec, const struct hwdec_profile_entry *table); int hwdec_get_max_refs(struct lavc_ctx *ctx); +const char *hwdec_find_decoder(const char *codec, const char *suffix); + #endif diff --git a/video/decode/mediacodec.c b/video/decode/mediacodec.c deleted file mode 100644 index 37ce1b8..0000000 --- a/video/decode/mediacodec.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include "lavc.h" -#include "common/common.h" - -static const char *const codecs[][2] = { - {"h264", "h264_mediacodec"}, - {0} -}; - -static const char *map_codec(const char *c) -{ - for (int n = 0; codecs[n][0]; n++) { - if (c && strcmp(codecs[n][0], c) == 0) - return codecs[n][1]; - } - return NULL; -} - -static int init_decoder(struct lavc_ctx *ctx, int w, int h) -{ - return 0; -} - -static void uninit(struct lavc_ctx *ctx) -{ -} - -static int init(struct lavc_ctx *ctx) -{ - return 0; -} - -static int probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, - const char *decoder) -{ - return map_codec(decoder) ? 0 : HWDEC_ERR_NO_CODEC; -} - -static const char *get_codec(struct lavc_ctx *ctx, const char *codec) -{ - return map_codec(codec); -} - -const struct vd_lavc_hwdec mp_vd_lavc_mediacodec = { - .type = HWDEC_MEDIACODEC, - .image_format = IMGFMT_NV12, - .probe = probe, - .init = init, - .uninit = uninit, - .init_decoder = init_decoder, - .get_codec = get_codec, -}; diff --git a/video/decode/rpi.c b/video/decode/rpi.c deleted file mode 100644 index f2ed6d2..0000000 --- a/video/decode/rpi.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include "lavc.h" -#include "common/common.h" - -static const char *const codecs[][2] = { - {"h264", "h264_mmal"}, - {"mpeg2video", "mpeg2_mmal"}, - {"mpeg4", "mpeg4_mmal"}, - {"vc1", "vc1_mmal"}, - {0} -}; - -static const char *map_codec(const char *c) -{ - for (int n = 0; codecs[n][0]; n++) { - if (c && strcmp(codecs[n][0], c) == 0) - return codecs[n][1]; - } - return NULL; -} - -static int init_decoder(struct lavc_ctx *ctx, int w, int h) -{ - return 0; -} - -static void uninit(struct lavc_ctx *ctx) -{ -} - -static int init(struct lavc_ctx *ctx) -{ - return 0; -} - -static int probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, - const char *codec) -{ - return map_codec(codec) ? 0 : HWDEC_ERR_NO_CODEC; -} - -static const char *get_codec(struct lavc_ctx *ctx, const char *codec) -{ - return map_codec(codec); -} - -const struct vd_lavc_hwdec mp_vd_lavc_rpi = { - .type = HWDEC_RPI, - .image_format = IMGFMT_MMAL, - .probe = probe, - .init = init, - .uninit = uninit, - .init_decoder = init_decoder, - .get_codec = get_codec, -}; diff --git a/video/decode/vaapi.c b/video/decode/vaapi.c index 2682225..aa8291d 100644 --- a/video/decode/vaapi.c +++ b/video/decode/vaapi.c @@ -72,15 +72,6 @@ struct va_native_display { void (*destroy)(struct priv *p); }; -static const struct va_native_display disp_x11; - -static const struct va_native_display *const native_displays[] = { -#if HAVE_VAAPI_X11 - &disp_x11, -#endif - NULL -}; - #if HAVE_VAAPI_X11 #include <X11/Xlib.h> #include <va/va_x11.h> @@ -108,6 +99,13 @@ static const struct va_native_display disp_x11 = { }; #endif +static const struct va_native_display *const native_displays[] = { +#if HAVE_VAAPI_X11 + &disp_x11, +#endif + NULL +}; + #define HAS_HEVC VA_CHECK_VERSION(0, 38, 0) #define HAS_VP9 (VA_CHECK_VERSION(0, 38, 1) && defined(FF_PROFILE_VP9_0)) @@ -340,6 +338,12 @@ static struct mp_image *allocate_image(struct lavc_ctx *ctx, int w, int h) return img; } +static struct mp_image *update_format(struct lavc_ctx *ctx, struct mp_image *img) +{ + va_surface_init_subformat(img); + return img; +} + static void destroy_va_dummy_ctx(struct priv *p) { va_destroy(p->ctx); @@ -351,7 +355,7 @@ static void destroy_va_dummy_ctx(struct priv *p) // Creates a "private" VADisplay, disconnected from the VO. We just create a // new X connection, because that's simpler. (We could also pass the X -// connection along with struct mp_hwdec_info, if we wanted.) +// connection along with struct mp_hwdec_devices, if we wanted.) static bool create_va_dummy_ctx(struct priv *p) { for (int n = 0; native_displays[n]; n++) { @@ -393,21 +397,23 @@ static void uninit(struct lavc_ctx *ctx) ctx->hwdec_priv = NULL; } -static int init_with_vactx(struct lavc_ctx *ctx, struct mp_vaapi_ctx *vactx) +static int init(struct lavc_ctx *ctx, bool direct) { struct priv *p = talloc_ptrtype(NULL, p); *p = (struct priv) { .log = mp_log_new(p, ctx->log, "vaapi"), - .ctx = vactx, .va_context = &p->va_context_storage, .rt_format = VA_RT_FORMAT_YUV420 }; - if (!p->ctx) + if (direct) { + p->ctx = hwdec_devices_get(ctx->hwdec_devs, HWDEC_VAAPI)->ctx; + } else { create_va_dummy_ctx(p); - if (!p->ctx) { - talloc_free(p); - return -1; + if (!p->ctx) { + talloc_free(p); + return -1; + } } p->display = p->ctx->display; @@ -425,25 +431,22 @@ static int init_with_vactx(struct lavc_ctx *ctx, struct mp_vaapi_ctx *vactx) return 0; } -static int init(struct lavc_ctx *ctx) +static int init_direct(struct lavc_ctx *ctx) { - return init_with_vactx(ctx, ctx->hwdec_info->hwctx->vaapi_ctx); + return init(ctx, true); } -static int probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, +static int probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, const char *codec) { - hwdec_request_api(info, "vaapi"); - if (!info || !info->hwctx || !info->hwctx->vaapi_ctx) + if (!hwdec_devices_load(ctx->hwdec_devs, HWDEC_VAAPI)) return HWDEC_ERR_NO_CTX; if (!hwdec_check_codec_support(codec, profiles)) return HWDEC_ERR_NO_CODEC; - if (va_guess_if_emulated(info->hwctx->vaapi_ctx)) - return HWDEC_ERR_EMULATED; return 0; } -static int probe_copy(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, +static int probe_copy(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, const char *codec) { struct priv dummy = {mp_null_log}; @@ -460,7 +463,7 @@ static int probe_copy(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, static int init_copy(struct lavc_ctx *ctx) { - return init_with_vactx(ctx, NULL); + return init(ctx, false); } static struct mp_image *copy_image(struct lavc_ctx *ctx, struct mp_image *img) @@ -491,16 +494,18 @@ const struct vd_lavc_hwdec mp_vd_lavc_vaapi = { .type = HWDEC_VAAPI, .image_format = IMGFMT_VAAPI, .probe = probe, - .init = init, + .init = init_direct, .uninit = uninit, .init_decoder = init_decoder, .allocate_image = allocate_image, .lock = intel_shit_lock, .unlock = intel_crap_unlock, + .process_image = update_format, }; const struct vd_lavc_hwdec mp_vd_lavc_vaapi_copy = { .type = HWDEC_VAAPI_COPY, + .copying = true, .image_format = IMGFMT_VAAPI, .probe = probe_copy, .init = init_copy, diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c index a444f88..fbb04d1 100644 --- a/video/decode/vd_lavc.c +++ b/video/decode/vd_lavc.c @@ -126,9 +126,20 @@ extern const struct vd_lavc_hwdec mp_vd_lavc_vaapi; extern const struct vd_lavc_hwdec mp_vd_lavc_vaapi_copy; extern const struct vd_lavc_hwdec mp_vd_lavc_dxva2; extern const struct vd_lavc_hwdec mp_vd_lavc_dxva2_copy; +extern const struct vd_lavc_hwdec mp_vd_lavc_d3d11va; extern const struct vd_lavc_hwdec mp_vd_lavc_d3d11va_copy; -extern const struct vd_lavc_hwdec mp_vd_lavc_rpi; -extern const struct vd_lavc_hwdec mp_vd_lavc_mediacodec; + +static const struct vd_lavc_hwdec mp_vd_lavc_rpi = { + .type = HWDEC_RPI, + .lavc_suffix = "_mmal", + .image_format = IMGFMT_MMAL, +}; + +static const struct vd_lavc_hwdec mp_vd_lavc_mediacodec = { + .type = HWDEC_MEDIACODEC, + .lavc_suffix = "_mediacodec", + .copying = true, +}; static const struct vd_lavc_hwdec *const hwdec_list[] = { #if HAVE_RPI @@ -144,11 +155,10 @@ static const struct vd_lavc_hwdec *const hwdec_list[] = { &mp_vd_lavc_vaapi, &mp_vd_lavc_vaapi_copy, #endif -#if HAVE_DXVA2_HWACCEL +#if HAVE_D3D_HWACCEL + &mp_vd_lavc_d3d11va, &mp_vd_lavc_dxva2, &mp_vd_lavc_dxva2_copy, -#endif -#if HAVE_D3D11VA_HWACCEL &mp_vd_lavc_d3d11va_copy, #endif #if HAVE_ANDROID @@ -233,18 +243,51 @@ int hwdec_get_max_refs(struct lavc_ctx *ctx) return 2; } -void hwdec_request_api(struct mp_hwdec_info *info, const char *api_name) +// This is intended to return the name of a decoder for a given wrapper API. +// Decoder wrappers are usually added to libavcodec with a specific suffix. +// For example the mmal h264 decoder is named h264_mmal. +// This API would e.g. return h264_mmal for +// hwdec_find_decoder("h264", "_mmal"). +// Just concatenating the two names will not always work due to inconsistencies +// (e.g. "mpeg2video" vs. "mpeg2"). +const char *hwdec_find_decoder(const char *codec, const char *suffix) +{ + enum AVCodecID codec_id = mp_codec_to_av_codec_id(codec); + if (codec_id == AV_CODEC_ID_NONE) + return NULL; + AVCodec *cur = NULL; + for (;;) { + cur = av_codec_next(cur); + if (!cur) + break; + if (cur->id == codec_id && av_codec_is_decoder(cur) && + bstr_endswith0(bstr0(cur->name), suffix)) + return cur->name; + } + return NULL; +} + +// Parallel to hwdec_find_decoder(): return whether a hwdec can use the given +// decoder. This can't be answered accurately; it works for wrapper decoders +// only (like mmal), and for real hwaccels this will always return false. +static bool hwdec_is_wrapper(struct vd_lavc_hwdec *hwdec, const char *decoder) { - if (info && info->load_api) - info->load_api(info, api_name); + if (!hwdec->lavc_suffix) + return false; + return bstr_endswith0(bstr0(decoder), hwdec->lavc_suffix); } -static int hwdec_probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, +static int hwdec_probe(struct dec_video *vd, struct vd_lavc_hwdec *hwdec, const char *codec) { + vd_ffmpeg_ctx *ctx = vd->priv; int r = 0; if (hwdec->probe) - r = hwdec->probe(hwdec, info, codec); + r = hwdec->probe(ctx, hwdec, codec); + if (r >= 0) { + if (hwdec->lavc_suffix && !hwdec_find_decoder(codec, hwdec->lavc_suffix)) + return HWDEC_ERR_NO_CODEC; + } return r; } @@ -258,7 +301,7 @@ static struct vd_lavc_hwdec *probe_hwdec(struct dec_video *vd, bool autoprobe, MP_VERBOSE(vd, "Requested hardware decoder not compiled.\n"); return NULL; } - int r = hwdec_probe(hwdec, vd->hwdec_info, codec); + int r = hwdec_probe(vd, hwdec, codec); if (r == HWDEC_ERR_EMULATED) { if (autoprobe) return NULL; @@ -284,17 +327,14 @@ static void uninit(struct dec_video *vd) talloc_free(vd->priv); } -static bool force_fallback(struct dec_video *vd) +static void force_fallback(struct dec_video *vd) { vd_ffmpeg_ctx *ctx = vd->priv; - if (!ctx->hwdec) - return false; uninit_avctx(vd); int lev = ctx->hwdec_notified ? MSGL_WARN : MSGL_V; mp_msg(vd->log, lev, "Falling back to software decoding.\n"); init_avctx(vd, ctx->decoder, NULL); - return true; } static void reinit(struct dec_video *vd) @@ -308,14 +348,38 @@ static void reinit(struct dec_video *vd) struct vd_lavc_hwdec *hwdec = NULL; if (hwdec_codec_allowed(vd, codec)) { - if (vd->opts->hwdec_api == HWDEC_AUTO) { + int api = vd->opts->hwdec_api; + if (HWDEC_IS_AUTO(api)) { + // If a specific decoder is forced, we should try a hwdec method + // that works with it, instead of simply failing later at runtime. + // This is good for avoiding trying "normal" hwaccels on wrapper + // decoders (like vaapi on a mmal decoder). Since libavcodec doesn't + // tell us which decoder supports which hwaccel methods without + // actually running it, do it by detecting such wrapper decoders. + // On the other hand, e.g. "--hwdec=rpi" should always force the + // wrapper decoder, so be careful not to break this case. + bool might_be_wrapper = false; + for (int n = 0; hwdec_list[n]; n++) { + struct vd_lavc_hwdec *other = (void *)hwdec_list[n]; + if (hwdec_is_wrapper(other, decoder)) + might_be_wrapper = true; + } for (int n = 0; hwdec_list[n]; n++) { hwdec = probe_hwdec(vd, true, hwdec_list[n]->type, codec); - if (hwdec) + if (hwdec) { + if (might_be_wrapper && !hwdec_is_wrapper(hwdec, decoder)) { + MP_VERBOSE(vd, "This hwaccel is not compatible.\n"); + continue; + } + if (api == HWDEC_AUTO_COPY && !hwdec->copying) { + MP_VERBOSE(vd, "Not using this for auto-copy mode.\n"); + continue; + } break; + } } - } else if (vd->opts->hwdec_api != HWDEC_NONE) { - hwdec = probe_hwdec(vd, false, vd->opts->hwdec_api, codec); + } else if (api != HWDEC_NONE) { + hwdec = probe_hwdec(vd, false, api, codec); } } else { MP_VERBOSE(vd, "Not trying to use hardware decoding: codec %s is not " @@ -326,13 +390,15 @@ static void reinit(struct dec_video *vd) if (hwdec) { if (hwdec->get_codec) decoder = hwdec->get_codec(ctx, decoder); + if (hwdec->lavc_suffix) + decoder = hwdec_find_decoder(codec, hwdec->lavc_suffix); MP_VERBOSE(vd, "Trying hardware decoding.\n"); } else { MP_VERBOSE(vd, "Using software decoding.\n"); } init_avctx(vd, decoder, hwdec); - if (!ctx->avctx) + if (!ctx->avctx && hwdec) force_fallback(vd); } @@ -343,6 +409,7 @@ static int init(struct dec_video *vd, const char *decoder) ctx->log = vd->log; ctx->opts = vd->opts; ctx->decoder = talloc_strdup(ctx, decoder); + ctx->hwdec_devs = vd->hwdec_devs; reinit(vd); @@ -372,8 +439,6 @@ static void init_avctx(struct dec_video *vd, const char *decoder, if (!lavc_codec) return; - ctx->hwdec_info = vd->hwdec_info; - ctx->codec_timebase = (AVRational){0}; if (strstr(decoder, "_mmal") || strstr(decoder, "_mediacodec")) ctx->codec_timebase = (AVRational){1, 1000000}; @@ -389,17 +454,21 @@ static void init_avctx(struct dec_video *vd, const char *decoder, avctx->codec_type = AVMEDIA_TYPE_VIDEO; avctx->codec_id = lavc_codec->id; + if (ctx->codec_timebase.num) + avctx->time_base = ctx->codec_timebase; + avctx->refcounted_frames = 1; ctx->pic = av_frame_alloc(); if (!ctx->pic) goto error; if (ctx->hwdec) { - avctx->thread_count = 1; - avctx->get_format = get_format_hwdec; + avctx->thread_count = 1; + if (ctx->hwdec->image_format) + avctx->get_format = get_format_hwdec; if (ctx->hwdec->allocate_image) avctx->get_buffer2 = get_buffer2_hwdec; - if (ctx->hwdec->init(ctx) < 0) + if (ctx->hwdec->init && ctx->hwdec->init(ctx) < 0) goto error; ctx->max_delay_queue = ctx->hwdec->delay_queue; } else { @@ -409,14 +478,8 @@ static void init_avctx(struct dec_video *vd, const char *decoder, avctx->flags |= lavc_param->bitexact ? CODEC_FLAG_BITEXACT : 0; avctx->flags2 |= lavc_param->fast ? CODEC_FLAG2_FAST : 0; - if (lavc_param->show_all) { -#ifdef CODEC_FLAG2_SHOW_ALL - avctx->flags2 |= CODEC_FLAG2_SHOW_ALL; // ffmpeg only? -#endif -#ifdef CODEC_FLAG_OUTPUT_CORRUPT - avctx->flags |= CODEC_FLAG_OUTPUT_CORRUPT; // added with Libav 10 -#endif - } + if (lavc_param->show_all) + avctx->flags |= CODEC_FLAG_OUTPUT_CORRUPT; avctx->skip_loop_filter = lavc_param->skip_loop_filter; avctx->skip_idct = lavc_param->skip_idct; @@ -551,31 +614,29 @@ static enum AVPixelFormat get_format_hwdec(struct AVCodecContext *avctx, ctx->hwdec_request_reinit |= ctx->hwdec_failed; ctx->hwdec_failed = false; - if (ctx->hwdec->image_format) { - for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++) { - if (ctx->hwdec->image_format == pixfmt2imgfmt(fmt[i])) { - // There could be more reasons for a change, and it's possible - // that we miss some. (Might also depend on the hwaccel type.) - bool change = - ctx->hwdec_w != avctx->coded_width || - ctx->hwdec_h != avctx->coded_height || - ctx->hwdec_fmt != ctx->hwdec->image_format || - ctx->hwdec_profile != avctx->profile || - ctx->hwdec_request_reinit; - ctx->hwdec_w = avctx->coded_width; - ctx->hwdec_h = avctx->coded_height; - ctx->hwdec_fmt = ctx->hwdec->image_format; - ctx->hwdec_profile = avctx->profile; - ctx->hwdec_request_reinit = false; - if (change) { - if (ctx->hwdec->init_decoder(ctx, ctx->hwdec_w, ctx->hwdec_h) < 0) - { - ctx->hwdec_fmt = 0; - break; - } + for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++) { + if (ctx->hwdec->image_format == pixfmt2imgfmt(fmt[i])) { + // There could be more reasons for a change, and it's possible + // that we miss some. (Might also depend on the hwaccel type.) + bool change = + ctx->hwdec_w != avctx->coded_width || + ctx->hwdec_h != avctx->coded_height || + ctx->hwdec_fmt != ctx->hwdec->image_format || + ctx->hwdec_profile != avctx->profile || + ctx->hwdec_request_reinit; + ctx->hwdec_w = avctx->coded_width; + ctx->hwdec_h = avctx->coded_height; + ctx->hwdec_fmt = ctx->hwdec->image_format; + ctx->hwdec_profile = avctx->profile; + ctx->hwdec_request_reinit = false; + if (change && ctx->hwdec->init_decoder) { + if (ctx->hwdec->init_decoder(ctx, ctx->hwdec_w, ctx->hwdec_h) < 0) + { + ctx->hwdec_fmt = 0; + break; } - return fmt[i]; } + return fmt[i]; } } @@ -640,7 +701,7 @@ static struct mp_image *read_output(struct dec_video *vd) if (ctx->hwdec && ctx->hwdec->process_image) res = ctx->hwdec->process_image(ctx, res); - return mp_img_swap_to_native(res); + return res ? mp_img_swap_to_native(res) : NULL; } static void decode(struct dec_video *vd, struct demux_packet *packet, @@ -701,7 +762,9 @@ static void decode(struct dec_video *vd, struct demux_packet *packet, MP_WARN(vd, "Error while decoding frame!\n"); if (ctx->hwdec) { ctx->hwdec_fail_count += 1; - if (ctx->hwdec_fail_count >= opts->software_fallback) + // The FFmpeg VT hwaccel is buggy and can crash after 1 broken frame. + bool vt = ctx->hwdec && ctx->hwdec->type == HWDEC_VIDEOTOOLBOX; + if (ctx->hwdec_fail_count >= opts->software_fallback || vt) ctx->hwdec_failed = true; } if (!ctx->hwdec_failed && packet) @@ -767,7 +830,8 @@ static struct mp_image *decode_with_fallback(struct dec_video *vd, decode(vd, packet, flags, &mpi); if (ctx->hwdec_failed) { // Failed hardware decoding? Try again in software. - if (force_fallback(vd) && ctx->avctx) + force_fallback(vd); + if (ctx->avctx) decode(vd, packet, flags, &mpi); } @@ -805,8 +869,10 @@ static int control(struct dec_video *vd, int cmd, void *arg) return CONTROL_TRUE; } case VDCTRL_FORCE_HWDEC_FALLBACK: - if (force_fallback(vd)) + if (ctx->hwdec) { + force_fallback(vd); return ctx->avctx ? CONTROL_OK : CONTROL_ERROR; + } return CONTROL_FALSE; case VDCTRL_REINIT: reinit(vd); diff --git a/video/decode/vdpau.c b/video/decode/vdpau.c index 313fabf..0003182 100644 --- a/video/decode/vdpau.c +++ b/video/decode/vdpau.c @@ -61,6 +61,17 @@ static struct mp_image *allocate_image(struct lavc_ctx *ctx, int w, int h) return mp_vdpau_get_video_surface(p->mpvdp, chroma, s_w, s_h); } +static struct mp_image *update_format(struct lavc_ctx *ctx, struct mp_image *img) +{ + VdpChromaType chroma = 0; + uint32_t s_w, s_h; + if (av_vdpau_get_surface_parameters(ctx->avctx, &chroma, &s_w, &s_h) >= 0) { + if (chroma == VDP_CHROMA_TYPE_420) + img->params.hw_subfmt = IMGFMT_NV12; + } + return img; +} + static void uninit(struct lavc_ctx *ctx) { struct priv *p = ctx->hwdec_priv; @@ -75,7 +86,7 @@ static int init(struct lavc_ctx *ctx) struct priv *p = talloc_ptrtype(NULL, p); *p = (struct priv) { .log = mp_log_new(p, ctx->log, "vdpau"), - .mpvdp = ctx->hwdec_info->hwctx->vdpau_ctx, + .mpvdp = hwdec_devices_get(ctx->hwdec_devs, HWDEC_VDPAU)->ctx, }; ctx->hwdec_priv = p; @@ -83,14 +94,11 @@ static int init(struct lavc_ctx *ctx) return 0; } -static int probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, +static int probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, const char *codec) { - hwdec_request_api(info, "vdpau"); - if (!info || !info->hwctx || !info->hwctx->vdpau_ctx) + if (!hwdec_devices_load(ctx->hwdec_devs, HWDEC_VDPAU)) return HWDEC_ERR_NO_CTX; - if (mp_vdpau_guess_if_emulated(info->hwctx->vdpau_ctx)) - return HWDEC_ERR_EMULATED; return 0; } @@ -102,4 +110,5 @@ const struct vd_lavc_hwdec mp_vd_lavc_vdpau = { .uninit = uninit, .init_decoder = init_decoder, .allocate_image = allocate_image, + .process_image = update_format, }; diff --git a/video/decode/videotoolbox.c b/video/decode/videotoolbox.c index 2d2f5f7..c69d5e8 100644 --- a/video/decode/videotoolbox.c +++ b/video/decode/videotoolbox.c @@ -27,11 +27,10 @@ #include "config.h" -static int probe(struct vd_lavc_hwdec *hwdec, struct mp_hwdec_info *info, +static int probe(struct lavc_ctx *ctx, struct vd_lavc_hwdec *hwdec, const char *codec) { - hwdec_request_api(info, "videotoolbox"); - if (!info || !info->hwctx || !info->hwctx->get_vt_fmt) + if (!hwdec_devices_load(ctx->hwdec_devs, HWDEC_VIDEOTOOLBOX)) return HWDEC_ERR_NO_CTX; switch (mp_codec_to_av_codec_id(codec)) { case AV_CODEC_ID_H264: @@ -89,8 +88,8 @@ static int init_decoder(struct lavc_ctx *ctx, int w, int h) AVVideotoolboxContext *vtctx = av_videotoolbox_alloc_context(); - struct mp_hwdec_ctx *hwctx = ctx->hwdec_info->hwctx; - vtctx->cv_pix_fmt_type = hwctx->get_vt_fmt(hwctx); + struct mp_vt_ctx *vt = hwdec_devices_load(ctx->hwdec_devs, HWDEC_VIDEOTOOLBOX); + vtctx->cv_pix_fmt_type = vt->get_vt_fmt(vt); int err = av_videotoolbox_default_init2(ctx->avctx, vtctx); if (err < 0) { diff --git a/video/dxva2.c b/video/dxva2.c deleted file mode 100644 index d6635ce..0000000 --- a/video/dxva2.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include "common/av_common.h" -#include "dxva2.h" -#include "mp_image.h" -#include "img_format.h" -#include "mp_image_pool.h" - -struct dxva2_surface { - HMODULE d3dlib; - HMODULE dxva2lib; - - IDirectXVideoDecoder *decoder; - IDirect3DSurface9 *surface; -}; - -IDirect3DSurface9 *d3d9_surface_in_mp_image(struct mp_image *mpi) -{ - return mpi && mpi->imgfmt == IMGFMT_DXVA2 ? - (IDirect3DSurface9 *)mpi->planes[3] : NULL; -} - -static void dxva2_release_img(void *arg) -{ - struct dxva2_surface *surface = arg; - if (surface->surface) - IDirect3DSurface9_Release(surface->surface); - - if (surface->decoder) - IDirectXVideoDecoder_Release(surface->decoder); - - if (surface->dxva2lib) - FreeLibrary(surface->dxva2lib); - - if (surface->d3dlib) - FreeLibrary(surface->d3dlib); - - talloc_free(surface); -} - -struct mp_image *dxva2_new_ref(IDirectXVideoDecoder *decoder, - IDirect3DSurface9 *d3d9_surface, int w, int h) -{ - if (!decoder || !d3d9_surface) - return NULL; - struct dxva2_surface *surface = talloc_zero(NULL, struct dxva2_surface); - - // Add additional references to the libraries which might otherwise be freed - // before the surface, which is observed to lead to bad behaviour - surface->d3dlib = LoadLibrary(L"d3d9.dll"); - surface->dxva2lib = LoadLibrary(L"dxva2.dll"); - if (!surface->d3dlib || !surface->dxva2lib) - goto fail; - - surface->surface = d3d9_surface; - IDirect3DSurface9_AddRef(surface->surface); - surface->decoder = decoder; - IDirectXVideoDecoder_AddRef(surface->decoder); - - struct mp_image *mpi = mp_image_new_custom_ref(&(struct mp_image){0}, - surface, dxva2_release_img); - if (!mpi) - abort(); - - mp_image_setfmt(mpi, IMGFMT_DXVA2); - mp_image_set_size(mpi, w, h); - mpi->planes[3] = (void *)surface->surface; - return mpi; -fail: - dxva2_release_img(surface); - return NULL; -} diff --git a/video/dxva2.h b/video/dxva2.h deleted file mode 100644 index 1f2e4a4..0000000 --- a/video/dxva2.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MPV_DXVA2_H -#define MPV_DXVA2_H - -#include <d3d9.h> -#include <dxva2api.h> - -struct mp_image; -struct mp_image_pool; - -IDirect3DSurface9 *d3d9_surface_in_mp_image(struct mp_image *mpi); - -struct mp_image *dxva2_new_ref(IDirectXVideoDecoder *decoder, - IDirect3DSurface9 *d3d9_surface, int w, int h); - -#endif diff --git a/video/filter/refqueue.c b/video/filter/refqueue.c new file mode 100644 index 0000000..04de312 --- /dev/null +++ b/video/filter/refqueue.c @@ -0,0 +1,230 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include "common/common.h" +#include "video/mp_image.h" + +#include "refqueue.h" + +struct mp_refqueue { + int needed_past_frames; + int needed_future_frames; + int flags; + + bool second_field; // current frame has to output a second field yet + bool eof; + + // Queue of input frames, used to determine past/current/future frames. + // queue[0] is the newest frame, queue[num_queue - 1] the oldest. + struct mp_image **queue; + int num_queue; + // queue[pos] is the current frame, unless pos is an invalid index. + int pos; +}; + +struct mp_refqueue *mp_refqueue_alloc(void) +{ + struct mp_refqueue *q = talloc_zero(NULL, struct mp_refqueue); + mp_refqueue_flush(q); + return q; +} + +void mp_refqueue_free(struct mp_refqueue *q) +{ + mp_refqueue_flush(q); + talloc_free(q); +} + +// The minimum number of frames required before and after the current frame. +void mp_refqueue_set_refs(struct mp_refqueue *q, int past, int future) +{ + assert(past >= 0 && future >= 0); + q->needed_past_frames = past; + q->needed_future_frames = MPMAX(future, 1); // at least 1 for determining PTS +} + +// MP_MODE_* flags +void mp_refqueue_set_mode(struct mp_refqueue *q, int flags) +{ + q->flags = flags; +} + +// Whether the current frame should be deinterlaced. +bool mp_refqueue_should_deint(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q) || !(q->flags & MP_MODE_DEINT)) + return false; + + return (q->queue[q->pos]->fields & MP_IMGFIELD_INTERLACED) || + !(q->flags & MP_MODE_INTERLACED_ONLY); +} + +// Whether the current output frame is marked as interlaced. +bool mp_refqueue_is_interlaced(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return false; + + return q->queue[q->pos]->fields & MP_IMGFIELD_INTERLACED; +} + +// Whether the current output frame (field) is the top field, bottom field +// otherwise. (Assumes the caller forces deinterlacing.) +bool mp_refqueue_is_top_field(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return false; + + return !!(q->queue[q->pos]->fields & MP_IMGFIELD_TOP_FIRST) ^ q->second_field; +} + +// Whether top-field-first mode is enabled. +bool mp_refqueue_top_field_first(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return false; + + return q->queue[q->pos]->fields & MP_IMGFIELD_TOP_FIRST; +} + +// Discard all state. +void mp_refqueue_flush(struct mp_refqueue *q) +{ + for (int n = 0; n < q->num_queue; n++) + talloc_free(q->queue[n]); + q->num_queue = 0; + q->pos = -1; + q->second_field = false; + q->eof = false; +} + +// Add a new frame to the queue. (Call mp_refqueue_next() to advance the +// current frame and to discard unneeded past frames.) +// Ownership goes to the mp_refqueue. +// Passing NULL means EOF, in which case mp_refqueue_need_input() will return +// false even if not enough future frames are available. +void mp_refqueue_add_input(struct mp_refqueue *q, struct mp_image *img) +{ + q->eof = !img; + if (!img) + return; + + MP_TARRAY_INSERT_AT(q, q->queue, q->num_queue, 0, img); + q->pos++; + + assert(q->pos >= 0 && q->pos < q->num_queue); +} + +bool mp_refqueue_need_input(struct mp_refqueue *q) +{ + return q->pos < q->needed_future_frames && !q->eof; +} + +bool mp_refqueue_has_output(struct mp_refqueue *q) +{ + return q->pos >= 0 && !mp_refqueue_need_input(q); +} + +static bool output_next_field(struct mp_refqueue *q) +{ + if (q->second_field) + return false; + if (!(q->flags & MP_MODE_OUTPUT_FIELDS)) + return false; + if (!mp_refqueue_should_deint(q)) + return false; + + assert(q->pos >= 0); + + // If there's no (reasonable) timestamp, also skip the field. + if (q->pos == 0) + return false; + + double pts = q->queue[q->pos]->pts; + double next_pts = q->queue[q->pos - 1]->pts; + if (pts == MP_NOPTS_VALUE || next_pts == MP_NOPTS_VALUE) + return false; + + double frametime = next_pts - pts; + if (frametime <= 0.0 || frametime >= 1.0) + return false; + + q->queue[q->pos]->pts = pts + frametime / 2; + q->second_field = true; + return true; +} + +// Advance current field, depending on interlace flags. +void mp_refqueue_next_field(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return; + + if (!output_next_field(q)) + mp_refqueue_next(q); +} + +// Advance to next input frame (skips fields even in field output mode). +void mp_refqueue_next(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return; + + q->pos--; + q->second_field = false; + + assert(q->pos >= -1 && q->pos < q->num_queue); + + // Discard unneeded past frames. + while (q->num_queue - (q->pos + 1) > q->needed_past_frames) { + assert(q->num_queue > 0); + talloc_free(q->queue[q->num_queue - 1]); + q->num_queue--; + } + + assert(q->pos >= -1 && q->pos < q->num_queue); +} + +// Return a frame by relative position: +// -1: first past frame +// 0: current frame +// 1: first future frame +// Caller doesn't get ownership. Return NULL if unavailable. +struct mp_image *mp_refqueue_get(struct mp_refqueue *q, int pos) +{ + int i = q->pos - pos; + return i >= 0 && i < q->num_queue ? q->queue[i] : NULL; +} + +// Same as mp_refqueue_get(), but return the frame which contains a field +// relative to the current field's position. +struct mp_image *mp_refqueue_get_field(struct mp_refqueue *q, int pos) +{ + // If the current field is the second field (conceptually), then pos=1 + // needs to get the next frame. Similarly, pos=-1 needs to get the current + // frame, so round towards negative infinity. + int round = mp_refqueue_top_field_first(q) != mp_refqueue_is_top_field(q); + int frame = (pos < 0 ? pos - (1 - round) : pos + round) / 2; + return mp_refqueue_get(q, frame); +} + +bool mp_refqueue_is_second_field(struct mp_refqueue *q) +{ + return mp_refqueue_has_output(q) && q->second_field; +} diff --git a/video/filter/refqueue.h b/video/filter/refqueue.h new file mode 100644 index 0000000..ef23bee --- /dev/null +++ b/video/filter/refqueue.h @@ -0,0 +1,36 @@ +#ifndef MP_REFQUEUE_H_ +#define MP_REFQUEUE_H_ + +#include <stdbool.h> + +// A helper for deinterlacers which require past/future reference frames. + +struct mp_refqueue; + +struct mp_refqueue *mp_refqueue_alloc(void); +void mp_refqueue_free(struct mp_refqueue *q); + +void mp_refqueue_set_refs(struct mp_refqueue *q, int past, int future); +void mp_refqueue_flush(struct mp_refqueue *q); +void mp_refqueue_add_input(struct mp_refqueue *q, struct mp_image *img); +bool mp_refqueue_need_input(struct mp_refqueue *q); +bool mp_refqueue_has_output(struct mp_refqueue *q); +void mp_refqueue_next(struct mp_refqueue *q); +void mp_refqueue_next_field(struct mp_refqueue *q); +struct mp_image *mp_refqueue_get(struct mp_refqueue *q, int pos); + +enum { + MP_MODE_DEINT = (1 << 0), // deinterlacing enabled + MP_MODE_OUTPUT_FIELDS = (1 << 1), // output fields separately + MP_MODE_INTERLACED_ONLY = (1 << 2), // only deinterlace marked frames +}; + +void mp_refqueue_set_mode(struct mp_refqueue *q, int flags); +bool mp_refqueue_should_deint(struct mp_refqueue *q); +bool mp_refqueue_is_interlaced(struct mp_refqueue *q); +bool mp_refqueue_is_top_field(struct mp_refqueue *q); +bool mp_refqueue_top_field_first(struct mp_refqueue *q); +bool mp_refqueue_is_second_field(struct mp_refqueue *q); +struct mp_image *mp_refqueue_get_field(struct mp_refqueue *q, int pos); + +#endif diff --git a/video/filter/vf.c b/video/filter/vf.c index d8e7f6b..176ac95 100644 --- a/video/filter/vf.c +++ b/video/filter/vf.c @@ -61,6 +61,7 @@ extern const vf_info_t vf_info_vapoursynth_lazy; extern const vf_info_t vf_info_vdpaupp; extern const vf_info_t vf_info_vdpaurb; extern const vf_info_t vf_info_buffer; +extern const vf_info_t vf_info_d3d11vpp; // list of available filters: static const vf_info_t *const filter_list[] = { @@ -99,6 +100,9 @@ static const vf_info_t *const filter_list[] = { &vf_info_vdpaupp, &vf_info_vdpaurb, #endif +#if HAVE_D3D_HWACCEL + &vf_info_d3d11vpp, +#endif NULL }; @@ -244,7 +248,7 @@ static struct vf_instance *vf_open(struct vf_chain *c, const char *name, *vf = (vf_instance_t) { .info = desc.p, .log = mp_log_new(vf, c->log, name), - .hwdec = c->hwdec, + .hwdec_devs = c->hwdec_devs, .query_format = vf_default_query_format, .out_pool = talloc_steal(vf, mp_image_pool_new(16)), .chain = c, @@ -514,7 +518,23 @@ static void query_formats(uint8_t *fmts, struct vf_instance *vf) static bool is_conv_filter(struct vf_instance *vf) { - return vf && strcmp(vf->info->name, "scale") == 0; + return vf && (strcmp(vf->info->name, "scale") == 0 || vf->autoinserted); +} + +static const char *find_conv_filter(uint8_t *fmts_in, uint8_t *fmts_out) +{ + for (int n = 0; filter_list[n]; n++) { + if (filter_list[n]->test_conversion) { + for (int a = IMGFMT_START; a < IMGFMT_END; a++) { + for (int b = IMGFMT_START; b < IMGFMT_END; b++) { + if (fmts_in[a - IMGFMT_START] && fmts_out[b - IMGFMT_START] && + filter_list[n]->test_conversion(a, b)) + return filter_list[n]->name; + } + } + } + } + return "scale"; } static void update_formats(struct vf_chain *c, struct vf_instance *vf, @@ -535,7 +555,18 @@ static void update_formats(struct vf_chain *c, struct vf_instance *vf, // filters after vf work, but vf can't output any format the filters // after it accept), try to insert a conversion filter. MP_INFO(c, "Using conversion filter.\n"); - struct vf_instance *conv = vf_open(c, "scale", NULL); + // Determine which output formats the filter _could_ accept. For this + // to work after the conversion filter is inserted, it is assumed that + // conversion filters have a single set of in/output formats that can + // be converted to each other. + uint8_t out_formats[IMGFMT_END - IMGFMT_START]; + for (int n = IMGFMT_START; n < IMGFMT_END; n++) { + out_formats[n - IMGFMT_START] = vf->last_outfmts[n - IMGFMT_START]; + vf->last_outfmts[n - IMGFMT_START] = 1; + } + query_formats(fmts, vf); + const char *filter = find_conv_filter(fmts, out_formats); + struct vf_instance *conv = vf_open(c, filter, NULL); if (conv) { conv->autoinserted = true; conv->next = vf->next; diff --git a/video/filter/vf.h b/video/filter/vf.h index c982b61..49296fb 100644 --- a/video/filter/vf.h +++ b/video/filter/vf.h @@ -37,6 +37,7 @@ typedef struct vf_info { const void *priv_defaults; const struct m_option *options; void (*print_help)(struct mp_log *log); + bool (*test_conversion)(int in, int out); } vf_info_t; typedef struct vf_instance { @@ -92,7 +93,7 @@ typedef struct vf_instance { struct mp_image_pool *out_pool; struct vf_priv_s *priv; struct mp_log *log; - struct mp_hwdec_info *hwdec; + struct mp_hwdec_devices *hwdec_devs; struct mp_image **out_queued; int num_out_queued; @@ -120,7 +121,7 @@ struct vf_chain { struct mp_log *log; struct MPOpts *opts; struct mpv_global *global; - struct mp_hwdec_info *hwdec; + struct mp_hwdec_devices *hwdec_devs; // Call when the filter chain wants new processing (for filters with // asynchronous behavior) - must be immutable once filters are created, diff --git a/video/filter/vf_crop.c b/video/filter/vf_crop.c index 89b2b6f..6f9a788 100644 --- a/video/filter/vf_crop.c +++ b/video/filter/vf_crop.c @@ -51,10 +51,23 @@ static int reconfig(struct vf_instance *vf, struct mp_image_params *in, if(vf->priv->crop_y<0) vf->priv->crop_y=(height-vf->priv->crop_h)/2; // rounding: + int orig_x = vf->priv->crop_x; + int orig_y = vf->priv->crop_y; + struct mp_imgfmt_desc fmt = mp_imgfmt_get_desc(in->imgfmt); - vf->priv->crop_x = MP_ALIGN_DOWN(vf->priv->crop_x, fmt.align_x); - vf->priv->crop_y = MP_ALIGN_DOWN(vf->priv->crop_y, fmt.align_y); + if (fmt.flags & MP_IMGFLAG_HWACCEL) { + vf->priv->crop_x = 0; + vf->priv->crop_y = 0; + } else { + vf->priv->crop_x = MP_ALIGN_DOWN(vf->priv->crop_x, fmt.align_x); + vf->priv->crop_y = MP_ALIGN_DOWN(vf->priv->crop_y, fmt.align_y); + } + + if (vf->priv->crop_x != orig_x || vf->priv->crop_y != orig_y) { + MP_WARN(vf, "Adjusting crop origin to %d/%d for pixel format alignment.\n", + vf->priv->crop_x, vf->priv->crop_y); + } // check: if(vf->priv->crop_w+vf->priv->crop_x>width || @@ -71,17 +84,19 @@ static int reconfig(struct vf_instance *vf, struct mp_image_params *in, static struct mp_image *filter(struct vf_instance *vf, struct mp_image *mpi) { - mp_image_crop(mpi, vf->priv->crop_x, vf->priv->crop_y, - vf->priv->crop_x + vf->priv->crop_w, - vf->priv->crop_y + vf->priv->crop_h); + if (mpi->fmt.flags & MP_IMGFLAG_HWACCEL) { + mp_image_set_size(mpi, vf->fmt_out.w, vf->fmt_out.h); + } else { + mp_image_crop(mpi, vf->priv->crop_x, vf->priv->crop_y, + vf->priv->crop_x + vf->priv->crop_w, + vf->priv->crop_y + vf->priv->crop_h); + } return mpi; } static int query_format(struct vf_instance *vf, unsigned int fmt) { - if (!IMGFMT_IS_HWACCEL(fmt)) - return vf_next_query_format(vf, fmt); - return 0; + return vf_next_query_format(vf, fmt); } static int vf_open(vf_instance_t *vf){ diff --git a/video/filter/vf_d3d11vpp.c b/video/filter/vf_d3d11vpp.c new file mode 100644 index 0000000..a0aa0ed --- /dev/null +++ b/video/filter/vf_d3d11vpp.c @@ -0,0 +1,537 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <initguid.h> +#include <assert.h> +#include <windows.h> +#include <d3d11.h> + +#include "common/common.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" +#include "vf.h" +#include "refqueue.h" +#include "video/hwdec.h" +#include "video/mp_image_pool.h" + +// missing in MinGW +#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BOB 0x2 + +struct vf_priv_s { + ID3D11Device *vo_dev; + + ID3D11DeviceContext *device_ctx; + ID3D11VideoDevice *video_dev; + ID3D11VideoContext *video_ctx; + + ID3D11VideoProcessor *video_proc; + ID3D11VideoProcessorEnumerator *vp_enum; + D3D11_VIDEO_FRAME_FORMAT d3d_frame_format; + + DXGI_FORMAT out_format; + bool out_shared; + bool out_rgb; + + bool require_filtering; + + struct mp_image_params params, out_params; + int c_w, c_h; + + struct mp_image_pool *pool; + + struct mp_refqueue *queue; + + int deint_enabled; + int interlaced_only; +}; + +static void release_tex(void *arg) +{ + ID3D11Texture2D *texture = arg; + + ID3D11Texture2D_Release(texture); +} + +static struct mp_image *alloc_pool(void *pctx, int fmt, int w, int h) +{ + struct vf_instance *vf = pctx; + struct vf_priv_s *p = vf->priv; + HRESULT hr; + + ID3D11Texture2D *texture = NULL; + D3D11_TEXTURE2D_DESC texdesc = { + .Width = w, + .Height = h, + .Format = p->out_format, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc = { .Count = 1 }, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE, + .MiscFlags = p->out_shared ? D3D11_RESOURCE_MISC_SHARED : 0, + }; + hr = ID3D11Device_CreateTexture2D(p->vo_dev, &texdesc, NULL, &texture); + if (FAILED(hr)) + return NULL; + + struct mp_image *mpi = mp_image_new_custom_ref(NULL, texture, release_tex); + if (!mpi) + abort(); + + mp_image_setfmt(mpi, p->out_params.imgfmt); + mp_image_set_size(mpi, w, h); + mpi->params.hw_subfmt = p->out_params.hw_subfmt; + + mpi->planes[1] = (void *)texture; + mpi->planes[2] = (void *)(intptr_t)0; + + return mpi; +} + +static void flush_frames(struct vf_instance *vf) +{ + struct vf_priv_s *p = vf->priv; + mp_refqueue_flush(p->queue); +} + +static int filter_ext(struct vf_instance *vf, struct mp_image *in) +{ + struct vf_priv_s *p = vf->priv; + + mp_refqueue_set_refs(p->queue, 0, 0); + mp_refqueue_set_mode(p->queue, + (p->deint_enabled ? MP_MODE_DEINT : 0) | + MP_MODE_OUTPUT_FIELDS | + (p->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0)); + + mp_refqueue_add_input(p->queue, in); + return 0; +} + +static void destroy_video_proc(struct vf_instance *vf) +{ + struct vf_priv_s *p = vf->priv; + + if (p->video_proc) + ID3D11VideoProcessor_Release(p->video_proc); + p->video_proc = NULL; + + if (p->vp_enum) + ID3D11VideoProcessorEnumerator_Release(p->vp_enum); + p->vp_enum = NULL; +} + +static int recreate_video_proc(struct vf_instance *vf) +{ + struct vf_priv_s *p = vf->priv; + HRESULT hr; + + destroy_video_proc(vf); + + D3D11_VIDEO_PROCESSOR_CONTENT_DESC vpdesc = { + .InputFrameFormat = p->d3d_frame_format, + .InputWidth = p->c_w, + .InputHeight = p->c_h, + .OutputWidth = p->params.w, + .OutputHeight = p->params.h, + }; + hr = ID3D11VideoDevice_CreateVideoProcessorEnumerator(p->video_dev, &vpdesc, + &p->vp_enum); + if (FAILED(hr)) + goto fail; + + D3D11_VIDEO_PROCESSOR_CAPS caps; + hr = ID3D11VideoProcessorEnumerator_GetVideoProcessorCaps(p->vp_enum, &caps); + if (FAILED(hr)) + goto fail; + + MP_VERBOSE(vf, "Found %d rate conversion caps.\n", + (int)caps.RateConversionCapsCount); + + int rindex = -1; + for (int n = 0; n < caps.RateConversionCapsCount; n++) { + D3D11_VIDEO_PROCESSOR_RATE_CONVERSION_CAPS rcaps; + hr = ID3D11VideoProcessorEnumerator_GetVideoProcessorRateConversionCaps + (p->vp_enum, n, &rcaps); + if (FAILED(hr)) + goto fail; + MP_VERBOSE(vf, " - %d: 0x%08x\n", n, (unsigned)rcaps.ProcessorCaps); + if (rcaps.ProcessorCaps & D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BOB) + { + MP_VERBOSE(vf, " (matching)\n"); + if (rindex < 0) + rindex = n; + } + } + + if (rindex < 0) { + MP_WARN(vf, "No video deinterlacing processor found.\n"); + rindex = 0; + } + + hr = ID3D11VideoDevice_CreateVideoProcessor(p->video_dev, p->vp_enum, rindex, + &p->video_proc); + if (FAILED(hr)) { + MP_ERR(vf, "Failed to create D3D11 video processor.\n"); + goto fail; + } + + // Note: libavcodec does not support cropping left/top with hwaccel. + RECT src_rc = { + .right = p->params.w, + .bottom = p->params.h, + }; + ID3D11VideoContext_VideoProcessorSetStreamSourceRect(p->video_ctx, + p->video_proc, + 0, TRUE, &src_rc); + + // This is supposed to stop drivers from fucking up the video quality. + ID3D11VideoContext_VideoProcessorSetStreamAutoProcessingMode(p->video_ctx, + p->video_proc, + 0, FALSE); + + ID3D11VideoContext_VideoProcessorSetStreamOutputRate(p->video_ctx, + p->video_proc, + 0, + D3D11_VIDEO_PROCESSOR_OUTPUT_RATE_NORMAL, + FALSE, 0); + + D3D11_VIDEO_PROCESSOR_COLOR_SPACE csp = { + .YCbCr_Matrix = p->params.colorspace != MP_CSP_BT_601, + .Nominal_Range = p->params.colorlevels == MP_CSP_LEVELS_TV ? 1 : 2, + }; + ID3D11VideoContext_VideoProcessorSetStreamColorSpace(p->video_ctx, + p->video_proc, + 0, &csp); + if (p->out_rgb) { + if (p->params.colorspace != MP_CSP_BT_601 && + p->params.colorspace != MP_CSP_BT_709) + { + MP_WARN(vf, "Unsupported video colorspace (%s/%s). Consider " + "disabling hardware decoding, or using " + "--hwdec=d3d11va-copy to get correct output.\n", + m_opt_choice_str(mp_csp_names, p->params.colorspace), + m_opt_choice_str(mp_csp_levels_names, p->params.colorlevels)); + } + } else { + ID3D11VideoContext_VideoProcessorSetOutputColorSpace(p->video_ctx, + p->video_proc, + &csp); + } + + return 0; +fail: + destroy_video_proc(vf); + return -1; +} + +static int render(struct vf_instance *vf) +{ + struct vf_priv_s *p = vf->priv; + int res = -1; + HRESULT hr; + ID3D11VideoProcessorInputView *in_view = NULL; + ID3D11VideoProcessorOutputView *out_view = NULL; + struct mp_image *in = NULL, *out = NULL; + out = mp_image_pool_get(p->pool, p->out_params.imgfmt, p->params.w, p->params.h); + if (!out) + goto cleanup; + + ID3D11Texture2D *d3d_out_tex = (void *)out->planes[1]; + + in = mp_refqueue_get(p->queue, 0); + if (!in) + goto cleanup; + ID3D11Texture2D *d3d_tex = (void *)in->planes[1]; + int d3d_subindex = (intptr_t)in->planes[2]; + + mp_image_copy_attributes(out, in); + + D3D11_VIDEO_FRAME_FORMAT d3d_frame_format; + if (!mp_refqueue_is_interlaced(p->queue)) { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE; + } else if (mp_refqueue_top_field_first(p->queue)) { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST; + } else { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_BOTTOM_FIELD_FIRST; + } + + D3D11_TEXTURE2D_DESC texdesc; + ID3D11Texture2D_GetDesc(d3d_tex, &texdesc); + if (!p->video_proc || p->c_w != texdesc.Width || p->c_h != texdesc.Height || + p->d3d_frame_format != d3d_frame_format) + { + p->c_w = texdesc.Width; + p->c_h = texdesc.Height; + p->d3d_frame_format = d3d_frame_format; + if (recreate_video_proc(vf) < 0) + goto cleanup; + } + + if (!mp_refqueue_is_interlaced(p->queue)) { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE; + } else if (mp_refqueue_is_top_field(p->queue)) { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST; + } else { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_BOTTOM_FIELD_FIRST; + } + + ID3D11VideoContext_VideoProcessorSetStreamFrameFormat(p->video_ctx, + p->video_proc, + 0, d3d_frame_format); + + D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC indesc = { + .ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D, + .Texture2D = { + .ArraySlice = d3d_subindex, + }, + }; + hr = ID3D11VideoDevice_CreateVideoProcessorInputView(p->video_dev, + (ID3D11Resource *)d3d_tex, + p->vp_enum, &indesc, + &in_view); + if (FAILED(hr)) { + MP_ERR(vf, "Could not create ID3D11VideoProcessorInputView\n"); + goto cleanup; + } + + D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC outdesc = { + .ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2D, + }; + hr = ID3D11VideoDevice_CreateVideoProcessorOutputView(p->video_dev, + (ID3D11Resource *)d3d_out_tex, + p->vp_enum, &outdesc, + &out_view); + if (FAILED(hr)) + goto cleanup; + + D3D11_VIDEO_PROCESSOR_STREAM stream = { + .Enable = TRUE, + .pInputSurface = in_view, + }; + int frame = mp_refqueue_is_second_field(p->queue); + hr = ID3D11VideoContext_VideoProcessorBlt(p->video_ctx, p->video_proc, + out_view, frame, 1, &stream); + if (FAILED(hr)) { + MP_ERR(vf, "VideoProcessorBlt failed.\n"); + goto cleanup; + } + + // Make sure the texture is updated correctly on the shared context. + // (I'm not sure if this is correct, though it won't harm.) + if (p->out_shared) + ID3D11DeviceContext_Flush(p->device_ctx); + + res = 0; +cleanup: + if (in_view) + ID3D11VideoProcessorInputView_Release(in_view); + if (out_view) + ID3D11VideoProcessorOutputView_Release(out_view); + if (res >= 0) { + vf_add_output_frame(vf, out); + } else { + talloc_free(out); + } + mp_refqueue_next_field(p->queue); + return res; +} + +static int filter_out(struct vf_instance *vf) +{ + struct vf_priv_s *p = vf->priv; + + if (!mp_refqueue_has_output(p->queue)) + return 0; + + // no filtering + if (!mp_refqueue_should_deint(p->queue) && !p->require_filtering) { + struct mp_image *in = mp_refqueue_get(p->queue, 0); + vf_add_output_frame(vf, mp_image_new_ref(in)); + mp_refqueue_next(p->queue); + return 0; + } + + return render(vf); +} + +static int reconfig(struct vf_instance *vf, struct mp_image_params *in, + struct mp_image_params *out) +{ + struct vf_priv_s *p = vf->priv; + + flush_frames(vf); + talloc_free(p->pool); + p->pool = NULL; + + destroy_video_proc(vf); + + *out = *in; + + if (vf_next_query_format(vf, IMGFMT_D3D11VA) || + vf_next_query_format(vf, IMGFMT_D3D11NV12)) + { + out->imgfmt = vf_next_query_format(vf, IMGFMT_D3D11VA) + ? IMGFMT_D3D11VA : IMGFMT_D3D11NV12; + out->hw_subfmt = IMGFMT_NV12; + p->out_format = DXGI_FORMAT_NV12; + p->out_shared = false; + p->out_rgb = false; + } else { + out->imgfmt = IMGFMT_D3D11RGB; + out->hw_subfmt = IMGFMT_RGB0; + p->out_format = DXGI_FORMAT_B8G8R8A8_UNORM; + p->out_shared = true; + p->out_rgb = true; + } + + p->require_filtering = in->hw_subfmt != out->hw_subfmt; + + p->params = *in; + p->out_params = *out; + + p->pool = mp_image_pool_new(20); + mp_image_pool_set_allocator(p->pool, alloc_pool, vf); + mp_image_pool_set_lru(p->pool); + + return 0; +} + +static void uninit(struct vf_instance *vf) +{ + struct vf_priv_s *p = vf->priv; + + destroy_video_proc(vf); + + flush_frames(vf); + mp_refqueue_free(p->queue); + talloc_free(p->pool); + + if (p->video_ctx) + ID3D11VideoContext_Release(p->video_ctx); + + if (p->video_dev) + ID3D11VideoDevice_Release(p->video_dev); + + if (p->device_ctx) + ID3D11DeviceContext_Release(p->device_ctx); + + if (p->vo_dev) + ID3D11Device_Release(p->vo_dev); +} + +static int query_format(struct vf_instance *vf, unsigned int imgfmt) +{ + if (imgfmt == IMGFMT_D3D11VA || + imgfmt == IMGFMT_D3D11NV12 || + imgfmt == IMGFMT_D3D11RGB) + { + return vf_next_query_format(vf, IMGFMT_D3D11VA) || + vf_next_query_format(vf, IMGFMT_D3D11NV12) || + vf_next_query_format(vf, IMGFMT_D3D11RGB); + } + return 0; +} + +static bool test_conversion(int in, int out) +{ + return (in == IMGFMT_D3D11VA || + in == IMGFMT_D3D11NV12 || + in == IMGFMT_D3D11RGB) && + (out == IMGFMT_D3D11VA || + out == IMGFMT_D3D11NV12 || + out == IMGFMT_D3D11RGB); +} + +static int control(struct vf_instance *vf, int request, void* data) +{ + struct vf_priv_s *p = vf->priv; + switch (request){ + case VFCTRL_GET_DEINTERLACE: + *(int*)data = !!p->deint_enabled; + return true; + case VFCTRL_SET_DEINTERLACE: + p->deint_enabled = !!*(int*)data; + return true; + case VFCTRL_SEEK_RESET: + flush_frames(vf); + return true; + default: + return CONTROL_UNKNOWN; + } +} + +static int vf_open(vf_instance_t *vf) +{ + struct vf_priv_s *p = vf->priv; + + vf->reconfig = reconfig; + vf->filter_ext = filter_ext; + vf->filter_out = filter_out; + vf->query_format = query_format; + vf->uninit = uninit; + vf->control = control; + + p->queue = mp_refqueue_alloc(); + + p->vo_dev = hwdec_devices_load(vf->hwdec_devs, HWDEC_D3D11VA); + if (!p->vo_dev) + return 0; + + ID3D11Device_AddRef(p->vo_dev); + + HRESULT hr; + + hr = ID3D11Device_QueryInterface(p->vo_dev, &IID_ID3D11VideoDevice, + (void **)&p->video_dev); + if (FAILED(hr)) + goto fail; + + ID3D11Device_GetImmediateContext(p->vo_dev, &p->device_ctx); + if (!p->device_ctx) + goto fail; + hr = ID3D11DeviceContext_QueryInterface(p->device_ctx, &IID_ID3D11VideoContext, + (void **)&p->video_ctx); + if (FAILED(hr)) + goto fail; + + return 1; + +fail: + uninit(vf); + return 0; +} + +#define OPT_BASE_STRUCT struct vf_priv_s +static const m_option_t vf_opts_fields[] = { + OPT_FLAG("deint", deint_enabled, 0), + OPT_FLAG("interlaced-only", interlaced_only, 0), + {0} +}; + +const vf_info_t vf_info_d3d11vpp = { + .description = "D3D11 Video Post-Process Filter", + .name = "d3d11vpp", + .test_conversion = test_conversion, + .open = vf_open, + .priv_size = sizeof(struct vf_priv_s), + .priv_defaults = &(const struct vf_priv_s) { + .deint_enabled = 1, + .interlaced_only = 1, + }, + .options = vf_opts_fields, +}; diff --git a/video/filter/vf_format.c b/video/filter/vf_format.c index ff7389c..109fda4 100644 --- a/video/filter/vf_format.c +++ b/video/filter/vf_format.c @@ -38,6 +38,7 @@ struct vf_priv_s { int colorlevels; int primaries; int gamma; + float peak; int chroma_location; int stereo_in; int stereo_out; @@ -94,6 +95,8 @@ static int reconfig(struct vf_instance *vf, struct mp_image_params *in, out->primaries = p->primaries; if (p->gamma) out->gamma = p->gamma; + if (p->peak) + out->peak = p->peak; if (p->chroma_location) out->chroma_location = p->chroma_location; if (p->stereo_in) @@ -142,6 +145,7 @@ static const m_option_t vf_opts_fields[] = { OPT_CHOICE_C("colorlevels", colorlevels, 0, mp_csp_levels_names), OPT_CHOICE_C("primaries", primaries, 0, mp_csp_prim_names), OPT_CHOICE_C("gamma", gamma, 0, mp_csp_trc_names), + OPT_FLOAT("peak", peak, 0), OPT_CHOICE_C("chroma-location", chroma_location, 0, mp_chroma_names), OPT_CHOICE_C("stereo-in", stereo_in, 0, mp_stereo3d_names), OPT_CHOICE_C("stereo-out", stereo_out, 0, mp_stereo3d_names), diff --git a/video/filter/vf_vavpp.c b/video/filter/vf_vavpp.c index ae1d6b5..0365b55 100644 --- a/video/filter/vf_vavpp.c +++ b/video/filter/vf_vavpp.c @@ -23,6 +23,7 @@ #include "config.h" #include "options/options.h" #include "vf.h" +#include "refqueue.h" #include "video/vaapi.h" #include "video/hwdec.h" #include "video/mp_image_pool.h" @@ -40,13 +41,6 @@ struct surface_refs { int num_surfaces; }; -static void add_surface(void *ta_ctx, struct surface_refs *refs, struct mp_image *s) -{ - VASurfaceID id = va_surface_id(s); - if (id != VA_INVALID_ID) - MP_TARRAY_APPEND(ta_ctx, refs->surfaces, refs->num_surfaces, id); -} - struct pipeline { VABufferID *filters; int num_filters; @@ -71,16 +65,7 @@ struct vf_priv_s { struct mp_image_pool *pool; int current_rt_format; - int needed_future_frames; - int needed_past_frames; - - // Queue of input frames, used to determine past/current/future frames. - // queue[0] is the newest frame, queue[num_queue - 1] the oldest. - struct mp_image **queue; - int num_queue; - // queue[current_pos] is the current frame, unless current_pos is not a - // valid index. - int current_pos; + struct mp_refqueue *queue; }; static const struct vf_priv_s vf_priv_default = { @@ -90,6 +75,18 @@ static const struct vf_priv_s vf_priv_default = { .interlaced_only = 1, }; +static void add_surfaces(struct vf_priv_s *p, struct surface_refs *refs, int dir) +{ + for (int n = 0; ; n++) { + struct mp_image *s = mp_refqueue_get(p->queue, (1 + n) * dir); + if (!s) + break; + VASurfaceID id = va_surface_id(s); + if (id != VA_INVALID_ID) + MP_TARRAY_APPEND(p, refs->surfaces, refs->num_surfaces, id); + } +} + // The array items must match with the "deint" suboption values. static const int deint_algorithm[] = { [0] = VAProcDeinterlacingNone, @@ -103,72 +100,82 @@ static const int deint_algorithm[] = { static void flush_frames(struct vf_instance *vf) { struct vf_priv_s *p = vf->priv; - for (int n = 0; n < p->num_queue; n++) - talloc_free(p->queue[n]); - p->num_queue = 0; - p->current_pos = -1; + mp_refqueue_flush(p->queue); } -static bool update_pipeline(struct vf_instance *vf, bool deint) +static void update_pipeline(struct vf_instance *vf) { struct vf_priv_s *p = vf->priv; VABufferID *filters = p->buffers; int num_filters = p->num_buffers; - if (p->deint_type && !deint) { + if (p->deint_type && !p->do_deint) { filters++; num_filters--; } if (filters == p->pipe.filters && num_filters == p->pipe.num_filters) - return true; + return; /* cached state is correct */ p->pipe.forward.num_surfaces = p->pipe.backward.num_surfaces = 0; p->pipe.num_input_colors = p->pipe.num_output_colors = 0; p->pipe.num_filters = 0; p->pipe.filters = NULL; if (!num_filters) - return false; - VAProcPipelineCaps caps; - caps.input_color_standards = p->pipe.input_colors; - caps.output_color_standards = p->pipe.output_colors; - caps.num_input_color_standards = VAProcColorStandardCount; - caps.num_output_color_standards = VAProcColorStandardCount; + goto nodeint; + VAProcPipelineCaps caps = { + .input_color_standards = p->pipe.input_colors, + .output_color_standards = p->pipe.output_colors, + .num_input_color_standards = VAProcColorStandardCount, + .num_output_color_standards = VAProcColorStandardCount, + }; VAStatus status = vaQueryVideoProcPipelineCaps(p->display, p->context, filters, num_filters, &caps); if (!check_error(vf, status, "vaQueryVideoProcPipelineCaps()")) - return false; + goto nodeint; p->pipe.filters = filters; p->pipe.num_filters = num_filters; p->pipe.num_input_colors = caps.num_input_color_standards; p->pipe.num_output_colors = caps.num_output_color_standards; - p->needed_future_frames = caps.num_forward_references; - p->needed_past_frames = caps.num_backward_references; - return true; -} + mp_refqueue_set_refs(p->queue, caps.num_backward_references, + caps.num_forward_references); + mp_refqueue_set_mode(p->queue, + (p->do_deint ? MP_MODE_DEINT : 0) | + (p->deint_type >= 2 ? MP_MODE_OUTPUT_FIELDS : 0) | + (p->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0)); + return; -static inline int get_deint_field(struct vf_priv_s *p, int i, - struct mp_image *mpi) -{ - if (!p->do_deint || !(mpi->fields & MP_IMGFIELD_INTERLACED)) - return VA_FRAME_PICTURE; - return !!(mpi->fields & MP_IMGFIELD_TOP_FIRST) ^ i ? VA_TOP_FIELD : VA_BOTTOM_FIELD; +nodeint: + mp_refqueue_set_refs(p->queue, 0, 0); + mp_refqueue_set_mode(p->queue, 0); } -static struct mp_image *render(struct vf_instance *vf, struct mp_image *in, - unsigned int flags) +static struct mp_image *render(struct vf_instance *vf) { struct vf_priv_s *p = vf->priv; + + struct mp_image *in = mp_refqueue_get(p->queue, 0); + struct mp_image *img = NULL; + bool need_end_picture = false; + bool success = false; + VASurfaceID in_id = va_surface_id(in); if (!p->pipe.filters || in_id == VA_INVALID_ID) - return NULL; + goto cleanup; int r_w, r_h; va_surface_get_uncropped_size(in, &r_w, &r_h); - struct mp_image *img = mp_image_pool_get(p->pool, IMGFMT_VAAPI, r_w, r_h); + img = mp_image_pool_get(p->pool, IMGFMT_VAAPI, r_w, r_h); if (!img) - return NULL; + goto cleanup; mp_image_set_size(img, in->w, in->h); - - bool need_end_picture = false; - bool success = false; + mp_image_copy_attributes(img, in); + + unsigned int flags = va_get_colorspace_flag(p->params.colorspace); + if (!mp_refqueue_is_interlaced(p->queue)) { + flags |= VA_FRAME_PICTURE; + } else if (mp_refqueue_is_top_field(p->queue)) { + flags |= VA_TOP_FIELD; + } else { + flags |= VA_BOTTOM_FIELD; + } VASurfaceID id = va_surface_id(img); if (id == VA_INVALID_ID) @@ -194,7 +201,7 @@ static struct mp_image *render(struct vf_instance *vf, struct mp_image *in, goto cleanup; filter_params->flags = flags & VA_TOP_FIELD ? 0 : VA_DEINTERLACING_BOTTOM_FIELD; - if (!(in->fields & MP_IMGFIELD_TOP_FIRST)) + if (!mp_refqueue_top_field_first(p->queue)) filter_params->flags |= VA_DEINTERLACING_BOTTOM_FIELD_FIRST; vaUnmapBuffer(p->display, *(p->pipe.filters)); @@ -211,19 +218,11 @@ static struct mp_image *render(struct vf_instance *vf, struct mp_image *in, param->filters = p->pipe.filters; param->num_filters = p->pipe.num_filters; - for (int n = 0; n < p->needed_future_frames; n++) { - int idx = p->current_pos - 1 - n; - if (idx >= 0 && idx < p->num_queue) - add_surface(p, &p->pipe.forward, p->queue[idx]); - } + add_surfaces(p, &p->pipe.forward, 1); param->forward_references = p->pipe.forward.surfaces; param->num_forward_references = p->pipe.forward.num_surfaces; - for (int n = 0; n < p->needed_past_frames; n++) { - int idx = p->current_pos + 1 + n; - if (idx >= 0 && idx < p->num_queue) - add_surface(p, &p->pipe.backward, p->queue[idx]); - } + add_surfaces(p, &p->pipe.backward, -1); param->backward_references = p->pipe.backward.surfaces; param->num_backward_references = p->pipe.backward.num_surfaces; @@ -244,47 +243,6 @@ cleanup: return NULL; } -static void output_frames(struct vf_instance *vf) -{ - struct vf_priv_s *p = vf->priv; - - struct mp_image *in = p->queue[p->current_pos]; - double prev_pts = p->current_pos + 1 < p->num_queue - ? p->queue[p->current_pos + 1]->pts : MP_NOPTS_VALUE; - - bool deint = p->do_deint && p->deint_type > 0; - if (!update_pipeline(vf, deint) || !p->pipe.filters) { // no filtering - vf_add_output_frame(vf, mp_image_new_ref(in)); - return; - } - unsigned int csp = va_get_colorspace_flag(p->params.colorspace); - unsigned int field = get_deint_field(p, 0, in); - if (field == VA_FRAME_PICTURE && p->interlaced_only) { - vf_add_output_frame(vf, mp_image_new_ref(in)); - return; - } - struct mp_image *out1 = render(vf, in, field | csp); - if (!out1) { // cannot render - vf_add_output_frame(vf, mp_image_new_ref(in)); - return; - } - mp_image_copy_attributes(out1, in); - vf_add_output_frame(vf, out1); - // first-field only - if (field == VA_FRAME_PICTURE || (p->do_deint && p->deint_type < 2)) - return; - double add = (in->pts - prev_pts) * 0.5; - if (prev_pts == MP_NOPTS_VALUE || add <= 0.0 || add > 0.5) // no pts, skip it - return; - struct mp_image *out2 = render(vf, in, get_deint_field(p, 1, in) | csp); - if (!out2) // cannot render - return; - mp_image_copy_attributes(out2, in); - out2->pts = in->pts + add; - vf_add_output_frame(vf, out2); - return; -} - static struct mp_image *upload(struct vf_instance *vf, struct mp_image *in) { struct vf_priv_s *p = vf->priv; @@ -303,45 +261,40 @@ static int filter_ext(struct vf_instance *vf, struct mp_image *in) { struct vf_priv_s *p = vf->priv; - if (in) { - int rt_format = in->imgfmt == IMGFMT_VAAPI ? va_surface_rt_format(in) - : VA_RT_FORMAT_YUV420; - if (!p->pool || p->current_rt_format != rt_format) { - talloc_free(p->pool); - p->pool = mp_image_pool_new(20); - va_pool_set_allocator(p->pool, p->va, rt_format); - p->current_rt_format = rt_format; - } - if (in->imgfmt != IMGFMT_VAAPI) { - struct mp_image *tmp = upload(vf, in); - talloc_free(in); - in = tmp; - if (!in) - return -1; - } - } + update_pipeline(vf); - if (in) { - MP_TARRAY_INSERT_AT(p, p->queue, p->num_queue, 0, in); - p->current_pos++; - assert(p->num_queue != 1 || p->current_pos == 0); + if (in && in->imgfmt != IMGFMT_VAAPI) { + struct mp_image *tmp = upload(vf, in); + talloc_free(in); + in = tmp; + if (!in) + return -1; } - // Discard unneeded past frames. - // Note that we keep at least 1 past frame (for PTS calculations). - while (p->num_queue - (p->current_pos + 1) > MPMAX(p->needed_past_frames, 1)) { - assert(p->num_queue > 0); - talloc_free(p->queue[p->num_queue - 1]); - p->num_queue--; - } + mp_refqueue_add_input(p->queue, in); + return 0; +} + +static int filter_out(struct vf_instance *vf) +{ + struct vf_priv_s *p = vf->priv; - if (p->current_pos < p->needed_future_frames && in) - return 0; // wait until future frames have been filled + if (!mp_refqueue_has_output(p->queue)) + return 0; - if (p->current_pos >= 0 && p->current_pos < p->num_queue) { - output_frames(vf); - p->current_pos--; + // no filtering + if (!p->pipe.num_filters || !mp_refqueue_should_deint(p->queue)) { + struct mp_image *in = mp_refqueue_get(p->queue, 0); + vf_add_output_frame(vf, mp_image_new_ref(in)); + mp_refqueue_next(p->queue); + return 0; } + + struct mp_image *out = render(vf); + mp_refqueue_next_field(p->queue); + if (!out) + return -1; // cannot render + vf_add_output_frame(vf, out); return 0; } @@ -350,10 +303,25 @@ static int reconfig(struct vf_instance *vf, struct mp_image_params *in, { struct vf_priv_s *p = vf->priv; + flush_frames(vf); + talloc_free(p->pool); + p->pool = NULL; + p->params = *in; + + p->current_rt_format = VA_RT_FORMAT_YUV420; + p->pool = mp_image_pool_new(20); + va_pool_set_allocator(p->pool, p->va, p->current_rt_format); + + struct mp_image *probe = mp_image_pool_get(p->pool, IMGFMT_VAAPI, in->w, in->h); + if (!probe) + return -1; + va_surface_init_subformat(probe); *out = *in; - out->imgfmt = IMGFMT_VAAPI; - flush_frames(vf); + out->imgfmt = probe->params.imgfmt; + out->hw_subfmt = probe->params.hw_subfmt; + talloc_free(probe); + return 0; } @@ -368,6 +336,7 @@ static void uninit(struct vf_instance *vf) vaDestroyConfig(p->display, p->config); talloc_free(p->pool); flush_frames(vf); + mp_refqueue_free(p->queue); } static int query_format(struct vf_instance *vf, unsigned int imgfmt) @@ -476,19 +445,20 @@ static bool initialize(struct vf_instance *vf) static int vf_open(vf_instance_t *vf) { + struct vf_priv_s *p = vf->priv; + vf->reconfig = reconfig; vf->filter_ext = filter_ext; + vf->filter_out = filter_out; vf->query_format = query_format; vf->uninit = uninit; vf->control = control; - struct vf_priv_s *p = vf->priv; - if (!vf->hwdec) - return false; - hwdec_request_api(vf->hwdec, "vaapi"); - p->va = vf->hwdec->hwctx ? vf->hwdec->hwctx->vaapi_ctx : NULL; - if (!p->va || !p->va->display) - return false; + p->queue = mp_refqueue_alloc(); + + p->va = hwdec_devices_load(vf->hwdec_devs, HWDEC_VAAPI); + if (!p->va) + return 0; p->display = p->va->display; if (initialize(vf)) return true; diff --git a/video/filter/vf_vdpaupp.c b/video/filter/vf_vdpaupp.c index 882b80d..92a40ec 100644 --- a/video/filter/vf_vdpaupp.c +++ b/video/filter/vf_vdpaupp.c @@ -1,20 +1,18 @@ /* * This file is part of mpv. * - * Parts based on fragments of vo_vdpau.c: Copyright (C) 2009 Uoti Urpala - * - * mpv is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. * * mpv is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU General Public License along - * with mpv. If not, see <http://www.gnu.org/licenses/>. + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ #include <stdio.h> @@ -33,6 +31,7 @@ #include "video/vdpau.h" #include "video/vdpau_mixer.h" #include "vf.h" +#include "refqueue.h" // Note: this filter does no actual filtering; it merely sets appropriate // flags on vdpau images (mp_vdpau_mixer_frame) to do the appropriate @@ -40,13 +39,7 @@ struct vf_priv_s { struct mp_vdpau_ctx *ctx; - - // This is needed to supply past/future fields and to calculate the - // interpolated timestamp. - struct mp_image *buffered[3]; - int num_buffered; - - int prev_pos; // last field that was output + struct mp_refqueue *queue; int def_deintmode; int deint_enabled; @@ -54,130 +47,92 @@ struct vf_priv_s { struct mp_vdpau_mixer_opts opts; }; -static void forget_frames(struct vf_instance *vf) +static int filter_ext(struct vf_instance *vf, struct mp_image *mpi) { struct vf_priv_s *p = vf->priv; - for (int n = 0; n < p->num_buffered; n++) - talloc_free(p->buffered[n]); - p->num_buffered = 0; - p->prev_pos = 0; -} -#define FIELD_VALID(p, f) ((f) >= 0 && (f) < (p)->num_buffered * 2) + if (p->opts.deint >= 2) { + mp_refqueue_set_refs(p->queue, 1, 1); // 2 past fields, 1 future field + } else { + mp_refqueue_set_refs(p->queue, 0, 0); + } + mp_refqueue_set_mode(p->queue, + (p->deint_enabled ? MP_MODE_DEINT : 0) | + (p->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0) | + (p->opts.deint >= 2 ? MP_MODE_OUTPUT_FIELDS : 0)); + + if (mpi) { + struct mp_image *new = mp_vdpau_upload_video_surface(p->ctx, mpi); + talloc_free(mpi); + if (!new) + return -1; + mpi = new; + + if (mp_vdpau_mixed_frame_get(mpi)) { + MP_ERR(vf, "Can't apply vdpaupp filter multiple times.\n"); + vf_add_output_frame(vf, mpi); + return -1; + } + } + + mp_refqueue_add_input(p->queue, mpi); + return 0; +} static VdpVideoSurface ref_field(struct vf_priv_s *p, struct mp_vdpau_mixer_frame *frame, int pos) { - if (!FIELD_VALID(p, pos)) - return VDP_INVALID_HANDLE; - struct mp_image *mpi = mp_image_new_ref(p->buffered[pos / 2]); + struct mp_image *mpi = mp_image_new_ref(mp_refqueue_get_field(p->queue, pos)); if (!mpi) return VDP_INVALID_HANDLE; talloc_steal(frame, mpi); return (uintptr_t)mpi->planes[3]; } -// pos==0 means last field of latest frame, 1 earlier field of latest frame, -// 2 last field of previous frame and so on -static bool output_field(struct vf_instance *vf, int pos, bool deint) +static int filter_out(struct vf_instance *vf) { struct vf_priv_s *p = vf->priv; - if (!FIELD_VALID(p, pos)) - return false; + if (!mp_refqueue_has_output(p->queue)) + return 0; - struct mp_image *mpi = mp_vdpau_mixed_frame_create(p->buffered[pos / 2]); + struct mp_image *mpi = + mp_vdpau_mixed_frame_create(mp_refqueue_get_field(p->queue, 0)); if (!mpi) - return false; // skip output on OOM + return -1; // OOM struct mp_vdpau_mixer_frame *frame = mp_vdpau_mixed_frame_get(mpi); - frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME; - if (p->opts.deint && deint) { - int top_field_first = !!(mpi->fields & MP_IMGFIELD_TOP_FIRST); - frame->field = top_field_first ^ (pos & 1) ? - VDP_VIDEO_MIXER_PICTURE_STRUCTURE_BOTTOM_FIELD: - VDP_VIDEO_MIXER_PICTURE_STRUCTURE_TOP_FIELD; + if (!mp_refqueue_should_deint(p->queue)) { + frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME; + } else if (mp_refqueue_is_top_field(p->queue)) { + frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_TOP_FIELD; + } else { + frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_BOTTOM_FIELD; } - frame->future[0] = ref_field(p, frame, pos - 1); - frame->current = ref_field(p, frame, pos); - frame->past[0] = ref_field(p, frame, pos + 1); - frame->past[1] = ref_field(p, frame, pos + 2); + frame->future[0] = ref_field(p, frame, 1); + frame->current = ref_field(p, frame, 0); + frame->past[0] = ref_field(p, frame, -1); + frame->past[1] = ref_field(p, frame, -2); frame->opts = p->opts; mpi->planes[3] = (void *)(uintptr_t)frame->current; - // Interpolate timestamps of extra fields (these always have even indexes) - int idx = pos / 2; - if (idx > 0 && !(pos & 1) && p->opts.deint >= 2 && deint) { - double pts1 = p->buffered[idx - 1]->pts; - double pts2 = p->buffered[idx]->pts; - double diff = pts1 - pts2; - mpi->pts = diff > 0 && diff < 0.5 ? (pts1 + pts2) / 2 : pts2; - } + mp_refqueue_next_field(p->queue); vf_add_output_frame(vf, mpi); - return true; -} - -static int filter_ext(struct vf_instance *vf, struct mp_image *mpi) -{ - struct vf_priv_s *p = vf->priv; - int maxbuffer = p->opts.deint >= 2 ? 3 : 2; - bool eof = !mpi; - - if (mpi) { - struct mp_image *new = mp_vdpau_upload_video_surface(p->ctx, mpi); - talloc_free(mpi); - if (!new) - return -1; - mpi = new; - - if (mp_vdpau_mixed_frame_get(mpi)) { - MP_ERR(vf, "Can't apply vdpaupp filter multiple times.\n"); - vf_add_output_frame(vf, mpi); - return -1; - } - - while (p->num_buffered >= maxbuffer) { - talloc_free(p->buffered[p->num_buffered - 1]); - p->num_buffered--; - } - for (int n = p->num_buffered; n > 0; n--) - p->buffered[n] = p->buffered[n - 1]; - p->buffered[0] = mpi; - p->num_buffered++; - p->prev_pos += 2; - } - - bool deint = (mpi && (mpi->fields & MP_IMGFIELD_INTERLACED)) || !p->interlaced_only; - - while (1) { - int current = p->prev_pos - 1; - if (!FIELD_VALID(p, current)) - break; - // No field-splitting deinterlace -> only output first field (odd index) - if ((current & 1) || (deint && p->opts.deint >= 2)) { - // Wait for enough future frames being buffered. - // (Past frames are always around if available at all.) - if (!eof && !FIELD_VALID(p, current - 1)) - break; - if (!output_field(vf, current, deint)) - break; - } - p->prev_pos = current; - } - return 0; } static int reconfig(struct vf_instance *vf, struct mp_image_params *in, struct mp_image_params *out) { - forget_frames(vf); + struct vf_priv_s *p = vf->priv; + mp_refqueue_flush(p->queue); *out = *in; out->imgfmt = IMGFMT_VDPAU; + out->hw_subfmt = 0; return 0; } @@ -194,7 +149,7 @@ static int control(vf_instance_t *vf, int request, void *data) switch (request) { case VFCTRL_SEEK_RESET: - forget_frames(vf); + mp_refqueue_flush(p->queue); return CONTROL_OK; case VFCTRL_GET_DEINTERLACE: *(int *)data = !!p->deint_enabled; @@ -209,7 +164,9 @@ static int control(vf_instance_t *vf, int request, void *data) static void uninit(struct vf_instance *vf) { - forget_frames(vf); + struct vf_priv_s *p = vf->priv; + + mp_refqueue_free(p->queue); } static int vf_open(vf_instance_t *vf) @@ -218,15 +175,14 @@ static int vf_open(vf_instance_t *vf) vf->reconfig = reconfig; vf->filter_ext = filter_ext; - vf->filter = NULL; + vf->filter_out = filter_out; vf->query_format = query_format; vf->control = control; vf->uninit = uninit; - if (!vf->hwdec) - return 0; - hwdec_request_api(vf->hwdec, "vdpau"); - p->ctx = vf->hwdec->hwctx ? vf->hwdec->hwctx->vdpau_ctx : NULL; + p->queue = mp_refqueue_alloc(); + + p->ctx = hwdec_devices_load(vf->hwdec_devs, HWDEC_VDPAU); if (!p->ctx) return 0; diff --git a/video/filter/vf_vdpaurb.c b/video/filter/vf_vdpaurb.c index 62f7f34..2e6da79 100644 --- a/video/filter/vf_vdpaurb.c +++ b/video/filter/vf_vdpaurb.c @@ -35,10 +35,7 @@ struct vf_priv_s { static int filter_ext(struct vf_instance *vf, struct mp_image *mpi) { - VdpStatus vdp_st; struct vf_priv_s *p = vf->priv; - struct mp_vdpau_ctx *ctx = p->ctx; - struct vdp_functions *vdp = &ctx->vdp; if (!mpi) { return 0; @@ -56,21 +53,14 @@ static int filter_ext(struct vf_instance *vf, struct mp_image *mpi) return -1; } - struct mp_image *out = vf_alloc_out_image(vf); - if (!out) { + struct mp_hwdec_ctx *hwctx = &p->ctx->hwctx; + + struct mp_image *out = hwctx->download_image(hwctx, mpi, vf->out_pool); + if (!out || out->imgfmt != IMGFMT_NV12) { mp_image_unrefp(&mpi); + mp_image_unrefp(&out); return -1; } - mp_image_copy_attributes(out, mpi); - - VdpVideoSurface surface = (uintptr_t)mpi->planes[3]; - assert(surface > 0); - - vdp_st = vdp->video_surface_get_bits_y_cb_cr(surface, - VDP_YCBCR_FORMAT_NV12, - (void * const *)out->planes, - out->stride); - CHECK_VDP_WARNING(vf, "Error when calling vdp_output_surface_get_bits_y_cb_cr"); vf_add_output_frame(vf, out); mp_image_unrefp(&mpi); @@ -83,6 +73,7 @@ static int reconfig(struct vf_instance *vf, struct mp_image_params *in, *out = *in; if (in->imgfmt == IMGFMT_VDPAU) { out->imgfmt = IMGFMT_NV12; + out->hw_subfmt = 0; } return 0; } @@ -101,14 +92,9 @@ static int vf_open(vf_instance_t *vf) vf->reconfig = reconfig; vf->query_format = query_format; - if (!vf->hwdec) { + p->ctx = hwdec_devices_load(vf->hwdec_devs, HWDEC_VDPAU); + if (!p->ctx) return 0; - } - hwdec_request_api(vf->hwdec, "vdpau"); - p->ctx = vf->hwdec->hwctx ? vf->hwdec->hwctx->vdpau_ctx : NULL; - if (!p->ctx) { - return 0; - } return 1; } diff --git a/video/fmt-conversion.c b/video/fmt-conversion.c index 5334206..1fca8bf 100644 --- a/video/fmt-conversion.c +++ b/video/fmt-conversion.c @@ -109,13 +109,17 @@ static const struct { #endif {IMGFMT_VAAPI, AV_PIX_FMT_VAAPI_VLD}, {IMGFMT_DXVA2, AV_PIX_FMT_DXVA2_VLD}, -#if HAVE_D3D11VA_HWACCEL +#if HAVE_D3D_HWACCEL {IMGFMT_D3D11VA, AV_PIX_FMT_D3D11VA_VLD}, #endif #if HAVE_AV_PIX_FMT_MMAL {IMGFMT_MMAL, AV_PIX_FMT_MMAL}, #endif +#ifdef AV_PIX_FMT_P010 + {IMGFMT_P010, AV_PIX_FMT_P010}, +#endif + {0, AV_PIX_FMT_NONE} }; diff --git a/video/hwdec.c b/video/hwdec.c new file mode 100644 index 0000000..6db8d57 --- /dev/null +++ b/video/hwdec.c @@ -0,0 +1,90 @@ +#include <pthread.h> +#include <assert.h> + +#include "hwdec.h" + +struct mp_hwdec_devices { + pthread_mutex_t lock; + + struct mp_hwdec_ctx *hwctx; + + void (*load_api)(void *ctx, enum hwdec_type type); + void *load_api_ctx; +}; + +struct mp_hwdec_devices *hwdec_devices_create(void) +{ + struct mp_hwdec_devices *devs = talloc_zero(NULL, struct mp_hwdec_devices); + pthread_mutex_init(&devs->lock, NULL); + return devs; +} + +void hwdec_devices_destroy(struct mp_hwdec_devices *devs) +{ + if (!devs) + return; + assert(!devs->hwctx); // must have been hwdec_devices_remove()ed + assert(!devs->load_api); // must have been unset + pthread_mutex_destroy(&devs->lock); + talloc_free(devs); +} + +struct mp_hwdec_ctx *hwdec_devices_get(struct mp_hwdec_devices *devs, + enum hwdec_type type) +{ + struct mp_hwdec_ctx *res = NULL; + pthread_mutex_lock(&devs->lock); + if (devs->hwctx && devs->hwctx->type == type) + res = devs->hwctx; + pthread_mutex_unlock(&devs->lock); + return res; +} + +struct mp_hwdec_ctx *hwdec_devices_get_first(struct mp_hwdec_devices *devs) +{ + pthread_mutex_lock(&devs->lock); + struct mp_hwdec_ctx *res = devs->hwctx; + pthread_mutex_unlock(&devs->lock); + return res; +} + +void hwdec_devices_add(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx) +{ + pthread_mutex_lock(&devs->lock); + // We support only 1 device; ignore the rest. + if (!devs->hwctx) + devs->hwctx = ctx; + pthread_mutex_unlock(&devs->lock); +} + +void hwdec_devices_remove(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx) +{ + pthread_mutex_lock(&devs->lock); + if (devs->hwctx == ctx) + devs->hwctx = NULL; + pthread_mutex_unlock(&devs->lock); +} + +void hwdec_devices_set_loader(struct mp_hwdec_devices *devs, + void (*load_api)(void *ctx, enum hwdec_type type), void *load_api_ctx) +{ + devs->load_api = load_api; + devs->load_api_ctx = load_api_ctx; +} + +// Cause VO to lazily load the requested device, and will block until this is +// done (even if not available). +void hwdec_devices_request(struct mp_hwdec_devices *devs, enum hwdec_type type) +{ + if (devs->load_api && !hwdec_devices_get_first(devs)) + devs->load_api(devs->load_api_ctx, type); +} + +void *hwdec_devices_load(struct mp_hwdec_devices *devs, enum hwdec_type type) +{ + if (!devs) + return NULL; + hwdec_devices_request(devs, type); + struct mp_hwdec_ctx *hwctx = hwdec_devices_get(devs, type); + return hwctx ? hwctx->ctx : NULL; +} diff --git a/video/hwdec.h b/video/hwdec.h index 898b035..5d563c9 100644 --- a/video/hwdec.h +++ b/video/hwdec.h @@ -7,32 +7,38 @@ struct mp_image_pool; // keep in sync with --hwdec option (see mp_hwdec_names) enum hwdec_type { - HWDEC_AUTO = -1, HWDEC_NONE = 0, - HWDEC_VDPAU = 1, - HWDEC_VIDEOTOOLBOX = 3, - HWDEC_VAAPI = 4, - HWDEC_VAAPI_COPY = 5, - HWDEC_DXVA2 = 6, - HWDEC_DXVA2_COPY = 7, - HWDEC_D3D11VA_COPY = 8, - HWDEC_RPI = 9, - HWDEC_MEDIACODEC = 10, + HWDEC_AUTO, + HWDEC_AUTO_COPY, + HWDEC_VDPAU, + HWDEC_VIDEOTOOLBOX, + HWDEC_VAAPI, + HWDEC_VAAPI_COPY, + HWDEC_DXVA2, + HWDEC_DXVA2_COPY, + HWDEC_D3D11VA, + HWDEC_D3D11VA_COPY, + HWDEC_RPI, + HWDEC_MEDIACODEC, }; +#define HWDEC_IS_AUTO(x) ((x) == HWDEC_AUTO || (x) == HWDEC_AUTO_COPY) + // hwdec_type names (options.c) extern const struct m_opt_choice_alternatives mp_hwdec_names[]; struct mp_hwdec_ctx { - enum hwdec_type type; - - void *priv; // for free use by hwdec implementation + enum hwdec_type type; // (never HWDEC_NONE or HWDEC_IS_AUTO) + const char *driver_name; // NULL if unknown/not loaded - // API-specific, not needed by all backends. - struct mp_vdpau_ctx *vdpau_ctx; - struct mp_vaapi_ctx *vaapi_ctx; - struct mp_d3d_ctx *d3d_ctx; - uint32_t (*get_vt_fmt)(struct mp_hwdec_ctx *ctx); + // This is never NULL. Its meaning depends on the .type field: + // HWDEC_VDPAU: struct mp_vaapi_ctx* + // HWDEC_VIDEOTOOLBOX: struct mp_vt_ctx* + // HWDEC_VAAPI: struct mp_vaapi_ctx* + // HWDEC_D3D11VA: ID3D11Device* + // HWDEC_DXVA2: IDirect3DDevice9* + // HWDEC_DXVA2_COPY: IDirect3DDevice9* + void *ctx; // Optional. // Allocates a software image from the pool, downloads the hw image from @@ -44,24 +50,50 @@ struct mp_hwdec_ctx { struct mp_image_pool *swpool); }; -// Used to communicate hardware decoder API handles from VO to video decoder. -// The VO can set the context pointer for supported APIs. -struct mp_hwdec_info { - // (Since currently only 1 hwdec API is loaded at a time, this pointer - // simply maps to the loaded one.) - struct mp_hwdec_ctx *hwctx; - - // Can be used to lazily load a requested API. - // api_name is e.g. "vdpau" (like the fields above, without "_ctx") - // Can be NULL, is idempotent, caller checks hwctx fields for success/access. - // Due to threading, the callback is the only code that is allowed to - // change fields in this struct after initialization. - void (*load_api)(struct mp_hwdec_info *info, const char *api_name); - void *load_api_ctx; +struct mp_vt_ctx { + void *priv; + uint32_t (*get_vt_fmt)(struct mp_vt_ctx *ctx); }; -// Trivial helper to call info->load_api(). -// Implemented in vd_lavc.c. -void hwdec_request_api(struct mp_hwdec_info *info, const char *api_name); +// Used to communicate hardware decoder device handles from VO to video decoder. +struct mp_hwdec_devices; + +struct mp_hwdec_devices *hwdec_devices_create(void); +void hwdec_devices_destroy(struct mp_hwdec_devices *devs); + +// Return the device context for the given API type. Returns NULL if none +// available. Logically, the returned pointer remains valid until VO +// uninitialization is started (all users of it must be uninitialized before). +// hwdec_devices_request() may be used before this to lazily load devices. +struct mp_hwdec_ctx *hwdec_devices_get(struct mp_hwdec_devices *devs, + enum hwdec_type type); + +// For code which still strictly assumes there is 1 (or none) device. +struct mp_hwdec_ctx *hwdec_devices_get_first(struct mp_hwdec_devices *devs); + +// Add this to the list of internal devices. Adding the same pointer twice must +// be avoided. +void hwdec_devices_add(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx); + +// Remove this from the list of internal devices. Idempotent/ignores entries +// not added yet. +void hwdec_devices_remove(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx); + +// Can be used to enable lazy loading of an API with hwdec_devices_request(). +// If used at all, this must be set/unset during initialization/uninitialization, +// as concurrent use with hwdec_devices_request() is a race condition. +void hwdec_devices_set_loader(struct mp_hwdec_devices *devs, + void (*load_api)(void *ctx, enum hwdec_type type), void *load_api_ctx); + +// Cause VO to lazily load the requested device, and will block until this is +// done (even if not available). +void hwdec_devices_request(struct mp_hwdec_devices *devs, enum hwdec_type type); + +// Convenience function: +// - return NULL if devs==NULL +// - call hwdec_devices_request(devs, type) +// - call hwdec_devices_get(devs, type) +// - then return the mp_hwdec_ctx.ctx field +void *hwdec_devices_load(struct mp_hwdec_devices *devs, enum hwdec_type type); #endif diff --git a/video/image_writer.c b/video/image_writer.c index 6c1c994..5ba89c8 100644 --- a/video/image_writer.c +++ b/video/image_writer.c @@ -136,9 +136,21 @@ static bool write_lavc(struct image_writer_ctx *ctx, mp_image_t *image, FILE *fp pic->color_primaries = mp_csp_prim_to_avcol_pri(image->params.primaries); pic->color_trc = mp_csp_trc_to_avcol_trc(image->params.gamma); } + +#if HAVE_AVCODEC_NEW_CODEC_API + int ret = avcodec_send_frame(avctx, pic); + if (ret < 0) + goto error_exit; + avcodec_send_frame(avctx, NULL); // send EOF + ret = avcodec_receive_packet(avctx, &pkt); + if (ret < 0) + goto error_exit; + got_output = 1; +#else int ret = avcodec_encode_video2(avctx, &pkt, pic, &got_output); if (ret < 0) goto error_exit; +#endif fwrite(pkt.data, pkt.size, 1, fp); diff --git a/video/img_format.c b/video/img_format.c index fe2ca14..24545a8 100644 --- a/video/img_format.c +++ b/video/img_format.c @@ -36,6 +36,8 @@ struct mp_imgfmt_entry { static const struct mp_imgfmt_entry mp_imgfmt_list[] = { // not in ffmpeg {"vdpau_output", IMGFMT_VDPAU_OUTPUT}, + {"d3d11_nv12", IMGFMT_D3D11NV12}, + {"d3d11_rgb", IMGFMT_D3D11RGB}, // FFmpeg names have an annoying "_vld" suffix {"videotoolbox", IMGFMT_VIDEOTOOLBOX}, {"vaapi", IMGFMT_VAAPI}, @@ -120,12 +122,20 @@ static struct mp_imgfmt_desc mp_only_imgfmt_desc(int mpfmt) { switch (mpfmt) { case IMGFMT_VDPAU_OUTPUT: + case IMGFMT_D3D11RGB: return (struct mp_imgfmt_desc) { .id = mpfmt, .avformat = AV_PIX_FMT_NONE, .flags = MP_IMGFLAG_BE | MP_IMGFLAG_LE | MP_IMGFLAG_RGB | MP_IMGFLAG_HWACCEL, }; + case IMGFMT_D3D11NV12: + return (struct mp_imgfmt_desc) { + .id = mpfmt, + .avformat = AV_PIX_FMT_NONE, + .flags = MP_IMGFLAG_BE | MP_IMGFLAG_LE | MP_IMGFLAG_YUV | + MP_IMGFLAG_HWACCEL, + }; } return (struct mp_imgfmt_desc) {0}; } diff --git a/video/img_format.h b/video/img_format.h index 605dc92..b6f5830 100644 --- a/video/img_format.h +++ b/video/img_format.h @@ -157,6 +157,9 @@ enum mp_imgfmt { IMGFMT_NV12, IMGFMT_NV21, + // Like IMGFMT_NV12, but with 16 bits per component + IMGFMT_P010, + // RGB/BGR Formats // Byte accessed (low address to high address) @@ -195,7 +198,17 @@ enum mp_imgfmt { IMGFMT_VDPAU, // VdpVideoSurface IMGFMT_VDPAU_OUTPUT, // VdpOutputSurface IMGFMT_VAAPI, - IMGFMT_D3D11VA, // ID3D11VideoDecoderOutputView (NV12/P010/P016) + // NV12/P010/P016 + // plane 1: ID3D11Texture2D + // plane 2: slice index casted to pointer + // plane 3: ID3D11VideoDecoderOutputView (can be absent in filters/VO) + IMGFMT_D3D11VA, + // Like IMGFMT_D3D11VA, but format is restricted to NV12. + IMGFMT_D3D11NV12, + // Like IMGFMT_D3D11VA, but format is restricted to a certain RGB format. + // Also, it must have a share handle, have been flushed, and not be a + // texture array slice. + IMGFMT_D3D11RGB, IMGFMT_DXVA2, // IDirect3DSurface9 (NV12/P010/P016) IMGFMT_MMAL, // MMAL_BUFFER_HEADER_T IMGFMT_VIDEOTOOLBOX, // CVPixelBufferRef diff --git a/video/mp_image.c b/video/mp_image.c index 565de18..d5b9748 100644 --- a/video/mp_image.c +++ b/video/mp_image.c @@ -81,12 +81,13 @@ static bool mp_image_alloc_planes(struct mp_image *mpi) void mp_image_setfmt(struct mp_image *mpi, int out_fmt) { + struct mp_image_params params = mpi->params; struct mp_imgfmt_desc fmt = mp_imgfmt_get_desc(out_fmt); - mpi->params.imgfmt = fmt.id; + params.imgfmt = fmt.id; mpi->fmt = fmt; mpi->imgfmt = fmt.id; mpi->num_planes = fmt.num_planes; - mp_image_set_size(mpi, mpi->w, mpi->h); + mpi->params = params; } static void mp_image_destructor(void *ptr) @@ -94,6 +95,7 @@ static void mp_image_destructor(void *ptr) mp_image_t *mpi = ptr; for (int p = 0; p < MP_MAX_PLANES; p++) av_buffer_unref(&mpi->bufs[p]); + av_buffer_unref(&mpi->hwctx); } int mp_chroma_div_up(int size, int shift) @@ -119,7 +121,6 @@ void mp_image_set_size(struct mp_image *mpi, int w, int h) assert(w >= 0 && h >= 0); mpi->w = mpi->params.w = w; mpi->h = mpi->params.h = h; - mpi->params.p_w = mpi->params.p_h = 1; } void mp_image_set_params(struct mp_image *image, @@ -163,17 +164,12 @@ void mp_image_steal_data(struct mp_image *dst, struct mp_image *src) assert(dst->imgfmt == src->imgfmt && dst->w == src->w && dst->h == src->h); assert(dst->bufs[0] && src->bufs[0]); - for (int p = 0; p < MP_MAX_PLANES; p++) { - dst->planes[p] = src->planes[p]; - dst->stride[p] = src->stride[p]; - } - mp_image_copy_attributes(dst, src); + mp_image_destructor(dst); // unref old + talloc_free_children(dst); - for (int p = 0; p < MP_MAX_PLANES; p++) { - av_buffer_unref(&dst->bufs[p]); - dst->bufs[p] = src->bufs[p]; - src->bufs[p] = NULL; - } + *dst = *src; + + *src = (struct mp_image){0}; talloc_free(src); } @@ -199,6 +195,11 @@ struct mp_image *mp_image_new_ref(struct mp_image *img) fail = true; } } + if (new->hwctx) { + new->hwctx = av_buffer_ref(new->hwctx); + if (!new->hwctx) + fail = true; + } if (!fail) return new; @@ -229,9 +230,10 @@ struct mp_image *mp_image_new_dummy_ref(struct mp_image *img) { struct mp_image *new = talloc_ptrtype(NULL, new); talloc_set_destructor(new, mp_image_destructor); - *new = *img; + *new = img ? *img : (struct mp_image){0}; for (int p = 0; p < MP_MAX_PLANES; p++) new->bufs[p] = NULL; + new->hwctx = NULL; return new; } @@ -539,7 +541,7 @@ bool mp_image_params_valid(const struct mp_image_params *p) if (p->w <= 0 || p->h <= 0 || (p->w + 128LL) * (p->h + 128LL) >= INT_MAX / 8) return false; - if (p->p_w <= 0 || p->p_h <= 0) + if (p->p_w < 0 || p->p_h < 0) return false; if (p->rotate < 0 || p->rotate >= 360) @@ -566,6 +568,7 @@ bool mp_image_params_equal(const struct mp_image_params *p1, p1->colorlevels == p2->colorlevels && p1->primaries == p2->primaries && p1->gamma == p2->gamma && + p1->peak == p2->peak && p1->chroma_location == p2->chroma_location && p1->rotate == p2->rotate && p1->stereo_in == p2->stereo_in && @@ -660,16 +663,25 @@ void mp_image_params_guess_csp(struct mp_image_params *params) params->primaries = MP_CSP_PRIM_AUTO; params->gamma = MP_CSP_TRC_AUTO; } + + // Guess the reference peak (independent of the colorspace) + if (params->gamma == MP_CSP_TRC_SMPTE_ST2084) { + if (!params->peak) + params->peak = 10000; // As per the spec + } } // Copy properties and data of the AVFrame into the mp_image, without taking // care of memory management issues. -void mp_image_copy_fields_from_av_frame(struct mp_image *dst, - struct AVFrame *src) +static void mp_image_copy_fields_from_av_frame(struct mp_image *dst, + struct AVFrame *src) { mp_image_setfmt(dst, pixfmt2imgfmt(src->format)); mp_image_set_size(dst, src->width, src->height); + dst->params.p_w = src->sample_aspect_ratio.num; + dst->params.p_h = src->sample_aspect_ratio.den; + for (int i = 0; i < 4; i++) { dst->planes[i] = src->data[i]; dst->stride[i] = src->linesize[i]; @@ -688,13 +700,16 @@ void mp_image_copy_fields_from_av_frame(struct mp_image *dst, // Copy properties and data of the mp_image into the AVFrame, without taking // care of memory management issues. -void mp_image_copy_fields_to_av_frame(struct AVFrame *dst, - struct mp_image *src) +static void mp_image_copy_fields_to_av_frame(struct AVFrame *dst, + struct mp_image *src) { dst->format = imgfmt2pixfmt(src->imgfmt); dst->width = src->w; dst->height = src->h; + dst->sample_aspect_ratio.num = src->params.p_w; + dst->sample_aspect_ratio.den = src->params.p_h; + for (int i = 0; i < 4; i++) { dst->data[i] = src->planes[i]; dst->linesize[i] = src->stride[i]; @@ -720,34 +735,41 @@ struct mp_image *mp_image_from_av_frame(struct AVFrame *av_frame) mp_image_copy_fields_from_av_frame(&t, av_frame); for (int p = 0; p < MP_MAX_PLANES; p++) t.bufs[p] = av_frame->buf[p]; +#if HAVE_AVUTIL_HAS_HWCONTEXT + t.hwctx = av_frame->hw_frames_ctx; +#endif return mp_image_new_ref(&t); } // Convert the mp_image reference to a AVFrame reference. -// Warning: img is unreferenced (i.e. free'd). This is asymmetric to -// mp_image_from_av_frame(). It was done as some sort of optimization, -// but now these semantics are pointless. -// On failure, img is only unreffed. -struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img) +struct AVFrame *mp_image_to_av_frame(struct mp_image *img) { - struct mp_image *new_ref = mp_image_new_ref(img); // ensure it's refcounted - talloc_free(img); - if (!new_ref) - return NULL; + struct mp_image *new_ref = mp_image_new_ref(img); AVFrame *frame = av_frame_alloc(); - if (!frame) { + if (!frame || !new_ref) { talloc_free(new_ref); + av_frame_free(&frame); return NULL; } mp_image_copy_fields_to_av_frame(frame, new_ref); - for (int p = 0; p < MP_MAX_PLANES; p++) { + for (int p = 0; p < MP_MAX_PLANES; p++) frame->buf[p] = new_ref->bufs[p]; - new_ref->bufs[p] = NULL; - } +#if HAVE_AVUTIL_HAS_HWCONTEXT + frame->hw_frames_ctx = new_ref->hwctx; +#endif + *new_ref = (struct mp_image){0}; talloc_free(new_ref); return frame; } +// Same as mp_image_to_av_frame(), but unref img. (It does so even on failure.) +struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img) +{ + AVFrame *frame = mp_image_to_av_frame(img); + talloc_free(img); + return frame; +} + void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height, int dstStride, int srcStride) { diff --git a/video/mp_image.h b/video/mp_image.h index c00c78a..18d2596 100644 --- a/video/mp_image.h +++ b/video/mp_image.h @@ -42,11 +42,12 @@ struct mp_image_params { uint64_t hw_subfmt; // underlying format for some hwaccel pixfmts // (will use the HW API's format identifiers) int w, h; // image dimensions - int p_w, p_h; // define pixel aspect ratio (never 0/0) + int p_w, p_h; // define pixel aspect ratio (undefined: 0/0) enum mp_csp colorspace; enum mp_csp_levels colorlevels; enum mp_csp_prim primaries; enum mp_csp_trc gamma; + float peak; // 0 = auto/unknown enum mp_chroma_location chroma_location; // The image should be rotated clockwise (0-359 degrees). int rotate; @@ -100,6 +101,8 @@ typedef struct mp_image { // All mp_* functions manage this automatically; do not mess with it. // (See also AVFrame.buf.) struct AVBufferRef *bufs[MP_MAX_PLANES]; + // Points to AVHWFramesContext* (same as AVFrame.hw_frames_ctx) + struct AVBufferRef *hwctx; } mp_image_t; int mp_chroma_div_up(int size, int shift); @@ -152,11 +155,8 @@ void mp_image_set_attributes(struct mp_image *image, const struct mp_image_params *params); struct AVFrame; -void mp_image_copy_fields_from_av_frame(struct mp_image *dst, - struct AVFrame *src); -void mp_image_copy_fields_to_av_frame(struct AVFrame *dst, - struct mp_image *src); struct mp_image *mp_image_from_av_frame(struct AVFrame *av_frame); +struct AVFrame *mp_image_to_av_frame(struct mp_image *img); struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img); void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height, diff --git a/video/out/bitmap_packer.c b/video/out/bitmap_packer.c index 4896076..3f75a72 100644 --- a/video/out/bitmap_packer.c +++ b/video/out/bitmap_packer.c @@ -22,6 +22,7 @@ #include <stdlib.h> #include <assert.h> #include <stdio.h> +#include <limits.h> #include <libavutil/common.h> @@ -46,10 +47,7 @@ void packer_reset(struct bitmap_packer *packer) void packer_get_bb(struct bitmap_packer *packer, struct pos out_bb[2]) { out_bb[0] = (struct pos) {0}; - out_bb[1] = (struct pos) { - FFMIN(packer->used_width + packer->padding, packer->w), - FFMIN(packer->used_height + packer->padding, packer->h), - }; + out_bb[1] = (struct pos) {packer->used_width, packer->used_height}; } #define HEIGHT_SORT_BITS 4 @@ -138,8 +136,12 @@ int packer_pack(struct bitmap_packer *packer) struct pos *in = packer->in; int xmax = 0, ymax = 0; for (int i = 0; i < packer->count; i++) { - if (in[i].x <= packer->padding || in[i].y <= packer->padding) + if (in[i].x <= 0 || in[i].y <= 0) { in[i] = (struct pos){0, 0}; + } else { + in[i].x += packer->padding * 2; + in[i].y += packer->padding * 2; + } if (in[i].x < 0 || in [i].x > 65535 || in[i].y < 0 || in[i].y > 65535) { fprintf(stderr, "Invalid OSD / subtitle bitmap size\n"); abort(); @@ -147,8 +149,6 @@ int packer_pack(struct bitmap_packer *packer) xmax = FFMAX(xmax, in[i].x); ymax = FFMAX(ymax, in[i].y); } - xmax = FFMAX(0, xmax - packer->padding); - ymax = FFMAX(0, ymax - packer->padding); if (xmax > packer->w) packer->w = 1 << (av_log2(xmax - 1) + 1); if (ymax > packer->h) @@ -156,21 +156,27 @@ int packer_pack(struct bitmap_packer *packer) while (1) { int used_width = 0; int y = pack_rectangles(in, packer->result, packer->count, - packer->w + packer->padding, - packer->h + packer->padding, + packer->w, packer->h, packer->scratch, &used_width); if (y >= 0) { - // No padding at edges packer->used_width = FFMIN(used_width, packer->w); packer->used_height = FFMIN(y, packer->h); assert(packer->w == 0 || IS_POWER_OF_2(packer->w)); assert(packer->h == 0 || IS_POWER_OF_2(packer->h)); + if (packer->padding) { + for (int i = 0; i < packer->count; i++) { + packer->result[i].x += packer->padding; + packer->result[i].y += packer->padding; + } + } return packer->w != w_orig || packer->h != h_orig; } - if (packer->w <= packer->h && packer->w != packer->w_max) - packer->w = FFMIN(packer->w * 2, packer->w_max); - else if (packer->h != packer->h_max) - packer->h = FFMIN(packer->h * 2, packer->h_max); + int w_max = packer->w_max > 0 ? packer->w_max : INT_MAX; + int h_max = packer->h_max > 0 ? packer->h_max : INT_MAX; + if (packer->w <= packer->h && packer->w != w_max) + packer->w = FFMIN(packer->w * 2, w_max); + else if (packer->h != h_max) + packer->h = FFMIN(packer->h * 2, h_max); else { packer->w = w_orig; packer->h = h_orig; @@ -201,9 +207,8 @@ int packer_pack_from_subbitmaps(struct bitmap_packer *packer, if (b->format == SUBBITMAP_EMPTY) return 0; packer_set_size(packer, b->num_parts); - int a = packer->padding; for (int i = 0; i < b->num_parts; i++) - packer->in[i] = (struct pos){b->parts[i].w + a, b->parts[i].h + a}; + packer->in[i] = (struct pos){b->parts[i].w, b->parts[i].h}; return packer_pack(packer); } diff --git a/video/out/bitmap_packer.h b/video/out/bitmap_packer.h index b86c3ec..8fd2fce 100644 --- a/video/out/bitmap_packer.h +++ b/video/out/bitmap_packer.h @@ -23,7 +23,6 @@ struct bitmap_packer { int asize; }; -struct ass_image; struct sub_bitmaps; // Clear all internal state. Leave the following fields: w_max, h_max diff --git a/video/out/cocoa/events_view.m b/video/out/cocoa/events_view.m index 6fec712..4a0c4bf 100644 --- a/video/out/cocoa/events_view.m +++ b/video/out/cocoa/events_view.m @@ -358,8 +358,13 @@ { NSPasteboard *pboard = [sender draggingPasteboard]; if ([[pboard types] containsObject:NSURLPboardType]) { - NSURL *file_url = [NSURL URLFromPasteboard:pboard]; - [self.adapter handleFilesArray:@[[file_url absoluteString]]]; + NSArray *pbitems = [pboard readObjectsForClasses:@[[NSURL class]] + options:@{}]; + NSMutableArray* ar = [[[NSMutableArray alloc] init] autorelease]; + for (NSURL* url in pbitems) { + [ar addObject:[url path]]; + } + [self.adapter handleFilesArray:ar]; return YES; } else if ([[pboard types] containsObject:NSFilenamesPboardType]) { NSArray *pbitems = [pboard propertyListForType:NSFilenamesPboardType]; diff --git a/video/out/cocoa/window.m b/video/out/cocoa/window.m index 646281d..d89e296 100644 --- a/video/out/cocoa/window.m +++ b/video/out/cocoa/window.m @@ -56,6 +56,11 @@ [self.adapter setNeedsResize]; } +- (void)windowDidChangeScreen:(NSNotification *)notification +{ + [self.adapter windowDidChangeScreen:notification]; +} + - (void)windowDidChangeScreenProfile:(NSNotification *)notification { [self.adapter didChangeWindowedScreenProfile:[self screen]]; diff --git a/video/out/cocoa_common.m b/video/out/cocoa_common.m index 30b832d..21e1246 100644 --- a/video/out/cocoa_common.m +++ b/video/out/cocoa_common.m @@ -49,6 +49,9 @@ #include "common/msg.h" +static CVReturn displayLinkCallback(CVDisplayLinkRef displayLink, const CVTimeStamp* now, + const CVTimeStamp* outputTime, CVOptionFlags flagsIn, + CVOptionFlags* flagsOut, void* displayLinkContext); static int vo_cocoa_fullscreen(struct vo *vo); static void cocoa_rm_fs_screen_profile_observer(struct vo_cocoa_state *s); static void cocoa_add_screen_reconfiguration_observer(struct vo *vo); @@ -370,6 +373,7 @@ static void vo_cocoa_update_screens_pointers(struct vo *vo) static void vo_cocoa_update_screen_fps(struct vo *vo) { struct vo_cocoa_state *s = vo->cocoa; + NSScreen *screen = vo->opts->fullscreen ? s->fs_screen : s->current_screen; NSDictionary* sinfo = [screen deviceDescription]; NSNumber* sid = [sinfo objectForKey:@"NSScreenNumber"]; @@ -377,16 +381,24 @@ static void vo_cocoa_update_screen_fps(struct vo *vo) CVDisplayLinkRef link; CVDisplayLinkCreateWithCGDisplay(did, &link); - s->screen_fps = CVDisplayLinkGetActualOutputVideoRefreshPeriod(link); + CVDisplayLinkSetOutputCallback(link, &displayLinkCallback, NULL); + CVDisplayLinkStart(link); + CVDisplayLinkSetCurrentCGDisplay(link, did); + + double display_period = CVDisplayLinkGetActualOutputVideoRefreshPeriod(link); - if (s->screen_fps == 0) { + if (display_period > 0) { + s->screen_fps = 1/display_period; + } else { // Fallback to using Nominal refresh rate from DisplayLink, // CVDisplayLinkGet *Actual* OutputVideoRefreshPeriod seems to // return 0 on some Apple devices. Use the nominal refresh period // instead. const CVTime t = CVDisplayLinkGetNominalOutputVideoRefreshPeriod(link); - if (!(t.flags & kCVTimeIsIndefinite)) + if (!(t.flags & kCVTimeIsIndefinite)) { s->screen_fps = (t.timeScale / (double) t.timeValue); + MP_VERBOSE(vo, "Falling back to %f for display sync.\n", s->screen_fps); + } } CVDisplayLinkRelease(link); @@ -394,6 +406,13 @@ static void vo_cocoa_update_screen_fps(struct vo *vo) flag_events(vo, VO_EVENT_WIN_STATE); } +static CVReturn displayLinkCallback(CVDisplayLinkRef displayLink, const CVTimeStamp* now, + const CVTimeStamp* outputTime, CVOptionFlags flagsIn, + CVOptionFlags* flagsOut, void* displayLinkContext) +{ + return kCVReturnSuccess; +} + static void vo_cocoa_update_screen_info(struct vo *vo, struct mp_rect *out_rc) { struct vo_cocoa_state *s = vo->cocoa; @@ -931,6 +950,11 @@ int vo_cocoa_control(struct vo *vo, int *events, int request, void *arg) [[EventsResponder sharedInstance] handleFilesArray:files]; } +- (void)windowDidChangeScreen:(NSNotification *)notification +{ + vo_cocoa_update_screen_info(self.vout, NULL); +} + - (void)didChangeWindowedScreenProfile:(NSScreen *)screen { flag_events(self.vout, VO_EVENT_ICC_PROFILE_CHANGED); diff --git a/video/out/drm_common.c b/video/out/drm_common.c index c105a14..a39db93 100644 --- a/video/out/drm_common.c +++ b/video/out/drm_common.c @@ -222,7 +222,7 @@ void kms_destroy(struct kms *kms) static void vt_switcher_sighandler(int sig) { unsigned char event = sig == RELEASE_SIGNAL ? EVT_RELEASE : EVT_ACQUIRE; - write(vt_switcher_pipe[1], &event, sizeof(event)); + (void)write(vt_switcher_pipe[1], &event, sizeof(event)); } static bool has_signal_installed(int signo) @@ -312,7 +312,7 @@ void vt_switcher_release(struct vt_switcher *s, void vt_switcher_interrupt_poll(struct vt_switcher *s) { unsigned char event = EVT_INTERRUPT; - write(vt_switcher_pipe[1], &event, sizeof(event)); + (void)write(vt_switcher_pipe[1], &event, sizeof(event)); } void vt_switcher_destroy(struct vt_switcher *s) diff --git a/video/out/opengl/angle_common.c b/video/out/opengl/angle_common.c new file mode 100644 index 0000000..21cc924 --- /dev/null +++ b/video/out/opengl/angle_common.c @@ -0,0 +1,13 @@ +#include "angle_common.h" + +// Test if Direct3D11 can be used by us. Basically, this prevents trying to use +// D3D11 on Win7, and then failing somewhere in the process. +bool d3d11_check_decoding(ID3D11Device *dev) +{ + HRESULT hr; + // We assume that NV12 is always supported, if hw decoding is supported at + // all. + UINT supported = 0; + hr = ID3D11Device_CheckFormatSupport(dev, DXGI_FORMAT_NV12, &supported); + return !FAILED(hr) && (supported & D3D11_BIND_DECODER); +} diff --git a/video/out/opengl/angle_common.h b/video/out/opengl/angle_common.h new file mode 100644 index 0000000..14ecd6a --- /dev/null +++ b/video/out/opengl/angle_common.h @@ -0,0 +1,13 @@ +#ifndef MP_ANGLE_COMMON_H +#define MP_ANGLE_COMMON_H + +#include <initguid.h> +#include <assert.h> +#include <windows.h> +#include <d3d11.h> + +#include <stdbool.h> + +bool d3d11_check_decoding(ID3D11Device *dev); + +#endif
\ No newline at end of file diff --git a/video/out/opengl/angle_dynamic.c b/video/out/opengl/angle_dynamic.c new file mode 100644 index 0000000..f4540c4 --- /dev/null +++ b/video/out/opengl/angle_dynamic.c @@ -0,0 +1,33 @@ +#include <pthread.h> +#include <windows.h> + +#define ANGLE_NO_ALIASES +#include "angle_dynamic.h" + +#include "common/common.h" + +#define ANGLE_DECL(NAME, VAR) \ + VAR; +ANGLE_FNS(ANGLE_DECL) + +static bool angle_loaded; +static pthread_once_t angle_load_once = PTHREAD_ONCE_INIT; + +static void angle_do_load(void) +{ + // Note: we let this handle "leak", as the functions remain valid forever. + HANDLE angle_dll = LoadLibraryW(L"LIBEGL.DLL"); + if (!angle_dll) + return; +#define ANGLE_LOAD_ENTRY(NAME, VAR) \ + MP_CONCAT(PFN_, NAME) = (void *)GetProcAddress(angle_dll, #NAME); \ + if (!MP_CONCAT(PFN_, NAME)) return; + ANGLE_FNS(ANGLE_LOAD_ENTRY) + angle_loaded = true; +} + +bool angle_load(void) +{ + pthread_once(&angle_load_once, angle_do_load); + return angle_loaded; +} diff --git a/video/out/opengl/angle_dynamic.h b/video/out/opengl/angle_dynamic.h new file mode 100644 index 0000000..87ad85c --- /dev/null +++ b/video/out/opengl/angle_dynamic.h @@ -0,0 +1,82 @@ +// Based on Khronos headers, thus MIT licensed. + +#ifndef MP_ANGLE_DYNAMIC_H +#define MP_ANGLE_DYNAMIC_H + +#include <stdbool.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#define ANGLE_FNS(FN) \ + FN(eglBindAPI, EGLBoolean (*EGLAPIENTRY PFN_eglBindAPI)(EGLenum)) \ + FN(eglBindTexImage, EGLBoolean (*EGLAPIENTRY PFN_eglBindTexImage) \ + (EGLDisplay, EGLSurface, EGLint)) \ + FN(eglChooseConfig, EGLBoolean (*EGLAPIENTRY PFN_eglChooseConfig) \ + (EGLDisplay, const EGLint *, EGLConfig *, EGLint, EGLint *)) \ + FN(eglCreateContext, EGLContext (*EGLAPIENTRY PFN_eglCreateContext) \ + (EGLDisplay, EGLConfig, EGLContext, const EGLint *)) \ + FN(eglCreatePbufferFromClientBuffer, EGLSurface (*EGLAPIENTRY \ + PFN_eglCreatePbufferFromClientBuffer)(EGLDisplay, EGLenum, \ + EGLClientBuffer, EGLConfig, const EGLint *)) \ + FN(eglCreateWindowSurface, EGLSurface (*EGLAPIENTRY \ + PFN_eglCreateWindowSurface)(EGLDisplay, EGLConfig, \ + EGLNativeWindowType, const EGLint *)) \ + FN(eglDestroyContext, EGLBoolean (*EGLAPIENTRY PFN_eglDestroyContext) \ + (EGLDisplay, EGLContext)) \ + FN(eglDestroySurface, EGLBoolean (*EGLAPIENTRY PFN_eglDestroySurface) \ + (EGLDisplay, EGLSurface)) \ + FN(eglGetConfigAttrib, EGLBoolean (*EGLAPIENTRY PFN_eglGetConfigAttrib) \ + (EGLDisplay, EGLConfig, EGLint, EGLint *)) \ + FN(eglGetCurrentContext, EGLContext (*EGLAPIENTRY \ + PFN_eglGetCurrentContext)(void)) \ + FN(eglGetCurrentDisplay, EGLDisplay (*EGLAPIENTRY \ + PFN_eglGetCurrentDisplay)(void)) \ + FN(eglGetDisplay, EGLDisplay (*EGLAPIENTRY PFN_eglGetDisplay) \ + (EGLNativeDisplayType)) \ + FN(eglGetError, EGLint (*EGLAPIENTRY PFN_eglGetError)(void)) \ + FN(eglGetProcAddress, void *(*EGLAPIENTRY \ + PFN_eglGetProcAddress)(const char *)) \ + FN(eglInitialize, EGLBoolean (*EGLAPIENTRY PFN_eglInitialize) \ + (EGLDisplay, EGLint *, EGLint *)) \ + FN(eglMakeCurrent, EGLBoolean (*EGLAPIENTRY PFN_eglMakeCurrent) \ + (EGLDisplay, EGLSurface, EGLSurface, EGLContext)) \ + FN(eglQueryString, const char *(*EGLAPIENTRY PFN_eglQueryString) \ + (EGLDisplay, EGLint)) \ + FN(eglSwapBuffers, EGLBoolean (*EGLAPIENTRY PFN_eglSwapBuffers) \ + (EGLDisplay, EGLSurface)) \ + FN(eglReleaseTexImage, EGLBoolean (*EGLAPIENTRY PFN_eglReleaseTexImage) \ + (EGLDisplay, EGLSurface, EGLint)) \ + FN(eglTerminate, EGLBoolean (*EGLAPIENTRY PFN_eglTerminate)(EGLDisplay)) + +#define ANGLE_EXT_DECL(NAME, VAR) \ + extern VAR; +ANGLE_FNS(ANGLE_EXT_DECL) + +bool angle_load(void); + +// Source compatibility to statically linked ANGLE. +#ifndef ANGLE_NO_ALIASES +#define eglBindAPI PFN_eglBindAPI +#define eglBindTexImage PFN_eglBindTexImage +#define eglChooseConfig PFN_eglChooseConfig +#define eglCreateContext PFN_eglCreateContext +#define eglCreatePbufferFromClientBuffer PFN_eglCreatePbufferFromClientBuffer +#define eglCreateWindowSurface PFN_eglCreateWindowSurface +#define eglDestroyContext PFN_eglDestroyContext +#define eglDestroySurface PFN_eglDestroySurface +#define eglGetConfigAttrib PFN_eglGetConfigAttrib +#define eglGetCurrentContext PFN_eglGetCurrentContext +#define eglGetCurrentDisplay PFN_eglGetCurrentDisplay +#define eglGetDisplay PFN_eglGetDisplay +#define eglGetError PFN_eglGetError +#define eglGetProcAddress PFN_eglGetProcAddress +#define eglInitialize PFN_eglInitialize +#define eglMakeCurrent PFN_eglMakeCurrent +#define eglQueryString PFN_eglQueryString +#define eglReleaseTexImage PFN_eglReleaseTexImage +#define eglSwapBuffers PFN_eglSwapBuffers +#define eglTerminate PFN_eglTerminate +#endif + +#endif diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index 46cbc2f..dd44165 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -72,6 +72,8 @@ struct gl_functions { int provides; // bitfield of MPGL_CAP_* constants int ver_core; // introduced as required function int ver_es_core; // introduced as required GL ES function + int ver_exclude; // not applicable to versions >= ver_exclude + int ver_es_exclude; // same for GLES const struct gl_function *functions; }; @@ -144,15 +146,23 @@ static const struct gl_functions gl_functions[] = { .ver_core = 210, .provides = MPGL_CAP_ROW_LENGTH | MPGL_CAP_1D_TEX, .functions = (const struct gl_function[]) { - DEF_FN(DrawBuffer), DEF_FN(GetTexLevelParameteriv), - DEF_FN(MapBuffer), DEF_FN(ReadBuffer), DEF_FN(TexImage1D), DEF_FN(UnmapBuffer), {0} }, }, + // GL 2.1 has this as extension only. + { + .ver_exclude = 300, + .ver_es_exclude = 300, + .extension = "GL_ARB_map_buffer_range", + .functions = (const struct gl_function[]) { + DEF_FN(MapBufferRange), + {0} + }, + }, // GL 3.0+ and ES 3.x core only functions. { .ver_core = 300, @@ -161,6 +171,7 @@ static const struct gl_functions gl_functions[] = { DEF_FN(BindBufferBase), DEF_FN(BlitFramebuffer), DEF_FN(GetStringi), + DEF_FN(MapBufferRange), // for ES 3.0 DEF_FN(ReadBuffer), DEF_FN(UnmapBuffer), @@ -203,6 +214,7 @@ static const struct gl_functions gl_functions[] = { DEF_FN(DeleteFramebuffers), DEF_FN(CheckFramebufferStatus), DEF_FN(FramebufferTexture2D), + DEF_FN(GetFramebufferAttachmentParameteriv), {0} }, }, @@ -227,6 +239,32 @@ static const struct gl_functions gl_functions[] = { .provides = MPGL_CAP_TEX_RG, }, { + .ver_core = 300, + .ver_es_core = 300, + .extension = "GL_EXT_texture_rg", + .provides = MPGL_CAP_TEX_RG, + }, + // GL_R16 etc. + { + .extension = "GL_EXT_texture_norm16", + .provides = MPGL_CAP_EXT16, + .ver_exclude = 1, // never in desktop GL + }, + // Float texture support for GL 2.x + { + .extension = "GL_ARB_texture_float", + .provides = MPGL_CAP_ARB_FLOAT, + .ver_exclude = 300, + .ver_es_exclude = 1, + }, + // 16 bit float textures that can be rendered to in GLES + { + .extension = "GL_EXT_color_buffer_half_float", + .provides = MPGL_CAP_EXT_CR_HFLOAT, + .ver_exclude = 1, + .ver_es_exclude = 320, + }, + { .ver_core = 320, .extension = "GL_ARB_sync", .functions = (const struct gl_function[]) { @@ -236,6 +274,47 @@ static const struct gl_functions gl_functions[] = { {0} }, }, + { + .ver_core = 330, + .extension = "GL_ARB_timer_query", + .functions = (const struct gl_function[]) { + DEF_FN(GenQueries), + DEF_FN(DeleteQueries), + DEF_FN(BeginQuery), + DEF_FN(EndQuery), + DEF_FN(QueryCounter), + DEF_FN(IsQuery), + DEF_FN(GetQueryObjectiv), + DEF_FN(GetQueryObjecti64v), + DEF_FN(GetQueryObjectuiv), + DEF_FN(GetQueryObjectui64v), + {0} + }, + }, + { + .extension = "GL_EXT_disjoint_timer_query", + .functions = (const struct gl_function[]) { + DEF_FN_NAME(GenQueries, "glGenQueriesEXT"), + DEF_FN_NAME(DeleteQueries, "glDeleteQueriesEXT"), + DEF_FN_NAME(BeginQuery, "glBeginQueryEXT"), + DEF_FN_NAME(EndQuery, "glEndQueryEXT"), + DEF_FN_NAME(QueryCounter, "glQueryCounterEXT"), + DEF_FN_NAME(IsQuery, "glIsQueryEXT"), + DEF_FN_NAME(GetQueryObjectiv, "glGetQueryObjectivEXT"), + DEF_FN_NAME(GetQueryObjecti64v, "glGetQueryObjecti64vEXT"), + DEF_FN_NAME(GetQueryObjectuiv, "glGetQueryObjectuivEXT"), + DEF_FN_NAME(GetQueryObjectui64v, "glGetQueryObjectui64vEXT"), + {0} + }, + }, + { + .ver_core = 430, + .ver_es_core = 300, + .functions = (const struct gl_function[]) { + DEF_FN(InvalidateFramebuffer), + {0} + }, + }, // Swap control, always an OS specific extension // The OSX code loads this manually. { @@ -270,6 +349,7 @@ static const struct gl_functions gl_functions[] = { DEF_FN(VDPAUInitNV), DEF_FN(VDPAUFiniNV), DEF_FN(VDPAURegisterOutputSurfaceNV), + DEF_FN(VDPAURegisterVideoSurfaceNV), DEF_FN(VDPAUUnregisterSurfaceNV), DEF_FN(VDPAUSurfaceAccessNV), DEF_FN(VDPAUMapSurfacesNV), @@ -327,14 +407,10 @@ static const struct gl_functions gl_functions[] = { {0} }, }, - // uniform buffer object extensions, requires OpenGL 3.1. { - .ver_core = 310, - .ver_es_core = 300, - .extension = "GL_ARB_uniform_buffer_object", + .extension = "GL_ANGLE_translated_shader_source", .functions = (const struct gl_function[]) { - DEF_FN(GetUniformBlockIndex), - DEF_FN(UniformBlockBinding), + DEF_FN(GetTranslatedShaderSourceANGLE), {0} }, }, @@ -348,11 +424,9 @@ static const struct gl_functions gl_functions[] = { // Fill the GL struct with function pointers and extensions from the current // GL context. Called by the backend. -// getProcAddress: function to resolve function names, may be NULL +// get_fn: function to resolve function names // ext2: an extra extension string // log: used to output messages -// Note: if you create a CONTEXT_FORWARD_COMPATIBLE_BIT_ARB with OpenGL 3.0, -// you must append "GL_ARB_compatibility" to ext2. void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), void *fn_ctx, const char *ext2, struct mp_log *log) { @@ -428,6 +502,13 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), // NOTE: Function entrypoints can exist, even if they do not work. // We must always check extension strings and versions. + if (gl->version && section->ver_exclude && + gl->version >= section->ver_exclude) + continue; + if (gl->es && section->ver_es_exclude && + gl->es >= section->ver_es_exclude) + continue; + bool exists = false, must_exist = false; if (ver_core) must_exist = version >= ver_core; @@ -448,13 +529,15 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), void *ptr = get_fn(fn_ctx, fn->name); if (!ptr) { all_loaded = false; - mp_warn(log, "Required function '%s' not " - "found for %s OpenGL %d.%d.\n", fn->name, - section->extension ? section->extension : "builtin", - MPGL_VER_GET_MAJOR(ver_core), - MPGL_VER_GET_MINOR(ver_core)); - if (must_exist) + if (must_exist) { + mp_err(log, "GL %d.%d function %s not found.\n", + MPGL_VER_GET_MAJOR(ver_core), + MPGL_VER_GET_MINOR(ver_core), fn->name); goto error; + } else { + mp_warn(log, "Function %s from extension %s not found.\n", + fn->name, section->extension); + } break; } assert(i < MAX_FN_COUNT); @@ -469,8 +552,8 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), if (loaded[i]) *funcptr = loaded[i]; } - mp_verbose(log, "Loaded functions for %d/%s.\n", ver_core, - section->extension ? section->extension : "builtin"); + if (!must_exist && section->extension) + mp_verbose(log, "Loaded extension %s.\n", section->extension); } } @@ -494,14 +577,6 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), mp_verbose(log, "Detected suspected software renderer.\n"); } - // Detect 16F textures that work with GL_LINEAR filtering. - if ((!gl->es && (gl->version >= 300 || check_ext(gl, "GL_ARB_texture_float"))) || - (gl->es && (gl->version >= 310 || check_ext(gl, "GL_OES_texture_half_float_linear")))) - { - mp_verbose(log, "Filterable half-float textures supported.\n"); - gl->mpgl_caps |= MPGL_CAP_FLOAT_TEX; - } - // Provided for simpler handling if no framebuffer support is available. if (!gl->BindFramebuffer) gl->BindFramebuffer = &dummy_glBindFramebuffer; diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h index f790dcb..e3ebd66 100644 --- a/video/out/opengl/common.h +++ b/video/out/opengl/common.h @@ -53,7 +53,6 @@ enum { MPGL_CAP_ROW_LENGTH = (1 << 4), // GL_[UN]PACK_ROW_LENGTH MPGL_CAP_FB = (1 << 5), MPGL_CAP_VAO = (1 << 6), - MPGL_CAP_FLOAT_TEX = (1 << 9), MPGL_CAP_TEX_RG = (1 << 10), // GL_ARB_texture_rg / GL 3.x MPGL_CAP_VDPAU = (1 << 11), // GL_NV_vdpau_interop MPGL_CAP_APPLE_RGB_422 = (1 << 12), // GL_APPLE_rgb_422 @@ -61,6 +60,10 @@ enum { MPGL_CAP_3D_TEX = (1 << 15), MPGL_CAP_DEBUG = (1 << 16), MPGL_CAP_DXINTEROP = (1 << 17), // WGL_NV_DX_interop + MPGL_CAP_EXT16 = (1 << 18), // GL_EXT_texture_norm16 + MPGL_CAP_ARB_FLOAT = (1 << 19), // GL_ARB_texture_float + MPGL_CAP_EXT_CR_HFLOAT = (1 << 20), // GL_EXT_color_buffer_half_float + MPGL_CAP_SW = (1 << 30), // indirect or sw renderer }; @@ -88,7 +91,7 @@ struct GL { char *extensions; // Equivalent to GL_EXTENSIONS int mpgl_caps; // Bitfield of MPGL_CAP_* constants bool debug_context; // use of e.g. GLX_CONTEXT_DEBUG_BIT_ARB - int fb_r, fb_g, fb_b; // frame buffer bit depth (0 if unknown) + GLuint main_fb; // framebuffer to render to (normally 0) void (GLAPIENTRY *Viewport)(GLint, GLint, GLsizei, GLsizei); void (GLAPIENTRY *Clear)(GLbitfield); @@ -98,7 +101,6 @@ struct GL { void (GLAPIENTRY *Enable)(GLenum); void (GLAPIENTRY *Disable)(GLenum); const GLubyte *(GLAPIENTRY * GetString)(GLenum); - void (GLAPIENTRY *DrawBuffer)(GLenum); void (GLAPIENTRY *BlendFuncSeparate)(GLenum, GLenum, GLenum, GLenum); void (GLAPIENTRY *Flush)(void); void (GLAPIENTRY *Finish)(void); @@ -123,7 +125,8 @@ struct GL { void (GLAPIENTRY *DeleteBuffers)(GLsizei, const GLuint *); void (GLAPIENTRY *BindBuffer)(GLenum, GLuint); void (GLAPIENTRY *BindBufferBase)(GLenum, GLuint, GLuint); - GLvoid * (GLAPIENTRY * MapBuffer)(GLenum, GLenum); + GLvoid * (GLAPIENTRY *MapBufferRange)(GLenum, GLintptr, GLsizeiptr, + GLbitfield); GLboolean (GLAPIENTRY *UnmapBuffer)(GLenum); void (GLAPIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum); void (GLAPIENTRY *ActiveTexture)(GLenum); @@ -166,6 +169,8 @@ struct GL { GLint); void (GLAPIENTRY *BlitFramebuffer)(GLint, GLint, GLint, GLint, GLint, GLint, GLint, GLint, GLbitfield, GLenum); + void (GLAPIENTRY *GetFramebufferAttachmentParameteriv)(GLenum, GLenum, + GLenum, GLint *); void (GLAPIENTRY *Uniform1f)(GLint, GLfloat); void (GLAPIENTRY *Uniform2f)(GLint, GLfloat, GLfloat); @@ -177,14 +182,29 @@ struct GL { void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean, const GLfloat *); + void (GLAPIENTRY *InvalidateFramebuffer)(GLenum, GLsizei, const GLenum *); + GLsync (GLAPIENTRY *FenceSync)(GLenum, GLbitfield); GLenum (GLAPIENTRY *ClientWaitSync)(GLsync, GLbitfield, GLuint64); void (GLAPIENTRY *DeleteSync)(GLsync sync); + void (GLAPIENTRY *GenQueries)(GLsizei, GLuint *); + void (GLAPIENTRY *DeleteQueries)(GLsizei, const GLuint *); + void (GLAPIENTRY *BeginQuery)(GLenum, GLuint); + void (GLAPIENTRY *EndQuery)(GLenum); + void (GLAPIENTRY *QueryCounter)(GLuint, GLenum); + GLboolean (GLAPIENTRY *IsQuery)(GLuint); + void (GLAPIENTRY *GetQueryObjectiv)(GLuint, GLenum, GLint *); + void (GLAPIENTRY *GetQueryObjecti64v)(GLuint, GLenum, GLint64 *); + void (GLAPIENTRY *GetQueryObjectuiv)(GLuint, GLenum, GLuint *); + void (GLAPIENTRY *GetQueryObjectui64v)(GLuint, GLenum, GLuint64 *); + void (GLAPIENTRY *VDPAUInitNV)(const GLvoid *, const GLvoid *); void (GLAPIENTRY *VDPAUFiniNV)(void); GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterOutputSurfaceNV) (GLvoid *, GLenum, GLsizei, const GLuint *); + GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterVideoSurfaceNV) + (GLvoid *, GLenum, GLsizei, const GLuint *); void (GLAPIENTRY *VDPAUUnregisterSurfaceNV)(GLvdpauSurfaceNV); void (GLAPIENTRY *VDPAUSurfaceAccessNV)(GLvdpauSurfaceNV, GLenum); void (GLAPIENTRY *VDPAUMapSurfacesNV)(GLsizei, const GLvdpauSurfaceNV *); @@ -208,8 +228,8 @@ struct GL { GLint (GLAPIENTRY *GetVideoSync)(GLuint *); GLint (GLAPIENTRY *WaitVideoSync)(GLint, GLint, unsigned int *); - GLuint (GLAPIENTRY *GetUniformBlockIndex)(GLuint, const GLchar *); - void (GLAPIENTRY *UniformBlockBinding)(GLuint, GLuint, GLuint); + void (GLAPIENTRY *GetTranslatedShaderSourceANGLE)(GLuint, GLsizei, + GLsizei*, GLchar* source); void (GLAPIENTRY *DebugMessageCallback)(MP_GLDEBUGPROC callback, const void *userParam); diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c index 77e9709..186211d 100644 --- a/video/out/opengl/context.c +++ b/video/out/opengl/context.c @@ -42,6 +42,7 @@ extern const struct mpgl_driver mpgl_driver_cocoa; extern const struct mpgl_driver mpgl_driver_wayland; extern const struct mpgl_driver mpgl_driver_w32; extern const struct mpgl_driver mpgl_driver_angle; +extern const struct mpgl_driver mpgl_driver_angle_es2; extern const struct mpgl_driver mpgl_driver_dxinterop; extern const struct mpgl_driver mpgl_driver_rpi; @@ -54,6 +55,7 @@ static const struct mpgl_driver *const backends[] = { #endif #if HAVE_EGL_ANGLE &mpgl_driver_angle, + &mpgl_driver_angle_es2, #endif #if HAVE_GL_WIN32 &mpgl_driver_w32, diff --git a/video/out/opengl/context_angle.c b/video/out/opengl/context_angle.c index b922ce8..cc14fc3 100644 --- a/video/out/opengl/context_angle.c +++ b/video/out/opengl/context_angle.c @@ -18,15 +18,26 @@ #include <windows.h> #include <EGL/egl.h> #include <EGL/eglext.h> +#include <d3d11.h> +#include <dxgi.h> + +#include "angle_dynamic.h" #include "common/common.h" #include "video/out/w32_common.h" #include "context.h" +#ifndef EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE +#define EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE 0x33A7 +#define EGL_SURFACE_ORIENTATION_ANGLE 0x33A8 +#define EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE 0x0002 +#endif + struct priv { EGLDisplay egl_display; EGLContext egl_context; EGLSurface egl_surface; + bool use_es2; }; static void angle_uninit(MPGLContext *ctx) @@ -39,6 +50,8 @@ static void angle_uninit(MPGLContext *ctx) eglDestroyContext(p->egl_display, p->egl_context); } p->egl_context = EGL_NO_CONTEXT; + if (p->egl_display) + eglTerminate(p->egl_display); vo_w32_uninit(ctx->vo); } @@ -90,6 +103,74 @@ static bool create_context_egl(MPGLContext *ctx, EGLConfig config, int version) return true; } +static void d3d_init(struct MPGLContext *ctx) +{ + HRESULT hr; + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + IDXGIDevice *dxgi_dev = NULL; + IDXGIAdapter *dxgi_adapter = NULL; + IDXGIFactory *dxgi_factory = NULL; + + PFNEGLQUERYDISPLAYATTRIBEXTPROC eglQueryDisplayAttribEXT = + (PFNEGLQUERYDISPLAYATTRIBEXTPROC)eglGetProcAddress("eglQueryDisplayAttribEXT"); + PFNEGLQUERYDEVICEATTRIBEXTPROC eglQueryDeviceAttribEXT = + (PFNEGLQUERYDEVICEATTRIBEXTPROC)eglGetProcAddress("eglQueryDeviceAttribEXT"); + if (!eglQueryDisplayAttribEXT || !eglQueryDeviceAttribEXT) { + MP_VERBOSE(vo, "Missing EGL_EXT_device_query\n"); + goto done; + } + + EGLAttrib dev_attr; + if (!eglQueryDisplayAttribEXT(p->egl_display, EGL_DEVICE_EXT, &dev_attr)) { + MP_VERBOSE(vo, "Missing EGL_EXT_device_query\n"); + goto done; + } + + // If ANGLE is in D3D11 mode, get the underlying ID3D11Device + EGLDeviceEXT dev = (EGLDeviceEXT)dev_attr; + EGLAttrib d3d11_dev_attr; + if (eglQueryDeviceAttribEXT(dev, EGL_D3D11_DEVICE_ANGLE, &d3d11_dev_attr)) { + ID3D11Device *d3d11_dev = (ID3D11Device*)d3d11_dev_attr; + + hr = ID3D11Device_QueryInterface(d3d11_dev, &IID_IDXGIDevice, + (void**)&dxgi_dev); + if (FAILED(hr)) { + MP_ERR(vo, "Device is not a IDXGIDevice\n"); + goto done; + } + + hr = IDXGIDevice_GetAdapter(dxgi_dev, &dxgi_adapter); + if (FAILED(hr)) { + MP_ERR(vo, "Couldn't get IDXGIAdapter\n"); + goto done; + } + + hr = IDXGIAdapter_GetParent(dxgi_adapter, &IID_IDXGIFactory, + (void**)&dxgi_factory); + if (FAILED(hr)) { + MP_ERR(vo, "Couldn't get IDXGIFactory\n"); + goto done; + } + + // Prevent DXGI from making changes to the VO window, otherwise in + // non-DirectComposition mode it will hook the Alt+Enter keystroke and + // make it trigger an ugly transition to exclusive fullscreen mode + // instead of running the user-set command. + IDXGIFactory_MakeWindowAssociation(dxgi_factory, vo_w32_hwnd(vo), + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER | + DXGI_MWA_NO_PRINT_SCREEN); + } + +done: + if (dxgi_dev) + IDXGIDevice_Release(dxgi_dev); + if (dxgi_adapter) + IDXGIAdapter_Release(dxgi_adapter); + if (dxgi_factory) + IDXGIFactory_Release(dxgi_factory); +} + static void *get_proc_address(const GLubyte *proc_name) { return eglGetProcAddress(proc_name); @@ -100,6 +181,11 @@ static int angle_init(struct MPGLContext *ctx, int flags) struct priv *p = ctx->priv; struct vo *vo = ctx->vo; + if (!angle_load()) { + MP_VERBOSE(vo, "Failed to load LIBEGL.DLL\n"); + goto fail; + } + if (!vo_w32_init(vo)) goto fail; @@ -142,6 +228,10 @@ static int angle_init(struct MPGLContext *ctx, int flags) goto fail; } + const char *exts = eglQueryString(p->egl_display, EGL_EXTENSIONS); + if (exts) + MP_DBG(ctx->vo, "EGL extensions: %s\n", exts); + eglBindAPI(EGL_OPENGL_ES_API); if (eglGetError() != EGL_SUCCESS) { MP_FATAL(vo, "Couldn't bind GLES API\n"); @@ -152,22 +242,53 @@ static int angle_init(struct MPGLContext *ctx, int flags) if (!config) goto fail; + int window_attribs_len = 0; + EGLint *window_attribs = NULL; + + EGLint flip_val; + if (eglGetConfigAttrib(p->egl_display, config, + EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE, &flip_val)) + { + if (flip_val == EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE) { + MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, + EGL_SURFACE_ORIENTATION_ANGLE); + MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, + EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE); + ctx->flip_v = true; + MP_VERBOSE(vo, "Rendering flipped.\n"); + } + } + + // EGL_DIRECT_COMPOSITION_ANGLE enables the use of flip-mode present, which + // avoids a copy of the video image and lowers vsync jitter, though the + // extension is only present on Windows 8 and up. + if (strstr(exts, "EGL_ANGLE_direct_composition")) { + MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, + EGL_DIRECT_COMPOSITION_ANGLE); + MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, EGL_TRUE); + MP_VERBOSE(vo, "Using DirectComposition.\n"); + } + + MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, EGL_NONE); p->egl_surface = eglCreateWindowSurface(p->egl_display, config, - vo_w32_hwnd(vo), NULL); + vo_w32_hwnd(vo), window_attribs); + talloc_free(window_attribs); if (p->egl_surface == EGL_NO_SURFACE) { MP_FATAL(ctx->vo, "Could not create EGL surface!\n"); goto fail; } - if (!create_context_egl(ctx, config, 3) && + if (!(!p->use_es2 && create_context_egl(ctx, config, 3)) && !create_context_egl(ctx, config, 2)) { MP_FATAL(ctx->vo, "Could not create EGL context!\n"); goto fail; } - mpgl_load_functions(ctx->gl, get_proc_address, NULL, vo->log); + // Configure the underlying Direct3D device + d3d_init(ctx); + mpgl_load_functions(ctx->gl, get_proc_address, NULL, vo->log); return 0; fail: @@ -175,6 +296,17 @@ fail: return -1; } +static int angle_init_es2(struct MPGLContext *ctx, int flags) +{ + struct priv *p = ctx->priv; + p->use_es2 = true; + if (ctx->vo->probing) { + MP_VERBOSE(ctx->vo, "Not using this by default.\n"); + return -1; + } + return angle_init(ctx, flags); +} + static int angle_reconfig(struct MPGLContext *ctx) { vo_w32_config(ctx->vo); @@ -201,3 +333,13 @@ const struct mpgl_driver mpgl_driver_angle = { .control = angle_control, .uninit = angle_uninit, }; + +const struct mpgl_driver mpgl_driver_angle_es2 = { + .name = "angle-es2", + .priv_size = sizeof(struct priv), + .init = angle_init_es2, + .reconfig = angle_reconfig, + .swap_buffers = angle_swap_buffers, + .control = angle_control, + .uninit = angle_uninit, +}; diff --git a/video/out/opengl/context_cocoa.c b/video/out/opengl/context_cocoa.c index 271bdb7..ea7a9b5 100644 --- a/video/out/opengl/context_cocoa.c +++ b/video/out/opengl/context_cocoa.c @@ -33,14 +33,6 @@ static int set_swap_interval(int enabled) return (err == kCGLNoError) ? 0 : -1; } -static int cgl_color_size(struct MPGLContext *ctx) -{ - struct cgl_context *p = ctx->priv; - GLint value; - CGLDescribePixelFormat(p->pix, 0, kCGLPFAColorSize, &value); - return value > 16 ? 8 : 5; -} - static void *cocoa_glgetaddr(const char *s) { void *ret = NULL; @@ -123,7 +115,6 @@ static bool create_gl_context(struct MPGLContext *ctx, int vo_flags) CGLSetParameter(p->ctx, kCGLCPSurfaceOpacity, &(GLint){0}); mpgl_load_functions(ctx->gl, (void *)cocoa_glgetaddr, NULL, ctx->vo->log); - ctx->gl->fb_r = ctx->gl->fb_g = ctx->gl->fb_b = cgl_color_size(ctx); CGLReleasePixelFormat(p->pix); diff --git a/video/out/opengl/context_dxinterop.c b/video/out/opengl/context_dxinterop.c index 4dfc3c2..95b9296 100644 --- a/video/out/opengl/context_dxinterop.c +++ b/video/out/opengl/context_dxinterop.c @@ -27,6 +27,9 @@ // For WGL_ACCESS_WRITE_DISCARD_NV, etc. #include <GL/wglext.h> +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase) + // mingw-w64 header typo? #ifndef IDirect3DSwapChain9Ex_GetBackBuffer #define IDirect3DSwapChain9Ex_GetBackBuffer IDirect3DSwapChain9EX_GetBackBuffer @@ -51,21 +54,14 @@ struct priv { HGLRC os_ctx; // OpenGL resources - GLuint framebuffer; GLuint texture; - // Is the shared framebuffer currently bound? - bool fb_bound; - // Is the shared texture currently attached? - bool tex_attached; // Did we lose the device? bool lost_device; // Requested and current parameters int requested_swapinterval; int width, height, swapinterval; - - void (GLAPIENTRY *real_gl_bind_framebuffer)(GLenum, GLuint); }; static __thread struct MPGLContext *current_ctx; @@ -99,7 +95,7 @@ static int os_ctx_create(struct MPGLContext *ctx) .cbSize = sizeof(WNDCLASSEXW), .style = CS_OWNDC, .lpfnWndProc = DefWindowProc, - .hInstance = GetModuleHandleW(NULL), + .hInstance = HINST_THISCOMPONENT, .lpszClassName = os_wnd_class, }); @@ -107,7 +103,7 @@ static int os_ctx_create(struct MPGLContext *ctx) // possible to use the VO window, but MSDN recommends against drawing to // the same window with flip mode present and other APIs, so play it safe. p->os_wnd = CreateWindowExW(0, os_wnd_class, os_wnd_class, 0, 0, 0, 200, - 200, NULL, NULL, GetModuleHandleW(NULL), NULL); + 200, NULL, NULL, HINST_THISCOMPONENT, NULL); p->os_dc = GetDC(p->os_wnd); if (!p->os_dc) { MP_FATAL(ctx->vo, "Couldn't create window for offscreen rendering\n"); @@ -224,18 +220,6 @@ static void os_ctx_destroy(MPGLContext *ctx) DestroyWindow(p->os_wnd); } -static void try_attach_texture(MPGLContext *ctx) -{ - struct priv *p = ctx->priv; - struct GL *gl = ctx->gl; - - if (p->fb_bound && !p->tex_attached) { - gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, p->texture, 0); - p->tex_attached = true; - } -} - static int d3d_size_dependent_create(MPGLContext *ctx) { struct priv *p = ctx->priv; @@ -275,25 +259,6 @@ static int d3d_size_dependent_create(MPGLContext *ctx) MP_VERBOSE(ctx->vo, "DX_interop backbuffer format: %u\n", (unsigned)bb_desc.Format); - // Note: This backend has only been tested on an 8-bit display. It's - // unknown whether this code is enough to support other formats or if more - // work is needed. - switch (bb_desc.Format) { - case D3DFMT_X1R5G5B5: case D3DFMT_A1R5G5B5: - ctx->gl->fb_r = ctx->gl->fb_g = ctx->gl->fb_b = 5; - break; - case D3DFMT_R5G6B5: - ctx->gl->fb_r = 5; ctx->gl->fb_g = 6; ctx->gl->fb_b = 5; - break; - case D3DFMT_R8G8B8: case D3DFMT_A8R8G8B8: case D3DFMT_X8R8G8B8: - case D3DFMT_A8B8G8R8: case D3DFMT_X8B8G8R8: default: - ctx->gl->fb_r = ctx->gl->fb_g = ctx->gl->fb_b = 8; - break; - case D3DFMT_A2R10G10B10: case D3DFMT_A2B10G10R10: - ctx->gl->fb_r = ctx->gl->fb_g = ctx->gl->fb_b = 10; - break; - } - // Create a rendertarget with the same format as the backbuffer for // rendering from OpenGL HANDLE share_handle = NULL; @@ -312,7 +277,6 @@ static int d3d_size_dependent_create(MPGLContext *ctx) // Create the OpenGL-side texture gl->GenTextures(1, &p->texture); - p->tex_attached = false; // Now share the rendertarget with OpenGL as a texture p->rtarget_h = gl->DXRegisterObjectNV(p->device_h, p->rtarget, p->texture, @@ -331,9 +295,10 @@ static int d3d_size_dependent_create(MPGLContext *ctx) return -1; } - // Only attach the shared texture if the shared framebuffer is bound. If - // it's not, the texture will be attached when glBindFramebuffer is called. - try_attach_texture(ctx); + gl->BindFramebuffer(GL_FRAMEBUFFER, gl->main_fb); + gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, p->texture, 0); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); return 0; } @@ -476,27 +441,6 @@ static void dxinterop_uninit(MPGLContext *ctx) pump_message_loop(); } -static GLAPIENTRY void dxinterop_bind_framebuffer(GLenum target, - GLuint framebuffer) -{ - if (!current_ctx) - return; - struct priv *p = current_ctx->priv; - - // Keep track of whether the shared framebuffer is bound - if (target == GL_FRAMEBUFFER || target == GL_DRAW_FRAMEBUFFER) - p->fb_bound = (framebuffer == 0); - - // Pretend the shared framebuffer is the primary framebuffer - if (framebuffer == 0) - framebuffer = p->framebuffer; - - p->real_gl_bind_framebuffer(target, framebuffer); - - // Attach the shared texture if it is not attached already - try_attach_texture(current_ctx); -} - static void dxinterop_reset(struct MPGLContext *ctx) { struct priv *p = ctx->priv; @@ -570,16 +514,10 @@ static int dxinterop_init(struct MPGLContext *ctx, int flags) goto fail; // Create the shared framebuffer - gl->GenFramebuffers(1, &p->framebuffer); + gl->GenFramebuffers(1, &gl->main_fb); - // Hook glBindFramebuffer to return the shared framebuffer instead of the - // primary one current_ctx = ctx; - p->real_gl_bind_framebuffer = gl->BindFramebuffer; - gl->BindFramebuffer = dxinterop_bind_framebuffer; - gl->SwapInterval = dxinterop_swap_interval; - gl->MPGetNativeDisplay = dxinterop_get_native_display; if (d3d_create(ctx) < 0) @@ -587,9 +525,6 @@ static int dxinterop_init(struct MPGLContext *ctx, int flags) if (d3d_size_dependent_create(ctx) < 0) goto fail; - // Bind the shared framebuffer. This will also attach the shared texture. - gl->BindFramebuffer(GL_FRAMEBUFFER, 0); - // The OpenGL and Direct3D coordinate systems are flipped vertically // relative to each other. Flip the video during rendering so it can be // copied to the Direct3D backbuffer with a simple (and fast) StretchRect. diff --git a/video/out/opengl/context_rpi.c b/video/out/opengl/context_rpi.c index c01c173..c0ca733 100644 --- a/video/out/opengl/context_rpi.c +++ b/video/out/opengl/context_rpi.c @@ -19,7 +19,6 @@ #include <assert.h> #include "common/common.h" -#include "video/out/x11_common.h" #include "context.h" #include "context_rpi.h" diff --git a/video/out/opengl/context_w32.c b/video/out/opengl/context_w32.c index c647d97..3a0118e 100644 --- a/video/out/opengl/context_w32.c +++ b/video/out/opengl/context_w32.c @@ -209,14 +209,6 @@ static void create_ctx(void *ptr) if (!w32_ctx->context) create_context_w32_old(ctx); - int pfmt = GetPixelFormat(w32_ctx->hdc); - PIXELFORMATDESCRIPTOR pfd; - if (DescribePixelFormat(w32_ctx->hdc, pfmt, sizeof(pfd), &pfd)) { - ctx->gl->fb_r = pfd.cRedBits; - ctx->gl->fb_g = pfd.cGreenBits; - ctx->gl->fb_b = pfd.cBlueBits; - } - wglMakeCurrent(w32_ctx->hdc, NULL); } diff --git a/video/out/opengl/context_wayland.c b/video/out/opengl/context_wayland.c index a100073..e74132b 100644 --- a/video/out/opengl/context_wayland.c +++ b/video/out/opengl/context_wayland.c @@ -25,10 +25,14 @@ static void egl_resize(struct vo_wayland_state *wl) int32_t y = wl->window.sh_y; int32_t width = wl->window.sh_width; int32_t height = wl->window.sh_height; + int32_t scale = 1; if (!wl->egl_context.egl_window) return; + if (wl->display.current_output) + scale = wl->display.current_output->scale; + // get the real size of the window // this improves moving the window while resizing it wl_egl_window_get_attached_size(wl->egl_context.egl_window, @@ -46,14 +50,15 @@ static void egl_resize(struct vo_wayland_state *wl) if (y != 0) y = wl->window.height - height; - wl_egl_window_resize(wl->egl_context.egl_window, width, height, x, y); + wl_surface_set_buffer_scale(wl->window.video_surface, scale); + wl_egl_window_resize(wl->egl_context.egl_window, scale*width, scale*height, x, y); wl->window.width = width; wl->window.height = height; /* set size for mplayer */ - wl->vo->dwidth = wl->window.width; - wl->vo->dheight = wl->window.height; + wl->vo->dwidth = scale*wl->window.width; + wl->vo->dheight = scale*wl->window.height; wl->vo->want_redraw = true; wl->window.events = 0; diff --git a/video/out/opengl/context_x11.c b/video/out/opengl/context_x11.c index d9a584e..11700ef 100644 --- a/video/out/opengl/context_x11.c +++ b/video/out/opengl/context_x11.c @@ -271,10 +271,6 @@ static int glx_init(struct MPGLContext *ctx, int flags) if (!success) goto uninit; - glXGetFBConfigAttrib(vo->x11->display, fbc, GLX_RED_SIZE, &ctx->gl->fb_r); - glXGetFBConfigAttrib(vo->x11->display, fbc, GLX_GREEN_SIZE, &ctx->gl->fb_g); - glXGetFBConfigAttrib(vo->x11->display, fbc, GLX_BLUE_SIZE, &ctx->gl->fb_b); - return 0; uninit: diff --git a/video/out/opengl/context_x11egl.c b/video/out/opengl/context_x11egl.c index e6069b7..2e4fd5f 100644 --- a/video/out/opengl/context_x11egl.c +++ b/video/out/opengl/context_x11egl.c @@ -21,6 +21,11 @@ #include <EGL/egl.h> #include <EGL/eglext.h> +#ifndef EGL_VERSION_1_5 +#define EGL_CONTEXT_OPENGL_PROFILE_MASK 0x30FD +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001 +#endif + #include "common/common.h" #include "video/out/x11_common.h" #include "context.h" @@ -79,9 +84,15 @@ static bool create_context_egl(MPGLContext *ctx, EGLConfig config, EGLint context_attributes[] = { // aka EGL_CONTEXT_MAJOR_VERSION_KHR EGL_CONTEXT_CLIENT_VERSION, es ? 2 : 3, + EGL_NONE, EGL_NONE, EGL_NONE }; + if (!es) { + context_attributes[2] = EGL_CONTEXT_OPENGL_PROFILE_MASK; + context_attributes[3] = EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT; + } + p->egl_surface = eglCreateWindowSurface(p->egl_display, config, window, NULL); if (p->egl_surface == EGL_NO_SURFACE) { @@ -152,7 +163,6 @@ static int mpegl_init(struct MPGLContext *ctx, int flags) void *(*gpa)(const GLubyte*) = (void *(*)(const GLubyte*))eglGetProcAddress; mpgl_load_functions(ctx->gl, gpa, egl_exts, vo->log); - mp_egl_get_depth(ctx->gl, config); ctx->native_display_type = "x11"; ctx->native_display = vo->x11->display; diff --git a/video/out/opengl/egl_helpers.c b/video/out/opengl/egl_helpers.c index d86b5be..7e236f1 100644 --- a/video/out/opengl/egl_helpers.c +++ b/video/out/opengl/egl_helpers.c @@ -18,13 +18,3 @@ #include "egl_helpers.h" #include "common.h" -void mp_egl_get_depth(struct GL *gl, EGLConfig fbc) -{ - EGLint tokens[] = {EGL_RED_SIZE, EGL_GREEN_SIZE, EGL_BLUE_SIZE}; - int *ptrs[] = {&gl->fb_r, &gl->fb_g, &gl->fb_b}; - for (int n = 0; n < MP_ARRAY_SIZE(tokens); n++) { - EGLint depth = 0; - if (eglGetConfigAttrib(eglGetCurrentDisplay(), fbc, tokens[n], &depth)) - *ptrs[n] = depth; - } -} diff --git a/video/out/opengl/egl_helpers.h b/video/out/opengl/egl_helpers.h index f9961fe..3806ef1 100644 --- a/video/out/opengl/egl_helpers.h +++ b/video/out/opengl/egl_helpers.h @@ -4,7 +4,4 @@ #include <EGL/egl.h> #include <EGL/eglext.h> -struct GL; -void mp_egl_get_depth(struct GL *gl, EGLConfig fbc); - #endif diff --git a/video/out/opengl/formats.c b/video/out/opengl/formats.c new file mode 100644 index 0000000..2e3dad0 --- /dev/null +++ b/video/out/opengl/formats.c @@ -0,0 +1,272 @@ +#include "common/common.h" +#include "formats.h" + +enum { + // --- GL type aliases (for readability) + T_U8 = GL_UNSIGNED_BYTE, + T_U16 = GL_UNSIGNED_SHORT, + T_FL = GL_FLOAT, +}; + +// List of allowed formats, and their usability for bilinear filtering and FBOs. +// This is limited to combinations that are useful for our renderer. +const struct gl_format gl_formats[] = { + // These are used for desktop GL 3+, and GLES 3+ with GL_EXT_texture_norm16. + {GL_R8, GL_RED, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {GL_RG8, GL_RG, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {GL_RGB8, GL_RGB, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {GL_RGBA8, GL_RGBA, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {GL_R16, GL_RED, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + {GL_RG16, GL_RG, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + {GL_RGB16, GL_RGB, T_U16, F_CF | F_GL3 | F_GL2F}, + {GL_RGBA16, GL_RGBA, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + + // Specifically not color-renderable. + {GL_RGB16, GL_RGB, T_U16, F_TF | F_EXT16}, + + // GL2 legacy. Ignores possibly present FBO extensions (no CF flag set). + {GL_LUMINANCE8, GL_LUMINANCE, T_U8, F_TF | F_GL2}, + {GL_LUMINANCE8_ALPHA8, GL_LUMINANCE_ALPHA, T_U8, F_TF | F_GL2}, + {GL_RGB8, GL_RGB, T_U8, F_TF | F_GL2}, + {GL_RGBA8, GL_RGBA, T_U8, F_TF | F_GL2}, + {GL_LUMINANCE16, GL_LUMINANCE, T_U16, F_TF | F_GL2}, + {GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA, T_U16, F_TF | F_GL2}, + {GL_RGB16, GL_RGB, T_U16, F_TF | F_GL2}, + {GL_RGBA16, GL_RGBA, T_U16, F_TF | F_GL2}, + + // ES2 legacy + {GL_LUMINANCE, GL_LUMINANCE, T_U8, F_TF | F_ES2}, + {GL_LUMINANCE_ALPHA, GL_LUMINANCE_ALPHA, T_U8, F_TF | F_ES2}, + {GL_RGB, GL_RGB, T_U8, F_TF | F_ES2}, + {GL_RGBA, GL_RGBA, T_U8, F_TF | F_ES2}, + + // Non-normalized integer formats. + // Follows ES 3.0 as to which are color-renderable. + {GL_R8UI, GL_RED_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {GL_RG8UI, GL_RG_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {GL_RGB8UI, GL_RGB_INTEGER, T_U8, F_GL3 | F_ES3}, + {GL_RGBA8UI, GL_RGBA_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {GL_R16UI, GL_RED_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + {GL_RG16UI, GL_RG_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + {GL_RGB16UI, GL_RGB_INTEGER, T_U16, F_GL3 | F_ES3}, + {GL_RGBA16UI, GL_RGBA_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + + // On GL3+ or GL2.1 with GL_ARB_texture_float, floats work fully. + {GL_R16F, GL_RED, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {GL_RG16F, GL_RG, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {GL_RGB16F, GL_RGB, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {GL_R32F, GL_RED, T_FL, F_CF | F_GL3 | F_GL2F}, + {GL_RG32F, GL_RG, T_FL, F_CF | F_GL3 | F_GL2F}, + {GL_RGB32F, GL_RGB, T_FL, F_CF | F_GL3 | F_GL2F}, + {GL_RGBA32F, GL_RGBA, T_FL, F_CF | F_GL3 | F_GL2F}, + + // Note: we simply don't support float anything on ES2, despite extensions. + // We also don't bother with non-filterable float formats, and we ignore + // 32 bit float formats that are not blendable when rendering to them. + + // On ES3.2+, both 16 bit floats work fully (except 3-component formats). + // F_EXTF16 implies extensions that also enable 16 bit floats fully. + {GL_R16F, GL_RED, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + {GL_RG16F, GL_RG, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + {GL_RGB16F, GL_RGB, T_FL, F_F16 | F_TF | F_ES32 | F_EXTF16}, + {GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + + // On ES3.0+, 16 bit floats are texture-filterable. + // Don't bother with 32 bit floats; they exist but are neither CR nor TF. + {GL_R16F, GL_RED, T_FL, F_F16 | F_TF | F_ES3}, + {GL_RG16F, GL_RG, T_FL, F_F16 | F_TF | F_ES3}, + {GL_RGB16F, GL_RGB, T_FL, F_F16 | F_TF | F_ES3}, + {GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_TF | F_ES3}, + + // These might be useful as FBO formats. + {GL_RGB10_A2, GL_RGBA, + GL_UNSIGNED_INT_2_10_10_10_REV, F_CF | F_GL3 | F_ES3}, + {GL_RGBA12, GL_RGBA, T_U16, F_CF | F_GL2 | F_GL3}, + {GL_RGB10, GL_RGB, T_U16, F_CF | F_GL2 | F_GL3}, + + // Special formats. + {GL_RGB8, GL_RGB, + GL_UNSIGNED_SHORT_5_6_5, F_TF | F_GL2 | F_GL3}, + {GL_RGB_RAW_422_APPLE, GL_RGB_422_APPLE, + GL_UNSIGNED_SHORT_8_8_APPLE, F_TF | F_APPL}, + {GL_RGB_RAW_422_APPLE, GL_RGB_422_APPLE, + GL_UNSIGNED_SHORT_8_8_REV_APPLE, F_TF | F_APPL}, + + {0} +}; + +// Pairs of mpv formats and OpenGL types that match directly. Code using this +// is supposed to look through the gl_formats table, and there is supposed to +// be exactly 1 matching entry (which tells you format/internal format). +static const int special_formats[][2] = { + {IMGFMT_RGB565, GL_UNSIGNED_SHORT_5_6_5}, + {IMGFMT_UYVY, GL_UNSIGNED_SHORT_8_8_APPLE}, + {IMGFMT_YUYV, GL_UNSIGNED_SHORT_8_8_REV_APPLE}, + {0} +}; + +// Return an or-ed combination of all F_ flags that apply. +int gl_format_feature_flags(GL *gl) +{ + return (gl->version == 210 ? F_GL2 : 0) + | (gl->version >= 300 ? F_GL3 : 0) + | (gl->es == 200 ? F_ES2 : 0) + | (gl->es >= 300 ? F_ES3 : 0) + | (gl->es >= 320 ? F_ES32 : 0) + | (gl->mpgl_caps & MPGL_CAP_EXT16 ? F_EXT16 : 0) + | ((gl->es >= 300 && + (gl->mpgl_caps & MPGL_CAP_EXT_CR_HFLOAT)) ? F_EXTF16 : 0) + | ((gl->version == 210 && + (gl->mpgl_caps & MPGL_CAP_ARB_FLOAT) && + (gl->mpgl_caps & MPGL_CAP_TEX_RG) && + (gl->mpgl_caps & MPGL_CAP_FB)) ? F_GL2F : 0) + | (gl->mpgl_caps & MPGL_CAP_APPLE_RGB_422 ? F_APPL : 0); +} + +// Return the entry for the given internal format. Return NULL if unsupported. +const struct gl_format *gl_find_internal_format(GL *gl, GLint internal_format) +{ + int features = gl_format_feature_flags(gl); + for (int n = 0; gl_formats[n].type; n++) { + const struct gl_format *f = &gl_formats[n]; + if (f->internal_format == internal_format && (f->flags & features)) + return f; + } + return NULL; +} + +const struct gl_format *gl_find_special_format(GL *gl, int mpfmt) +{ + int features = gl_format_feature_flags(gl); + for (int n = 0; special_formats[n][0]; n++) { + if (special_formats[n][0] == mpfmt) { + GLenum type = special_formats[n][1]; + for (int i = 0; gl_formats[i].type; i++) { + const struct gl_format *f = &gl_formats[i]; + if (f->type == type && (f->flags & features)) + return f; + } + break; + } + } + return NULL; +} + +// type: one of MPGL_TYPE_* +// flags: bitset of F_*, all flags must be present +const struct gl_format *gl_find_format(GL *gl, int type, int flags, + int bytes_per_component, int n_components) +{ + if (!bytes_per_component || !n_components || !type) + return NULL; + int features = gl_format_feature_flags(gl); + for (int n = 0; gl_formats[n].type; n++) { + const struct gl_format *f = &gl_formats[n]; + if ((f->flags & features) && + ((f->flags & flags) == flags) && + gl_format_type(f) == type && + gl_component_size(f->type) == bytes_per_component && + gl_format_components(f->format) == n_components) + return f; + } + return NULL; +} + +// Return a texture-filterable unsigned normalized fixed point format. +const struct gl_format *gl_find_unorm_format(GL *gl, int bytes_per_component, + int n_components) +{ + return gl_find_format(gl, MPGL_TYPE_UNORM, F_TF, bytes_per_component, + n_components); +} + +// Return an unsigned integer format. +const struct gl_format *gl_find_uint_format(GL *gl, int bytes_per_component, + int n_components) +{ + return gl_find_format(gl, MPGL_TYPE_UINT, 0, bytes_per_component, + n_components); +} + +// Return a 16 bit float format. Note that this will return a GL_FLOAT format +// with 32 bit per component; just the internal representation is smaller. +// Some GL versions will allow upload with GL_HALF_FLOAT as well. +const struct gl_format *gl_find_float16_format(GL *gl, int n_components) +{ + return gl_find_format(gl, MPGL_TYPE_FLOAT, F_F16, 4, n_components); +} + +int gl_format_type(const struct gl_format *format) +{ + if (!format) + return 0; + if (format->type == GL_FLOAT) + return MPGL_TYPE_FLOAT; + if (gl_integer_format_to_base(format->format)) + return MPGL_TYPE_UINT; + return MPGL_TYPE_UNORM; +} + +// Return an integer pixel "format" to a base internal format. +// Return 0 if it's not an integer format. +GLenum gl_integer_format_to_base(GLenum format) +{ + switch (format) { + case GL_RED_INTEGER: return GL_RED; + case GL_RG_INTEGER: return GL_RG; + case GL_RGB_INTEGER: return GL_RGB; + case GL_RGBA_INTEGER: return GL_RGBA; + } + return 0; +} + +// Return the number of bytes per component this format implies. +// Returns 0 for formats with non-byte alignments and formats which +// merge multiple components (like GL_UNSIGNED_SHORT_5_6_5). +int gl_component_size(GLenum type) +{ + switch (type) { + case GL_UNSIGNED_BYTE: return 1; + case GL_UNSIGNED_SHORT: return 2; + case GL_FLOAT: return 4; + } + return 0; +} + +// Return the number of a pixel "format". +int gl_format_components(GLenum format) +{ + switch (format) { + case GL_RED: + case GL_RED_INTEGER: + case GL_LUMINANCE: + return 1; + case GL_RG: + case GL_RG_INTEGER: + case GL_LUMINANCE_ALPHA: + return 2; + case GL_RGB: + case GL_RGB_INTEGER: + return 3; + case GL_RGBA: + case GL_RGBA_INTEGER: + return 4; + } + return 0; +} + +// return the number of bytes per pixel for the given format +// does not handle all possible variants, just those used by mpv +int gl_bytes_per_pixel(GLenum format, GLenum type) +{ + // Formats with merged components are special. + switch (type) { + case GL_UNSIGNED_INT_2_10_10_10_REV: return 4; + case GL_UNSIGNED_SHORT_5_6_5: return 2; + case GL_UNSIGNED_SHORT_8_8_APPLE: return 2; + case GL_UNSIGNED_SHORT_8_8_REV_APPLE: return 2; + } + + return gl_format_components(format) * gl_component_size(type); +} diff --git a/video/out/opengl/formats.h b/video/out/opengl/formats.h new file mode 100644 index 0000000..6ced4a7 --- /dev/null +++ b/video/out/opengl/formats.h @@ -0,0 +1,59 @@ +#ifndef MPGL_FORMATS_H_ +#define MPGL_FORMATS_H_ + +#include "common.h" + +struct gl_format { + GLint internal_format; // glTexImage argument + GLenum format; // glTexImage argument + GLenum type; // e.g. GL_UNSIGNED_SHORT + int flags; +}; + +extern const struct gl_format gl_formats[]; + +enum { + // --- gl_format.flags + + // Version flags. If at least 1 flag matches, the format entry is considered + // supported on the current GL context. + F_GL2 = 1 << 0, // GL2.1-only + F_GL3 = 1 << 1, // GL3.0 or later + F_ES2 = 1 << 2, // ES2-only + F_ES3 = 1 << 3, // ES3.0 or later + F_ES32 = 1 << 4, // ES3.2 or later + F_EXT16 = 1 << 5, // ES with GL_EXT_texture_norm16 + F_EXTF16 = 1 << 6, // GL_EXT_color_buffer_half_float + F_GL2F = 1 << 7, // GL2.1-only with texture_rg + texture_float + FBOs + F_APPL = 1 << 8, // GL_APPLE_rgb_422 + + // Feature flags. They are additional and signal presence of features. + F_CR = 1 << 16, // color-renderable + F_TF = 1 << 17, // texture-filterable with GL_LINEAR + F_CF = F_CR | F_TF, + F_F16 = 1 << 18, // uses half-floats (16 bit) internally, even though + // the format is still GL_FLOAT (32 bit) + + // --- Other constants. + MPGL_TYPE_UNORM = 1, + MPGL_TYPE_UINT = 2, + MPGL_TYPE_FLOAT = 3, +}; + +int gl_format_feature_flags(GL *gl); +const struct gl_format *gl_find_internal_format(GL *gl, GLint internal_format); +const struct gl_format *gl_find_special_format(GL *gl, int mpfmt); +const struct gl_format *gl_find_format(GL *gl, int type, int flags, + int bytes_per_component, int n_components); +const struct gl_format *gl_find_unorm_format(GL *gl, int bytes_per_component, + int n_components); +const struct gl_format *gl_find_uint_format(GL *gl, int bytes_per_component, + int n_components); +const struct gl_format *gl_find_float16_format(GL *gl, int n_components); +int gl_format_type(const struct gl_format *format); +GLenum gl_integer_format_to_base(GLenum format); +int gl_component_size(GLenum type); +int gl_format_components(GLenum format); +int gl_bytes_per_pixel(GLenum format, GLenum type); + +#endif diff --git a/video/out/opengl/header_fixes.h b/video/out/opengl/header_fixes.h index 885c277..9953f7e 100644 --- a/video/out/opengl/header_fixes.h +++ b/video/out/opengl/header_fixes.h @@ -62,6 +62,10 @@ #define GL_DEBUG_SEVERITY_NOTIFICATION 0x826B #endif +#ifndef GL_BACK_LEFT +#define GL_BACK_LEFT 0x0402 +#endif + #if HAVE_ANDROID_GL #define GL_UNSIGNED_BYTE_3_3_2 0x8032 #define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362 @@ -80,12 +84,34 @@ #define GL_TEXTURE_LUMINANCE_SIZE 0x8060 #define GL_R16 0x822A #define GL_RG16 0x822C +#define GL_LUMINANCE8 0x8040 +#define GL_LUMINANCE8_ALPHA8 0x8045 #define GL_LUMINANCE16 0x8042 #define GL_LUMINANCE16_ALPHA16 0x8048 #define GL_UNSIGNED_SHORT_8_8_APPLE 0x85BA #define GL_UNSIGNED_SHORT_8_8_REV_APPLE 0x85BB #endif +// GL_ARB_timer_query and EXT_disjoint_timer_query +#ifndef GL_TIME_ELAPSED +// Same as GL_TIME_ELAPSED_EXT +#define GL_TIME_ELAPSED 0x88BF +#endif + +// GL_OES_EGL_image_external, GL_NV_EGL_stream_consumer_external +#ifndef GL_TEXTURE_EXTERNAL_OES +#define GL_TEXTURE_EXTERNAL_OES 0x8D65 +#endif + +// GL_ANGLE_translated_shader_source +#ifndef GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE +#define GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE 0x93A0 +#endif + +#ifndef GL_RGB_RAW_422_APPLE +#define GL_RGB_RAW_422_APPLE 0x8A51 +#endif + #undef MP_GET_GL_WORKAROUNDS #endif // MP_GET_GL_WORKAROUNDS diff --git a/video/out/opengl/hwdec.c b/video/out/opengl/hwdec.c index b58af9b..8c82861 100644 --- a/video/out/opengl/hwdec.c +++ b/video/out/opengl/hwdec.c @@ -29,6 +29,8 @@ extern const struct gl_hwdec_driver gl_hwdec_vaglx; extern const struct gl_hwdec_driver gl_hwdec_videotoolbox; extern const struct gl_hwdec_driver gl_hwdec_vdpau; extern const struct gl_hwdec_driver gl_hwdec_dxva2egl; +extern const struct gl_hwdec_driver gl_hwdec_d3d11egl; +extern const struct gl_hwdec_driver gl_hwdec_d3d11eglrgb; extern const struct gl_hwdec_driver gl_hwdec_dxva2gldx; extern const struct gl_hwdec_driver gl_hwdec_dxva2; @@ -45,8 +47,10 @@ static const struct gl_hwdec_driver *const mpgl_hwdec_drivers[] = { #if HAVE_VIDEOTOOLBOX_GL &gl_hwdec_videotoolbox, #endif -#if HAVE_DXVA2_HWACCEL +#if HAVE_D3D_HWACCEL #if HAVE_EGL_ANGLE + &gl_hwdec_d3d11egl, + &gl_hwdec_d3d11eglrgb, &gl_hwdec_dxva2egl, #endif #if HAVE_GL_DXINTEROP @@ -59,6 +63,7 @@ static const struct gl_hwdec_driver *const mpgl_hwdec_drivers[] = { static struct gl_hwdec *load_hwdec_driver(struct mp_log *log, GL *gl, struct mpv_global *global, + struct mp_hwdec_devices *devs, const struct gl_hwdec_driver *drv, bool is_auto) { @@ -68,7 +73,7 @@ static struct gl_hwdec *load_hwdec_driver(struct mp_log *log, GL *gl, .log = mp_log_new(hwdec, log, drv->name), .global = global, .gl = gl, - .gl_texture_target = GL_TEXTURE_2D, + .devs = devs, .probing = is_auto, }; mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name); @@ -80,14 +85,16 @@ static struct gl_hwdec *load_hwdec_driver(struct mp_log *log, GL *gl, return hwdec; } -struct gl_hwdec *gl_hwdec_load_api_id(struct mp_log *log, GL *gl, - struct mpv_global *g, int id) +struct gl_hwdec *gl_hwdec_load_api(struct mp_log *log, GL *gl, + struct mpv_global *g, + struct mp_hwdec_devices *devs, + enum hwdec_type api) { - bool is_auto = id == HWDEC_AUTO; + bool is_auto = HWDEC_IS_AUTO(api); for (int n = 0; mpgl_hwdec_drivers[n]; n++) { const struct gl_hwdec_driver *drv = mpgl_hwdec_drivers[n]; - if (is_auto || id == drv->api) { - struct gl_hwdec *r = load_hwdec_driver(log, gl, g, drv, is_auto); + if (is_auto || api == drv->api) { + struct gl_hwdec *r = load_hwdec_driver(log, gl, g, devs, drv, is_auto); if (r) return r; } @@ -95,19 +102,6 @@ struct gl_hwdec *gl_hwdec_load_api_id(struct mp_log *log, GL *gl, return NULL; } -// Like gl_hwdec_load_api_id(), but use option names. -struct gl_hwdec *gl_hwdec_load_api(struct mp_log *log, GL *gl, - struct mpv_global *g, const char *api_name) -{ - int id = HWDEC_NONE; - for (const struct m_opt_choice_alternatives *c = mp_hwdec_names; c->name; c++) - { - if (strcmp(c->name, api_name) == 0) - id = c->value; - } - return gl_hwdec_load_api_id(log, gl, g, id); -} - void gl_hwdec_uninit(struct gl_hwdec *hwdec) { if (hwdec) diff --git a/video/out/opengl/hwdec.h b/video/out/opengl/hwdec.h index 5126d7f..29ccd18 100644 --- a/video/out/opengl/hwdec.h +++ b/video/out/opengl/hwdec.h @@ -4,54 +4,61 @@ #include "common.h" #include "video/hwdec.h" -struct mp_hwdec_info; - struct gl_hwdec { const struct gl_hwdec_driver *driver; struct mp_log *log; struct mpv_global *global; GL *gl; - struct mp_hwdec_ctx *hwctx; + struct mp_hwdec_devices *devs; + // GLSL extensions required to sample textures from this. + const char **glsl_extensions; // For free use by hwdec driver void *priv; // For working around the vdpau vs. vaapi mess. bool probing; - // hwdec backends must set this to an IMGFMT_ that has an equivalent - // internal representation in gl_video.c as the hardware texture. - // It's used to build the rendering chain. For example, setting it to - // IMGFMT_RGB0 indicates that the video texture is RGB. - int converted_imgfmt; - // Normally this is GL_TEXTURE_2D, but the hwdec driver can set it to - // GL_TEXTURE_RECTANGLE. This is needed because VideoToolbox is shit. - GLenum gl_texture_target; +}; + +struct gl_hwdec_plane { + GLuint gl_texture; + GLenum gl_target; + int tex_w, tex_h; // allocated texture size + char swizzle[5]; // component order (if length is 0, use defaults) +}; + +struct gl_hwdec_frame { + struct gl_hwdec_plane planes[4]; + bool vdpau_fields; }; struct gl_hwdec_driver { - // Name of the interop backend. This is used for logging only. + // Name of the interop backend. This is used for informational purposes only. const char *name; // Used to explicitly request a specific API. enum hwdec_type api; // The hardware surface IMGFMT_ that must be passed to map_image later. int imgfmt; - // Create the hwdec device. It must fill in hw->info, if applicable. - // This also must set hw->converted_imgfmt. + // Create the hwdec device. It must add it to hw->devs, if applicable. int (*create)(struct gl_hwdec *hw); // Prepare for rendering video. (E.g. create textures.) // Called on initialization, and every time the video size changes. // *params must be set to the format the hw textures return. - // This also can update hw->converted_imgfmt. int (*reinit)(struct gl_hwdec *hw, struct mp_image_params *params); // Return textures that contain a copy or reference of the given hw_image. - int (*map_image)(struct gl_hwdec *hw, struct mp_image *hw_image, - GLuint *out_textures); + // The textures mirror the format returned by the reinit params argument. + // The textures must remain valid until unmap is called. + // hw_image remains referenced by the caller until unmap is called. + int (*map_frame)(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame); + // Must be idempotent. + void (*unmap)(struct gl_hwdec *hw); void (*destroy)(struct gl_hwdec *hw); }; struct gl_hwdec *gl_hwdec_load_api(struct mp_log *log, GL *gl, - struct mpv_global *g, const char *api_name); -struct gl_hwdec *gl_hwdec_load_api_id(struct mp_log *log, GL *gl, - struct mpv_global *g, int id); + struct mpv_global *g, + struct mp_hwdec_devices *devs, + enum hwdec_type api); void gl_hwdec_uninit(struct gl_hwdec *hwdec); diff --git a/video/out/opengl/hwdec_d3d11egl.c b/video/out/opengl/hwdec_d3d11egl.c new file mode 100644 index 0000000..549d3f5 --- /dev/null +++ b/video/out/opengl/hwdec_d3d11egl.c @@ -0,0 +1,335 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <initguid.h> +#include <assert.h> +#include <windows.h> +#include <d3d11.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "angle_common.h" +#include "angle_dynamic.h" + +#include "common/common.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" +#include "hwdec.h" +#include "video/hwdec.h" + +#ifndef EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE +#define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x3AAB +#endif + +struct priv { + struct mp_hwdec_ctx hwctx; + + ID3D11Device *d3d11_device; + EGLDisplay egl_display; + + EGLStreamKHR egl_stream; + GLuint gl_textures[3]; + + // EGL_KHR_stream + EGLStreamKHR (EGLAPIENTRY *CreateStreamKHR)(EGLDisplay dpy, + const EGLint *attrib_list); + EGLBoolean (EGLAPIENTRY *DestroyStreamKHR)(EGLDisplay dpy, + EGLStreamKHR stream); + + // EGL_KHR_stream_consumer_gltexture + EGLBoolean (EGLAPIENTRY *StreamConsumerAcquireKHR) + (EGLDisplay dpy, EGLStreamKHR stream); + EGLBoolean (EGLAPIENTRY *StreamConsumerReleaseKHR) + (EGLDisplay dpy, EGLStreamKHR stream); + + // EGL_NV_stream_consumer_gltexture_yuv + EGLBoolean (EGLAPIENTRY *StreamConsumerGLTextureExternalAttribsNV) + (EGLDisplay dpy, EGLStreamKHR stream, EGLAttrib *attrib_list); + + // EGL_ANGLE_stream_producer_d3d_texture_nv12 + EGLBoolean (EGLAPIENTRY *CreateStreamProducerD3DTextureNV12ANGLE) + (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list); + EGLBoolean (EGLAPIENTRY *StreamPostD3DTextureNV12ANGLE) + (EGLDisplay dpy, EGLStreamKHR stream, void *texture, + const EGLAttrib *attrib_list); +}; + +static void destroy_objects(struct gl_hwdec *hw) +{ + struct priv *p = hw->priv; + GL *gl = hw->gl; + + if (p->egl_stream) + p->DestroyStreamKHR(p->egl_display, p->egl_stream); + p->egl_stream = 0; + + for (int n = 0; n < 3; n++) { + gl->DeleteTextures(1, &p->gl_textures[n]); + p->gl_textures[n] = 0; + } +} + +static void destroy(struct gl_hwdec *hw) +{ + struct priv *p = hw->priv; + + destroy_objects(hw); + + hwdec_devices_remove(hw->devs, &p->hwctx); + + if (p->d3d11_device) + ID3D11Device_Release(p->d3d11_device); + p->d3d11_device = NULL; +} + +static int create(struct gl_hwdec *hw) +{ + if (!angle_load()) + return -1; + + EGLDisplay egl_display = eglGetCurrentDisplay(); + if (!egl_display) + return -1; + + if (!eglGetCurrentContext()) + return -1; + + const char *exts = eglQueryString(egl_display, EGL_EXTENSIONS); + if (!exts || !strstr(exts, "EGL_ANGLE_d3d_share_handle_client_buffer") || + !strstr(exts, "EGL_ANGLE_stream_producer_d3d_texture_nv12") || + !(strstr(hw->gl->extensions, "GL_OES_EGL_image_external_essl3") || + hw->gl->es == 200) || + !strstr(exts, "EGL_EXT_device_query") || + !(hw->gl->mpgl_caps & MPGL_CAP_TEX_RG)) + return -1; + + HRESULT hr; + struct priv *p = talloc_zero(hw, struct priv); + hw->priv = p; + + p->egl_display = egl_display; + + p->CreateStreamKHR = (void *)eglGetProcAddress("eglCreateStreamKHR"); + p->DestroyStreamKHR = (void *)eglGetProcAddress("eglDestroyStreamKHR"); + p->StreamConsumerAcquireKHR = + (void *)eglGetProcAddress("eglStreamConsumerAcquireKHR"); + p->StreamConsumerReleaseKHR = + (void *)eglGetProcAddress("eglStreamConsumerReleaseKHR"); + p->StreamConsumerGLTextureExternalAttribsNV = + (void *)eglGetProcAddress("eglStreamConsumerGLTextureExternalAttribsNV"); + p->CreateStreamProducerD3DTextureNV12ANGLE = + (void *)eglGetProcAddress("eglCreateStreamProducerD3DTextureNV12ANGLE"); + p->StreamPostD3DTextureNV12ANGLE = + (void *)eglGetProcAddress("eglStreamPostD3DTextureNV12ANGLE"); + + if (!p->CreateStreamKHR || !p->DestroyStreamKHR || + !p->StreamConsumerAcquireKHR || !p->StreamConsumerReleaseKHR || + !p->StreamConsumerGLTextureExternalAttribsNV || + !p->CreateStreamProducerD3DTextureNV12ANGLE || + !p->StreamPostD3DTextureNV12ANGLE) + { + MP_ERR(hw, "Failed to load some EGLStream functions.\n"); + goto fail; + } + + static const char *es2_exts[] = {"GL_NV_EGL_stream_consumer_external", 0}; + static const char *es3_exts[] = {"GL_NV_EGL_stream_consumer_external", + "GL_OES_EGL_image_external_essl3", 0}; + hw->glsl_extensions = hw->gl->es == 200 ? es2_exts : es3_exts; + + PFNEGLQUERYDISPLAYATTRIBEXTPROC p_eglQueryDisplayAttribEXT = + (void *)eglGetProcAddress("eglQueryDisplayAttribEXT"); + PFNEGLQUERYDEVICEATTRIBEXTPROC p_eglQueryDeviceAttribEXT = + (void *)eglGetProcAddress("eglQueryDeviceAttribEXT"); + if (!p_eglQueryDisplayAttribEXT || !p_eglQueryDeviceAttribEXT) + goto fail; + + EGLAttrib device = 0; + if (!p_eglQueryDisplayAttribEXT(egl_display, EGL_DEVICE_EXT, &device)) + goto fail; + EGLAttrib d3d_device = 0; + if (!p_eglQueryDeviceAttribEXT((EGLDeviceEXT)device, + EGL_D3D11_DEVICE_ANGLE, &d3d_device)) + { + MP_ERR(hw, "Could not get EGL_D3D11_DEVICE_ANGLE from ANGLE.\n"); + goto fail; + } + + p->d3d11_device = (ID3D11Device *)d3d_device; + if (!p->d3d11_device) + goto fail; + ID3D11Device_AddRef(p->d3d11_device); + + if (!d3d11_check_decoding(p->d3d11_device)) { + MP_VERBOSE(hw, "D3D11 video decoding not supported on this system.\n"); + goto fail; + } + + ID3D10Multithread *multithread; + hr = ID3D11Device_QueryInterface(p->d3d11_device, &IID_ID3D10Multithread, + (void **)&multithread); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get Multithread interface: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); + ID3D10Multithread_Release(multithread); + + p->hwctx = (struct mp_hwdec_ctx){ + .type = HWDEC_D3D11VA, + .driver_name = hw->driver->name, + .ctx = p->d3d11_device, + }; + hwdec_devices_add(hw->devs, &p->hwctx); + + return 0; +fail: + destroy(hw); + return -1; +} + +static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) +{ + struct priv *p = hw->priv; + GL *gl = hw->gl; + + destroy_objects(hw); + + if (params->hw_subfmt != IMGFMT_NV12) { + MP_FATAL(hw, "Format not supported.\n"); + return -1; + } + + // Hope that the given texture unit range is not "in use" by anything. + // The texture units need to be bound during init only, and are free for + // use again after the initialization here is done. + int texunits = 0; // [texunits, texunits + num_planes) + int num_planes = 2; + int gl_target = GL_TEXTURE_EXTERNAL_OES; + + p->egl_stream = p->CreateStreamKHR(p->egl_display, (EGLint[]){EGL_NONE}); + if (!p->egl_stream) + goto fail; + + for (int n = 0; n < num_planes; n++) { + gl->ActiveTexture(GL_TEXTURE0 + texunits + n); + gl->GenTextures(1, &p->gl_textures[n]); + gl->BindTexture(gl_target, p->gl_textures[n]); + gl->TexParameteri(gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(gl_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } + + EGLAttrib attrs[] = { + EGL_COLOR_BUFFER_TYPE, EGL_YUV_BUFFER_EXT, + EGL_YUV_NUMBER_OF_PLANES_EXT, num_planes, + EGL_YUV_PLANE0_TEXTURE_UNIT_NV, texunits + 0, + EGL_YUV_PLANE1_TEXTURE_UNIT_NV, texunits + 1, + EGL_NONE, + }; + + if (!p->StreamConsumerGLTextureExternalAttribsNV(p->egl_display, p->egl_stream, + attrs)) + goto fail; + + if (!p->CreateStreamProducerD3DTextureNV12ANGLE(p->egl_display, p->egl_stream, + (EGLAttrib[]){EGL_NONE})) + goto fail; + + params->imgfmt = params->hw_subfmt; + + for (int n = 0; n < num_planes; n++) { + gl->ActiveTexture(GL_TEXTURE0 + texunits + n); + gl->BindTexture(gl_target, 0); + } + gl->ActiveTexture(GL_TEXTURE0); + return 0; +fail: + MP_ERR(hw, "Failed to create EGLStream\n"); + if (p->egl_stream) + p->DestroyStreamKHR(p->egl_display, p->egl_stream); + p->egl_stream = 0; + gl->ActiveTexture(GL_TEXTURE0); + return -1; +} + +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) +{ + struct priv *p = hw->priv; + + if (!p->gl_textures[0]) + return -1; + + ID3D11Texture2D *d3d_tex = (void *)hw_image->planes[1]; + int d3d_subindex = (intptr_t)hw_image->planes[2]; + if (!d3d_tex) + return -1; + + EGLAttrib attrs[] = { + EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE, d3d_subindex, + EGL_NONE, + }; + if (!p->StreamPostD3DTextureNV12ANGLE(p->egl_display, p->egl_stream, + (void *)d3d_tex, attrs)) + return -1; + + if (!p->StreamConsumerAcquireKHR(p->egl_display, p->egl_stream)) + return -1; + + D3D11_TEXTURE2D_DESC texdesc; + ID3D11Texture2D_GetDesc(d3d_tex, &texdesc); + + *out_frame = (struct gl_hwdec_frame){ + .planes = { + { + .gl_texture = p->gl_textures[0], + .gl_target = GL_TEXTURE_EXTERNAL_OES, + .tex_w = texdesc.Width, + .tex_h = texdesc.Height, + }, + { + .gl_texture = p->gl_textures[1], + .gl_target = GL_TEXTURE_EXTERNAL_OES, + .tex_w = texdesc.Width / 2, + .tex_h = texdesc.Height / 2, + }, + }, + }; + return 0; +} + +static void unmap(struct gl_hwdec *hw) +{ + struct priv *p = hw->priv; + if (p->egl_stream) + p->StreamConsumerReleaseKHR(p->egl_display, p->egl_stream); +} + +const struct gl_hwdec_driver gl_hwdec_d3d11egl = { + .name = "d3d11-egl", + .api = HWDEC_D3D11VA, + .imgfmt = IMGFMT_D3D11NV12, + .create = create, + .reinit = reinit, + .map_frame = map_frame, + .unmap = unmap, + .destroy = destroy, +}; diff --git a/video/out/opengl/hwdec_d3d11eglrgb.c b/video/out/opengl/hwdec_d3d11eglrgb.c new file mode 100644 index 0000000..2e61189 --- /dev/null +++ b/video/out/opengl/hwdec_d3d11eglrgb.c @@ -0,0 +1,268 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <initguid.h> +#include <assert.h> +#include <windows.h> +#include <d3d11.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "angle_common.h" +#include "angle_dynamic.h" + +#include "common/common.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" +#include "hwdec.h" +#include "video/hwdec.h" + +#ifndef EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE +#define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x3AAB +#endif + +struct priv { + struct mp_hwdec_ctx hwctx; + + ID3D11Device *d3d11_device; + + EGLDisplay egl_display; + EGLConfig egl_config; + EGLSurface egl_surface; + + GLuint gl_texture; +}; + +static void unmap(struct gl_hwdec *hw) +{ + struct priv *p = hw->priv; + if (p->egl_surface) { + eglReleaseTexImage(p->egl_display, p->egl_surface, EGL_BACK_BUFFER); + eglDestroySurface(p->egl_display, p->egl_surface); + } + p->egl_surface = NULL; +} + +static void destroy_objects(struct gl_hwdec *hw) +{ + struct priv *p = hw->priv; + GL *gl = hw->gl; + + unmap(hw); + + gl->DeleteTextures(1, &p->gl_texture); + p->gl_texture = 0; +} + +static void destroy(struct gl_hwdec *hw) +{ + struct priv *p = hw->priv; + + destroy_objects(hw); + + hwdec_devices_remove(hw->devs, &p->hwctx); + + if (p->d3d11_device) + ID3D11Device_Release(p->d3d11_device); + p->d3d11_device = NULL; +} + +static int create(struct gl_hwdec *hw) +{ + if (!angle_load()) + return -1; + + EGLDisplay egl_display = eglGetCurrentDisplay(); + if (!egl_display) + return -1; + + if (!eglGetCurrentContext()) + return -1; + + const char *exts = eglQueryString(egl_display, EGL_EXTENSIONS); + if (!exts || !strstr(exts, "EGL_ANGLE_d3d_share_handle_client_buffer")) + return -1; + + HRESULT hr; + struct priv *p = talloc_zero(hw, struct priv); + hw->priv = p; + + p->egl_display = egl_display; + + HANDLE d3d11_dll = GetModuleHandleW(L"d3d11.dll"); + if (!d3d11_dll) { + if (!hw->probing) + MP_ERR(hw, "Failed to load D3D11 library\n"); + goto fail; + } + + PFN_D3D11_CREATE_DEVICE CreateDevice = + (void *)GetProcAddress(d3d11_dll, "D3D11CreateDevice"); + if (!CreateDevice) + goto fail; + + hr = CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, + D3D11_CREATE_DEVICE_VIDEO_SUPPORT, NULL, 0, + D3D11_SDK_VERSION, &p->d3d11_device, NULL, NULL); + if (FAILED(hr)) { + int lev = hw->probing ? MSGL_V : MSGL_ERR; + mp_msg(hw->log, lev, "Failed to create D3D11 Device: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + ID3D10Multithread *multithread; + hr = ID3D11Device_QueryInterface(p->d3d11_device, &IID_ID3D10Multithread, + (void **)&multithread); + if (FAILED(hr)) { + ID3D10Multithread_Release(multithread); + MP_ERR(hw, "Failed to get Multithread interface: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); + ID3D10Multithread_Release(multithread); + + if (!d3d11_check_decoding(p->d3d11_device)) { + MP_VERBOSE(hw, "D3D11 video decoding not supported on this system.\n"); + goto fail; + } + + EGLint attrs[] = { + EGL_BUFFER_SIZE, 32, + EGL_RED_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_BLUE_SIZE, 8, + EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, + EGL_ALPHA_SIZE, 8, + EGL_BIND_TO_TEXTURE_RGBA, EGL_TRUE, + EGL_NONE + }; + EGLint count; + if (!eglChooseConfig(p->egl_display, attrs, &p->egl_config, 1, &count) || + !count) { + MP_ERR(hw, "Failed to get EGL surface configuration\n"); + goto fail; + } + + p->hwctx = (struct mp_hwdec_ctx){ + .type = HWDEC_D3D11VA, + .driver_name = hw->driver->name, + .ctx = p->d3d11_device, + }; + hwdec_devices_add(hw->devs, &p->hwctx); + + return 0; +fail: + destroy(hw); + return -1; +} + +static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) +{ + struct priv *p = hw->priv; + GL *gl = hw->gl; + + destroy_objects(hw); + + gl->GenTextures(1, &p->gl_texture); + gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->BindTexture(GL_TEXTURE_2D, 0); + + params->imgfmt = IMGFMT_RGB0; + return 0; +} + +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) +{ + struct priv *p = hw->priv; + GL *gl = hw->gl; + HRESULT hr; + + if (!p->gl_texture) + return -1; + + ID3D11Texture2D *d3d_tex = (void *)hw_image->planes[1]; + if (!d3d_tex) + return -1; + + IDXGIResource *res; + hr = IUnknown_QueryInterface(d3d_tex, &IID_IDXGIResource, (void **)&res); + if (FAILED(hr)) + return -1; + + HANDLE share_handle = NULL; + hr = IDXGIResource_GetSharedHandle(res, &share_handle); + if (FAILED(hr)) + share_handle = NULL; + + IDXGIResource_Release(res); + + if (!share_handle) + return -1; + + D3D11_TEXTURE2D_DESC texdesc; + ID3D11Texture2D_GetDesc(d3d_tex, &texdesc); + + EGLint attrib_list[] = { + EGL_WIDTH, texdesc.Width, + EGL_HEIGHT, texdesc.Height, + EGL_TEXTURE_FORMAT, EGL_TEXTURE_RGBA, + EGL_TEXTURE_TARGET, EGL_TEXTURE_2D, + EGL_NONE + }; + p->egl_surface = eglCreatePbufferFromClientBuffer( + p->egl_display, EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE, + share_handle, p->egl_config, attrib_list); + if (p->egl_surface == EGL_NO_SURFACE) { + MP_ERR(hw, "Failed to create EGL surface\n"); + return -1; + } + + gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); + eglBindTexImage(p->egl_display, p->egl_surface, EGL_BACK_BUFFER); + gl->BindTexture(GL_TEXTURE_2D, 0); + + *out_frame = (struct gl_hwdec_frame){ + .planes = { + { + .gl_texture = p->gl_texture, + .gl_target = GL_TEXTURE_2D, + .tex_w = texdesc.Width, + .tex_h = texdesc.Height, + }, + }, + }; + return 0; +} + +const struct gl_hwdec_driver gl_hwdec_d3d11eglrgb = { + .name = "d3d11-egl-rgb", + .api = HWDEC_D3D11VA, + .imgfmt = IMGFMT_D3D11RGB, + .create = create, + .reinit = reinit, + .map_frame = map_frame, + .unmap = unmap, + .destroy = destroy, +}; diff --git a/video/out/opengl/hwdec_dxva2.c b/video/out/opengl/hwdec_dxva2.c index f72c817..d832bb4 100644 --- a/video/out/opengl/hwdec_dxva2.c +++ b/video/out/opengl/hwdec_dxva2.c @@ -1,8 +1,9 @@ +#include <d3d9.h> + #include "common/common.h" #include "hwdec.h" #include "utils.h" -#include "video/d3d.h" #include "video/hwdec.h" // This does not provide real (zero-copy) interop - it merely exists for @@ -10,36 +11,38 @@ // may help with OpenGL fullscreen mode. struct priv { - struct mp_d3d_ctx ctx; + struct mp_hwdec_ctx hwctx; }; static void destroy(struct gl_hwdec *hw) { struct priv *p = hw->priv; - if (p->ctx.d3d9_device) - IDirect3DDevice9_Release(p->ctx.d3d9_device); + hwdec_devices_remove(hw->devs, &p->hwctx); + if (p->hwctx.ctx) + IDirect3DDevice9_Release((IDirect3DDevice9 *)p->hwctx.ctx); } static int create(struct gl_hwdec *hw) { GL *gl = hw->gl; - if (hw->hwctx || !gl->MPGetNativeDisplay) + if (!gl->MPGetNativeDisplay) return -1; struct priv *p = talloc_zero(hw, struct priv); hw->priv = p; - p->ctx.d3d9_device = gl->MPGetNativeDisplay("IDirect3DDevice9"); - if (!p->ctx.d3d9_device) + IDirect3DDevice9 *d3d = gl->MPGetNativeDisplay("IDirect3DDevice9"); + if (!d3d) return -1; - p->ctx.hwctx.type = HWDEC_DXVA2_COPY; - p->ctx.hwctx.d3d_ctx = &p->ctx; - - MP_VERBOSE(hw, "Using libmpv supplied device %p.\n", p->ctx.d3d9_device); + MP_VERBOSE(hw, "Using libmpv supplied device %p.\n", d3d); - hw->hwctx = &p->ctx.hwctx; - hw->converted_imgfmt = 0; + p->hwctx = (struct mp_hwdec_ctx){ + .type = HWDEC_DXVA2_COPY, + .driver_name = hw->driver->name, + .ctx = d3d, + }; + hwdec_devices_add(hw->devs, &p->hwctx); return 0; } @@ -48,8 +51,8 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) return -1; } -static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, - GLuint *out_textures) +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) { return -1; } @@ -60,6 +63,6 @@ const struct gl_hwdec_driver gl_hwdec_dxva2 = { .imgfmt = -1, .create = create, .reinit = reinit, - .map_image = map_image, + .map_frame = map_frame, .destroy = destroy, }; diff --git a/video/out/opengl/hwdec_dxva2egl.c b/video/out/opengl/hwdec_dxva2egl.c index eed9426..d67a85b 100644 --- a/video/out/opengl/hwdec_dxva2egl.c +++ b/video/out/opengl/hwdec_dxva2egl.c @@ -17,19 +17,21 @@ #include <assert.h> #include <windows.h> +#include <d3d9.h> + #include <EGL/egl.h> #include <EGL/eglext.h> +#include "angle_dynamic.h" + #include "common/common.h" #include "osdep/timer.h" #include "osdep/windows_utils.h" #include "hwdec.h" -#include "video/dxva2.h" -#include "video/d3d.h" #include "video/hwdec.h" struct priv { - struct mp_d3d_ctx ctx; + struct mp_hwdec_ctx hwctx; HMODULE d3d9_dll; IDirect3D9Ex *d3d9ex; @@ -77,6 +79,8 @@ static void destroy(struct gl_hwdec *hw) destroy_textures(hw); + hwdec_devices_remove(hw->devs, &p->hwctx); + if (p->query9) IDirect3DQuery9_Release(p->query9); @@ -92,13 +96,16 @@ static void destroy(struct gl_hwdec *hw) static int create(struct gl_hwdec *hw) { - if (hw->hwctx) + if (!angle_load()) return -1; EGLDisplay egl_display = eglGetCurrentDisplay(); if (!egl_display) return -1; + if (!eglGetCurrentContext()) + return -1; + const char *exts = eglQueryString(egl_display, EGL_EXTENSIONS); if (!exts || !strstr(exts, "EGL_ANGLE_d3d_share_handle_client_buffer")) { @@ -202,13 +209,13 @@ static int create(struct gl_hwdec *hw) goto fail; } - hw->converted_imgfmt = IMGFMT_RGB0; - - p->ctx.d3d9_device = (IDirect3DDevice9 *)p->device9ex; - p->ctx.hwctx.type = HWDEC_DXVA2; - p->ctx.hwctx.d3d_ctx = &p->ctx; + p->hwctx = (struct mp_hwdec_ctx){ + .type = HWDEC_DXVA2, + .driver_name = hw->driver->name, + .ctx = (IDirect3DDevice9 *)p->device9ex, + }; + hwdec_devices_add(hw->devs, &p->hwctx); - hw->hwctx = &p->ctx.hwctx; return 0; fail: destroy(hw); @@ -223,8 +230,6 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) destroy_textures(hw); - assert(params->imgfmt == hw->driver->imgfmt); - HANDLE share_handle = NULL; hr = IDirect3DDevice9Ex_CreateTexture(p->device9ex, params->w, params->h, @@ -269,14 +274,15 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); gl->BindTexture(GL_TEXTURE_2D, 0); + params->imgfmt = IMGFMT_RGB0; return 0; fail: destroy_textures(hw); return -1; } -static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, - GLuint *out_textures) +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) { struct priv *p = hw->priv; GL *gl = hw->gl; @@ -285,7 +291,7 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, HRESULT hr; RECT rc = {0, 0, hw_image->w, hw_image->h}; - IDirect3DSurface9* hw_surface = d3d9_surface_in_mp_image(hw_image); + IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)hw_image->planes[3]; hr = IDirect3DDevice9Ex_StretchRect(p->device9ex, hw_surface, &rc, p->surface9, &rc, @@ -329,7 +335,16 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, eglBindTexImage(p->egl_display, p->egl_surface, EGL_BACK_BUFFER); gl->BindTexture(GL_TEXTURE_2D, 0); - out_textures[0] = p->gl_texture; + *out_frame = (struct gl_hwdec_frame){ + .planes = { + { + .gl_texture = p->gl_texture, + .gl_target = GL_TEXTURE_2D, + .tex_w = hw_image->w, + .tex_h = hw_image->h, + }, + }, + }; return 0; } @@ -339,6 +354,6 @@ const struct gl_hwdec_driver gl_hwdec_dxva2egl = { .imgfmt = IMGFMT_DXVA2, .create = create, .reinit = reinit, - .map_image = map_image, + .map_frame = map_frame, .destroy = destroy, }; diff --git a/video/out/opengl/hwdec_dxva2gldx.c b/video/out/opengl/hwdec_dxva2gldx.c index 69be0cc..4cd8c1c 100644 --- a/video/out/opengl/hwdec_dxva2gldx.c +++ b/video/out/opengl/hwdec_dxva2gldx.c @@ -15,14 +15,13 @@ * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ +#include <d3d9.h> #include <assert.h> #include "common/common.h" #include "osdep/windows_utils.h" #include "hwdec.h" #include "video/hwdec.h" -#include "video/d3d.h" -#include "video/dxva2.h" // for WGL_ACCESS_READ_ONLY_NV #include <GL/wglext.h> @@ -30,7 +29,7 @@ #define SHARED_SURFACE_D3DFMT D3DFMT_X8R8G8B8 #define SHARED_SURFACE_MPFMT IMGFMT_RGB0 struct priv { - struct mp_d3d_ctx ctx; + struct mp_hwdec_ctx hwctx; IDirect3DDevice9Ex *device; HANDLE device_h; @@ -74,6 +73,8 @@ static void destroy(struct gl_hwdec *hw) struct priv *p = hw->priv; destroy_objects(hw); + hwdec_devices_remove(hw->devs, &p->hwctx); + if (p->device) IDirect3DDevice9Ex_Release(p->device); } @@ -81,10 +82,8 @@ static void destroy(struct gl_hwdec *hw) static int create(struct gl_hwdec *hw) { GL *gl = hw->gl; - if (hw->hwctx || !gl->MPGetNativeDisplay || - !(gl->mpgl_caps & MPGL_CAP_DXINTEROP)) { + if (!gl->MPGetNativeDisplay || !(gl->mpgl_caps & MPGL_CAP_DXINTEROP)) return -1; - } struct priv *p = talloc_zero(hw, struct priv); hw->priv = p; @@ -100,13 +99,13 @@ static int create(struct gl_hwdec *hw) if (!p->device) return -1; IDirect3DDevice9Ex_AddRef(p->device); - p->ctx.d3d9_device = (IDirect3DDevice9 *)p->device; - - p->ctx.hwctx.type = HWDEC_DXVA2; - p->ctx.hwctx.d3d_ctx = &p->ctx; - hw->hwctx = &p->ctx.hwctx; - hw->converted_imgfmt = SHARED_SURFACE_MPFMT; + p->hwctx = (struct mp_hwdec_ctx){ + .type = HWDEC_DXVA2, + .driver_name = hw->driver->name, + .ctx = (IDirect3DDevice9 *)p->device, + }; + hwdec_devices_add(hw->devs, &p->hwctx); return 0; } @@ -118,8 +117,6 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) destroy_objects(hw); - assert(params->imgfmt == hw->driver->imgfmt); - HANDLE share_handle = NULL; hr = IDirect3DDevice9Ex_CreateRenderTarget( p->device, @@ -162,14 +159,16 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) goto fail; } + params->imgfmt = SHARED_SURFACE_MPFMT; + return 0; fail: destroy_objects(hw); return -1; } -static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, - GLuint *out_textures) +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) { assert(hw_image && hw_image->imgfmt == hw->driver->imgfmt); GL *gl = hw->gl; @@ -182,7 +181,7 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, return -1; } - IDirect3DSurface9* hw_surface = d3d9_surface_in_mp_image(hw_image); + IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)hw_image->planes[3]; RECT rc = {0, 0, hw_image->w, hw_image->h}; hr = IDirect3DDevice9Ex_StretchRect(p->device, hw_surface, &rc, @@ -199,7 +198,16 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, return -1; } - out_textures[0] = p->texture; + *out_frame = (struct gl_hwdec_frame){ + .planes = { + { + .gl_texture = p->texture, + .gl_target = GL_TEXTURE_2D, + .tex_w = hw_image->w, + .tex_h = hw_image->h, + }, + }, + }; return 0; } @@ -209,6 +217,6 @@ const struct gl_hwdec_driver gl_hwdec_dxva2gldx = { .imgfmt = IMGFMT_DXVA2, .create = create, .reinit = reinit, - .map_image = map_image, + .map_frame = map_frame, .destroy = destroy, }; diff --git a/video/out/opengl/hwdec_osx.c b/video/out/opengl/hwdec_osx.c index addc16f..6ddfa66 100644 --- a/video/out/opengl/hwdec_osx.c +++ b/video/out/opengl/hwdec_osx.c @@ -33,6 +33,7 @@ struct vt_gl_plane_format { GLenum gl_format; GLenum gl_type; GLenum gl_internal_format; + char swizzle[5]; }; struct vt_format { @@ -43,9 +44,11 @@ struct vt_format { }; struct priv { + struct mp_hwdec_ctx hwctx; + struct mp_vt_ctx vtctx; + CVPixelBufferRef pbuf; GLuint gl_planes[MP_MAX_PLANES]; - struct mp_hwdec_ctx hwctx; }; static struct vt_format vt_formats[] = { @@ -63,7 +66,7 @@ static struct vt_format vt_formats[] = { .imgfmt = IMGFMT_UYVY, .planes = 1, .gl = { - { GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_APPLE, GL_RGB } + { GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_APPLE, GL_RGB, "gbra" } } }, { @@ -147,9 +150,9 @@ static bool check_hwdec(struct gl_hwdec *hw) return true; } -static uint32_t get_vt_fmt(struct mp_hwdec_ctx *ctx) +static uint32_t get_vt_fmt(struct mp_vt_ctx *vtctx) { - struct gl_hwdec *hw = ctx->priv; + struct gl_hwdec *hw = vtctx->priv; struct vt_format *f = vt_get_gl_format_from_imgfmt(hw->global->opts->videotoolbox_format); return f ? f->cvpixfmt : (uint32_t)-1; @@ -161,21 +164,21 @@ static int create(struct gl_hwdec *hw) return -1; struct priv *p = talloc_zero(hw, struct priv); - struct vt_format *f = vt_get_gl_format_from_imgfmt(IMGFMT_NV12); - if (!f) - return -1; - hw->priv = p; - hw->converted_imgfmt = f->imgfmt; - hw->hwctx = &p->hwctx; - hw->hwctx->download_image = download_image; - hw->hwctx->type = HWDEC_VIDEOTOOLBOX; - hw->hwctx->get_vt_fmt = get_vt_fmt; - hw->gl_texture_target = GL_TEXTURE_RECTANGLE; hw->gl->GenTextures(MP_MAX_PLANES, p->gl_planes); - hw->hwctx->priv = hw; + p->vtctx = (struct mp_vt_ctx){ + .priv = hw, + .get_vt_fmt = get_vt_fmt, + }; + p->hwctx = (struct mp_hwdec_ctx){ + .type = HWDEC_VIDEOTOOLBOX, + .download_image = download_image, + .ctx = &p->vtctx, + }; + hwdec_devices_add(hw->devs, &p->hwctx); + return 0; } @@ -189,16 +192,13 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) return -1; } - hw->converted_imgfmt = f->imgfmt; + params->imgfmt = f->imgfmt; return 0; } -static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, - GLuint *out_textures) +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) { - if (!check_hwdec(hw)) - return -1; - struct priv *p = hw->priv; GL *gl = hw->gl; @@ -222,11 +222,13 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, const int planes = CVPixelBufferGetPlaneCount(p->pbuf); assert(planar && planes == f->planes || f->planes == 1); + GLenum gl_target = GL_TEXTURE_RECTANGLE; + for (int i = 0; i < f->planes; i++) { - gl->BindTexture(hw->gl_texture_target, p->gl_planes[i]); + gl->BindTexture(gl_target, p->gl_planes[i]); CGLError err = CGLTexImageIOSurface2D( - CGLGetCurrentContext(), hw->gl_texture_target, + CGLGetCurrentContext(), gl_target, f->gl[i].gl_internal_format, IOSurfaceGetWidthOfPlane(surface, i), IOSurfaceGetHeightOfPlane(surface, i), @@ -236,9 +238,16 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, MP_ERR(hw, "error creating IOSurface texture for plane %d: %s (%x)\n", i, CGLErrorString(err), gl->GetError()); - gl->BindTexture(hw->gl_texture_target, 0); + gl->BindTexture(gl_target, 0); - out_textures[i] = p->gl_planes[i]; + out_frame->planes[i] = (struct gl_hwdec_plane){ + .gl_texture = p->gl_planes[i], + .gl_target = gl_target, + .tex_w = IOSurfaceGetWidthOfPlane(surface, i), + .tex_h = IOSurfaceGetHeightOfPlane(surface, i), + }; + snprintf(out_frame->planes[i].swizzle, sizeof(out_frame->planes[i].swizzle), + "%s", f->gl[i].swizzle); } return 0; @@ -251,6 +260,8 @@ static void destroy(struct gl_hwdec *hw) CVPixelBufferRelease(p->pbuf); gl->DeleteTextures(MP_MAX_PLANES, p->gl_planes); + + hwdec_devices_remove(hw->devs, &p->hwctx); } const struct gl_hwdec_driver gl_hwdec_videotoolbox = { @@ -259,6 +270,6 @@ const struct gl_hwdec_driver gl_hwdec_videotoolbox = { .imgfmt = IMGFMT_VIDEOTOOLBOX, .create = create, .reinit = reinit, - .map_image = map_image, + .map_frame = map_frame, .destroy = destroy, }; diff --git a/video/out/opengl/hwdec_vaegl.c b/video/out/opengl/hwdec_vaegl.c index 7b34d6b..6c52cdd 100644 --- a/video/out/opengl/hwdec_vaegl.c +++ b/video/out/opengl/hwdec_vaegl.c @@ -114,7 +114,7 @@ struct priv { EGLImageKHR images[4]; VAImage current_image; bool buffer_acquired; - struct mp_image *current_ref; + int current_mpfmt; EGLImageKHR (EGLAPIENTRY *CreateImageKHR)(EGLDisplay, EGLContext, EGLenum, EGLClientBuffer, @@ -125,7 +125,7 @@ struct priv { static bool test_format(struct gl_hwdec *hw); -static void unref_image(struct gl_hwdec *hw) +static void unmap_frame(struct gl_hwdec *hw) { struct priv *p = hw->priv; VAStatus status; @@ -149,8 +149,6 @@ static void unref_image(struct gl_hwdec *hw) p->current_image.image_id = VA_INVALID_ID; } - mp_image_unrefp(&p->current_ref); - va_unlock(p->ctx); } @@ -167,35 +165,13 @@ static void destroy_textures(struct gl_hwdec *hw) static void destroy(struct gl_hwdec *hw) { struct priv *p = hw->priv; - unref_image(hw); + unmap_frame(hw); destroy_textures(hw); + if (p->ctx) + hwdec_devices_remove(hw->devs, &p->ctx->hwctx); va_destroy(p->ctx); } -// Create an empty dummy VPP. This works around a weird bug that affects the -// VA surface format, as it is reported by vaDeriveImage(). Before a VPP -// context or a decoder context is created, the surface format will be reported -// as YV12. Surfaces created after context creation will report NV12 (even -// though surface creation does not take a context as argument!). Existing -// surfaces will change their format from YV12 to NV12 as soon as the decoder -// renders to them! Because we want know the surface format in advance (to -// simplify our renderer configuration logic), we hope that this hack gives -// us reasonable behavior. -// See: https://bugs.freedesktop.org/show_bug.cgi?id=79848 -static void insane_hack(struct gl_hwdec *hw) -{ - struct priv *p = hw->priv; - VAConfigID config; - if (vaCreateConfig(p->display, VAProfileNone, VAEntrypointVideoProc, - NULL, 0, &config) == VA_STATUS_SUCCESS) - { - // We want to keep this until the VADisplay is destroyed. It will - // implicitly free the context. - VAContextID context; - vaCreateContext(p->display, config, 0, 0, 0, NULL, 0, &context); - } -} - static int create(struct gl_hwdec *hw) { GL *gl = hw->gl; @@ -205,9 +181,7 @@ static int create(struct gl_hwdec *hw) p->current_image.buf = p->current_image.image_id = VA_INVALID_ID; p->log = hw->log; - if (hw->hwctx) - return -1; - if (!eglGetCurrentDisplay()) + if (!eglGetCurrentContext()) return -1; const char *exts = eglQueryString(eglGetCurrentDisplay(), EGL_EXTENSIONS); @@ -248,13 +222,13 @@ static int create(struct gl_hwdec *hw) MP_VERBOSE(p, "using VAAPI EGL interop\n"); - insane_hack(hw); if (!test_format(hw)) { destroy(hw); return -1; } - hw->hwctx = &p->ctx->hwctx; + p->ctx->hwctx.driver_name = hw->driver->name; + hwdec_devices_add(hw->devs, &p->ctx->hwctx); return 0; } @@ -266,8 +240,6 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) // Recreate them to get rid of all previous image data (possibly). destroy_textures(hw); - assert(params->imgfmt == hw->driver->imgfmt); - gl->GenTextures(4, p->gl_textures); for (int n = 0; n < 4; n++) { gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]); @@ -278,6 +250,20 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) } gl->BindTexture(GL_TEXTURE_2D, 0); + p->current_mpfmt = va_fourcc_to_imgfmt(params->hw_subfmt); + if (p->current_mpfmt != IMGFMT_NV12 && + p->current_mpfmt != IMGFMT_420P) + { + MP_FATAL(p, "unsupported VA image format %s\n", + mp_tag_str(params->hw_subfmt)); + return -1; + } + + MP_VERBOSE(p, "format: %s %s\n", mp_tag_str(params->hw_subfmt), + mp_imgfmt_to_name(p->current_mpfmt)); + + params->imgfmt = p->current_mpfmt; + return 0; } @@ -289,17 +275,15 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) attribs[num_attribs] = EGL_NONE; \ } while(0) -static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, - GLuint *out_textures) +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) { struct priv *p = hw->priv; GL *gl = hw->gl; VAStatus status; VAImage *va_image = &p->current_image; - unref_image(hw); - - mp_image_setrefp(&p->current_ref, hw_image); + unmap_frame(hw); va_lock(p->ctx); @@ -308,21 +292,9 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, goto err; int mpfmt = va_fourcc_to_imgfmt(va_image->format.fourcc); - if (mpfmt != IMGFMT_NV12 && mpfmt != IMGFMT_420P) { - MP_FATAL(p, "unsupported VA image format %s\n", - mp_tag_str(va_image->format.fourcc)); - goto err; - } - - if (!hw->converted_imgfmt) { - MP_VERBOSE(p, "format: %s %s\n", mp_tag_str(va_image->format.fourcc), - mp_imgfmt_to_name(mpfmt)); - hw->converted_imgfmt = mpfmt; - } - - if (hw->converted_imgfmt != mpfmt) { + if (p->current_mpfmt != mpfmt) { MP_FATAL(p, "mid-stream hwdec format change (%s -> %s) not supported\n", - mp_imgfmt_to_name(hw->converted_imgfmt), mp_imgfmt_to_name(mpfmt)); + mp_imgfmt_to_name(p->current_mpfmt), mp_imgfmt_to_name(mpfmt)); goto err; } @@ -361,12 +333,17 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]); p->EGLImageTargetTexture2DOES(GL_TEXTURE_2D, p->images[n]); - out_textures[n] = p->gl_textures[n]; + out_frame->planes[n] = (struct gl_hwdec_plane){ + .gl_texture = p->gl_textures[n], + .gl_target = GL_TEXTURE_2D, + .tex_w = mp_image_plane_w(&layout, n), + .tex_h = mp_image_plane_h(&layout, n), + }; } gl->BindTexture(GL_TEXTURE_2D, 0); if (va_image->format.fourcc == VA_FOURCC_YV12) - MPSWAP(GLuint, out_textures[1], out_textures[2]); + MPSWAP(struct gl_hwdec_plane, out_frame->planes[1], out_frame->planes[2]); va_unlock(p->ctx); return 0; @@ -374,7 +351,7 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, err: va_unlock(p->ctx); MP_FATAL(p, "mapping VAAPI EGL image failed\n"); - unref_image(hw); + unmap_frame(hw); return -1; } @@ -387,12 +364,13 @@ static bool test_format(struct gl_hwdec *hw) va_pool_set_allocator(alloc, p->ctx, VA_RT_FORMAT_YUV420); struct mp_image *surface = mp_image_pool_get(alloc, IMGFMT_VAAPI, 64, 64); if (surface) { + va_surface_init_subformat(surface); struct mp_image_params params = surface->params; if (reinit(hw, ¶ms) >= 0) { - GLuint textures[4]; - ok = map_image(hw, surface, textures) >= 0; + struct gl_hwdec_frame frame = {0}; + ok = map_frame(hw, surface, &frame) >= 0; } - unref_image(hw); + unmap_frame(hw); } talloc_free(surface); talloc_free(alloc); @@ -406,6 +384,7 @@ const struct gl_hwdec_driver gl_hwdec_vaegl = { .imgfmt = IMGFMT_VAAPI, .create = create, .reinit = reinit, - .map_image = map_image, + .map_frame = map_frame, + .unmap = unmap_frame, .destroy = destroy, }; diff --git a/video/out/opengl/hwdec_vaglx.c b/video/out/opengl/hwdec_vaglx.c index 77b1f27..2e3017c 100644 --- a/video/out/opengl/hwdec_vaglx.c +++ b/video/out/opengl/hwdec_vaglx.c @@ -64,13 +64,13 @@ static void destroy(struct gl_hwdec *hw) { struct priv *p = hw->priv; destroy_texture(hw); + if (p->ctx) + hwdec_devices_remove(hw->devs, &p->ctx->hwctx); va_destroy(p->ctx); } static int create(struct gl_hwdec *hw) { - if (hw->hwctx) - return -1; Display *x11disp = glXGetCurrentDisplay(); if (!x11disp) return -1; @@ -126,8 +126,8 @@ static int create(struct gl_hwdec *hw) return -1; } - hw->hwctx = &p->ctx->hwctx; - hw->converted_imgfmt = IMGFMT_RGB0; + p->ctx->hwctx.driver_name = hw->driver->name; + hwdec_devices_add(hw->devs, &p->ctx->hwctx); return 0; } @@ -138,8 +138,6 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) destroy_texture(hw); - assert(params->imgfmt == hw->driver->imgfmt); - gl->GenTextures(1, &p->gl_texture); gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -168,11 +166,13 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) p->glXBindTexImage(p->xdisplay, p->glxpixmap, GLX_FRONT_EXT, NULL); gl->BindTexture(GL_TEXTURE_2D, 0); + params->imgfmt = IMGFMT_RGB0; + return 0; } -static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, - GLuint *out_textures) +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) { struct priv *p = hw->priv; VAStatus status; @@ -189,7 +189,16 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, CHECK_VA_STATUS(p, "vaPutSurface()"); va_unlock(p->ctx); - out_textures[0] = p->gl_texture; + *out_frame = (struct gl_hwdec_frame){ + .planes = { + { + .gl_texture = p->gl_texture, + .gl_target = GL_TEXTURE_2D, + .tex_w = hw_image->w, + .tex_h = hw_image->h, + }, + }, + }; return 0; } @@ -199,6 +208,6 @@ const struct gl_hwdec_driver gl_hwdec_vaglx = { .imgfmt = IMGFMT_VAAPI, .create = create, .reinit = reinit, - .map_image = map_image, + .map_frame = map_frame, .destroy = destroy, }; diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c index b1d4962..83f664a 100644 --- a/video/out/opengl/hwdec_vdpau.c +++ b/video/out/opengl/hwdec_vdpau.c @@ -36,20 +36,35 @@ struct priv { struct mp_vdpau_ctx *ctx; uint64_t preemption_counter; struct mp_image_params image_params; - GLuint gl_texture; + GLuint gl_textures[4]; bool vdpgl_initialized; GLvdpauSurfaceNV vdpgl_surface; VdpOutputSurface vdp_surface; struct mp_vdpau_mixer *mixer; + bool direct_mode; bool mapped; }; +static void unmap(struct gl_hwdec *hw) +{ + struct priv *p = hw->priv; + GL *gl = hw->gl; + + if (p->mapped) { + gl->VDPAUUnmapSurfacesNV(1, &p->vdpgl_surface); + if (p->direct_mode) { + gl->VDPAUUnregisterSurfaceNV(p->vdpgl_surface); + p->vdpgl_surface = 0; + } + } + p->mapped = false; +} + static void mark_vdpau_objects_uninitialized(struct gl_hwdec *hw) { struct priv *p = hw->priv; p->vdp_surface = VDP_INVALID_HANDLE; - p->mixer->video_mixer = VDP_INVALID_HANDLE; p->mapped = false; } @@ -60,16 +75,15 @@ static void destroy_objects(struct gl_hwdec *hw) struct vdp_functions *vdp = &p->ctx->vdp; VdpStatus vdp_st; - if (p->mapped) - gl->VDPAUUnmapSurfacesNV(1, &p->vdpgl_surface); - p->mapped = false; + unmap(hw); if (p->vdpgl_surface) gl->VDPAUUnregisterSurfaceNV(p->vdpgl_surface); p->vdpgl_surface = 0; - glDeleteTextures(1, &p->gl_texture); - p->gl_texture = 0; + glDeleteTextures(4, p->gl_textures); + for (int n = 0; n < 4; n++) + p->gl_textures[n] = 0; if (p->vdp_surface != VDP_INVALID_HANDLE) { vdp_st = vdp->output_surface_destroy(p->vdp_surface); @@ -77,14 +91,14 @@ static void destroy_objects(struct gl_hwdec *hw) } p->vdp_surface = VDP_INVALID_HANDLE; - glCheckError(gl, hw->log, "Before uninitializing OpenGL interop"); + gl_check_error(gl, hw->log, "Before uninitializing OpenGL interop"); if (p->vdpgl_initialized) gl->VDPAUFiniNV(); p->vdpgl_initialized = false; - glCheckError(gl, hw->log, "After uninitializing OpenGL interop"); + gl_check_error(gl, hw->log, "After uninitializing OpenGL interop"); } static void destroy(struct gl_hwdec *hw) @@ -93,14 +107,14 @@ static void destroy(struct gl_hwdec *hw) destroy_objects(hw); mp_vdpau_mixer_destroy(p->mixer); + if (p->ctx) + hwdec_devices_remove(hw->devs, &p->ctx->hwctx); mp_vdpau_destroy(p->ctx); } static int create(struct gl_hwdec *hw) { GL *gl = hw->gl; - if (hw->hwctx) - return -1; Display *x11disp = glXGetCurrentDisplay(); if (!x11disp) return -1; @@ -120,8 +134,8 @@ static int create(struct gl_hwdec *hw) destroy(hw); return -1; } - hw->hwctx = &p->ctx->hwctx; - hw->converted_imgfmt = IMGFMT_RGB0; + p->ctx->hwctx.driver_name = hw->driver->name; + hwdec_devices_add(hw->devs, &p->ctx->hwctx); return 0; } @@ -144,39 +158,50 @@ static int reinit(struct gl_hwdec *hw, struct mp_image_params *params) p->vdpgl_initialized = true; - vdp_st = vdp->output_surface_create(p->ctx->vdp_device, - VDP_RGBA_FORMAT_B8G8R8A8, - params->w, params->h, &p->vdp_surface); - CHECK_VDP_ERROR(p, "Error when calling vdp_output_surface_create"); - - gl->GenTextures(1, &p->gl_texture); - gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + p->direct_mode = params->hw_subfmt == IMGFMT_NV12; + + gl->GenTextures(4, p->gl_textures); + for (int n = 0; n < 4; n++) { + gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]); + GLenum filter = p->direct_mode ? GL_NEAREST : GL_LINEAR; + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } gl->BindTexture(GL_TEXTURE_2D, 0); - p->vdpgl_surface = gl->VDPAURegisterOutputSurfaceNV(BRAINDEATH(p->vdp_surface), - GL_TEXTURE_2D, - 1, &p->gl_texture); - if (!p->vdpgl_surface) - return -1; + if (p->direct_mode) { + params->imgfmt = IMGFMT_NV12; + } else { + vdp_st = vdp->output_surface_create(p->ctx->vdp_device, + VDP_RGBA_FORMAT_B8G8R8A8, + params->w, params->h, &p->vdp_surface); + CHECK_VDP_ERROR(p, "Error when calling vdp_output_surface_create"); + + p->vdpgl_surface = gl->VDPAURegisterOutputSurfaceNV(BRAINDEATH(p->vdp_surface), + GL_TEXTURE_2D, + 1, p->gl_textures); + if (!p->vdpgl_surface) + return -1; - gl->VDPAUSurfaceAccessNV(p->vdpgl_surface, GL_READ_ONLY); + gl->VDPAUSurfaceAccessNV(p->vdpgl_surface, GL_READ_ONLY); + + params->imgfmt = IMGFMT_RGB0; + } - glCheckError(gl, hw->log, "After initializing vdpau OpenGL interop"); + gl_check_error(gl, hw->log, "After initializing vdpau OpenGL interop"); return 0; } -static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, - GLuint *out_textures) +static int map_frame(struct gl_hwdec *hw, struct mp_image *hw_image, + struct gl_hwdec_frame *out_frame) { struct priv *p = hw->priv; GL *gl = hw->gl; - - assert(hw_image && hw_image->imgfmt == IMGFMT_VDPAU); + struct vdp_functions *vdp = &p->ctx->vdp; + VdpStatus vdp_st; int pe = mp_vdpau_handle_preemption(p->ctx, &p->preemption_counter); if (pe < 1) { @@ -187,17 +212,58 @@ static int map_image(struct gl_hwdec *hw, struct mp_image *hw_image, return -1; } - if (!p->vdpgl_surface) - return -1; + if (p->direct_mode) { + VdpVideoSurface surface = (intptr_t)hw_image->planes[3]; - if (p->mapped) - gl->VDPAUUnmapSurfacesNV(1, &p->vdpgl_surface); + // We need the uncropped size. + VdpChromaType s_chroma_type; + uint32_t s_w, s_h; + vdp_st = vdp->video_surface_get_parameters(surface, &s_chroma_type, &s_w, &s_h); + CHECK_VDP_ERROR(hw, "Error when calling vdp_video_surface_get_parameters"); + + p->vdpgl_surface = gl->VDPAURegisterVideoSurfaceNV(BRAINDEATH(surface), + GL_TEXTURE_2D, + 4, p->gl_textures); + if (!p->vdpgl_surface) + return -1; - mp_vdpau_mixer_render(p->mixer, NULL, p->vdp_surface, NULL, hw_image, NULL); + gl->VDPAUSurfaceAccessNV(p->vdpgl_surface, GL_READ_ONLY); + gl->VDPAUMapSurfacesNV(1, &p->vdpgl_surface); + + p->mapped = true; + *out_frame = (struct gl_hwdec_frame){ + .vdpau_fields = true, + }; + for (int n = 0; n < 4; n++) { + bool chroma = n >= 2; + out_frame->planes[n] = (struct gl_hwdec_plane){ + .gl_texture = p->gl_textures[n], + .gl_target = GL_TEXTURE_2D, + .tex_w = s_w / (chroma ? 2 : 1), + .tex_h = s_h / (chroma ? 4 : 2), + }; + }; + } else { + if (!p->vdpgl_surface) + return -1; + + mp_vdpau_mixer_render(p->mixer, NULL, p->vdp_surface, NULL, hw_image, NULL); + + gl->VDPAUMapSurfacesNV(1, &p->vdpgl_surface); + + p->mapped = true; + *out_frame = (struct gl_hwdec_frame){ + .planes = { + { + .gl_texture = p->gl_textures[0], + .gl_target = GL_TEXTURE_2D, + .tex_w = p->image_params.w, + .tex_h = p->image_params.h, + }, + }, + }; + } - gl->VDPAUMapSurfacesNV(1, &p->vdpgl_surface); - p->mapped = true; - out_textures[0] = p->gl_texture; return 0; } @@ -207,6 +273,7 @@ const struct gl_hwdec_driver gl_hwdec_vdpau = { .imgfmt = IMGFMT_VDPAU, .create = create, .reinit = reinit, - .map_image = map_image, + .map_frame = map_frame, + .unmap = unmap, .destroy = destroy, }; diff --git a/video/out/opengl/lcms.c b/video/out/opengl/lcms.c index 7db8da6..eaeb86f 100644 --- a/video/out/opengl/lcms.c +++ b/video/out/opengl/lcms.c @@ -16,6 +16,7 @@ */ #include <string.h> +#include <math.h> #include "mpv_talloc.h" @@ -25,10 +26,10 @@ #include "common/common.h" #include "misc/bstr.h" #include "common/msg.h" +#include "options/m_config.h" #include "options/m_option.h" #include "options/path.h" - -#include "video.h" +#include "video/csputils.h" #include "lcms.h" #include "osdep/io.h" @@ -42,14 +43,14 @@ struct gl_lcms { void *icc_data; size_t icc_size; - char *icc_path; + bool using_memory_profile; bool changed; enum mp_csp_prim prev_prim; enum mp_csp_trc prev_trc; struct mp_log *log; struct mpv_global *global; - struct mp_icc_opts opts; + struct mp_icc_opts *opts; }; static bool parse_3dlut_size(const char *arg, int *p1, int *p2, int *p3) @@ -80,6 +81,7 @@ const struct m_sub_options mp_icc_conf = { OPT_FLAG("icc-profile-auto", profile_auto, 0), OPT_STRING("icc-cache-dir", cache_dir, 0), OPT_INT("icc-intent", intent, 0), + OPT_INTRANGE("icc-contrast", contrast, 0, 0, 100000), OPT_STRING_VALIDATE("3dlut-size", size_str, 0, validate_3dlut_size_opt), OPT_REMOVED("icc-cache", "see icc-cache-dir"), @@ -99,25 +101,28 @@ static void lcms2_error_handler(cmsContext ctx, cmsUInt32Number code, MP_ERR(p, "lcms2: %s\n", msg); } -static bool load_profile(struct gl_lcms *p) +static void load_profile(struct gl_lcms *p) { - if (p->icc_data && p->icc_size) - return true; + talloc_free(p->icc_data); + p->icc_data = NULL; + p->icc_size = 0; + p->using_memory_profile = false; - if (!p->icc_path) - return false; + if (!p->opts->profile || !p->opts->profile[0]) + return; - char *fname = mp_get_user_path(NULL, p->global, p->icc_path); + char *fname = mp_get_user_path(NULL, p->global, p->opts->profile); MP_VERBOSE(p, "Opening ICC profile '%s'\n", fname); struct bstr iccdata = stream_read_file(fname, p, p->global, 100000000); // 100 MB talloc_free(fname); if (!iccdata.len) - return false; + return; + + talloc_free(p->icc_data); p->icc_data = iccdata.start; p->icc_size = iccdata.len; - return true; } struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, @@ -128,44 +133,55 @@ struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, .global = global, .log = log, .changed = true, + .opts = m_sub_options_copy(p, &mp_icc_conf, mp_icc_conf.defaults), }; return p; } void gl_lcms_set_options(struct gl_lcms *p, struct mp_icc_opts *opts) { - p->opts = *opts; - p->icc_path = talloc_strdup(p, p->opts.profile); - load_profile(p); + struct mp_icc_opts *old_opts = p->opts; + p->opts = m_sub_options_copy(p, &mp_icc_conf, opts); + + if ((p->using_memory_profile && !p->opts->profile_auto) || + !bstr_equals(bstr0(p->opts->profile), bstr0(old_opts->profile))) + { + load_profile(p); + } + p->changed = true; // probably + + talloc_free(old_opts); } // Warning: profile.start must point to a ta allocation, and the function // takes over ownership. -void gl_lcms_set_memory_profile(struct gl_lcms *p, bstr *profile) +// Returns whether the internal profile was changed. +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) { - if (!p->opts.profile_auto) { - talloc_free(profile->start); - return; + if (!p->opts->profile_auto || (p->opts->profile && p->opts->profile[0])) { + talloc_free(profile.start); + return false; } - if (!p->icc_path && p->icc_data && profile->start && - profile->len == p->icc_size && - memcmp(profile->start, p->icc_data, p->icc_size) == 0) + if (p->using_memory_profile && + p->icc_data && profile.start && + profile.len == p->icc_size && + memcmp(profile.start, p->icc_data, p->icc_size) == 0) { - talloc_free(profile->start); - return; + talloc_free(profile.start); + return false; } p->changed = true; - - talloc_free(p->icc_path); - p->icc_path = NULL; + p->using_memory_profile = true; talloc_free(p->icc_data); - p->icc_data = talloc_steal(p, profile->start); - p->icc_size = profile->len; + p->icc_data = talloc_steal(p, profile.start); + p->icc_size = profile.len; + + return true; } // Return and _reset_ whether the profile or config has changed since the last @@ -180,7 +196,15 @@ bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, return change; } -static cmsHPROFILE get_vid_profile(cmsContext cms, cmsHPROFILE disp_profile, +// Whether a profile is set. (gl_lcms_get_lut3d() is expected to return a lut, +// but it could still fail due to runtime errors, such as invalid icc data.) +bool gl_lcms_has_profile(struct gl_lcms *p) +{ + return p->icc_size > 0; +} + +static cmsHPROFILE get_vid_profile(struct gl_lcms *p, cmsContext cms, + cmsHPROFILE disp_profile, enum mp_csp_prim prim, enum mp_csp_trc trc) { // The input profile for the transformation is dependent on the video @@ -213,21 +237,47 @@ static cmsHPROFILE get_vid_profile(cmsContext cms, cmsHPROFILE disp_profile, case MP_CSP_TRC_BT_1886: { // To build an appropriate BT.1886 transformation we need access to - // the display's black point, so we use the reverse mappings + // the display's black point, so we LittleCMS' detection function. + // Relative colorimetric is used since we want to approximate the + // BT.1886 to the target device's actual black point even in e.g. + // perceptual mode + const int intent = MP_INTENT_RELATIVE_COLORIMETRIC; + cmsCIEXYZ bp_XYZ; + if (!cmsDetectBlackPoint(&bp_XYZ, disp_profile, intent, 0)) + return false; + + // Map this XYZ value back into the (linear) source space cmsToneCurve *linear = cmsBuildGamma(cms, 1.0); cmsHPROFILE rev_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, (cmsToneCurve*[3]){linear, linear, linear}); - cmsHTRANSFORM disp2src = cmsCreateTransformTHR(cms, - disp_profile, TYPE_RGB_16, rev_profile, TYPE_RGB_DBL, - INTENT_RELATIVE_COLORIMETRIC, 0); + cmsHPROFILE xyz_profile = cmsCreateXYZProfile(); + cmsHTRANSFORM xyz2src = cmsCreateTransformTHR(cms, + xyz_profile, TYPE_XYZ_DBL, rev_profile, TYPE_RGB_DBL, + intent, 0); cmsFreeToneCurve(linear); cmsCloseProfile(rev_profile); - if (!disp2src) + cmsCloseProfile(xyz_profile); + if (!xyz2src) return false; - uint64_t disp_black[3] = {0}; double src_black[3]; - cmsDoTransform(disp2src, disp_black, src_black, 1); + cmsDoTransform(xyz2src, &bp_XYZ, src_black, 1); + cmsDeleteTransform(xyz2src); + + // Contrast limiting + if (p->opts->contrast > 0) { + for (int i = 0; i < 3; i++) + src_black[i] = MPMAX(src_black[i], 1.0 / p->opts->contrast); + } + + // Built-in contrast failsafe + double contrast = 3.0 / (src_black[0] + src_black[1] + src_black[2]); + if (contrast > 100000) { + MP_WARN(p, "ICC profile detected contrast very high (>100000)," + " falling back to contrast 1000 for sanity. Set the" + " icc-contrast option to silence this warning.\n"); + src_black[0] = src_black[1] = src_black[2] = 1.0 / 1000; + } // Build the parametric BT.1886 transfer curve, one per channel for (int i = 0; i < 3; i++) { @@ -265,10 +315,10 @@ bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, int s_r, s_g, s_b; bool result = false; - if (!parse_3dlut_size(p->opts.size_str, &s_r, &s_g, &s_b)) + if (!parse_3dlut_size(p->opts->size_str, &s_r, &s_g, &s_b)) return false; - if (!p->icc_data && !p->icc_path) + if (!gl_lcms_has_profile(p)) return false; void *tmp = talloc_new(NULL); @@ -277,13 +327,14 @@ bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, cmsContext cms = NULL; char *cache_file = NULL; - if (p->opts.cache_dir && p->opts.cache_dir[0]) { + if (p->opts->cache_dir && p->opts->cache_dir[0]) { // Gamma is included in the header to help uniquely identify it, // because we may change the parameter in the future or make it // customizable, same for the primaries. char *cache_info = talloc_asprintf(tmp, - "ver=1.3, intent=%d, size=%dx%dx%d, prim=%d, trc=%d\n", - p->opts.intent, s_r, s_g, s_b, prim, trc); + "ver=1.3, intent=%d, size=%dx%dx%d, prim=%d, trc=%d, " + "contrast=%d\n", + p->opts->intent, s_r, s_g, s_b, prim, trc, p->opts->contrast); uint8_t hash[32]; struct AVSHA *sha = av_sha_alloc(); @@ -295,7 +346,7 @@ bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, av_sha_final(sha, hash); av_free(sha); - char *cache_dir = mp_get_user_path(tmp, p->global, p->opts.cache_dir); + char *cache_dir = mp_get_user_path(tmp, p->global, p->opts->cache_dir); cache_file = talloc_strdup(tmp, ""); for (int i = 0; i < sizeof(hash); i++) cache_file = talloc_asprintf_append(cache_file, "%02X", hash[i]); @@ -305,7 +356,7 @@ bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, } // check cache - if (cache_file) { + if (cache_file && stat(cache_file, &(struct stat){0}) == 0) { MP_VERBOSE(p, "Opening 3D LUT cache in file '%s'.\n", cache_file); struct bstr cachedata = stream_read_file(cache_file, tmp, p->global, 1000000000); // 1 GB @@ -327,7 +378,7 @@ bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, if (!profile) goto error_exit; - cmsHPROFILE vid_profile = get_vid_profile(cms, profile, prim, trc); + cmsHPROFILE vid_profile = get_vid_profile(p, cms, profile, prim, trc); if (!vid_profile) { cmsCloseProfile(profile); goto error_exit; @@ -335,8 +386,9 @@ bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, cmsHTRANSFORM trafo = cmsCreateTransformTHR(cms, vid_profile, TYPE_RGB_16, profile, TYPE_RGB_16, - p->opts.intent, - cmsFLAGS_HIGHRESPRECALC); + p->opts->intent, + cmsFLAGS_HIGHRESPRECALC | + cmsFLAGS_BLACKPOINTCOMPENSATION); cmsCloseProfile(profile); cmsCloseProfile(vid_profile); @@ -406,7 +458,7 @@ struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, } void gl_lcms_set_options(struct gl_lcms *p, struct mp_icc_opts *opts) { } -void gl_lcms_set_memory_profile(struct gl_lcms *p, bstr *profile) { } +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) {return false;} bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, enum mp_csp_trc trc) @@ -414,6 +466,11 @@ bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, return false; } +bool gl_lcms_has_profile(struct gl_lcms *p) +{ + return false; +} + bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, enum mp_csp_prim prim, enum mp_csp_trc trc) { diff --git a/video/out/opengl/lcms.h b/video/out/opengl/lcms.h index ee2a48b..094514a 100644 --- a/video/out/opengl/lcms.h +++ b/video/out/opengl/lcms.h @@ -13,9 +13,14 @@ struct mp_icc_opts { char *cache_dir; char *size_str; int intent; + int contrast; +}; + +struct lut3d { + uint16_t *data; + int size[3]; }; -struct lut3d; struct mp_log; struct mpv_global; struct gl_lcms; @@ -23,7 +28,8 @@ struct gl_lcms; struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, struct mpv_global *global); void gl_lcms_set_options(struct gl_lcms *p, struct mp_icc_opts *opts); -void gl_lcms_set_memory_profile(struct gl_lcms *p, bstr *profile); +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile); +bool gl_lcms_has_profile(struct gl_lcms *p); bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **, enum mp_csp_prim prim, enum mp_csp_trc trc); bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, diff --git a/video/out/opengl/nnedi3.c b/video/out/opengl/nnedi3.c deleted file mode 100644 index 3c12fcc..0000000 --- a/video/out/opengl/nnedi3.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - * - * The shader portions may have been derived from existing LGPLv3 shaders - * (see below), possibly making this file effectively LGPLv3. - */ - -#include "nnedi3.h" - -#if HAVE_NNEDI - -#include <assert.h> -#include <stdint.h> -#include <float.h> - -#include <libavutil/bswap.h> - -#include "video.h" - -/* - * NNEDI3, an intra-field deinterlacer - * - * The original filter was authored by Kevin Stone (aka. tritical) and is - * licensed under GPL2 terms: - * http://bengal.missouri.edu/~kes25c/ - * - * A LGPLv3 licensed OpenCL kernel was created by SEt: - * http://forum.doom9.org/showthread.php?t=169766 - * - * A HLSL port further modified by madshi, Shiandow and Zach Saw could be - * found at (also LGPLv3 licensed): - * https://github.com/zachsaw/MPDN_Extensions - * - */ - -#define GLSL(x) gl_sc_add(sc, #x "\n"); -#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) -#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); -#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) - -const struct nnedi3_opts nnedi3_opts_def = { - .neurons = 1, - .window = 0, - .upload = NNEDI3_UPLOAD_UBO, -}; - -#define OPT_BASE_STRUCT struct nnedi3_opts -const struct m_sub_options nnedi3_conf = { - .opts = (const m_option_t[]) { - OPT_CHOICE("neurons", neurons, 0, - ({"16", 0}, - {"32", 1}, - {"64", 2}, - {"128", 3})), - OPT_CHOICE("window", window, 0, - ({"8x4", 0}, - {"8x6", 1})), - OPT_CHOICE("upload", upload, 0, - ({"ubo", NNEDI3_UPLOAD_UBO}, - {"shader", NNEDI3_UPLOAD_SHADER})), - {0} - }, - .size = sizeof(struct nnedi3_opts), - .defaults = &nnedi3_opts_def, -}; - -const static char nnedi3_weights[40320 * 4 + 1] = -#include "video/out/opengl/nnedi3_weights.inc" -; - -const int nnedi3_weight_offsets[9] = - {0, 1088, 3264, 7616, 16320, 17920, 21120, 27520, 40320}; - -const int nnedi3_neurons[4] = {16, 32, 64, 128}; -const int nnedi3_window_width[2] = {8, 8}; -const int nnedi3_window_height[2] = {4, 6}; - -const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size) -{ - int idx = conf->window * 4 + conf->neurons; - const int offset = nnedi3_weight_offsets[idx]; - *size = (nnedi3_weight_offsets[idx + 1] - offset) * 4; - return (const float*)(nnedi3_weights + offset * 4); -} - -void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, - int step, float tex_mul, const struct nnedi3_opts *conf, - struct gl_transform *transform, GLenum tex_target) -{ - assert(0 <= step && step < 2); - - if (!conf) - conf = &nnedi3_opts_def; - - const int neurons = nnedi3_neurons[conf->neurons]; - const int width = nnedi3_window_width[conf->window]; - const int height = nnedi3_window_height[conf->window]; - - const int offset = nnedi3_weight_offsets[conf->window * 4 + conf->neurons]; - const uint32_t *weights = (const int*)(nnedi3_weights + offset * 4); - - GLSLF("// nnedi3 (step %d, neurons %d, window %dx%d, mode %d)\n", - step, neurons, width, height, conf->upload); - - // This is required since each row will be encoded into vec4s - assert(width % 4 == 0); - const int sample_count = width * height / 4; - - if (conf->upload == NNEDI3_UPLOAD_UBO) { - char buf[32]; - snprintf(buf, sizeof(buf), "vec4 weights[%d];", - neurons * (sample_count * 2 + 1)); - gl_sc_uniform_buffer(sc, "NNEDI3_WEIGHTS", buf, 0); - if (!gl->es && gl->glsl_version < 140) - gl_sc_enable_extension(sc, "GL_ARB_uniform_buffer_object"); - } else if (conf->upload == NNEDI3_UPLOAD_SHADER) { - // Somehow necessary for hard coding approach. - GLSLH(#pragma optionNV(fastprecision on)) - } - - GLSLHF("float nnedi3(%s tex, vec2 pos, vec2 tex_size, vec2 pixel_size, int plane, float tex_mul) {\n", mp_sampler_type(tex_target)); - - if (step == 0) { - *transform = (struct gl_transform){{{1.0,0.0}, {0.0,2.0}}, {0.0,-0.5}}; - - GLSLH(if (fract(pos.y * tex_size.y) < 0.5) - return texture(tex, pos + vec2(0, 0.25) * pixel_size)[plane] * tex_mul;) - GLSLHF("#define GET(i, j) " - "(texture(tex, pos+vec2((i)-(%f),(j)-(%f)+0.25) * pixel_size)[plane]*tex_mul)\n", - width / 2.0 - 1, (height - 1) / 2.0); - } else { - *transform = (struct gl_transform){{{2.0,0.0}, {0.0,1.0}}, {-0.5,0.0}}; - - GLSLH(if (fract(pos.x * tex_size.x) < 0.5) - return texture(tex, pos + vec2(0.25, 0) * pixel_size)[plane] * tex_mul;) - GLSLHF("#define GET(i, j) " - "(texture(tex, pos+vec2((j)-(%f)+0.25,(i)-(%f)) * pixel_size)[plane]*tex_mul)\n", - (height - 1) / 2.0, width / 2.0 - 1); - } - - GLSLHF("vec4 samples[%d];\n", sample_count); - - for (int y = 0; y < height; y++) - for (int x = 0; x < width; x += 4) { - GLSLHF("samples[%d] = vec4(GET(%d.0, %d.0), GET(%d.0, %d.0)," - "GET(%d.0, %d.0), GET(%d.0, %d.0));\n", - (y * width + x) / 4, x, y, x+1, y, x+2, y, x+3, y); - } - - GLSLHF("float sum = 0.0, sumsq = 0.0;" - "for (int i = 0; i < %d; i++) {" - "sum += dot(samples[i], vec4(1.0));" - "sumsq += dot(samples[i], samples[i]);" - "}\n", sample_count); - - GLSLHF("float mstd0 = sum / %d.0;\n" - "float mstd1 = sumsq / %d.0 - mstd0 * mstd0;\n" - "float mstd2 = mix(0.0, inversesqrt(mstd1), mstd1 >= %.12e);\n" - "mstd1 *= mstd2;\n", - width * height, width * height, FLT_EPSILON); - - GLSLHF("float vsum = 0.0, wsum = 0.0, sum1, sum2;\n"); - - if (conf->upload == NNEDI3_UPLOAD_SHADER) { - GLSLH(#define T(x) intBitsToFloat(x)) - GLSLH(#define W(i,w0,w1,w2,w3) dot(samples[i],vec4(T(w0),T(w1),T(w2),T(w3)))) - - GLSLHF("#define WS(w0,w1) " - "sum1 = exp(sum1 * mstd2 + T(w0));" - "sum2 = sum2 * mstd2 + T(w1);" - "wsum += sum1;" - "vsum += sum1*(sum2/(1.0+abs(sum2)));\n"); - - for (int n = 0; n < neurons; n++) { - const uint32_t *weights_ptr = weights + (sample_count * 2 + 1) * 4 * n; - for (int s = 0; s < 2; s++) { - GLSLHF("sum%d", s + 1); - for (int i = 0; i < sample_count; i++) { - GLSLHF("%cW(%d,%d,%d,%d,%d)", i == 0 ? '=' : '+', i, - (int)av_le2ne32(weights_ptr[0]), - (int)av_le2ne32(weights_ptr[1]), - (int)av_le2ne32(weights_ptr[2]), - (int)av_le2ne32(weights_ptr[3])); - weights_ptr += 4; - } - GLSLHF(";"); - } - GLSLHF("WS(%d,%d);\n", (int)av_le2ne32(weights_ptr[0]), - (int)av_le2ne32(weights_ptr[1])); - } - } else if (conf->upload == NNEDI3_UPLOAD_UBO) { - GLSLH(int idx = 0;) - - GLSLHF("for (int n = 0; n < %d; n++) {\n", neurons); - - for (int s = 0; s < 2; s++) { - GLSLHF("sum%d = 0.0;\n" - "for (int i = 0; i < %d; i++) {" - "sum%d += dot(samples[i], weights[idx++]);" - "}\n", - s + 1, sample_count, s + 1); - } - - GLSLH(sum1 = exp(sum1 * mstd2 + weights[idx][0]); - sum2 = sum2 * mstd2 + weights[idx++][1]; - wsum += sum1; - vsum += sum1*(sum2/(1.0+abs(sum2)));) - - GLSLHF("}\n"); - } - - GLSLH(return clamp(mstd0 + 5.0 * vsum / wsum * mstd1, 0.0, 1.0);) - - GLSLHF("}\n"); // nnedi3 - - GLSL(color = vec4(1.0);) - - for (int i = 0; i < planes; i++) { - GLSLF("color[%d] = nnedi3(texture%d, texcoord%d, texture_size%d, pixel_size%d, %d, %f);\n", - i, tex_num, tex_num, tex_num, tex_num, i, tex_mul); - } -} - -#else - -const struct m_sub_options nnedi3_conf = {0}; - - -const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size) -{ - return NULL; -} - -void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, - int step, float tex_mul, const struct nnedi3_opts *conf, - struct gl_transform *transform, GLenum tex_target) -{ -} - -#endif diff --git a/video/out/opengl/nnedi3.h b/video/out/opengl/nnedi3.h deleted file mode 100644 index c3895a0..0000000 --- a/video/out/opengl/nnedi3.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MP_GL_NNEDI3_H -#define MP_GL_NNEDI3_H - -#include "config.h" -#include "common.h" -#include "utils.h" - -#define HAVE_NNEDI HAVE_GPL3 - -#define NNEDI3_UPLOAD_UBO 0 -#define NNEDI3_UPLOAD_SHADER 1 - -struct nnedi3_opts { - int neurons; - int window; - int upload; -}; - -extern const struct nnedi3_opts nnedi3_opts_def; -extern const struct m_sub_options nnedi3_conf; - -const float* get_nnedi3_weights(const struct nnedi3_opts *conf, int *size); - -void pass_nnedi3(GL *gl, struct gl_shader_cache *sc, int planes, int tex_num, - int step, float tex_mul, const struct nnedi3_opts *conf, - struct gl_transform *transform, GLenum tex_target); - -#endif diff --git a/video/out/opengl/nnedi3_weights.bin b/video/out/opengl/nnedi3_weights.bin Binary files differdeleted file mode 100644 index e1659d8..0000000 --- a/video/out/opengl/nnedi3_weights.bin +++ /dev/null diff --git a/video/out/opengl/osd.c b/video/out/opengl/osd.c index c554425..7b1ec16 100644 --- a/video/out/opengl/osd.c +++ b/video/out/opengl/osd.c @@ -17,19 +17,16 @@ #include <stdlib.h> #include <assert.h> +#include <limits.h> + #include <libavutil/common.h> #include "video/out/bitmap_packer.h" +#include "formats.h" #include "utils.h" #include "osd.h" -struct osd_fmt_entry { - GLint internal_format; - GLint format; - GLenum type; -}; - // glBlendFuncSeparate() arguments static const int blend_factors[SUBBITMAP_COUNT][4] = { [SUBBITMAP_LIBASS] = {GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, @@ -38,21 +35,6 @@ static const int blend_factors[SUBBITMAP_COUNT][4] = { GL_ONE, GL_ONE_MINUS_SRC_ALPHA}, }; -static const struct osd_fmt_entry osd_to_gl3_formats[SUBBITMAP_COUNT] = { - [SUBBITMAP_LIBASS] = {GL_RED, GL_RED, GL_UNSIGNED_BYTE}, - [SUBBITMAP_RGBA] = {GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, -}; - -static const struct osd_fmt_entry osd_to_gles3_formats[SUBBITMAP_COUNT] = { - [SUBBITMAP_LIBASS] = {GL_R8, GL_RED, GL_UNSIGNED_BYTE}, - [SUBBITMAP_RGBA] = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, -}; - -static const struct osd_fmt_entry osd_to_gl2_formats[SUBBITMAP_COUNT] = { - [SUBBITMAP_LIBASS] = {GL_LUMINANCE, GL_LUMINANCE, GL_UNSIGNED_BYTE}, - [SUBBITMAP_RGBA] = {GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, -}; - struct vertex { float position[2]; float texcoord[2]; @@ -77,16 +59,17 @@ struct mpgl_osd_part { struct sub_bitmap *subparts; struct vertex *vertices; struct bitmap_packer *packer; + void *upload; }; struct mpgl_osd { struct mp_log *log; struct osd_state *osd; GL *gl; + GLint max_tex_wh; bool use_pbo; - bool scaled; struct mpgl_osd_part *parts[MAX_OSD_PARTS]; - const struct osd_fmt_entry *fmt_table; + const struct gl_format *fmt_table[SUBBITMAP_COUNT]; bool formats[SUBBITMAP_COUNT]; struct gl_vao vao; int64_t change_counter; @@ -98,37 +81,32 @@ struct mpgl_osd { struct mpgl_osd *mpgl_osd_init(GL *gl, struct mp_log *log, struct osd_state *osd) { - GLint max_texture_size; - gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); - struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx); *ctx = (struct mpgl_osd) { .log = log, .osd = osd, .gl = gl, - .fmt_table = osd_to_gl3_formats, .scratch = talloc_zero_size(ctx, 1), }; - if (gl->es >= 300) { - ctx->fmt_table = osd_to_gles3_formats; - } else if (!(gl->mpgl_caps & MPGL_CAP_TEX_RG)) { - ctx->fmt_table = osd_to_gl2_formats; - } + gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &ctx->max_tex_wh); + + ctx->fmt_table[SUBBITMAP_LIBASS] = gl_find_unorm_format(gl, 1, 1); + ctx->fmt_table[SUBBITMAP_RGBA] = gl_find_unorm_format(gl, 1, 4); for (int n = 0; n < MAX_OSD_PARTS; n++) { struct mpgl_osd_part *p = talloc_ptrtype(ctx, p); *p = (struct mpgl_osd_part) { .packer = talloc_struct(p, struct bitmap_packer, { - .w_max = max_texture_size, - .h_max = max_texture_size, + .w_max = ctx->max_tex_wh, + .h_max = ctx->max_tex_wh, }), }; ctx->parts[n] = p; } for (int n = 0; n < SUBBITMAP_COUNT; n++) - ctx->formats[n] = ctx->fmt_table[n].type != 0; + ctx->formats[n] = !!ctx->fmt_table[n]; gl_vao_init(&ctx->vao, gl, sizeof(struct vertex), vertex_vao); @@ -149,6 +127,7 @@ void mpgl_osd_destroy(struct mpgl_osd *ctx) gl->DeleteTextures(1, &p->texture); if (gl->DeleteBuffers) gl->DeleteBuffers(1, &p->buffer); + talloc_free(p->upload); } talloc_free(ctx); } @@ -158,38 +137,79 @@ void mpgl_osd_set_options(struct mpgl_osd *ctx, bool pbo) ctx->use_pbo = pbo; } -static bool upload_pbo(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, - struct sub_bitmaps *imgs) +static bool upload(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, + struct sub_bitmaps *imgs, bool pbo) { GL *gl = ctx->gl; bool success = true; - struct osd_fmt_entry fmt = ctx->fmt_table[imgs->format]; - int pix_stride = glFmt2bpp(fmt.format, fmt.type); + const struct gl_format *fmt = ctx->fmt_table[imgs->format]; + size_t pix_stride = gl_bytes_per_pixel(fmt->format, fmt->type); + size_t buffer_size = pix_stride * osd->h * osd->w; + + char *data = NULL; + void *texdata = NULL; + + if (pbo) { + if (!osd->buffer) { + gl->GenBuffers(1, &osd->buffer); + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, osd->buffer); + gl->BufferData(GL_PIXEL_UNPACK_BUFFER, buffer_size, NULL, + GL_DYNAMIC_COPY); + } - if (!osd->buffer) { - gl->GenBuffers(1, &osd->buffer); gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, osd->buffer); - gl->BufferData(GL_PIXEL_UNPACK_BUFFER, osd->w * osd->h * pix_stride, - NULL, GL_DYNAMIC_COPY); - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + data = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, buffer_size, + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT); + if (!data) { + success = false; + goto done; + } + } else { + if (!imgs->packed) { + if (!osd->upload) + osd->upload = talloc_size(NULL, buffer_size); + data = osd->upload; + texdata = data; + } } - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, osd->buffer); - char *data = gl->MapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); - if (!data) { - success = false; + int copy_w = 0; + int copy_h = 0; + size_t stride = 0; + if (imgs->packed) { + copy_w = imgs->packed_w; + copy_h = imgs->packed_h; + stride = imgs->packed->stride[0]; + texdata = imgs->packed->planes[0]; + if (pbo) { + memcpy_pic(data, texdata, pix_stride * copy_w, copy_h, + osd->w * pix_stride, stride); + stride = osd->w * pix_stride; + texdata = NULL; + } } else { struct pos bb[2]; packer_get_bb(osd->packer, bb); - size_t stride = osd->w * pix_stride; + copy_w = bb[1].x; + copy_h = bb[1].y; + stride = osd->w * pix_stride; packer_copy_subbitmaps(osd->packer, imgs, data, pix_stride, stride); - if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) + } + + if (pbo) { + if (!gl->UnmapBuffer(GL_PIXEL_UNPACK_BUFFER)) { success = false; - glUploadTex(gl, GL_TEXTURE_2D, fmt.format, fmt.type, NULL, stride, - bb[0].x, bb[0].y, bb[1].x - bb[0].x, bb[1].y - bb[0].y, 0); + goto done; + } } - gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + gl_upload_tex(gl, GL_TEXTURE_2D, fmt->format, fmt->type, texdata, stride, + 0, 0, copy_w, copy_h); + + if (pbo) + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + +done: if (!success) { MP_FATAL(ctx, "Error: can't upload subtitles! " "Remove the 'pbo' suboption.\n"); @@ -198,24 +218,13 @@ static bool upload_pbo(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, return success; } -static void upload_tex(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, - struct sub_bitmaps *imgs) +static int next_pow2(int v) { - struct osd_fmt_entry fmt = ctx->fmt_table[imgs->format]; - if (osd->packer->padding) { - struct pos bb[2]; - packer_get_bb(osd->packer, bb); - glClearTex(ctx->gl, GL_TEXTURE_2D, fmt.format, fmt.type, - bb[0].x, bb[0].y, bb[1].x - bb[0].y, bb[1].y - bb[0].y, - 0, &ctx->scratch); - } - for (int n = 0; n < osd->packer->count; n++) { - struct sub_bitmap *s = &imgs->parts[n]; - struct pos p = osd->packer->result[n]; - - glUploadTex(ctx->gl, GL_TEXTURE_2D, fmt.format, fmt.type, - s->bitmap, s->stride, p.x, p.y, s->w, s->h, 0); + for (int x = 0; x < 30; x++) { + if ((1 << x) >= v) + return 1 << x; } + return INT_MAX; } static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, @@ -223,32 +232,46 @@ static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, { GL *gl = ctx->gl; - // assume 2x2 filter on scaling - osd->packer->padding = ctx->scaled || imgs->scaled; - int r = packer_pack_from_subbitmaps(osd->packer, imgs); - if (r < 0) { + int req_w = 0; + int req_h = 0; + + if (imgs->packed) { + req_w = next_pow2(imgs->packed_w); + req_h = next_pow2(imgs->packed_h); + } else { + // assume 2x2 filter on scaling + osd->packer->padding = imgs->scaled; + int r = packer_pack_from_subbitmaps(osd->packer, imgs); + if (r < 0) { + MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " + "supported size %dx%d.\n", osd->packer->w_max, osd->packer->h_max); + return false; + } + req_w = osd->packer->w; + req_h = osd->packer->h; + } + + if (req_w > ctx->max_tex_wh || req_h > ctx->max_tex_wh) { MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " - "supported size %dx%d.\n", osd->packer->w_max, osd->packer->h_max); + "supported size %dx%d.\n", ctx->max_tex_wh, ctx->max_tex_wh); return false; } - struct osd_fmt_entry fmt = ctx->fmt_table[imgs->format]; - assert(fmt.type != 0); + const struct gl_format *fmt = ctx->fmt_table[imgs->format]; + assert(fmt); if (!osd->texture) gl->GenTextures(1, &osd->texture); gl->BindTexture(GL_TEXTURE_2D, osd->texture); - if (osd->packer->w > osd->w || osd->packer->h > osd->h - || osd->format != imgs->format) - { + if (req_w > osd->w || req_h > osd->h || osd->format != imgs->format) { osd->format = imgs->format; - osd->w = FFMAX(32, osd->packer->w); - osd->h = FFMAX(32, osd->packer->h); + osd->w = FFMAX(32, req_w); + osd->h = FFMAX(32, req_h); - gl->TexImage2D(GL_TEXTURE_2D, 0, fmt.internal_format, osd->w, osd->h, - 0, fmt.format, fmt.type, NULL); + gl->TexImage2D(GL_TEXTURE_2D, 0, fmt->internal_format, osd->w, osd->h, + 0, fmt->format, fmt->type, NULL); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); @@ -258,13 +281,16 @@ static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, if (gl->DeleteBuffers) gl->DeleteBuffers(1, &osd->buffer); osd->buffer = 0; + + talloc_free(osd->upload); + osd->upload = NULL; } bool uploaded = false; if (ctx->use_pbo) - uploaded = upload_pbo(ctx, osd, imgs); + uploaded = upload(ctx, osd, imgs, true); if (!uploaded) - upload_tex(ctx, osd, imgs); + upload(ctx, osd, imgs, false); gl->BindTexture(GL_TEXTURE_2D, 0); @@ -280,18 +306,26 @@ static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) struct mpgl_osd_part *osd = ctx->parts[imgs->render_index]; + bool ok = true; if (imgs->change_id != osd->change_id) { if (!upload_osd(ctx, osd, imgs)) - osd->packer->count = 0; + ok = false; osd->change_id = imgs->change_id; ctx->change_counter += 1; } - osd->num_subparts = osd->packer->count; + osd->num_subparts = ok ? imgs->num_parts : 0; MP_TARRAY_GROW(osd, osd->subparts, osd->num_subparts); memcpy(osd->subparts, imgs->parts, osd->num_subparts * sizeof(osd->subparts[0])); + + if (!imgs->packed) { + for (int n = 0; n < osd->num_subparts; n++) { + osd->subparts[n].src_x = osd->packer->result[n].x; + osd->subparts[n].src_y = osd->packer->result[n].y; + } + } } static void write_quad(struct vertex *va, struct gl_transform t, @@ -319,7 +353,6 @@ static int generate_verts(struct mpgl_osd_part *part, struct gl_transform t) for (int n = 0; n < part->num_subparts; n++) { struct sub_bitmap *b = &part->subparts[n]; - struct pos pos = part->packer->result[n]; struct vertex *va = part->vertices; // NOTE: the blend color is used with SUBBITMAP_LIBASS only, so it @@ -330,7 +363,7 @@ static int generate_verts(struct mpgl_osd_part *part, struct gl_transform t) write_quad(&va[n * 6], t, b->x, b->y, b->x + b->dw, b->y + b->dh, - pos.x, pos.y, pos.x + b->w, pos.y + b->h, + b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h, part->w, part->h, color); } diff --git a/video/out/opengl/superxbr.c b/video/out/opengl/superxbr.c deleted file mode 100644 index 323ed18..0000000 --- a/video/out/opengl/superxbr.c +++ /dev/null @@ -1,244 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include "superxbr.h" - -#include <assert.h> - -#define GLSL(x) gl_sc_add(sc, #x "\n"); -#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) -#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); -#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) - -struct superxbr_opts { - float sharpness; - float edge_strength; -}; - -const struct superxbr_opts superxbr_opts_def = { - .sharpness = 1.0f, - .edge_strength = 0.6f, -}; - -#define OPT_BASE_STRUCT struct superxbr_opts -const struct m_sub_options superxbr_conf = { - .opts = (const m_option_t[]) { - OPT_FLOATRANGE("sharpness", sharpness, 0, 0.0, 2.0), - OPT_FLOATRANGE("edge-strength", edge_strength, 0, 0.0, 1.0), - {0} - }, - .size = sizeof(struct superxbr_opts), - .defaults = &superxbr_opts_def, -}; - -/* - - ******* Super XBR Shader ******* - - Copyright (c) 2015 Hyllian - sergiogdb@gmail.com - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. - -*/ - -struct step_params { - const float dstr, ostr; // sharpness strength modifiers - const int d1[3][3]; // 1-distance diagonal mask - const int d2[2][2]; // 2-distance diagonal mask - const int o1[3]; // 1-distance orthogonal mask - const int o2[3]; // 2-distance orthogonal mask -}; - -const struct step_params params[3] = { - { .dstr = 0.129633, - .ostr = 0.175068, - .d1 = {{0, 1, 0}, - {1, 2, 1}, - {0, 1, 0}}, - .d2 = {{-1, 0}, - { 0, -1}}, - - .o1 = {1, 2, 1}, - .o2 = { 0, 0}, - }, { - .dstr = 0.175068, - .ostr = 0.129633, - .d1 = {{0, 1, 0}, - {1, 4, 1}, - {0, 1, 0}}, - .d2 = {{ 0, 0}, - { 0, 0}}, - - .o1 = {1, 4, 1}, - .o2 = { 0, 0}, - } -}; - -// Compute a single step of the superxbr process, assuming the input can be -// sampled using i(x,y). Dumps its output into 'res' -static void superxbr_step_h(struct gl_shader_cache *sc, - const struct superxbr_opts *conf, - const struct step_params *mask) -{ - GLSLHF("{ // step\n"); - - // Convolute along the diagonal and orthogonal lines - GLSLH(vec4 d1 = vec4( i(0,0), i(1,1), i(2,2), i(3,3) );) - GLSLH(vec4 d2 = vec4( i(0,3), i(1,2), i(2,1), i(3,0) );) - GLSLH(vec4 h1 = vec4( i(0,1), i(1,1), i(2,1), i(3,1) );) - GLSLH(vec4 h2 = vec4( i(0,2), i(1,2), i(2,2), i(3,2) );) - GLSLH(vec4 v1 = vec4( i(1,0), i(1,1), i(1,2), i(1,3) );) - GLSLH(vec4 v2 = vec4( i(2,0), i(2,1), i(2,2), i(2,3) );) - - GLSLHF("float dw = %f;\n", conf->sharpness * mask->dstr); - GLSLHF("float ow = %f;\n", conf->sharpness * mask->ostr); - GLSLH(vec4 dk = vec4(-dw, dw+0.5, dw+0.5, -dw);) // diagonal kernel - GLSLH(vec4 ok = vec4(-ow, ow+0.5, ow+0.5, -ow);) // ortho kernel - - // Convoluted results - GLSLH(float d1c = dot(d1, dk);) - GLSLH(float d2c = dot(d2, dk);) - GLSLH(float vc = dot(v1+v2, ok)/2.0;) - GLSLH(float hc = dot(h1+h2, ok)/2.0;) - - // Compute diagonal edge strength using diagonal mask - GLSLH(float d_edge = 0;) - for (int x = 0; x < 3; x++) { - for (int y = 0; y < 3; y++) { - if (mask->d1[x][y]) { - // 1-distance diagonal neighbours - GLSLHF("d_edge += %d * abs(i(%d,%d) - i(%d,%d));\n", - mask->d1[x][y], x+1, y, x, y+1); - GLSLHF("d_edge -= %d * abs(i(%d,%d) - i(%d,%d));\n", - mask->d1[x][y], 3-y, x+1, 3-(y+1), x); // rotated - } - if (x < 2 && y < 2 && mask->d2[x][y]) { - // 2-distance diagonal neighbours - GLSLHF("d_edge += %d * abs(i(%d,%d) - i(%d,%d));\n", - mask->d2[x][y], x+2, y, x, y+2); - GLSLHF("d_edge -= %d * abs(i(%d,%d) - i(%d,%d));\n", - mask->d2[x][y], 3-y, x+2, 3-(y+2), x); // rotated - } - } - } - - // Compute orthogonal edge strength using orthogonal mask - GLSLH(float o_edge = 0;) - for (int x = 1; x < 3; x++) { - for (int y = 0; y < 3; y++) { - if (mask->o1[y]) { - // 1-distance neighbours - GLSLHF("o_edge += %d * abs(i(%d,%d) - i(%d,%d));\n", - mask->o1[y], x, y, x, y+1); // vertical - GLSLHF("o_edge -= %d * abs(i(%d,%d) - i(%d,%d));\n", - mask->o1[y], y, x, y+1, x); // horizontal - } - if (y < 2 && mask->o2[y]) { - // 2-distance neighbours - GLSLHF("o_edge += %d * abs(i(%d,%d) - i(%d,%d));\n", - mask->o2[y], x, y, x, y+2); // vertical - GLSLHF("o_edge -= %d * abs(i(%d,%d) - i(%d,%d));\n", - mask->o2[x], y, x, y+2, x); // horizontal - } - } - } - - // Pick the two best directions and mix them together - GLSLHF("float str = smoothstep(0.0, %f + 1e-6, abs(tex_mul*d_edge));\n", - conf->edge_strength); - GLSLH(res = mix(mix(d2c, d1c, step(0.0, d_edge)), \ - mix(hc, vc, step(0.0, o_edge)), 1.0 - str);) - - // Anti-ringing using center square - GLSLH(float lo = min(min( i(1,1), i(2,1) ), min( i(1,2), i(2,2) ));) - GLSLH(float hi = max(max( i(1,1), i(2,1) ), max( i(1,2), i(2,2) ));) - GLSLH(res = clamp(res, lo, hi);) - - GLSLHF("} // step\n"); -} - -void pass_superxbr(struct gl_shader_cache *sc, int id, int step, float tex_mul, - const struct superxbr_opts *conf, - struct gl_transform *transform) -{ - if (!conf) - conf = &superxbr_opts_def; - - assert(0 <= step && step < 2); - GLSLF("// superxbr (step %d)\n", step); - GLSLHF("#define tex texture%d\n", id); - GLSLHF("#define tex_size texture_size%d\n", id); - GLSLHF("#define tex_mul %f\n", tex_mul); - GLSLHF("#define pt pixel_size%d\n", id); - - // We use a sub-function in the header so we can return early - GLSLHF("float superxbr(vec2 pos) {\n"); - GLSLH(float i[4*4];) - GLSLH(float res;) - GLSLH(#define i(x,y) i[(x)*4+(y)]) - - if (step == 0) { - *transform = (struct gl_transform){{{2.0,0.0}, {0.0,2.0}}, {-0.5,-0.5}}; - GLSLH(vec2 dir = fract(pos * tex_size) - 0.5;) - - // Optimization: Discard (skip drawing) unused pixels, except those - // at the edge. - GLSLH(vec2 dist = tex_size * min(pos, vec2(1.0) - pos);) - GLSLH(if (dir.x * dir.y < 0.0 && dist.x > 1.0 && dist.y > 1.0) - return 0.0;) - - GLSLH(if (dir.x < 0.0 || dir.y < 0.0 || dist.x < 1.0 || dist.y < 1.0) - return texture(tex, pos - pt * dir).x;) - - // Load the input samples - GLSLH(for (int x = 0; x < 4; x++)) - GLSLH(for (int y = 0; y < 4; y++)) - GLSLH(i(x,y) = texture(tex, pos + pt * vec2(x-1.25, y-1.25)).x;) - } else { - *transform = (struct gl_transform){{{1.0,0.0}, {0.0,1.0}}, {0.0,0.0}}; - - GLSLH(vec2 dir = fract(pos * tex_size / 2.0) - 0.5;) - GLSLH(if (dir.x * dir.y > 0.0) - return texture(tex, pos).x;) - - GLSLH(for (int x = 0; x < 4; x++)) - GLSLH(for (int y = 0; y < 4; y++)) - GLSLH(i(x,y) = texture(tex, pos + pt * vec2(x+y-3, y-x)).x;) - } - - superxbr_step_h(sc, conf, ¶ms[step]); - GLSLH(return res;) - GLSLHF("}\n"); - - GLSLF("color.x = tex_mul * superxbr(texcoord%d);\n", id); -} diff --git a/video/out/opengl/superxbr.h b/video/out/opengl/superxbr.h deleted file mode 100644 index 7aa46ef..0000000 --- a/video/out/opengl/superxbr.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MP_GL_SUPERXBR_H -#define MP_GL_SUPERXBR_H - -#include "common.h" -#include "utils.h" - -extern const struct superxbr_opts superxbr_opts_def; -extern const struct m_sub_options superxbr_conf; - -void pass_superxbr(struct gl_shader_cache *sc, int id, int step, float tex_mul, - const struct superxbr_opts *conf, - struct gl_transform *transform); - -#endif diff --git a/video/out/opengl/user_shaders.c b/video/out/opengl/user_shaders.c new file mode 100644 index 0000000..8f915a5 --- /dev/null +++ b/video/out/opengl/user_shaders.c @@ -0,0 +1,195 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <ctype.h> + +#include "user_shaders.h" + +static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE]) +{ + int pos = 0; + + while (line.len > 0) { + struct bstr word = bstr_strip(bstr_splitchar(line, &line, ' ')); + if (word.len == 0) + continue; + + if (pos >= MAX_SZEXP_SIZE) + return false; + + struct szexp *exp = &out[pos++]; + + if (bstr_eatend0(&word, ".w") || bstr_eatend0(&word, ".width")) { + exp->tag = SZEXP_VAR_W; + exp->val.varname = word; + continue; + } + + if (bstr_eatend0(&word, ".h") || bstr_eatend0(&word, ".height")) { + exp->tag = SZEXP_VAR_H; + exp->val.varname = word; + continue; + } + + switch (word.start[0]) { + case '+': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_ADD; continue; + case '-': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_SUB; continue; + case '*': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MUL; continue; + case '/': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_DIV; continue; + case '!': exp->tag = SZEXP_OP1; exp->val.op = SZEXP_OP_NOT; continue; + case '>': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_GT; continue; + case '<': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_LT; continue; + } + + if (isdigit(word.start[0])) { + exp->tag = SZEXP_CONST; + if (bstr_sscanf(word, "%f", &exp->val.cval) != 1) + return false; + continue; + } + + // Some sort of illegal expression + return false; + } + + return true; +} + +// Returns false if no more shaders could be parsed +bool parse_user_shader_pass(struct mp_log *log, struct bstr *body, + struct gl_user_shader *out) +{ + if (!body || !out || !body->start || body->len == 0) + return false; + + *out = (struct gl_user_shader){ + .offset = identity_trans, + .width = {{ SZEXP_VAR_W, { .varname = bstr0("HOOKED") }}}, + .height = {{ SZEXP_VAR_H, { .varname = bstr0("HOOKED") }}}, + .cond = {{ SZEXP_CONST, { .cval = 1.0 }}}, + }; + + int hook_idx = 0; + int bind_idx = 0; + + // Skip all garbage (e.g. comments) before the first header + int pos = bstr_find(*body, bstr0("//!")); + if (pos < 0) { + mp_warn(log, "Shader appears to contain no passes!\n"); + return false; + } + *body = bstr_cut(*body, pos); + + // First parse all the headers + while (true) { + struct bstr rest; + struct bstr line = bstr_strip(bstr_getline(*body, &rest)); + + // Check for the presence of the magic line beginning + if (!bstr_eatstart0(&line, "//!")) + break; + + *body = rest; + + // Parse the supported commands + if (bstr_eatstart0(&line, "HOOK")) { + if (hook_idx == SHADER_MAX_HOOKS) { + mp_err(log, "Passes may only hook up to %d textures!\n", + SHADER_MAX_HOOKS); + return false; + } + out->hook_tex[hook_idx++] = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "BIND")) { + if (bind_idx == SHADER_MAX_BINDS) { + mp_err(log, "Passes may only bind up to %d textures!\n", + SHADER_MAX_BINDS); + return false; + } + out->bind_tex[bind_idx++] = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "SAVE")) { + out->save_tex = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "OFFSET")) { + float ox, oy; + if (bstr_sscanf(line, "%f %f", &ox, &oy) != 2) { + mp_err(log, "Error while parsing OFFSET!\n"); + return false; + } + out->offset.t[0] = ox; + out->offset.t[1] = oy; + continue; + } + + if (bstr_eatstart0(&line, "WIDTH")) { + if (!parse_rpn_szexpr(line, out->width)) { + mp_err(log, "Error while parsing WIDTH!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "HEIGHT")) { + if (!parse_rpn_szexpr(line, out->height)) { + mp_err(log, "Error while parsing HEIGHT!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "WHEN")) { + if (!parse_rpn_szexpr(line, out->cond)) { + mp_err(log, "Error while parsing WHEN!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "COMPONENTS")) { + if (bstr_sscanf(line, "%d", &out->components) != 1) { + mp_err(log, "Error while parsing COMPONENTS!\n"); + return false; + } + continue; + } + + // Unknown command type + mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line)); + return false; + } + + // The rest of the file up until the next magic line beginning (if any) + // shall be the shader body + if (bstr_split_tok(*body, "//!", &out->pass_body, body)) { + // Make sure the magic line is part of the rest + body->start -= 3; + body->len += 3; + } + + // Sanity checking + if (hook_idx == 0) + mp_warn(log, "Pass has no hooked textures (will be ignored)!\n"); + + return true; +} diff --git a/video/out/opengl/user_shaders.h b/video/out/opengl/user_shaders.h new file mode 100644 index 0000000..b8c287b --- /dev/null +++ b/video/out/opengl/user_shaders.h @@ -0,0 +1,74 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_GL_USER_SHADERS_H +#define MP_GL_USER_SHADERS_H + +#include "common.h" +#include "utils.h" + +#define SHADER_API 1 +#define SHADER_MAX_HOOKS 16 +#define SHADER_MAX_BINDS 6 +#define MAX_SZEXP_SIZE 32 + +enum szexp_op { + SZEXP_OP_ADD, + SZEXP_OP_SUB, + SZEXP_OP_MUL, + SZEXP_OP_DIV, + SZEXP_OP_NOT, + SZEXP_OP_GT, + SZEXP_OP_LT, +}; + +enum szexp_tag { + SZEXP_END = 0, // End of an RPN expression + SZEXP_CONST, // Push a constant value onto the stack + SZEXP_VAR_W, // Get the width/height of a named texture (variable) + SZEXP_VAR_H, + SZEXP_OP2, // Pop two elements and push the result of a dyadic operation + SZEXP_OP1, // Pop one element and push the result of a monadic operation +}; + +struct szexp { + enum szexp_tag tag; + union { + float cval; + struct bstr varname; + enum szexp_op op; + } val; +}; + +struct gl_user_shader { + struct bstr hook_tex[SHADER_MAX_HOOKS]; + struct bstr bind_tex[SHADER_MAX_BINDS]; + struct bstr save_tex; + struct bstr pass_body; + struct gl_transform offset; + struct szexp width[MAX_SZEXP_SIZE]; + struct szexp height[MAX_SZEXP_SIZE]; + struct szexp cond[MAX_SZEXP_SIZE]; + int components; +}; + +// Parse the next shader pass from 'body'. Returns false if the end of the +// string was reached +bool parse_user_shader_pass(struct mp_log *log, struct bstr *body, + struct gl_user_shader *out); + +#endif diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 40f1beb..73b411e 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -24,6 +24,7 @@ #include <assert.h> #include "common/common.h" +#include "formats.h" #include "utils.h" // GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) @@ -39,7 +40,7 @@ static const char *gl_error_to_string(GLenum error) } } -void glCheckError(GL *gl, struct mp_log *log, const char *info) +void gl_check_error(GL *gl, struct mp_log *log, const char *info) { for (;;) { GLenum error = gl->GetError(); @@ -50,52 +51,6 @@ void glCheckError(GL *gl, struct mp_log *log, const char *info) } } -// return the number of bytes per pixel for the given format -// does not handle all possible variants, just those used by mpv -int glFmt2bpp(GLenum format, GLenum type) -{ - int component_size = 0; - switch (type) { - case GL_UNSIGNED_BYTE_3_3_2: - case GL_UNSIGNED_BYTE_2_3_3_REV: - return 1; - case GL_UNSIGNED_SHORT_5_5_5_1: - case GL_UNSIGNED_SHORT_1_5_5_5_REV: - case GL_UNSIGNED_SHORT_5_6_5: - case GL_UNSIGNED_SHORT_5_6_5_REV: - return 2; - case GL_UNSIGNED_BYTE: - component_size = 1; - break; - case GL_UNSIGNED_SHORT: - component_size = 2; - break; - } - switch (format) { - case GL_LUMINANCE: - case GL_ALPHA: - return component_size; - case GL_RGB_422_APPLE: - return 2; - case GL_RGB: - case GL_BGR: - case GL_RGB_INTEGER: - return 3 * component_size; - case GL_RGBA: - case GL_BGRA: - case GL_RGBA_INTEGER: - return 4 * component_size; - case GL_RED: - case GL_RED_INTEGER: - return component_size; - case GL_RG: - case GL_LUMINANCE_ALPHA: - case GL_RG_INTEGER: - return 2 * component_size; - } - abort(); // unknown -} - static int get_alignment(int stride) { if (stride % 8 == 0) @@ -112,28 +67,26 @@ static int get_alignment(int stride) // format, type: texture parameters // dataptr, stride: image data // x, y, width, height: part of the image to upload -// slice: height of an upload slice, 0 for all at once -void glUploadTex(GL *gl, GLenum target, GLenum format, GLenum type, - const void *dataptr, int stride, - int x, int y, int w, int h, int slice) +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h) { + int bpp = gl_bytes_per_pixel(format, type); const uint8_t *data = dataptr; int y_max = y + h; - if (w <= 0 || h <= 0) + if (w <= 0 || h <= 0 || !bpp) return; - if (slice <= 0) - slice = h; if (stride < 0) { data += (h - 1) * stride; stride = -stride; } gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); - bool use_rowlength = slice > 1 && (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH); - if (use_rowlength) { + int slice = h; + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { // this is not always correct, but should work for MPlayer - gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / glFmt2bpp(format, type)); + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); } else { - if (stride != glFmt2bpp(format, type) * w) + if (stride != bpp * w) slice = 1; // very inefficient, but at least it works } for (; y + slice <= y_max; y += slice) { @@ -142,37 +95,12 @@ void glUploadTex(GL *gl, GLenum target, GLenum format, GLenum type, } if (y < y_max) gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); - if (use_rowlength) + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); } -// Like glUploadTex, but upload a byte array with all elements set to val. -// If scratch is not NULL, points to a resizeable talloc memory block than can -// be freely used by the function (for avoiding temporary memory allocations). -void glClearTex(GL *gl, GLenum target, GLenum format, GLenum type, - int x, int y, int w, int h, uint8_t val, void **scratch) -{ - int bpp = glFmt2bpp(format, type); - int stride = w * bpp; - int size = h * stride; - if (size < 1) - return; - void *data = scratch ? *scratch : NULL; - if (talloc_get_size(data) < size) - data = talloc_realloc(NULL, data, char *, size); - memset(data, val, size); - gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); - gl->TexSubImage2D(target, 0, x, y, w, h, format, type, data); - gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); - if (scratch) { - *scratch = data; - } else { - talloc_free(data); - } -} - -mp_image_t *glGetWindowScreenshot(GL *gl) +mp_image_t *gl_read_window_contents(GL *gl) { if (gl->es) return NULL; // ES can't read from front buffer @@ -307,32 +235,6 @@ void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) gl_vao_unbind(vao); } -struct gl_format { - GLenum format; - GLenum type; - GLint internal_format; -}; - -static const struct gl_format gl_formats[] = { - // GLES 3.0 - {GL_RGB, GL_UNSIGNED_BYTE, GL_RGB}, - {GL_RGBA, GL_UNSIGNED_BYTE, GL_RGBA}, - {GL_RGB, GL_UNSIGNED_BYTE, GL_RGB8}, - {GL_RGBA, GL_UNSIGNED_BYTE, GL_RGBA8}, - {GL_RGB, GL_UNSIGNED_SHORT, GL_RGB16}, - {GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, GL_RGB10_A2}, - // not texture filterable in GLES 3.0 - {GL_RGB, GL_FLOAT, GL_RGB16F}, - {GL_RGBA, GL_FLOAT, GL_RGBA16F}, - {GL_RGB, GL_FLOAT, GL_RGB32F}, - {GL_RGBA, GL_FLOAT, GL_RGBA32F}, - // Desktop GL - {GL_RGB, GL_UNSIGNED_SHORT, GL_RGB10}, - {GL_RGBA, GL_UNSIGNED_SHORT, GL_RGBA12}, - {GL_RGBA, GL_UNSIGNED_SHORT, GL_RGBA16}, - {0} -}; - // Create a texture and a FBO using the texture as color attachments. // iformat: texture internal format // Returns success. @@ -363,6 +265,7 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h, if (fbo->rw == cw && fbo->rh == ch && fbo->iformat == iformat) { fbo->lw = w; fbo->lh = h; + fbotex_invalidate(fbo); return true; } @@ -373,19 +276,18 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h, if (flags & FBOTEX_FUZZY_H) h = MP_ALIGN_UP(h, 256); - GLenum filter = fbo->tex_filter; + mp_verbose(log, "Create FBO: %dx%d (%dx%d)\n", lw, lh, w, h); - struct gl_format format = { - .format = GL_RGBA, - .type = GL_UNSIGNED_BYTE, - .internal_format = iformat, - }; - for (int n = 0; gl_formats[n].format; n++) { - if (gl_formats[n].internal_format == format.internal_format) { - format = gl_formats[n]; - break; - } + const struct gl_format *format = gl_find_internal_format(gl, iformat); + if (!format || (format->flags & F_CF) != F_CF) { + mp_verbose(log, "Format 0x%x not supported.\n", (unsigned)iformat); + return false; } + assert(gl->mpgl_caps & MPGL_CAP_FB); + + GLenum filter = fbo->tex_filter; + + fbotex_uninit(fbo); *fbo = (struct fbotex) { .gl = gl, @@ -396,24 +298,18 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h, .iformat = iformat, }; - mp_verbose(log, "Create FBO: %dx%d -> %dx%d\n", fbo->lw, fbo->lh, - fbo->rw, fbo->rh); - - if (!(gl->mpgl_caps & MPGL_CAP_FB)) - return false; - gl->GenFramebuffers(1, &fbo->fbo); gl->GenTextures(1, &fbo->texture); gl->BindTexture(GL_TEXTURE_2D, fbo->texture); - gl->TexImage2D(GL_TEXTURE_2D, 0, format.internal_format, fbo->rw, fbo->rh, 0, - format.format, format.type, NULL); + gl->TexImage2D(GL_TEXTURE_2D, 0, format->internal_format, fbo->rw, fbo->rh, 0, + format->format, format->type, NULL); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); gl->BindTexture(GL_TEXTURE_2D, 0); fbotex_set_filter(fbo, filter ? filter : GL_LINEAR); - glCheckError(gl, log, "after creating framebuffer texture"); + gl_check_error(gl, log, "after creating framebuffer texture"); gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo); gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, @@ -428,7 +324,7 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h, gl->BindFramebuffer(GL_FRAMEBUFFER, 0); - glCheckError(gl, log, "after creating framebuffer"); + gl_check_error(gl, log, "after creating framebuffer"); return res; } @@ -457,6 +353,20 @@ void fbotex_uninit(struct fbotex *fbo) } } +// Mark framebuffer contents as unneeded. +void fbotex_invalidate(struct fbotex *fbo) +{ + GL *gl = fbo->gl; + + if (!fbo->fbo || !gl->InvalidateFramebuffer) + return; + + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo->fbo); + gl->InvalidateFramebuffer(GL_FRAMEBUFFER, 1, + (GLenum[]){GL_COLOR_ATTACHMENT0}); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); +} + // Standard parallel 2D projection, except y1 < y0 means that the coordinate // system is flipped, not the projection. void gl_transform_ortho(struct gl_transform *t, float x0, float x1, @@ -510,24 +420,19 @@ void gl_set_debug_logger(GL *gl, struct mp_log *log) gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log); } -#define SC_ENTRIES 32 -#define SC_UNIFORM_ENTRIES 20 +// Force cache flush if more than this number of shaders is created. +#define SC_MAX_ENTRIES 48 enum uniform_type { UT_invalid, UT_i, UT_f, UT_m, - UT_buffer, }; union uniform_val { GLfloat f[9]; GLint i[4]; - struct { - char* text; - GLint binding; - } buffer; }; struct sc_uniform { @@ -539,10 +444,15 @@ struct sc_uniform { union uniform_val v; }; +struct sc_cached_uniform { + GLint loc; + union uniform_val v; +}; + struct sc_entry { GLuint gl_shader; - GLint uniform_locs[SC_UNIFORM_ENTRIES]; - union uniform_val cached_v[SC_UNIFORM_ENTRIES]; + struct sc_cached_uniform *uniforms; + int num_uniforms; bstr frag; bstr vert; struct gl_vao *vao; @@ -552,18 +462,24 @@ struct gl_shader_cache { GL *gl; struct mp_log *log; - // this is modified during use (gl_sc_add() etc.) + // permanent + char **exts; + int num_exts; + + // this is modified during use (gl_sc_add() etc.) and reset for each shader bstr prelude_text; bstr header_text; bstr text; struct gl_vao *vao; - struct sc_entry entries[SC_ENTRIES]; + struct sc_entry *entries; int num_entries; - struct sc_uniform uniforms[SC_UNIFORM_ENTRIES]; + struct sc_uniform *uniforms; int num_uniforms; + bool error_state; // true if an error occurred + // temporary buffers (avoids frequent reallocations) bstr tmp[5]; }; @@ -583,21 +499,21 @@ void gl_sc_reset(struct gl_shader_cache *sc) sc->prelude_text.len = 0; sc->header_text.len = 0; sc->text.len = 0; - for (int n = 0; n < sc->num_uniforms; n++) { + for (int n = 0; n < sc->num_uniforms; n++) talloc_free(sc->uniforms[n].name); - if (sc->uniforms[n].type == UT_buffer) - talloc_free(sc->uniforms[n].v.buffer.text); - } sc->num_uniforms = 0; } static void sc_flush_cache(struct gl_shader_cache *sc) { + MP_VERBOSE(sc, "flushing shader cache\n"); + for (int n = 0; n < sc->num_entries; n++) { struct sc_entry *e = &sc->entries[n]; sc->gl->DeleteProgram(e->gl_shader); talloc_free(e->vert.start); talloc_free(e->frag.start); + talloc_free(e->uniforms); } sc->num_entries = 0; } @@ -611,9 +527,23 @@ void gl_sc_destroy(struct gl_shader_cache *sc) talloc_free(sc); } +bool gl_sc_error_state(struct gl_shader_cache *sc) +{ + return sc->error_state; +} + +void gl_sc_reset_error(struct gl_shader_cache *sc) +{ + sc->error_state = false; +} + void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name) { - bstr_xappend_asprintf(sc, &sc->prelude_text, "#extension %s : enable\n", name); + for (int n = 0; n < sc->num_exts; n++) { + if (strcmp(sc->exts[n], name) == 0) + return; + } + MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name)); } #define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s)) @@ -644,6 +574,11 @@ void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) va_end(ap); } +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text) +{ + bstr_xappend(sc, &sc->header_text, text); +} + static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, const char *name) { @@ -652,10 +587,12 @@ static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, return &sc->uniforms[n]; } // not found -> add it - assert(sc->num_uniforms < SC_UNIFORM_ENTRIES); // just don't have too many - struct sc_uniform *new = &sc->uniforms[sc->num_uniforms++]; - *new = (struct sc_uniform) { .loc = -1, .name = talloc_strdup(NULL, name) }; - return new; + struct sc_uniform new = { + .loc = -1, + .name = talloc_strdup(NULL, name), + }; + MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); + return &sc->uniforms[sc->num_uniforms - 1]; } const char* mp_sampler_type(GLenum texture_target) @@ -664,6 +601,7 @@ const char* mp_sampler_type(GLenum texture_target) case GL_TEXTURE_1D: return "sampler1D"; case GL_TEXTURE_2D: return "sampler2D"; case GL_TEXTURE_RECTANGLE: return "sampler2DRect"; + case GL_TEXTURE_EXTERNAL_OES: return "samplerExternalOES"; case GL_TEXTURE_3D: return "sampler3D"; default: abort(); } @@ -765,15 +703,6 @@ void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, transpose3x3(&u->v.f[0]); } -void gl_sc_uniform_buffer(struct gl_shader_cache *sc, char *name, - const char *text, int binding) -{ - struct sc_uniform *u = find_uniform(sc, name); - u->type = UT_buffer; - u->v.buffer.text = talloc_strdup(sc, text); - u->v.buffer.binding = binding; -} - // This will call glBindAttribLocation() on the shader before it's linked // (OpenGL requires this to happen before linking). Basically, it associates // the input variable names with the fields in the vao. @@ -800,25 +729,21 @@ static const char *vao_glsl_type(const struct gl_vao_entry *e) // Assumes program is current (gl->UseProgram(program)). static void update_uniform(GL *gl, struct sc_entry *e, struct sc_uniform *u, int n) { - if (u->type == UT_buffer) { - GLuint idx = gl->GetUniformBlockIndex(e->gl_shader, u->name); - gl->UniformBlockBinding(e->gl_shader, idx, u->v.buffer.binding); - return; - } - GLint loc = e->uniform_locs[n]; + struct sc_cached_uniform *un = &e->uniforms[n]; + GLint loc = un->loc; if (loc < 0) return; switch (u->type) { case UT_i: assert(u->size == 1); - if (memcmp(e->cached_v[n].i, u->v.i, sizeof(u->v.i)) != 0) { - memcpy(e->cached_v[n].i, u->v.i, sizeof(u->v.i)); + if (memcmp(un->v.i, u->v.i, sizeof(u->v.i)) != 0) { + memcpy(un->v.i, u->v.i, sizeof(u->v.i)); gl->Uniform1i(loc, u->v.i[0]); } break; case UT_f: - if (memcmp(e->cached_v[n].f, u->v.f, sizeof(u->v.f)) != 0) { - memcpy(e->cached_v[n].f, u->v.f, sizeof(u->v.f)); + if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) { + memcpy(un->v.f, u->v.f, sizeof(u->v.f)); switch (u->size) { case 1: gl->Uniform1f(loc, u->v.f[0]); break; case 2: gl->Uniform2f(loc, u->v.f[0], u->v.f[1]); break; @@ -830,8 +755,8 @@ static void update_uniform(GL *gl, struct sc_entry *e, struct sc_uniform *u, int } break; case UT_m: - if (memcmp(e->cached_v[n].f, u->v.f, sizeof(u->v.f)) != 0) { - memcpy(e->cached_v[n].f, u->v.f, sizeof(u->v.f)); + if (memcmp(un->v.f, u->v.f, sizeof(u->v.f)) != 0) { + memcpy(un->v.f, u->v.f, sizeof(u->v.f)); switch (u->size) { case 2: gl->UniformMatrix2fv(loc, 1, GL_FALSE, &u->v.f[0]); break; case 3: gl->UniformMatrix3fv(loc, 1, GL_FALSE, &u->v.f[0]); break; @@ -870,9 +795,22 @@ static void compile_attach_shader(struct gl_shader_cache *sc, GLuint program, typestr, status, logstr); talloc_free(logstr); } + if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(sc->log, MSGL_DEBUG)) { + GLint len = 0; + gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len); + if (len > 0) { + GLchar *sstr = talloc_zero_size(NULL, len + 1); + gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr); + MP_DBG(sc, "Translated shader:\n"); + mp_log_source(sc->log, MSGL_DEBUG, sstr); + } + } gl->AttachShader(program, shader); gl->DeleteShader(shader); + + if (!status) + sc->error_state = true; } static void link_shader(struct gl_shader_cache *sc, GLuint program) @@ -891,6 +829,9 @@ static void link_shader(struct gl_shader_cache *sc, GLuint program) MP_MSG(sc, pri, "shader link log (status=%d): %s\n", status, logstr); talloc_free(logstr); } + + if (!status) + sc->error_state = true; } static GLuint create_program(struct gl_shader_cache *sc, const char *vertex, @@ -939,8 +880,14 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc) // set up shader text (header + uniforms + body) bstr *header = &sc->tmp[0]; ADD(header, "#version %d%s\n", gl->glsl_version, gl->es >= 300 ? " es" : ""); - if (gl->es) + for (int n = 0; n < sc->num_exts; n++) + ADD(header, "#extension %s : enable\n", sc->exts[n]); + if (gl->es) { ADD(header, "precision mediump float;\n"); + ADD(header, "precision mediump sampler2D;\n"); + if (gl->mpgl_caps & MPGL_CAP_3D_TEX) + ADD(header, "precision mediump sampler3D;\n"); + } ADD_BSTR(header, sc->prelude_text); char *vert_in = gl->glsl_version >= 130 ? "in" : "attribute"; char *vert_out = gl->glsl_version >= 130 ? "out" : "varying"; @@ -975,7 +922,6 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc) // fragment shader; still requires adding used uniforms and VAO elements bstr *frag = &sc->tmp[4]; ADD_BSTR(frag, *header); - ADD(frag, "#define RG %s\n", gl->mpgl_caps & MPGL_CAP_TEX_RG ? "rg" : "ra"); if (gl->glsl_version >= 130) { ADD(frag, "#define texture1D texture\n"); ADD(frag, "#define texture3D texture\n"); @@ -986,11 +932,7 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc) ADD_BSTR(frag, *frag_vaos); for (int n = 0; n < sc->num_uniforms; n++) { struct sc_uniform *u = &sc->uniforms[n]; - if (u->type == UT_buffer) { - ADD(frag, "uniform %s { %s };\n", u->name, u->v.buffer.text); - } else { - ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name); - } + ADD(frag, "uniform %s %s;\n", u->glsl_type, u->name); } // Additional helpers. @@ -1023,8 +965,9 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc) } } if (!entry) { - if (sc->num_entries == SC_ENTRIES) + if (sc->num_entries == SC_MAX_ENTRIES) sc_flush_cache(sc); + MP_TARRAY_GROW(sc, sc->entries, sc->num_entries); entry = &sc->entries[sc->num_entries++]; *entry = (struct sc_entry){ .vert = bstrdup(NULL, *vert), @@ -1035,15 +978,146 @@ void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc) if (!entry->gl_shader) { entry->gl_shader = create_program(sc, vert->start, frag->start); for (int n = 0; n < sc->num_uniforms; n++) { - entry->uniform_locs[n] = gl->GetUniformLocation(entry->gl_shader, - sc->uniforms[n].name); + struct sc_cached_uniform un = { + .loc = gl->GetUniformLocation(entry->gl_shader, + sc->uniforms[n].name), + }; + MP_TARRAY_APPEND(sc, entry->uniforms, entry->num_uniforms, un); } } gl->UseProgram(entry->gl_shader); + assert(sc->num_uniforms == entry->num_uniforms); + for (int n = 0; n < sc->num_uniforms; n++) update_uniform(gl, entry, &sc->uniforms[n], n); gl_sc_reset(sc); } + +// Maximum number of simultaneous query objects to keep around. Reducing this +// number might cause rendering to block until the result of a previous query is +// available +#define QUERY_OBJECT_NUM 8 + +// How many samples to keep around, for the sake of average and peak +// calculations. This corresponds to a few seconds (exact time variable) +#define QUERY_SAMPLE_SIZE 256 + +struct gl_timer { + GL *gl; + GLuint query[QUERY_OBJECT_NUM]; + int query_idx; + + GLuint64 samples[QUERY_SAMPLE_SIZE]; + int sample_idx; + int sample_count; + + uint64_t avg_sum; + uint64_t peak; +}; + +int gl_timer_sample_count(struct gl_timer *timer) +{ + return timer->sample_count; +} + +uint64_t gl_timer_last_us(struct gl_timer *timer) +{ + return timer->samples[(timer->sample_idx - 1) % QUERY_SAMPLE_SIZE] / 1000; +} + +uint64_t gl_timer_avg_us(struct gl_timer *timer) +{ + if (timer->sample_count <= 0) + return 0; + + return timer->avg_sum / timer->sample_count / 1000; +} + +uint64_t gl_timer_peak_us(struct gl_timer *timer) +{ + return timer->peak / 1000; +} + +struct gl_timer *gl_timer_create(GL *gl) +{ + struct gl_timer *timer = talloc_ptrtype(NULL, timer); + *timer = (struct gl_timer){ .gl = gl }; + + if (gl->GenQueries) + gl->GenQueries(QUERY_OBJECT_NUM, timer->query); + + return timer; +} + +void gl_timer_free(struct gl_timer *timer) +{ + if (!timer) + return; + + GL *gl = timer->gl; + if (gl && gl->DeleteQueries) { + // this is a no-op on already uninitialized queries + gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query); + } + + talloc_free(timer); +} + +static void gl_timer_record(struct gl_timer *timer, GLuint64 new) +{ + // Input res into the buffer and grab the previous value + GLuint64 old = timer->samples[timer->sample_idx]; + timer->samples[timer->sample_idx++] = new; + timer->sample_idx %= QUERY_SAMPLE_SIZE; + + // Update average and sum + timer->avg_sum = timer->avg_sum + new - old; + timer->sample_count = MPMIN(timer->sample_count + 1, QUERY_SAMPLE_SIZE); + + // Update peak if necessary + if (new >= timer->peak) { + timer->peak = new; + } else if (timer->peak == old) { + // It's possible that the last peak was the value we just removed, + // if so we need to scan for the new peak + uint64_t peak = new; + for (int i = 0; i < QUERY_SAMPLE_SIZE; i++) + peak = MPMAX(peak, timer->samples[i]); + timer->peak = peak; + } +} + +// If no free query is available, this can block. Shouldn't ever happen in +// practice, though. (If it does, consider increasing QUERY_OBJECT_NUM) +// IMPORTANT: only one gl_timer object may ever be active at a single time. +// The caling code *MUST* ensure this +void gl_timer_start(struct gl_timer *timer) +{ + GL *gl = timer->gl; + if (!gl->BeginQuery) + return; + + // Get the next query object + GLuint id = timer->query[timer->query_idx++]; + timer->query_idx %= QUERY_OBJECT_NUM; + + // If this query object already holds a result, we need to get and + // record it first + if (gl->IsQuery(id)) { + GLuint64 elapsed; + gl->GetQueryObjectui64v(id, GL_QUERY_RESULT, &elapsed); + gl_timer_record(timer, elapsed); + } + + gl->BeginQuery(GL_TIME_ELAPSED, id); +} + +void gl_timer_stop(struct gl_timer *timer) +{ + GL *gl = timer->gl; + if (gl->EndQuery) + gl->EndQuery(GL_TIME_ELAPSED); +} diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 170e24d..9b4fd84 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -20,19 +20,17 @@ #define MP_GL_UTILS_ #include "common.h" +#include "math.h" struct mp_log; -void glCheckError(GL *gl, struct mp_log *log, const char *info); +void gl_check_error(GL *gl, struct mp_log *log, const char *info); -int glFmt2bpp(GLenum format, GLenum type); -void glUploadTex(GL *gl, GLenum target, GLenum format, GLenum type, - const void *dataptr, int stride, - int x, int y, int w, int h, int slice); -void glClearTex(GL *gl, GLenum target, GLenum format, GLenum type, - int x, int y, int w, int h, uint8_t val, void **scratch); +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h); -mp_image_t *glGetWindowScreenshot(GL *gl); +mp_image_t *gl_read_window_contents(GL *gl); const char* mp_sampler_type(GLenum texture_target); @@ -84,6 +82,7 @@ bool fbotex_change(struct fbotex *fbo, GL *gl, struct mp_log *log, int w, int h, #define FBOTEX_FUZZY_H 2 #define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H) void fbotex_set_filter(struct fbotex *fbo, GLenum gl_filter); +void fbotex_invalidate(struct fbotex *fbo); // A 3x2 matrix, with the translation part separate. struct gl_transform { @@ -115,6 +114,13 @@ struct mp_rect_f { float x0, y0, x1, y1; }; +// Semantic equality (fuzzy comparison) +static inline bool mp_rect_f_seq(struct mp_rect_f a, struct mp_rect_f b) +{ + return fabs(a.x0 - b.x0) < 1e-6 && fabs(a.x1 - b.x1) < 1e-6 && + fabs(a.y0 - b.y0) < 1e-6 && fabs(a.y1 - b.y1) < 1e-6; +} + static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r) { gl_transform_vec(t, &r->x0, &r->y0); @@ -141,10 +147,13 @@ struct gl_shader_cache; struct gl_shader_cache *gl_sc_create(GL *gl, struct mp_log *log); void gl_sc_destroy(struct gl_shader_cache *sc); +bool gl_sc_error_state(struct gl_shader_cache *sc); +void gl_sc_reset_error(struct gl_shader_cache *sc); void gl_sc_add(struct gl_shader_cache *sc, const char *text); void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...); void gl_sc_hadd(struct gl_shader_cache *sc, const char *text); void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...); +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text); void gl_sc_uniform_sampler(struct gl_shader_cache *sc, char *name, GLenum target, int unit); void gl_sc_uniform_sampler_ui(struct gl_shader_cache *sc, char *name, int unit); @@ -156,11 +165,21 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, bool transpose, GLfloat *v); void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, bool transpose, GLfloat *v); -void gl_sc_uniform_buffer(struct gl_shader_cache *sc, char *name, - const char *text, int binding); void gl_sc_set_vao(struct gl_shader_cache *sc, struct gl_vao *vao); void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); void gl_sc_gen_shader_and_reset(struct gl_shader_cache *sc); void gl_sc_reset(struct gl_shader_cache *sc); +struct gl_timer; + +struct gl_timer *gl_timer_create(GL *gl); +void gl_timer_free(struct gl_timer *timer); +void gl_timer_start(struct gl_timer *timer); +void gl_timer_stop(struct gl_timer *timer); + +int gl_timer_sample_count(struct gl_timer *timer); +uint64_t gl_timer_last_us(struct gl_timer *timer); +uint64_t gl_timer_avg_us(struct gl_timer *timer); +uint64_t gl_timer_peak_us(struct gl_timer *timer); + #endif diff --git a/video/out/opengl/video.c b/video/out/opengl/video.c index 8807b65..f46fdc1 100644 --- a/video/out/opengl/video.c +++ b/video/out/opengl/video.c @@ -17,6 +17,7 @@ #include <assert.h> #include <math.h> +#include <stdarg.h> #include <stdbool.h> #include <string.h> #include <assert.h> @@ -31,24 +32,22 @@ #include "common/global.h" #include "options/options.h" #include "common.h" +#include "formats.h" #include "utils.h" #include "hwdec.h" #include "osd.h" #include "stream/stream.h" -#include "superxbr.h" -#include "nnedi3.h" #include "video_shaders.h" +#include "user_shaders.h" #include "video/out/filter_kernels.h" #include "video/out/aspect.h" #include "video/out/bitmap_packer.h" #include "video/out/dither.h" #include "video/out/vo.h" -// Maximal number of passes that prescaler can be applied. -#define MAX_PRESCALE_PASSES 5 - -// Maximal number of steps each pass of prescaling contains -#define MAX_PRESCALE_STEPS 2 +// Maximal number of saved textures (for user script purposes) +#define MAX_TEXTURE_HOOKS 16 +#define MAX_SAVED_TEXTURES 32 // scale/cscale arguments that map directly to shader filter routines. // Note that the convolution filters are not included in this list. @@ -91,6 +90,7 @@ static const struct gl_vao_entry vertex_vao[] = { struct texplane { int w, h; + int tex_w, tex_h; GLint gl_internal_format; GLenum gl_target; bool use_integer; @@ -98,12 +98,14 @@ struct texplane { GLenum gl_type; GLuint gl_texture; int gl_buffer; + char swizzle[5]; }; struct video_image { struct texplane planes[4]; bool image_flipped; struct mp_image *mpi; // original input image + bool hwdec_mapped; }; enum plane_type { @@ -125,10 +127,29 @@ struct img_tex { GLenum gl_target; bool use_integer; int tex_w, tex_h; // source texture size - int w, h; // logical size (with pre_transform applied) - struct gl_transform pre_transform; // source texture space + int w, h; // logical size (after transformation) struct gl_transform transform; // rendering transformation - bool texture_la; // it's a GL_LUMINANCE_ALPHA texture (access with .ra not .rg) + char swizzle[5]; +}; + +// A named img_tex, for user scripting purposes +struct saved_tex { + const char *name; + struct img_tex tex; +}; + +// A texture hook. This is some operation that transforms a named texture as +// soon as it's generated +struct tex_hook { + char *hook_tex; + char *save_tex; + char *bind_tex[TEXUNIT_VIDEO_NUM]; + int components; // how many components are relevant (0 = same as input) + void *priv; // this can be set to whatever the hook wants + void (*hook)(struct gl_video *p, struct img_tex tex, // generates GLSL + struct gl_transform *trans, void *priv); + void (*free)(struct tex_hook *hook); + bool (*cond)(struct gl_video *p, struct img_tex tex, void *priv); }; struct fbosurface { @@ -140,7 +161,7 @@ struct fbosurface { struct cached_file { char *path; - char *body; + struct bstr body; }; struct gl_video { @@ -149,15 +170,15 @@ struct gl_video { struct mpv_global *global; struct mp_log *log; struct gl_video_opts opts; + struct gl_video_opts *opts_alloc; struct gl_lcms *cms; bool gl_debug; int texture_16bit_depth; // actual bits available in 16 bit textures + int fb_depth; // actual bits available in GL main framebuffer struct gl_shader_cache *sc; - GLenum gl_target; // texture target (GL_TEXTURE_2D, ...) for video and FBOs - struct gl_vao vao; struct osd_state *osd_state; @@ -170,7 +191,9 @@ struct gl_video { GLuint dither_texture; int dither_size; - GLuint nnedi3_weights_buffer; + struct gl_timer *upload_timer; + struct gl_timer *render_timer; + struct gl_timer *present_timer; struct mp_image_params real_image_params; // configured format struct mp_image_params image_params; // texture format (mind hwdec case) @@ -188,21 +211,13 @@ struct gl_video { bool forced_dumb_mode; struct fbotex merge_fbo[4]; - struct fbotex deband_fbo[4]; struct fbotex scale_fbo[4]; struct fbotex integer_fbo[4]; struct fbotex indirect_fbo; struct fbotex blend_subs_fbo; - struct fbotex unsharp_fbo; struct fbotex output_fbo; struct fbosurface surfaces[FBOSURFACES_MAX]; - - // these are duplicated so we can keep rendering back and forth between - // them to support an unlimited number of shader passes per step - struct fbotex pre_fbo[2]; - struct fbotex post_fbo[2]; - - struct fbotex prescale_fbo[MAX_PRESCALE_PASSES][MAX_PRESCALE_STEPS]; + struct fbotex vdpau_deinterleave_fbo[2]; int surface_idx; int surface_now; @@ -229,6 +244,14 @@ struct gl_video { bool use_linear; float user_gamma; + // hooks and saved textures + struct saved_tex saved_tex[MAX_SAVED_TEXTURES]; + int saved_tex_num; + struct tex_hook tex_hooks[MAX_TEXTURE_HOOKS]; + int tex_hook_num; + struct fbotex hook_fbos[MAX_SAVED_TEXTURES]; + int hook_fbo_num; + int frames_uploaded; int frames_rendered; AVLFG lfg; @@ -237,7 +260,7 @@ struct gl_video { int last_dither_matrix_size; float *last_dither_matrix; - struct cached_file files[10]; + struct cached_file *files; int num_files; struct gl_hwdec *hwdec; @@ -245,89 +268,7 @@ struct gl_video { bool dsi_warned; bool custom_shader_fn_warned; -}; - -struct fmt_entry { - int mp_format; - GLint internal_format; - GLenum format; - GLenum type; -}; - -// Very special formats, for which OpenGL happens to have direct support -static const struct fmt_entry mp_to_gl_formats[] = { - {IMGFMT_RGB565, GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, - {0}, -}; - -static const struct fmt_entry gl_byte_formats[] = { - {0, GL_RED, GL_RED, GL_UNSIGNED_BYTE}, // 1 x 8 - {0, GL_RG, GL_RG, GL_UNSIGNED_BYTE}, // 2 x 8 - {0, GL_RGB, GL_RGB, GL_UNSIGNED_BYTE}, // 3 x 8 - {0, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, // 4 x 8 - {0, GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // 1 x 16 - {0, GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // 2 x 16 - {0, GL_RGB16, GL_RGB, GL_UNSIGNED_SHORT}, // 3 x 16 - {0, GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // 4 x 16 -}; - -static const struct fmt_entry gl_byte_formats_gles3[] = { - {0, GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // 1 x 8 - {0, GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // 2 x 8 - {0, GL_RGB8, GL_RGB, GL_UNSIGNED_BYTE}, // 3 x 8 - {0, GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // 4 x 8 - // There are no filterable texture formats that can be uploaded as - // GL_UNSIGNED_SHORT, so apparently we're out of luck. - {0, 0, 0, 0}, // 1 x 16 - {0, 0, 0, 0}, // 2 x 16 - {0, 0, 0, 0}, // 3 x 16 - {0, 0, 0, 0}, // 4 x 16 -}; - -static const struct fmt_entry gl_ui_byte_formats_gles3[] = { - {0, GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // 1 x 8 - {0, GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // 2 x 8 - {0, GL_RGB8UI, GL_RGB_INTEGER, GL_UNSIGNED_BYTE}, // 3 x 8 - {0, GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // 4 x 8 - {0, GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // 1 x 16 - {0, GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // 2 x 16 - {0, GL_RGB16UI, GL_RGB_INTEGER, GL_UNSIGNED_SHORT}, // 3 x 16 - {0, GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // 4 x 16 -}; - -static const struct fmt_entry gl_byte_formats_gles2[] = { - {0, GL_LUMINANCE, GL_LUMINANCE, GL_UNSIGNED_BYTE}, // 1 x 8 - {0, GL_LUMINANCE_ALPHA, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8 - {0, GL_RGB, GL_RGB, GL_UNSIGNED_BYTE}, // 3 x 8 - {0, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, // 4 x 8 - {0, 0, 0, 0}, // 1 x 16 - {0, 0, 0, 0}, // 2 x 16 - {0, 0, 0, 0}, // 3 x 16 - {0, 0, 0, 0}, // 4 x 16 -}; - -static const struct fmt_entry gl_byte_formats_legacy[] = { - {0, GL_LUMINANCE, GL_LUMINANCE, GL_UNSIGNED_BYTE}, // 1 x 8 - {0, GL_LUMINANCE_ALPHA, GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE}, // 2 x 8 - {0, GL_RGB, GL_RGB, GL_UNSIGNED_BYTE}, // 3 x 8 - {0, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE}, // 4 x 8 - {0, GL_LUMINANCE16, GL_LUMINANCE, GL_UNSIGNED_SHORT},// 1 x 16 - {0, GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA, GL_UNSIGNED_SHORT},// 2 x 16 - {0, GL_RGB16, GL_RGB, GL_UNSIGNED_SHORT},// 3 x 16 - {0, GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},// 4 x 16 -}; - -static const struct fmt_entry gl_float16_formats[] = { - {0, GL_R16F, GL_RED, GL_FLOAT}, // 1 x f - {0, GL_RG16F, GL_RG, GL_FLOAT}, // 2 x f - {0, GL_RGB16F, GL_RGB, GL_FLOAT}, // 3 x f - {0, GL_RGBA16F, GL_RGBA, GL_FLOAT}, // 4 x f -}; - -static const struct fmt_entry gl_apple_formats[] = { - {IMGFMT_UYVY, GL_RGB, GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_APPLE}, - {IMGFMT_YUYV, GL_RGB, GL_RGB_422_APPLE, GL_UNSIGNED_SHORT_8_8_REV_APPLE}, - {0} + bool broken_frame; // temporary error state }; struct packed_fmt_entry { @@ -359,6 +300,7 @@ static const struct packed_fmt_entry mp_packed_formats[] = { }; const struct gl_video_opts gl_video_opts_def = { + .dither_algo = DITHER_FRUIT, .dither_depth = -1, .dither_size = 6, .temporal_dither_period = 1, @@ -375,14 +317,16 @@ const struct gl_video_opts gl_video_opts_def = { .scaler_resizes_only = 1, .scaler_lut_size = 6, .interpolation_threshold = 0.0001, - .alpha_mode = 3, + .alpha_mode = ALPHA_BLEND_TILES, .background = {0, 0, 0, 255}, .gamma = 1.0f, - .prescale_passes = 1, - .prescale_downscaling_threshold = 2.0f, + .target_brightness = 250, + .hdr_tone_mapping = TONE_MAPPING_HABLE, + .tone_mapping_param = NAN, }; const struct gl_video_opts gl_video_opts_hq_def = { + .dither_algo = DITHER_FRUIT, .dither_depth = 0, .dither_size = 6, .temporal_dither_period = 1, @@ -401,13 +345,13 @@ const struct gl_video_opts gl_video_opts_hq_def = { .scaler_resizes_only = 1, .scaler_lut_size = 6, .interpolation_threshold = 0.0001, - .alpha_mode = 3, + .alpha_mode = ALPHA_BLEND_TILES, .background = {0, 0, 0, 255}, .gamma = 1.0f, - .blend_subs = 0, .deband = 1, - .prescale_passes = 1, - .prescale_downscaling_threshold = 2.0f, + .target_brightness = 250, + .hdr_tone_mapping = TONE_MAPPING_HABLE, + .tone_mapping_param = NAN, }; static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, @@ -436,6 +380,14 @@ const struct m_sub_options gl_video_conf = { OPT_FLAG("gamma-auto", gamma_auto, 0), OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), + OPT_INTRANGE("target-brightness", target_brightness, 0, 1, 100000), + OPT_CHOICE("hdr-tone-mapping", hdr_tone_mapping, 0, + ({"clip", TONE_MAPPING_CLIP}, + {"reinhard", TONE_MAPPING_REINHARD}, + {"hable", TONE_MAPPING_HABLE}, + {"gamma", TONE_MAPPING_GAMMA}, + {"linear", TONE_MAPPING_LINEAR})), + OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0), OPT_FLAG("pbo", pbo, 0), SCALER_OPTS("scale", SCALER_SCALE), SCALER_OPTS("dscale", SCALER_DSCALE), @@ -449,9 +401,7 @@ const struct m_sub_options gl_video_conf = { OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0), OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0), OPT_CHOICE("fbo-format", fbo_format, 0, - ({"rgb", GL_RGB}, - {"rgba", GL_RGBA}, - {"rgb8", GL_RGB8}, + ({"rgb8", GL_RGB8}, {"rgba8", GL_RGBA8}, {"rgb10", GL_RGB10}, {"rgb10_a2", GL_RGB10_A2}, @@ -466,42 +416,33 @@ const struct m_sub_options gl_video_conf = { OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16, ({"no", -1}, {"auto", 0})), OPT_CHOICE("dither", dither_algo, 0, - ({"fruit", 0}, {"ordered", 1}, {"no", -1})), + ({"fruit", DITHER_FRUIT}, + {"ordered", DITHER_ORDERED}, + {"no", DITHER_NONE})), OPT_INTRANGE("dither-size-fruit", dither_size, 0, 2, 8), OPT_FLAG("temporal-dither", temporal_dither, 0), OPT_INTRANGE("temporal-dither-period", temporal_dither_period, 0, 1, 128), OPT_CHOICE("alpha", alpha_mode, 0, - ({"no", 0}, - {"yes", 1}, - {"blend", 2}, - {"blend-tiles", 3})), + ({"no", ALPHA_NO}, + {"yes", ALPHA_YES}, + {"blend", ALPHA_BLEND}, + {"blend-tiles", ALPHA_BLEND_TILES})), OPT_FLAG("rectangle-textures", use_rectangle, 0), OPT_COLOR("background", background, 0), OPT_FLAG("interpolation", interpolation, 0), OPT_FLOAT("interpolation-threshold", interpolation_threshold, 0), OPT_CHOICE("blend-subtitles", blend_subs, 0, - ({"no", 0}, - {"yes", 1}, - {"video", 2})), + ({"no", BLEND_SUBS_NO}, + {"yes", BLEND_SUBS_YES}, + {"video", BLEND_SUBS_VIDEO})), OPT_STRING("scale-shader", scale_shader, 0), OPT_STRINGLIST("pre-shaders", pre_shaders, 0), OPT_STRINGLIST("post-shaders", post_shaders, 0), + OPT_STRINGLIST("user-shaders", user_shaders, 0), OPT_FLAG("deband", deband, 0), OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0), OPT_FLOAT("sharpen", unsharp, 0), - OPT_CHOICE("prescale-luma", prescale_luma, 0, - ({"none", 0}, - {"superxbr", 1} -#if HAVE_NNEDI - , {"nnedi3", 2} -#endif - )), - OPT_INTRANGE("prescale-passes", - prescale_passes, 0, 1, MAX_PRESCALE_PASSES), - OPT_FLOATRANGE("prescale-downscaling-threshold", - prescale_downscaling_threshold, 0, 0.0, 32.0), - OPT_SUBSTRUCT("superxbr", superxbr_opts, superxbr_conf, 0), - OPT_SUBSTRUCT("nnedi3", nnedi3_opts, nnedi3_conf, 0), + OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0), OPT_REMOVED("approx-gamma", "this is always enabled now"), OPT_REMOVED("cscale-down", "chroma is never downscaled"), @@ -509,6 +450,7 @@ const struct m_sub_options gl_video_conf = { OPT_REMOVED("indirect", "this is set automatically whenever sane"), OPT_REMOVED("srgb", "use target-prim=bt709:target-trc=srgb instead"), OPT_REMOVED("source-shader", "use :deband to enable debanding"), + OPT_REMOVED("prescale-luma", "use user shaders for prescaling"), OPT_REPLACED("lscale", "scale"), OPT_REPLACED("lscale-down", "scale-down"), @@ -524,7 +466,6 @@ const struct m_sub_options gl_video_conf = { OPT_REPLACED("smoothmotion-threshold", "tscale-param1"), OPT_REPLACED("scale-down", "dscale"), OPT_REPLACED("fancy-downscaling", "correct-downscaling"), - OPT_REPLACED("prescale", "prescale-luma"), {0} }, @@ -535,78 +476,44 @@ const struct m_sub_options gl_video_conf = { static void uninit_rendering(struct gl_video *p); static void uninit_scaler(struct gl_video *p, struct scaler *scaler); static void check_gl_features(struct gl_video *p); -static bool init_format(int fmt, struct gl_video *init); -static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi); -static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src); +static bool init_format(struct gl_video *p, int fmt, bool test_only); +static void init_image_desc(struct gl_video *p, int fmt); +static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi); +static void set_options(struct gl_video *p, struct gl_video_opts *src); +static const char *handle_scaler_opt(const char *name, bool tscale); +static void reinit_from_options(struct gl_video *p); static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]); +static void gl_video_setup_hooks(struct gl_video *p); #define GLSL(x) gl_sc_add(p->sc, #x "\n"); #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) #define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__) -// Return a fixed point texture format with given characteristics. -static const struct fmt_entry *find_tex_format(GL *gl, int bytes_per_comp, - int n_channels) -{ - assert(bytes_per_comp == 1 || bytes_per_comp == 2); - assert(n_channels >= 1 && n_channels <= 4); - const struct fmt_entry *fmts = gl_byte_formats; - if (gl->es >= 300) { - fmts = gl_byte_formats_gles3; - } else if (gl->es) { - fmts = gl_byte_formats_gles2; - } else if (!(gl->mpgl_caps & MPGL_CAP_TEX_RG)) { - fmts = gl_byte_formats_legacy; - } - return &fmts[n_channels - 1 + (bytes_per_comp - 1) * 4]; -} - -static bool is_integer_format(const struct fmt_entry *fmt) -{ - // Tests only the formats which we actually declare somewhere. - switch (fmt->format) { - case GL_RED_INTEGER: - case GL_RG_INTEGER: - case GL_RGB_INTEGER: - case GL_RGBA_INTEGER: - return true; - } - return false; -} - -static const char *load_cached_file(struct gl_video *p, const char *path) +static struct bstr load_cached_file(struct gl_video *p, const char *path) { if (!path || !path[0]) - return NULL; + return (struct bstr){0}; for (int n = 0; n < p->num_files; n++) { if (strcmp(p->files[n].path, path) == 0) return p->files[n].body; } // not found -> load it - if (p->num_files == MP_ARRAY_SIZE(p->files)) { - // empty cache when it overflows - for (int n = 0; n < p->num_files; n++) { - talloc_free(p->files[n].path); - talloc_free(p->files[n].body); - } - p->num_files = 0; - } - struct bstr s = stream_read_file(path, p, p->global, 100000); // 100 kB + struct bstr s = stream_read_file(path, p, p->global, 1024000); // 1024 kB if (s.len) { - struct cached_file *new = &p->files[p->num_files++]; - *new = (struct cached_file) { + struct cached_file new = { .path = talloc_strdup(p, path), - .body = s.start + .body = s, }; - return new->body; + MP_TARRAY_APPEND(p, p->files, p->num_files, new); + return new.body; } - return NULL; + return (struct bstr){0}; } static void debug_check_gl(struct gl_video *p, const char *msg) { if (p->gl_debug) - glCheckError(p->gl, p->log, msg); + gl_check_error(p->gl, p->log, msg); } void gl_video_set_debug(struct gl_video *p, bool enable) @@ -628,13 +535,23 @@ static void gl_video_reset_surfaces(struct gl_video *p) p->output_fbo_valid = false; } +static void gl_video_reset_hooks(struct gl_video *p) +{ + for (int i = 0; i < p->tex_hook_num; i++) { + if (p->tex_hooks[i].free) + p->tex_hooks[i].free(&p->tex_hooks[i]); + } + + p->tex_hook_num = 0; +} + static inline int fbosurface_wrap(int id) { id = id % FBOSURFACES_MAX; return id < 0 ? id + FBOSURFACES_MAX : id; } -static void recreate_osd(struct gl_video *p) +static void reinit_osd(struct gl_video *p) { mpgl_osd_destroy(p->osd); p->osd = NULL; @@ -644,17 +561,6 @@ static void recreate_osd(struct gl_video *p) } } -static void reinit_rendering(struct gl_video *p) -{ - MP_VERBOSE(p, "Reinit rendering.\n"); - - debug_check_gl(p, "before scaler initialization"); - - uninit_rendering(p); - - recreate_osd(p); -} - static void uninit_rendering(struct gl_video *p) { GL *gl = p->gl; @@ -665,45 +571,41 @@ static void uninit_rendering(struct gl_video *p) gl->DeleteTextures(1, &p->dither_texture); p->dither_texture = 0; - gl->DeleteBuffers(1, &p->nnedi3_weights_buffer); - p->nnedi3_weights_buffer = 0; - for (int n = 0; n < 4; n++) { fbotex_uninit(&p->merge_fbo[n]); - fbotex_uninit(&p->deband_fbo[n]); fbotex_uninit(&p->scale_fbo[n]); fbotex_uninit(&p->integer_fbo[n]); } fbotex_uninit(&p->indirect_fbo); fbotex_uninit(&p->blend_subs_fbo); - fbotex_uninit(&p->unsharp_fbo); - - for (int n = 0; n < 2; n++) { - fbotex_uninit(&p->pre_fbo[n]); - fbotex_uninit(&p->post_fbo[n]); - } - - for (int pass = 0; pass < MAX_PRESCALE_PASSES; pass++) { - for (int step = 0; step < MAX_PRESCALE_STEPS; step++) - fbotex_uninit(&p->prescale_fbo[pass][step]); - } for (int n = 0; n < FBOSURFACES_MAX; n++) fbotex_uninit(&p->surfaces[n].fbotex); + for (int n = 0; n < MAX_SAVED_TEXTURES; n++) + fbotex_uninit(&p->hook_fbos[n]); + + for (int n = 0; n < 2; n++) + fbotex_uninit(&p->vdpau_deinterleave_fbo[n]); + gl_video_reset_surfaces(p); + gl_video_reset_hooks(p); + + gl_sc_reset_error(p->sc); } -void gl_video_update_profile(struct gl_video *p) +// Warning: profile.start must point to a ta allocation, and the function +// takes over ownership. +void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data) { - if (p->use_lut_3d) - return; - - p->use_lut_3d = true; - check_gl_features(p); + if (gl_lcms_set_memory_profile(p->cms, icc_data)) + reinit_from_options(p); +} - reinit_rendering(p); +bool gl_video_icc_auto_enabled(struct gl_video *p) +{ + return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false; } static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, @@ -711,14 +613,15 @@ static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, { GL *gl = p->gl; - if (!p->cms || !p->use_lut_3d) + if (!p->use_lut_3d) return false; - if (!gl_lcms_has_changed(p->cms, prim, trc)) + if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc)) return true; struct lut3d *lut3d = NULL; if (!gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc) || !lut3d) { + p->use_lut_3d = false; return false; } @@ -738,12 +641,14 @@ static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, debug_check_gl(p, "after 3d lut creation"); + talloc_free(lut3d); + return true; } // Fill an img_tex struct from an FBO + some metadata -static struct img_tex img_tex_fbo(struct fbotex *fbo, struct gl_transform t, - enum plane_type type, int components) +static struct img_tex img_tex_fbo(struct fbotex *fbo, enum plane_type type, + int components) { assert(type != PLANE_NONE); return (struct img_tex){ @@ -756,8 +661,7 @@ static struct img_tex img_tex_fbo(struct fbotex *fbo, struct gl_transform t, .tex_h = fbo->rh, .w = fbo->lw, .h = fbo->lh, - .pre_transform = identity_trans, - .transform = t, + .transform = identity_trans, .components = components, }; } @@ -797,18 +701,19 @@ static void get_plane_source_transform(struct gl_video *p, int w, int h, } // Places a video_image's image textures + associated metadata into tex[]. The -// number of textures is equal to p->plane_count. +// number of textures is equal to p->plane_count. Any necessary plane offsets +// are stored in off. (e.g. chroma position) static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, - struct img_tex tex[4]) + struct img_tex tex[4], struct gl_transform off[4]) { assert(vimg->mpi); // Determine the chroma offset - struct gl_transform chroma = (struct gl_transform){{{0}}}; - float ls_w = 1.0 / (1 << p->image_desc.chroma_xs); float ls_h = 1.0 / (1 << p->image_desc.chroma_ys); + struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; + if (p->image_params.chroma_location != MP_CHROMA_CENTER) { int cx, cy; mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy); @@ -821,11 +726,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; } - // Make sure luma/chroma sizes are aligned. - // Example: For 4:2:0 with size 3x3, the subsampled chroma plane is 2x2 - // so luma (3,3) has to align with chroma (2,2). - chroma.m[0][0] = ls_w * (float)vimg->planes[0].w / vimg->planes[1].w; - chroma.m[1][1] = ls_h * (float)vimg->planes[0].h / vimg->planes[1].h; + // FIXME: account for rotation in the chroma offset // The existing code assumes we just have a single tex multiplier for // all of the planes. This may change in the future @@ -856,17 +757,18 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, .gl_target = t->gl_target, .multiplier = tex_mul, .use_integer = t->use_integer, - .tex_w = t->w, - .tex_h = t->h, + .tex_w = t->tex_w, + .tex_h = t->tex_h, .w = t->w, .h = t->h, - .transform = type == PLANE_CHROMA ? chroma : identity_trans, .components = p->image_desc.components[n], - .texture_la = t->gl_format == GL_LUMINANCE_ALPHA, }; - get_plane_source_transform(p, t->w, t->h, &tex[n].pre_transform); + snprintf(tex[n].swizzle, sizeof(tex[n].swizzle), "%s", t->swizzle); + get_plane_source_transform(p, t->w, t->h, &tex[n].transform); if (p->image_params.rotate % 180 == 90) MPSWAP(int, tex[n].w, tex[n].h); + + off[n] = type == PLANE_CHROMA ? chroma : identity_trans; } } @@ -874,19 +776,21 @@ static void init_video(struct gl_video *p) { GL *gl = p->gl; - init_format(p->image_params.imgfmt, p); - p->gl_target = p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D; - - check_gl_features(p); - - if (p->hwdec_active) { + if (p->hwdec && p->hwdec->driver->imgfmt == p->image_params.imgfmt) { if (p->hwdec->driver->reinit(p->hwdec, &p->image_params) < 0) MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); - init_format(p->image_params.imgfmt, p); - p->image_params.imgfmt = p->image_desc.id; - p->gl_target = p->hwdec->gl_texture_target; + init_image_desc(p, p->image_params.imgfmt); + const char **exts = p->hwdec->glsl_extensions; + for (int n = 0; exts && exts[n]; n++) + gl_sc_enable_extension(p->sc, (char *)exts[n]); + p->hwdec_active = true; + } else { + init_format(p, p->image_params.imgfmt, false); } + // Format-dependent checks. + check_gl_features(p); + mp_image_params_guess_csp(&p->image_params); int eq_caps = MP_CSP_EQ_CAPS_GAMMA; @@ -900,42 +804,65 @@ static void init_video(struct gl_video *p) debug_check_gl(p, "before video texture creation"); - struct video_image *vimg = &p->image; + if (!p->hwdec_active) { + struct video_image *vimg = &p->image; - struct mp_image layout = {0}; - mp_image_set_params(&layout, &p->image_params); + GLenum gl_target = + p->opts.use_rectangle ? GL_TEXTURE_RECTANGLE : GL_TEXTURE_2D; - for (int n = 0; n < p->plane_count; n++) { - struct texplane *plane = &vimg->planes[n]; + struct mp_image layout = {0}; + mp_image_set_params(&layout, &p->image_params); - plane->gl_target = p->gl_target; + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; - plane->w = mp_image_plane_w(&layout, n); - plane->h = mp_image_plane_h(&layout, n); + plane->gl_target = gl_target; + + plane->w = plane->tex_w = mp_image_plane_w(&layout, n); + plane->h = plane->tex_h = mp_image_plane_h(&layout, n); - if (!p->hwdec_active) { gl->ActiveTexture(GL_TEXTURE0 + n); gl->GenTextures(1, &plane->gl_texture); - gl->BindTexture(p->gl_target, plane->gl_texture); + gl->BindTexture(gl_target, plane->gl_texture); - gl->TexImage2D(p->gl_target, 0, plane->gl_internal_format, + gl->TexImage2D(gl_target, 0, plane->gl_internal_format, plane->w, plane->h, 0, plane->gl_format, plane->gl_type, NULL); int filter = plane->use_integer ? GL_NEAREST : GL_LINEAR; - gl->TexParameteri(p->gl_target, GL_TEXTURE_MIN_FILTER, filter); - gl->TexParameteri(p->gl_target, GL_TEXTURE_MAG_FILTER, filter); - gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - gl->TexParameteri(p->gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - } + gl->TexParameteri(gl_target, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(gl_target, GL_TEXTURE_MAG_FILTER, filter); + gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, plane->w, plane->h); + MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, plane->w, plane->h); + } + gl->ActiveTexture(GL_TEXTURE0); } - gl->ActiveTexture(GL_TEXTURE0); debug_check_gl(p, "after video texture creation"); - reinit_rendering(p); + gl_video_setup_hooks(p); +} + +// Release any texture mappings associated with the current frame. +static void unmap_current_image(struct gl_video *p) +{ + struct video_image *vimg = &p->image; + + if (vimg->hwdec_mapped) { + assert(p->hwdec_active); + if (p->hwdec->driver->unmap) + p->hwdec->driver->unmap(p->hwdec); + memset(vimg->planes, 0, sizeof(vimg->planes)); + vimg->hwdec_mapped = false; + } +} + +static void unref_current_image(struct gl_video *p) +{ + unmap_current_image(p); + mp_image_unrefp(&p->image.mpi); } static void uninit_video(struct gl_video *p) @@ -946,21 +873,21 @@ static void uninit_video(struct gl_video *p) struct video_image *vimg = &p->image; + unref_current_image(p); + for (int n = 0; n < p->plane_count; n++) { struct texplane *plane = &vimg->planes[n]; - if (!p->hwdec_active) - gl->DeleteTextures(1, &plane->gl_texture); - plane->gl_texture = 0; + gl->DeleteTextures(1, &plane->gl_texture); gl->DeleteBuffers(1, &plane->gl_buffer); - plane->gl_buffer = 0; } - mp_image_unrefp(&vimg->mpi); + *vimg = (struct video_image){0}; // Invalidate image_params to ensure that gl_video_config() will call // init_video() on uninitialized gl_video. p->real_image_params = (struct mp_image_params){0}; p->image_params = p->real_image_params; + p->hwdec_active = false; } static void pass_prepare_src_tex(struct gl_video *p) @@ -975,9 +902,11 @@ static void pass_prepare_src_tex(struct gl_video *p) char texture_name[32]; char texture_size[32]; + char texture_rot[32]; char pixel_size[32]; snprintf(texture_name, sizeof(texture_name), "texture%d", n); snprintf(texture_size, sizeof(texture_size), "texture_size%d", n); + snprintf(texture_rot, sizeof(texture_rot), "texture_rot%d", n); snprintf(pixel_size, sizeof(pixel_size), "pixel_size%d", n); if (s->use_integer) { @@ -991,6 +920,7 @@ static void pass_prepare_src_tex(struct gl_video *p) f[1] = s->tex_h; } gl_sc_uniform_vec2(sc, texture_size, f); + gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m); gl_sc_uniform_vec2(sc, pixel_size, (GLfloat[]){1.0f / f[0], 1.0f / f[1]}); @@ -1022,7 +952,6 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, if (!s->gl_tex) continue; struct gl_transform tr = s->transform; - gl_transform_trans(s->pre_transform, &tr); float tx = (n / 2) * s->w; float ty = (n % 2) * s->h; gl_transform_vec(tr, &tx, &ty); @@ -1038,7 +967,6 @@ static void render_pass_quad(struct gl_video *p, int vp_w, int vp_h, debug_check_gl(p, "after rendering"); } -// flags: see render_pass_quad static void finish_pass_direct(struct gl_video *p, GLint fbo, int vp_w, int vp_h, const struct mp_rect *dst) { @@ -1067,6 +995,34 @@ static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, &(struct mp_rect){0, 0, w, h}); } +// Copy a texture to the vec4 color, while increasing offset. Also applies +// the texture multiplier to the sampled color +static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img) +{ + int count = img.components; + assert(*offset + count <= 4); + + int id = pass_bind(p, img); + char src[5] = {0}; + char dst[5] = {0}; + const char *tex_fmt = img.swizzle[0] ? img.swizzle : "rgba"; + const char *dst_fmt = "rgba"; + for (int i = 0; i < count; i++) { + src[i] = tex_fmt[i]; + dst[i] = dst_fmt[*offset + i]; + } + + if (img.use_integer) { + uint64_t tex_max = 1ull << p->image_desc.component_full_bits; + img.multiplier *= 1.0 / (tex_max - 1); + } + + GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n", + dst, img.multiplier, id, id, src); + + *offset += count; +} + static void skip_unused(struct gl_video *p, int num_components) { for (int i = num_components; i < 4; i++) @@ -1083,9 +1039,202 @@ static void uninit_scaler(struct gl_video *p, struct scaler *scaler) scaler->initialized = false; } -static void load_shader(struct gl_video *p, const char *body) +static void hook_prelude(struct gl_video *p, const char *name, int id, + struct img_tex tex) { - gl_sc_hadd(p->sc, body); + GLSLHF("#define %s_raw texture%d\n", name, id); + GLSLHF("#define %s_pos texcoord%d\n", name, id); + GLSLHF("#define %s_size texture_size%d\n", name, id); + GLSLHF("#define %s_rot texture_rot%d\n", name, id); + GLSLHF("#define %s_pt pixel_size%d\n", name, id); + + // Set up the sampling functions + GLSLHF("#define %s_tex(pos) (%f * vec4(texture(%s_raw, pos)).%s)\n", + name, tex.multiplier, name, tex.swizzle[0] ? tex.swizzle : "rgba"); + + // Since the extra matrix multiplication impacts performance, + // skip it unless the texture was actually rotated + if (gl_transform_eq(tex.transform, identity_trans)) { + GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n", + name, name, name, name); + } else { + GLSLHF("#define %s_texOff(off) " + "%s_tex(%s_pos + %s_rot * vec2(off)/%s_size)\n", + name, name, name, name, name); + } +} + +static bool saved_tex_find(struct gl_video *p, const char *name, + struct img_tex *out) +{ + if (!name || !out) + return false; + + for (int i = 0; i < p->saved_tex_num; i++) { + if (strcmp(p->saved_tex[i].name, name) == 0) { + *out = p->saved_tex[i].tex; + return true; + } + } + + return false; +} + +static void saved_tex_store(struct gl_video *p, const char *name, + struct img_tex tex) +{ + assert(name); + + for (int i = 0; i < p->saved_tex_num; i++) { + if (strcmp(p->saved_tex[i].name, name) == 0) { + p->saved_tex[i].tex = tex; + return; + } + } + + assert(p->saved_tex_num < MAX_SAVED_TEXTURES); + p->saved_tex[p->saved_tex_num++] = (struct saved_tex) { + .name = name, + .tex = tex + }; +} + +// Process hooks for a plane, saving the result and returning a new img_tex +// If 'trans' is NULL, the shader is forbidden from transforming tex +static struct img_tex pass_hook(struct gl_video *p, const char *name, + struct img_tex tex, struct gl_transform *trans) +{ + if (!name) + return tex; + + saved_tex_store(p, name, tex); + + MP_DBG(p, "Running hooks for %s\n", name); + for (int i = 0; i < p->tex_hook_num; i++) { + struct tex_hook *hook = &p->tex_hooks[i]; + + if (strcmp(hook->hook_tex, name) != 0) + continue; + + // Check the hook's condition + if (hook->cond && !hook->cond(p, tex, hook->priv)) { + MP_DBG(p, "Skipping hook on %s due to condition.\n", name); + continue; + } + + // Bind all necessary textures and add them to the prelude + for (int t = 0; t < TEXUNIT_VIDEO_NUM; t++) { + const char *bind_name = hook->bind_tex[t]; + struct img_tex bind_tex; + + if (!bind_name) + continue; + + // This is a special name that means "currently hooked texture" + if (strcmp(bind_name, "HOOKED") == 0) { + int id = pass_bind(p, tex); + hook_prelude(p, "HOOKED", id, tex); + hook_prelude(p, name, id, tex); + continue; + } + + if (!saved_tex_find(p, bind_name, &bind_tex)) { + // Clean up texture bindings and move on to the next hook + MP_DBG(p, "Skipping hook on %s due to no texture named %s.\n", + name, bind_name); + p->pass_tex_num -= t; + goto next_hook; + } + + hook_prelude(p, bind_name, pass_bind(p, bind_tex), bind_tex); + } + + // Run the actual hook. This generates a series of GLSL shader + // instructions sufficient for drawing the hook's output + struct gl_transform hook_off = identity_trans; + hook->hook(p, tex, &hook_off, hook->priv); + + int comps = hook->components ? hook->components : tex.components; + skip_unused(p, comps); + + // Compute the updated FBO dimensions and store the result + struct mp_rect_f sz = {0, 0, tex.w, tex.h}; + gl_transform_rect(hook_off, &sz); + int w = lroundf(fabs(sz.x1 - sz.x0)); + int h = lroundf(fabs(sz.y1 - sz.y0)); + + assert(p->hook_fbo_num < MAX_SAVED_TEXTURES); + struct fbotex *fbo = &p->hook_fbos[p->hook_fbo_num++]; + finish_pass_fbo(p, fbo, w, h, 0); + + const char *store_name = hook->save_tex ? hook->save_tex : name; + struct img_tex saved_tex = img_tex_fbo(fbo, tex.type, comps); + + // If the texture we're saving overwrites the "current" texture, also + // update the tex parameter so that the future loop cycles will use the + // updated values, and export the offset + if (strcmp(store_name, name) == 0) { + if (!trans && !gl_transform_eq(hook_off, identity_trans)) { + MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n", + name); + return tex; + } + + tex = saved_tex; + if (trans) + gl_transform_trans(hook_off, trans); + } + + saved_tex_store(p, store_name, saved_tex); + +next_hook: ; + } + + return tex; +} + +// This can be used at any time in the middle of rendering to specify an +// optional hook point, which if triggered will render out to a new FBO and +// load the result back into vec4 color. Offsets applied by the hooks are +// accumulated in tex_trans, and the FBO is dimensioned according +// to p->texture_w/h +static void pass_opt_hook_point(struct gl_video *p, const char *name, + struct gl_transform *tex_trans) +{ + if (!name) + return; + + for (int i = 0; i < p->tex_hook_num; i++) { + struct tex_hook *hook = &p->tex_hooks[i]; + + if (strcmp(hook->hook_tex, name) == 0) + goto found; + + for (int b = 0; b < TEXUNIT_VIDEO_NUM; b++) { + if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0) + goto found; + } + } + + // Nothing uses this texture, don't bother storing it + return; + +found: + assert(p->hook_fbo_num < MAX_SAVED_TEXTURES); + struct fbotex *fbo = &p->hook_fbos[p->hook_fbo_num++]; + finish_pass_fbo(p, fbo, p->texture_w, p->texture_h, 0); + + struct img_tex img = img_tex_fbo(fbo, PLANE_RGB, p->components); + img = pass_hook(p, name, img, tex_trans); + copy_img_tex(p, &(int){0}, img); + p->texture_w = img.w; + p->texture_h = img.h; + p->components = img.components; +} + +static void load_shader(struct gl_video *p, struct bstr body) +{ + gl_sc_hadd_bstr(p->sc, body); gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX); gl_sc_uniform_f(p->sc, "frame", p->frames_uploaded); gl_sc_uniform_vec2(p->sc, "image_size", (GLfloat[]){p->image_params.w, @@ -1105,34 +1254,6 @@ static const char *get_custom_shader_fn(struct gl_video *p, const char *body) return "sample_pixel"; } -// Applies an arbitrary number of shaders in sequence, using the given pair -// of FBOs as intermediate buffers. Returns whether any shaders were applied. -static bool apply_shaders(struct gl_video *p, char **shaders, int w, int h, - struct fbotex textures[2]) -{ - if (!shaders) - return false; - bool success = false; - int tex = 0; - for (int n = 0; shaders[n]; n++) { - const char *body = load_cached_file(p, shaders[n]); - if (!body) - continue; - finish_pass_fbo(p, &textures[tex], w, h, 0); - int id = pass_bind(p, img_tex_fbo(&textures[tex], identity_trans, - PLANE_RGB, p->components)); - GLSLHF("#define pixel_size pixel_size%d\n", id); - load_shader(p, body); - const char *fn_name = get_custom_shader_fn(p, body); - GLSLF("// custom shader\n"); - GLSLF("color = %s(texture%d, texcoord%d, texture_size%d);\n", - fn_name, id, id, id); - tex = (tex+1) % 2; - success = true; - } - return success; -} - // Semantic equality static bool double_seq(double a, double b) { @@ -1175,6 +1296,9 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, uninit_scaler(p, scaler); scaler->conf = *conf; + bool is_tscale = scaler->index == SCALER_TSCALE; + scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale); + scaler->conf.window.name = (char *)handle_scaler_opt(conf->window.name, is_tscale); scaler->scale_factor = scale_factor; scaler->insufficient = false; scaler->initialized = true; @@ -1229,7 +1353,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, } int width = size / elems_per_pixel; assert(size == width * elems_per_pixel); - const struct fmt_entry *fmt = &gl_float16_formats[elems_per_pixel - 1]; + const struct gl_format *fmt = gl_find_float16_format(gl, elems_per_pixel); GLenum target = scaler->gl_target; gl->ActiveTexture(GL_TEXTURE0 + TEXUNIT_SCALERS + scaler->index); @@ -1288,7 +1412,8 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src, finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H); // Second pass (scale only in the x dir) - src = img_tex_fbo(&scaler->sep_fbo, t_x, src.type, src.components); + src = img_tex_fbo(&scaler->sep_fbo, src.type, src.components); + src.transform = t_x; sampler_prelude(p->sc, pass_bind(p, src)); GLSLF("// pass 2\n"); pass_sample_separated_gen(p->sc, scaler, 1, 0); @@ -1322,10 +1447,10 @@ static void pass_sample(struct gl_video *p, struct img_tex tex, } else if (strcmp(name, "oversample") == 0) { pass_sample_oversample(p->sc, scaler, w, h); } else if (strcmp(name, "custom") == 0) { - const char *body = load_cached_file(p, p->opts.scale_shader); - if (body) { + struct bstr body = load_cached_file(p, p->opts.scale_shader); + if (body.start) { load_shader(p, body); - const char *fn_name = get_custom_shader_fn(p, body); + const char *fn_name = get_custom_shader_fn(p, body.start); GLSLF("// custom scale-shader\n"); GLSLF("color = %s(tex, pos, size);\n", fn_name); } else { @@ -1349,316 +1474,474 @@ static void pass_sample(struct gl_video *p, struct img_tex tex, skip_unused(p, tex.components); } -// Get the number of passes for prescaler, with given display size. -static int get_prescale_passes(struct gl_video *p, struct img_tex tex[4]) +// Returns true if two img_texs are semantically equivalent (same metadata) +static bool img_tex_equiv(struct img_tex a, struct img_tex b) { - if (!p->opts.prescale_luma) - return 0; + return a.type == b.type && + a.components == b.components && + a.multiplier == b.multiplier && + a.gl_target == b.gl_target && + a.use_integer == b.use_integer && + a.tex_w == b.tex_w && + a.tex_h == b.tex_h && + a.w == b.w && + a.h == b.h && + gl_transform_eq(a.transform, b.transform) && + strcmp(a.swizzle, b.swizzle) == 0; +} - // Return 0 if no luma planes exist - for (int n = 0; ; n++) { - if (n > 4) - return 0; +static void pass_add_hook(struct gl_video *p, struct tex_hook hook) +{ + if (p->tex_hook_num < MAX_TEXTURE_HOOKS) { + p->tex_hooks[p->tex_hook_num++] = hook; + } else { + MP_ERR(p, "Too many hooks! Limit is %d.\n", MAX_TEXTURE_HOOKS); - if (tex[n].type == PLANE_LUMA) - break; + if (hook.free) + hook.free(&hook); } +} - // The downscaling threshold check is turned off. - if (p->opts.prescale_downscaling_threshold < 1.0f) - return p->opts.prescale_passes; +// Adds a hook multiple times, one per name. The last name must be NULL to +// signal the end of the argument list. +#define HOOKS(...) ((char*[]){__VA_ARGS__, NULL}) +static void pass_add_hooks(struct gl_video *p, struct tex_hook hook, + char **names) +{ + for (int i = 0; names[i] != NULL; i++) { + hook.hook_tex = names[i]; + pass_add_hook(p, hook); + } +} - double scale_factors[2]; - get_scale_factors(p, true, scale_factors); +static void deband_hook(struct gl_video *p, struct img_tex tex, + struct gl_transform *trans, void *priv) +{ + pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg); +} - int passes = 0; - for (; passes < p->opts.prescale_passes; passes ++) { - // The scale factor happens to be the same for superxbr and nnedi3. - scale_factors[0] /= 2; - scale_factors[1] /= 2; +static void unsharp_hook(struct gl_video *p, struct img_tex tex, + struct gl_transform *trans, void *priv) +{ + GLSLF("#define tex HOOKED\n"); + GLSLF("#define pos HOOKED_pos\n"); + GLSLF("#define pt HOOKED_pt\n"); + pass_sample_unsharp(p->sc, p->opts.unsharp); +} - if (1.0f / scale_factors[0] > p->opts.prescale_downscaling_threshold) - break; - if (1.0f / scale_factors[1] > p->opts.prescale_downscaling_threshold) - break; - } +static void user_hook_old(struct gl_video *p, struct img_tex tex, + struct gl_transform *trans, void *priv) +{ + const char *body = priv; + assert(body); - return passes; + GLSLHF("#define pixel_size HOOKED_pt\n"); + load_shader(p, bstr0(body)); + const char *fn_name = get_custom_shader_fn(p, body); + GLSLF("// custom shader\n"); + GLSLF("color = %s(HOOKED_raw, HOOKED_pos, HOOKED_size);\n", fn_name); } -// Upload the NNEDI3 UBO weights only if needed -static void upload_nnedi3_weights(struct gl_video *p) +// Returns whether successful. 'result' is left untouched on failure +static bool eval_szexpr(struct gl_video *p, struct img_tex tex, + struct szexp expr[MAX_SZEXP_SIZE], + float *result) { - GL *gl = p->gl; + float stack[MAX_SZEXP_SIZE] = {0}; + int idx = 0; // points to next element to push - if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO && - !p->nnedi3_weights_buffer) - { - gl->GenBuffers(1, &p->nnedi3_weights_buffer); - gl->BindBufferBase(GL_UNIFORM_BUFFER, 0, p->nnedi3_weights_buffer); + for (int i = 0; i < MAX_SZEXP_SIZE; i++) { + switch (expr[i].tag) { + case SZEXP_END: + goto done; - int size; - const float *weights = get_nnedi3_weights(p->opts.nnedi3_opts, &size); + case SZEXP_CONST: + // Since our SZEXPs are bound by MAX_SZEXP_SIZE, it should be + // impossible to overflow the stack + assert(idx < MAX_SZEXP_SIZE); + stack[idx++] = expr[i].val.cval; + continue; - MP_VERBOSE(p, "Uploading NNEDI3 weights via UBO (size=%d)\n", size); + case SZEXP_OP1: + if (idx < 1) { + MP_WARN(p, "Stack underflow in RPN expression!\n"); + return false; + } - // We don't know the endianness of GPU, just assume it's LE - gl->BufferData(GL_UNIFORM_BUFFER, size, weights, GL_STATIC_DRAW); - } -} + switch (expr[i].val.op) { + case SZEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break; + default: abort(); + } + continue; -// Applies a single pass of the prescaler, and accumulates the offset in -// pass_transform. -static void pass_prescale_luma(struct gl_video *p, struct img_tex *tex, - struct gl_transform *pass_transform, - struct fbotex fbo[MAX_PRESCALE_STEPS]) -{ - // Happens to be the same for superxbr and nnedi3. - const int num_steps = 2; + case SZEXP_OP2: + if (idx < 2) { + MP_WARN(p, "Stack underflow in RPN expression!\n"); + return false; + } - for (int step = 0; step < num_steps; step++) { - struct gl_transform step_transform = {{{0}}}; - int id = pass_bind(p, *tex); - int planes = tex->components; + // Pop the operands in reverse order + float op2 = stack[--idx]; + float op1 = stack[--idx]; + float res = 0.0; + switch (expr[i].val.op) { + case SZEXP_OP_ADD: res = op1 + op2; break; + case SZEXP_OP_SUB: res = op1 - op2; break; + case SZEXP_OP_MUL: res = op1 * op2; break; + case SZEXP_OP_DIV: res = op1 / op2; break; + case SZEXP_OP_GT: res = op1 > op2; break; + case SZEXP_OP_LT: res = op1 < op2; break; + default: abort(); + } - switch(p->opts.prescale_luma) { - case 1: - assert(planes == 1); - pass_superxbr(p->sc, id, step, tex->multiplier, - p->opts.superxbr_opts, &step_transform); - break; - case 2: - upload_nnedi3_weights(p); - pass_nnedi3(p->gl, p->sc, planes, id, step, tex->multiplier, - p->opts.nnedi3_opts, &step_transform, tex->gl_target); - break; - default: - abort(); - } + if (!isfinite(res)) { + MP_WARN(p, "Illegal operation in RPN expression!\n"); + return false; + } - int new_w = tex->w * (int)step_transform.m[0][0], - new_h = tex->h * (int)step_transform.m[1][1]; + stack[idx++] = res; + continue; - skip_unused(p, planes); - finish_pass_fbo(p, &fbo[step], new_w, new_h, 0); - *tex = img_tex_fbo(&fbo[step], identity_trans, tex->type, tex->components); + case SZEXP_VAR_W: + case SZEXP_VAR_H: { + struct bstr name = expr[i].val.varname; + struct img_tex var_tex; + + // The size of OUTPUT is determined. It could be useful for certain + // user shaders to skip passes. + if (bstr_equals0(name, "OUTPUT")) { + int vp_w = p->dst_rect.x1 - p->dst_rect.x0; + int vp_h = p->dst_rect.y1 - p->dst_rect.y0; + stack[idx++] = (expr[i].tag == SZEXP_VAR_W) ? vp_w : vp_h; + continue; + } - // Accumulate the local transform - gl_transform_trans(step_transform, pass_transform); - } -} + // HOOKED is a special case + if (bstr_equals0(name, "HOOKED")) { + var_tex = tex; + goto found_tex; + } -// Copy a texture to the vec4 color, while increasing offset. Also applies -// the texture multiplier to the sampled color -static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img) -{ - int count = img.components; - assert(*offset + count <= 4); + for (int o = 0; o < p->saved_tex_num; o++) { + if (bstr_equals0(name, p->saved_tex[o].name)) { + var_tex = p->saved_tex[o].tex; + goto found_tex; + } + } - int id = pass_bind(p, img); - char src[5] = {0}; - char dst[5] = {0}; - const char *tex_fmt = img.texture_la ? "ragg" : "rgba"; - const char *dst_fmt = "rgba"; - for (int i = 0; i < count; i++) { - src[i] = tex_fmt[i]; - dst[i] = dst_fmt[*offset + i]; - } + MP_WARN(p, "Texture %.*s not found in RPN expression!\n", BSTR_P(name)); + return false; - if (img.use_integer) { - uint64_t tex_max = 1ull << p->image_desc.component_full_bits; - img.multiplier *= 1.0 / (tex_max - 1); +found_tex: + stack[idx++] = (expr[i].tag == SZEXP_VAR_W) ? var_tex.w : var_tex.h; + continue; + } + } } - GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n", - dst, img.multiplier, id, id, src); +done: + // Return the single stack element + if (idx != 1) { + MP_WARN(p, "Malformed stack after RPN expression!\n"); + return false; + } - *offset += count; + *result = stack[0]; + return true; } -// sample from video textures, set "color" variable to yuv value -static void pass_read_video(struct gl_video *p) +static bool user_hook_cond(struct gl_video *p, struct img_tex tex, void *priv) { - struct img_tex tex[4]; - pass_get_img_tex(p, &p->image, tex); + struct gl_user_shader *shader = priv; + assert(shader); - // Most of the steps here don't actually apply image transformations yet, - // save for the actual upscaling - so as a code convenience we store them - // separately - struct gl_transform transforms[4]; - struct gl_transform tex_trans = identity_trans; - for (int i = 0; i < 4; i++) { - transforms[i] = tex[i].transform; - tex[i].transform = identity_trans; - } + float res = false; + eval_szexpr(p, tex, shader->cond, &res); + return res; +} - int prescale_passes = get_prescale_passes(p, tex); +static void user_hook(struct gl_video *p, struct img_tex tex, + struct gl_transform *trans, void *priv) +{ + struct gl_user_shader *shader = priv; + assert(shader); - int dst_w = p->texture_w << prescale_passes, - dst_h = p->texture_h << prescale_passes; + load_shader(p, shader->pass_body); + GLSLF("// custom hook\n"); + GLSLF("color = hook();\n"); - bool needs_deband[4]; - int scaler_id[4]; // ID if needed, -1 otherwise - int needs_prescale[4]; // number of prescaling passes left + // Make sure we at least create a legal FBO on failure, since it's better + // to do this and display an error message than just crash OpenGL + float w = 1.0, h = 1.0; - // Determine what needs to be done for which plane - for (int i=0; i < 4; i++) { - enum plane_type type = tex[i].type; - if (type == PLANE_NONE) { - needs_deband[i] = false; - needs_prescale[i] = 0; - scaler_id[i] = -1; - continue; - } + eval_szexpr(p, tex, shader->width, &w); + eval_szexpr(p, tex, shader->height, &h); - needs_deband[i] = type != PLANE_ALPHA ? p->opts.deband : false; - needs_prescale[i] = type == PLANE_LUMA ? prescale_passes : 0; + *trans = (struct gl_transform){{{w / tex.w, 0}, {0, h / tex.h}}}; + gl_transform_trans(shader->offset, trans); +} - scaler_id[i] = -1; - switch (type) { - case PLANE_RGB: - case PLANE_LUMA: - case PLANE_XYZ: - scaler_id[i] = SCALER_SCALE; - break; +static void user_hook_free(struct tex_hook *hook) +{ + talloc_free(hook->hook_tex); + talloc_free(hook->save_tex); + for (int i = 0; i < TEXUNIT_VIDEO_NUM; i++) + talloc_free(hook->bind_tex[i]); + talloc_free(hook->priv); +} - case PLANE_CHROMA: - scaler_id[i] = SCALER_CSCALE; - break; +static void pass_hook_user_shaders_old(struct gl_video *p, char *name, + char **shaders) +{ + assert(name); + if (!shaders) + return; - case PLANE_ALPHA: // always use bilinear for alpha - default: - continue; + for (int n = 0; shaders[n] != NULL; n++) { + char *body = load_cached_file(p, shaders[n]).start; + if (body) { + pass_add_hook(p, (struct tex_hook) { + .hook_tex = name, + .bind_tex = {"HOOKED"}, + .hook = user_hook_old, + .priv = body, + }); } - - // We can skip scaling if the texture is already at the required size - if (tex[i].w == dst_w && tex[i].h == dst_h) - scaler_id[i] = -1; } +} - // Process all the planes that need some action performed - while (true) { - // Find next plane to operate on - int n = -1; - for (int i = 0; i < 4; i++) { - if (tex[i].type != PLANE_NONE && - (scaler_id[i] >= 0 || needs_deband[i] || needs_prescale[i])) - { - n = i; - break; +static void pass_hook_user_shaders(struct gl_video *p, char **shaders) +{ + if (!shaders) + return; + + for (int n = 0; shaders[n] != NULL; n++) { + struct bstr file = load_cached_file(p, shaders[n]); + struct gl_user_shader out; + while (parse_user_shader_pass(p->log, &file, &out)) { + struct tex_hook hook = { + .components = out.components, + .hook = user_hook, + .free = user_hook_free, + .cond = user_hook_cond, + }; + + for (int i = 0; i < SHADER_MAX_HOOKS; i++) { + hook.hook_tex = bstrdup0(p, out.hook_tex[i]); + if (!hook.hook_tex) + continue; + + struct gl_user_shader *out_copy = talloc_ptrtype(p, out_copy); + *out_copy = out; + hook.priv = out_copy; + for (int o = 0; o < SHADER_MAX_BINDS; o++) + hook.bind_tex[o] = bstrdup0(p, out.bind_tex[o]); + hook.save_tex = bstrdup0(p, out.save_tex), + pass_add_hook(p, hook); } } + } +} - if (n == -1) // no textures left - break; +static void gl_video_setup_hooks(struct gl_video *p) +{ + gl_video_reset_hooks(p); + + if (p->opts.deband) { + pass_add_hooks(p, (struct tex_hook) {.hook = deband_hook, + .bind_tex = {"HOOKED"}}, + HOOKS("LUMA", "CHROMA", "RGB", "XYZ")); + } + + if (p->opts.unsharp != 0.0) { + pass_add_hook(p, (struct tex_hook) { + .hook_tex = "MAIN", + .bind_tex = {"HOOKED"}, + .hook = unsharp_hook, + }); + } + + pass_hook_user_shaders_old(p, "MAIN", p->opts.pre_shaders); + pass_hook_user_shaders_old(p, "SCALED", p->opts.post_shaders); + pass_hook_user_shaders(p, p->opts.user_shaders); +} + +// sample from video textures, set "color" variable to yuv value +static void pass_read_video(struct gl_video *p) +{ + struct img_tex tex[4]; + struct gl_transform offsets[4]; + pass_get_img_tex(p, &p->image, tex, offsets); + + // To keep the code as simple as possibly, we currently run all shader + // stages even if they would be unnecessary (e.g. no hooks for a texture). + // In the future, deferred img_tex should optimize this away. + + // Merge semantically identical textures. This loop is done from back + // to front so that merged textures end up in the right order while + // simultaneously allowing us to skip unnecessary merges + for (int n = 3; n >= 0; n--) { + if (tex[n].type == PLANE_NONE) + continue; - // Figure out if it needs to be merged with anything else first - int o = -1; - for (int i = n+1; i < 4; i++) { - if (tex[i].type == tex[n].type - && tex[i].w == tex[n].w - && tex[i].h == tex[n].h - && gl_transform_eq(transforms[i], transforms[n])) + int first = n; + int num = 0; + + for (int i = 0; i < n; i++) { + if (img_tex_equiv(tex[n], tex[i]) && + gl_transform_eq(offsets[n], offsets[i])) { - o = i; - break; + GLSLF("// merging plane %d ...\n", i); + copy_img_tex(p, &num, tex[i]); + first = MPMIN(first, i); + memset(&tex[i], 0, sizeof(tex[i])); } } - // Multiple planes share the same dimensions and type, merge them for - // upscaling/debanding efficiency - if (o != -1) { - GLSLF("// merging plane %d into %d\n", o, n); - - int num = 0; + if (num > 0) { + GLSLF("// merging plane %d ... into %d\n", n, first); copy_img_tex(p, &num, tex[n]); - copy_img_tex(p, &num, tex[o]); finish_pass_fbo(p, &p->merge_fbo[n], tex[n].w, tex[n].h, 0); - tex[n] = img_tex_fbo(&p->merge_fbo[n], identity_trans, - tex[n].type, num); - - memset(&tex[o], 0, sizeof(tex[o])); - continue; + tex[first] = img_tex_fbo(&p->merge_fbo[n], tex[n].type, num); + memset(&tex[n], 0, sizeof(tex[n])); } + } - // The steps after this point (debanding, upscaling) can't handle - // integer textures, so the plane is still in that format by this point - // we need to ensure it gets converted + // If any textures are still in integer format by this point, we need + // to introduce an explicit conversion pass to avoid breaking hooks/scaling + for (int n = 0; n < 4; n++) { if (tex[n].use_integer) { GLSLF("// use_integer fix for plane %d\n", n); copy_img_tex(p, &(int){0}, tex[n]); finish_pass_fbo(p, &p->integer_fbo[n], tex[n].w, tex[n].h, 0); - tex[n] = img_tex_fbo(&p->integer_fbo[n], identity_trans, - tex[n].type, tex[n].components); - continue; + tex[n] = img_tex_fbo(&p->integer_fbo[n], tex[n].type, + tex[n].components); } + } - // Plane is not yet debanded - if (needs_deband[n]) { - GLSLF("// debanding plane %d\n", n); + // Dispatch the hooks for all of these textures, saving and perhaps + // modifying them in the process + for (int n = 0; n < 4; n++) { + const char *name; + switch (tex[n].type) { + case PLANE_RGB: name = "RGB"; break; + case PLANE_LUMA: name = "LUMA"; break; + case PLANE_CHROMA: name = "CHROMA"; break; + case PLANE_ALPHA: name = "ALPHA"; break; + case PLANE_XYZ: name = "XYZ"; break; + default: continue; + } - int id = pass_bind(p, tex[n]); - pass_sample_deband(p->sc, p->opts.deband_opts, id, tex[n].multiplier, - tex[n].gl_target, &p->lfg); - skip_unused(p, tex[n].components); - finish_pass_fbo(p, &p->deband_fbo[n], tex[n].w, tex[n].h, 0); - tex[n] = img_tex_fbo(&p->deband_fbo[n], identity_trans, - tex[n].type, tex[n].components); + tex[n] = pass_hook(p, name, tex[n], &offsets[n]); + } - needs_deband[n] = false; - continue; + // At this point all planes are finalized but they may not be at the + // required size yet. Furthermore, they may have texture offsets that + // require realignment. For lack of something better to do, we assume + // the rgb/luma texture is the "reference" and scale everything else + // to match. + for (int n = 0; n < 4; n++) { + switch (tex[n].type) { + case PLANE_RGB: + case PLANE_XYZ: + case PLANE_LUMA: break; + default: continue; } - // Plane still needs prescaling passes - if (needs_prescale[n]) { - GLSLF("// prescaling plane %d (%d left)\n", n, needs_prescale[n]); - pass_prescale_luma(p, &tex[n], &tex_trans, - p->prescale_fbo[needs_prescale[n]-1]); - needs_prescale[n]--; - - // We can skip scaling if we arrived at our target res - if (tex[n].w == dst_w && tex[n].h == dst_h) - scaler_id[n] = -1; - - // If we're done prescaling, we need to adjust all of the - // other transforms to make sure the planes still align - if (needs_prescale[n] == 0) { - for (int i = 0; i < 4; i++) { - if (n == i) - continue; - - transforms[i].t[0] -= tex_trans.t[0] / tex_trans.m[0][0]; - transforms[i].t[1] -= tex_trans.t[1] / tex_trans.m[1][1]; - } - } + p->texture_w = tex[n].w; + p->texture_h = tex[n].h; + p->texture_offset = offsets[n]; + break; + } + + // Compute the reference rect + struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h}; + struct mp_rect_f ref = src; + gl_transform_rect(p->texture_offset, &ref); + MP_DBG(p, "ref rect: {%f %f} {%f %f}\n", ref.x0, ref.y0, ref.x1, ref.y1); + + // Explicitly scale all of the textures that don't match + for (int n = 0; n < 4; n++) { + if (tex[n].type == PLANE_NONE) continue; - } - // Plane is not yet upscaled - if (scaler_id[n] >= 0) { - const struct scaler_config *conf = &p->opts.scaler[scaler_id[n]]; - struct scaler *scaler = &p->scaler[scaler_id[n]]; - - // This is the only step that actually uses the transform - tex[n].transform = transforms[n]; - - // Bilinear scaling is a no-op due to GPU sampling - if (strcmp(conf->kernel.name, "bilinear") != 0) { - GLSLF("// upscaling plane %d\n", n); - pass_sample(p, tex[n], scaler, conf, 1.0, dst_w, dst_h); - finish_pass_fbo(p, &p->scale_fbo[n], dst_w, dst_h, FBOTEX_FUZZY); - tex[n] = img_tex_fbo(&p->scale_fbo[n], identity_trans, - tex[n].type, tex[n].components); - transforms[n] = identity_trans; - } + // If the planes are aligned identically, we will end up with the + // exact same source rectangle. + struct mp_rect_f rect = src; + gl_transform_rect(offsets[n], &rect); + MP_DBG(p, "rect[%d]: {%f %f} {%f %f}\n", n, + rect.x0, rect.y0, rect.x1, rect.y1); + + if (mp_rect_f_seq(ref, rect)) + continue; + + // If the rectangles differ, then our planes have a different + // alignment and/or size. First of all, we have to compute the + // corrections required to meet the target rectangle + struct gl_transform fix = { + .m = {{(ref.x1 - ref.x0) / (rect.x1 - rect.x0), 0.0}, + {0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}}, + .t = {ref.x0, ref.y0}, + }; + + // Since the scale in texture space is different from the scale in + // absolute terms, we have to scale the coefficients down to be + // relative to the texture's physical dimensions and local offset + struct gl_transform scale = { + .m = {{(float)tex[n].w / p->texture_w, 0.0}, + {0.0, (float)tex[n].h / p->texture_h}}, + .t = {-rect.x0, -rect.y0}, + }; + gl_transform_trans(scale, &fix); + MP_DBG(p, "-> fix[%d] = {%f %f} + off {%f %f}\n", n, + fix.m[0][0], fix.m[1][1], fix.t[0], fix.t[1]); + + // Since the texture transform is a function of the texture coordinates + // to texture space, rather than the other way around, we have to + // actually apply the *inverse* of this. Fortunately, calculating + // the inverse is relatively easy here. + fix.m[0][0] = 1.0 / fix.m[0][0]; + fix.m[1][1] = 1.0 / fix.m[1][1]; + fix.t[0] = fix.m[0][0] * -fix.t[0]; + fix.t[1] = fix.m[1][1] * -fix.t[1]; + gl_transform_trans(fix, &tex[n].transform); + + int scaler_id = -1; + const char *name = NULL; + switch (tex[n].type) { + case PLANE_RGB: + case PLANE_LUMA: + case PLANE_XYZ: + scaler_id = SCALER_SCALE; + // these aren't worth hooking, fringe hypothetical cases only + break; + case PLANE_CHROMA: + scaler_id = SCALER_CSCALE; + name = "CHROMA_SCALED"; + break; + case PLANE_ALPHA: + // alpha always uses bilinear + name = "ALPHA_SCALED"; + } - scaler_id[n] = -1; + if (scaler_id < 0) continue; + + const struct scaler_config *conf = &p->opts.scaler[scaler_id]; + struct scaler *scaler = &p->scaler[scaler_id]; + + // bilinear scaling is a free no-op thanks to GPU sampling + if (strcmp(conf->kernel.name, "bilinear") != 0) { + GLSLF("// upscaling plane %d\n", n); + pass_sample(p, tex[n], scaler, conf, 1.0, p->texture_w, p->texture_h); + finish_pass_fbo(p, &p->scale_fbo[n], p->texture_w, p->texture_h, + FBOTEX_FUZZY); + tex[n] = img_tex_fbo(&p->scale_fbo[n], tex[n].type, tex[n].components); } - // Execution should never reach this point - abort(); + // Run any post-scaling hooks + tex[n] = pass_hook(p, name, tex[n], NULL); } // All planes are of the same size and properly aligned at this point @@ -1668,10 +1951,6 @@ static void pass_read_video(struct gl_video *p) if (tex[i].type != PLANE_NONE) copy_img_tex(p, &coord, tex[i]); } - - p->texture_w = dst_w; - p->texture_h = dst_h; - p->texture_offset = tex_trans; p->components = coord; } @@ -1679,7 +1958,7 @@ static void pass_read_video(struct gl_video *p) // transformations. Returns the ID of the texture unit it was bound to static int pass_read_fbo(struct gl_video *p, struct fbotex *fbo) { - struct img_tex tex = img_tex_fbo(fbo, identity_trans, PLANE_RGB, p->components); + struct img_tex tex = img_tex_fbo(fbo, PLANE_RGB, p->components); copy_img_tex(p, &(int){0}, tex); return pass_bind(p, tex); @@ -1752,9 +2031,9 @@ static void pass_convert_yuv(struct gl_video *p) } p->components = 3; - if (!p->has_alpha || p->opts.alpha_mode == 0) { // none + if (!p->has_alpha || p->opts.alpha_mode == ALPHA_NO) { GLSL(color.a = 1.0;) - } else if (p->opts.alpha_mode == 2) { // blend against black + } else if (p->opts.alpha_mode == ALPHA_BLEND) { GLSL(color = vec4(color.rgb * color.a, 1.0);) } else { // alpha present in image p->components = 4; @@ -1805,9 +2084,12 @@ static void pass_scale_main(struct gl_video *p) struct scaler_config scaler_conf = p->opts.scaler[SCALER_SCALE]; if (p->opts.scaler_resizes_only && !downscaling && !upscaling) { scaler_conf.kernel.name = "bilinear"; - // bilinear is going to be used, just remove all sub-pixel offsets. - p->texture_offset.t[0] = (int)p->texture_offset.t[0]; - p->texture_offset.t[1] = (int)p->texture_offset.t[1]; + // For scaler-resizes-only, we round the texture offset to + // the nearest round value in order to prevent ugly blurriness + // (in exchange for slightly shifting the image by up to half a + // subpixel) + p->texture_offset.t[0] = roundf(p->texture_offset.t[0]); + p->texture_offset.t[1] = roundf(p->texture_offset.t[1]); } if (downscaling && p->opts.scaler[SCALER_DSCALE].kernel.name) { scaler_conf = p->opts.scaler[SCALER_DSCALE]; @@ -1826,8 +2108,10 @@ static void pass_scale_main(struct gl_video *p) // Pre-conversion, like linear light/sigmoidization GLSLF("// scaler pre-conversion\n"); - if (p->use_linear) + if (p->use_linear) { pass_linearize(p->sc, p->image_params.gamma); + pass_opt_hook_point(p, "LINEAR", NULL); + } bool use_sigmoid = p->use_linear && p->opts.sigmoid_upscaling && upscaling; float sig_center, sig_slope, sig_offset, sig_scale; @@ -1842,8 +2126,11 @@ static void pass_scale_main(struct gl_video *p) sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset; GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0)/%f;\n", sig_center, sig_scale, sig_offset, sig_slope); + pass_opt_hook_point(p, "SIGMOID", NULL); } + pass_opt_hook_point(p, "PREKERNEL", NULL); + int vp_w = p->dst_rect.x1 - p->dst_rect.x0; int vp_h = p->dst_rect.y1 - p->dst_rect.y0; struct gl_transform transform; @@ -1851,14 +2138,16 @@ static void pass_scale_main(struct gl_video *p) GLSLF("// main scaling\n"); finish_pass_fbo(p, &p->indirect_fbo, p->texture_w, p->texture_h, 0); - struct img_tex src = img_tex_fbo(&p->indirect_fbo, transform, - PLANE_RGB, p->components); + struct img_tex src = img_tex_fbo(&p->indirect_fbo, PLANE_RGB, p->components); + gl_transform_trans(transform, &src.transform); pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h); // Changes the texture size to display size after main scaler. p->texture_w = vp_w; p->texture_h = vp_h; + pass_opt_hook_point(p, "POSTKERNEL", NULL); + GLSLF("// scaler post-conversion\n"); if (use_sigmoid) { // Inverse of the transformation above @@ -1869,41 +2158,87 @@ static void pass_scale_main(struct gl_video *p) // Adapts the colors from the given color space to the display device's native // gamut. -static void pass_colormanage(struct gl_video *p, enum mp_csp_prim prim_src, +static void pass_colormanage(struct gl_video *p, float peak_src, + enum mp_csp_prim prim_src, enum mp_csp_trc trc_src) { GLSLF("// color management\n"); enum mp_csp_trc trc_dst = p->opts.target_trc; enum mp_csp_prim prim_dst = p->opts.target_prim; + float peak_dst = p->opts.target_brightness; if (p->use_lut_3d) { // The 3DLUT is always generated against the original source space enum mp_csp_prim prim_orig = p->image_params.primaries; enum mp_csp_trc trc_orig = p->image_params.gamma; + // One exception: SMPTE ST.2084 is not implemented by LittleCMS + // for technical limitation reasons, so we use a gamma 2.2 input curve + // here instead. We could pick any value we want here, the difference + // is just coding efficiency. + if (trc_orig == MP_CSP_TRC_SMPTE_ST2084) + trc_orig = MP_CSP_TRC_GAMMA22; + if (gl_video_get_lut3d(p, prim_orig, trc_orig)) { prim_dst = prim_orig; trc_dst = trc_orig; - } else { - p->use_lut_3d = false; } } - if (prim_dst == MP_CSP_PRIM_AUTO) + // When auto-guessing the output color params, just pick the source color + // params to preserve the authentic "look and feel" of wrong/naive players. + // Some exceptions apply to source spaces that even hardcore technoluddites + // would probably not enjoy viewing unaltered + if (prim_dst == MP_CSP_PRIM_AUTO) { prim_dst = prim_src; + + // Avoid outputting very wide gamut content automatically, since the + // majority target audience has standard gamut displays + if (prim_dst == MP_CSP_PRIM_BT_2020 || prim_dst == MP_CSP_PRIM_PRO_PHOTO) + prim_dst = MP_CSP_PRIM_BT_709; + } + if (trc_dst == MP_CSP_TRC_AUTO) { trc_dst = trc_src; - // Avoid outputting linear light at all costs + // Avoid outputting linear light at all costs. First try + // falling back to the image gamma (e.g. in the case that the input + // was linear light due to linear-scaling) if (trc_dst == MP_CSP_TRC_LINEAR) trc_dst = p->image_params.gamma; - if (trc_dst == MP_CSP_TRC_LINEAR) + + // Failing that, pick gamma 2.2 as a reasonable default. This is also + // picked as a default for outputting HDR content + if (trc_dst == MP_CSP_TRC_LINEAR || trc_dst == MP_CSP_TRC_SMPTE_ST2084) trc_dst = MP_CSP_TRC_GAMMA22; } - bool need_gamma = trc_src != trc_dst || prim_src != prim_dst; + if (!peak_src) { + // If the source has no information known, it's display-referred + // (and should be treated relative to the specified desired peak_dst) + peak_src = peak_dst; + } + + // All operations from here on require linear light as a starting point, + // so we linearize even if trc_src == trc_dst when one of the other + // operations needs it + bool need_gamma = trc_src != trc_dst || prim_src != prim_dst || + peak_src != peak_dst; if (need_gamma) pass_linearize(p->sc, trc_src); + // Adapt and tone map for a different reference peak brightness + if (peak_src != peak_dst) + { + GLSLF("// HDR tone mapping\n"); + float rel_peak = peak_src / peak_dst; + // Normalize such that 1 is the target brightness (and values above + // 1 are out of range) + GLSLF("color.rgb *= vec3(%f);\n", rel_peak); + // Tone map back down to the range [0,1] + pass_tone_map(p->sc, rel_peak, p->opts.hdr_tone_mapping, + p->opts.tone_mapping_param); + } + // Adapt to the right colorspace if necessary if (prim_src != prim_dst) { struct mp_csp_primaries csp_src = mp_get_csp_primaries(prim_src), @@ -1914,8 +2249,14 @@ static void pass_colormanage(struct gl_video *p, enum mp_csp_prim prim_src, GLSL(color.rgb = cms_matrix * color.rgb;) } - if (need_gamma) + if (need_gamma) { + // If the target encoding function has a fixed peak, we need to + // un-normalize back to the encoding signal range + if (trc_dst == MP_CSP_TRC_SMPTE_ST2084) + GLSLF("color.rgb *= vec3(%f);\n", peak_dst / 10000); + pass_delinearize(p->sc, trc_dst); + } if (p->use_lut_3d) { gl_sc_uniform_sampler(p->sc, "lut_3d", GL_TEXTURE_3D, TEXUNIT_3DLUT); @@ -1928,11 +2269,11 @@ static void pass_dither(struct gl_video *p) GL *gl = p->gl; // Assume 8 bits per component if unknown. - int dst_depth = gl->fb_g ? gl->fb_g : 8; + int dst_depth = p->fb_depth; if (p->opts.dither_depth > 0) dst_depth = p->opts.dither_depth; - if (p->opts.dither_depth < 0 || p->opts.dither_algo < 0) + if (p->opts.dither_depth < 0 || p->opts.dither_algo == DITHER_NONE) return; if (!p->dither_texture) { @@ -1940,12 +2281,12 @@ static void pass_dither(struct gl_video *p) int tex_size; void *tex_data; - GLint tex_iformat; - GLint tex_format; + GLint tex_iformat = 0; + GLint tex_format = 0; GLenum tex_type; unsigned char temp[256]; - if (p->opts.dither_algo == 0) { + if (p->opts.dither_algo == DITHER_FRUIT) { int sizeb = p->opts.dither_size; int size = 1 << sizeb; @@ -1956,15 +2297,14 @@ static void pass_dither(struct gl_video *p) p->last_dither_matrix_size = size; } - const struct fmt_entry *fmt = find_tex_format(gl, 2, 1); - tex_size = size; // Prefer R16 texture since they provide higher precision. - if (fmt->internal_format) { + const struct gl_format *fmt = gl_find_unorm_format(gl, 2, 1); + if (!fmt || gl->es) + fmt = gl_find_float16_format(gl, 1); + tex_size = size; + if (fmt) { tex_iformat = fmt->internal_format; tex_format = fmt->format; - } else { - tex_iformat = gl_float16_formats[0].internal_format; - tex_format = gl_float16_formats[0].format; } tex_type = GL_FLOAT; tex_data = p->last_dither_matrix; @@ -1972,7 +2312,7 @@ static void pass_dither(struct gl_video *p) assert(sizeof(temp) >= 8 * 8); mp_make_ordered_dither_matrix(temp, 8); - const struct fmt_entry *fmt = find_tex_format(gl, 1, 1); + const struct gl_format *fmt = gl_find_unorm_format(gl, 1, 1); tex_size = 8; tex_iformat = fmt->internal_format; tex_format = fmt->format; @@ -1987,7 +2327,7 @@ static void pass_dither(struct gl_video *p) gl->BindTexture(GL_TEXTURE_2D, p->dither_texture); gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); gl->TexImage2D(GL_TEXTURE_2D, 0, tex_iformat, tex_size, tex_size, 0, - tex_format, tex_type, tex_data); + tex_format, tex_type, tex_data); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); @@ -2057,9 +2397,12 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, default: abort(); } - // Subtitle color management, they're assumed to be sRGB by default - if (cms) - pass_colormanage(p, MP_CSP_PRIM_BT_709, MP_CSP_TRC_SRGB); + // Subtitle color management, they're assumed to be display-referred + // sRGB by default + if (cms) { + pass_colormanage(p, p->opts.target_brightness, + MP_CSP_PRIM_BT_709, MP_CSP_TRC_SRGB); + } gl_sc_set_vao(p->sc, mpgl_osd_get_vao(p->osd)); gl_sc_gen_shader_and_reset(p->sc); mpgl_osd_draw_part(p->osd, vp_w, vp_h, n); @@ -2073,19 +2416,19 @@ static void pass_render_frame_dumb(struct gl_video *p, int fbo) p->gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); struct img_tex tex[4]; - pass_get_img_tex(p, &p->image, tex); + struct gl_transform off[4]; + pass_get_img_tex(p, &p->image, tex, off); struct gl_transform transform; compute_src_transform(p, &transform); - struct gl_transform tchroma = transform; - tchroma.t[0] /= 1 << p->image_desc.chroma_xs; - tchroma.t[1] /= 1 << p->image_desc.chroma_ys; - int index = 0; for (int i = 0; i < p->plane_count; i++) { - gl_transform_trans(tex[i].type == PLANE_CHROMA ? tchroma : transform, - &tex[i].transform); + struct gl_transform trel = {{{(float)p->texture_w / tex[i].w, 0.0}, + {0.0, (float)p->texture_h / tex[i].h}}}; + gl_transform_trans(trel, &tex[i].transform); + gl_transform_trans(transform, &tex[i].transform); + gl_transform_trans(off[i], &tex[i].transform); copy_img_tex(p, &index, tex[i]); } @@ -2101,6 +2444,8 @@ static void pass_render_frame(struct gl_video *p) p->texture_h = p->image_params.h; p->texture_offset = identity_trans; p->components = 0; + p->saved_tex_num = 0; + p->hook_fbo_num = 0; if (p->image_params.rotate % 180 == 90) MPSWAP(int, p->texture_w, p->texture_h); @@ -2108,16 +2453,23 @@ static void pass_render_frame(struct gl_video *p) if (p->dumb_mode) return; + // start the render timer here. it will continue to the end of this + // function, to render the time needed to draw (excluding screen + // presentation) + gl_timer_start(p->render_timer); + p->use_linear = p->opts.linear_scaling || p->opts.sigmoid_upscaling; pass_read_video(p); + pass_opt_hook_point(p, "NATIVE", &p->texture_offset); pass_convert_yuv(p); + pass_opt_hook_point(p, "MAINPRESUB", &p->texture_offset); // For subtitles double vpts = p->image.mpi->pts; if (vpts == MP_NOPTS_VALUE) vpts = p->osd_pts; - if (p->osd && p->opts.blend_subs == 2) { + if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO) { double scale[2]; get_scale_factors(p, false, scale); struct mp_osd_res rect = { @@ -2130,20 +2482,13 @@ static void pass_render_frame(struct gl_video *p) GLSL(color = texture(texture0, texcoord0);) pass_read_fbo(p, &p->blend_subs_fbo); } - - apply_shaders(p, p->opts.pre_shaders, p->texture_w, p->texture_h, p->pre_fbo); - - if (p->opts.unsharp != 0.0) { - finish_pass_fbo(p, &p->unsharp_fbo, p->texture_w, p->texture_h, 0); - int id = pass_read_fbo(p, &p->unsharp_fbo); - pass_sample_unsharp(p->sc, id, p->opts.unsharp); - } + pass_opt_hook_point(p, "MAIN", &p->texture_offset); pass_scale_main(p); int vp_w = p->dst_rect.x1 - p->dst_rect.x0, vp_h = p->dst_rect.y1 - p->dst_rect.y0; - if (p->osd && p->opts.blend_subs == 1) { + if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES) { // Recreate the real video size from the src/dst rects struct mp_osd_res rect = { .w = vp_w, .h = vp_h, @@ -2157,22 +2502,26 @@ static void pass_render_frame(struct gl_video *p) rect.ml *= scale[0]; rect.mr *= scale[0]; rect.mt *= scale[1]; rect.mb *= scale[1]; // We should always blend subtitles in non-linear light - if (p->use_linear) + if (p->use_linear) { pass_delinearize(p->sc, p->image_params.gamma); + p->use_linear = false; + } finish_pass_fbo(p, &p->blend_subs_fbo, p->texture_w, p->texture_h, FBOTEX_FUZZY); pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, p->texture_w, p->texture_h, p->blend_subs_fbo.fbo, false); pass_read_fbo(p, &p->blend_subs_fbo); - if (p->use_linear) - pass_linearize(p->sc, p->image_params.gamma); } - apply_shaders(p, p->opts.post_shaders, p->texture_w, p->texture_h, p->post_fbo); + pass_opt_hook_point(p, "SCALED", NULL); + + gl_timer_stop(p->render_timer); } static void pass_draw_to_screen(struct gl_video *p, int fbo) { + gl_timer_start(p->present_timer); + if (p->dumb_mode) pass_render_frame_dumb(p, fbo); @@ -2183,19 +2532,23 @@ static void pass_draw_to_screen(struct gl_video *p, int fbo) GLSL(color.rgb = pow(color.rgb, vec3(user_gamma));) } - pass_colormanage(p, p->image_params.primaries, + pass_colormanage(p, p->image_params.peak, p->image_params.primaries, p->use_linear ? MP_CSP_TRC_LINEAR : p->image_params.gamma); // Draw checkerboard pattern to indicate transparency - if (p->has_alpha && p->opts.alpha_mode == 3) { + if (p->has_alpha && p->opts.alpha_mode == ALPHA_BLEND_TILES) { GLSLF("// transparency checkerboard\n"); GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy / 32.0), vec2(0.5));) GLSL(vec3 background = vec3(tile.x == tile.y ? 1.0 : 0.75);) GLSL(color.rgb = mix(background, color.rgb, color.a);) } + pass_opt_hook_point(p, "OUTPUT", NULL); + pass_dither(p); finish_pass_direct(p, fbo, p->vp_w, p->vp_h, &p->dst_rect); + + gl_timer_stop(p->present_timer); } // Draws an interpolate frame to fbo, based on the frame timing in t @@ -2214,7 +2567,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // First of all, figure out if we have a frame availble at all, and draw // it manually + reset the queue if not if (p->surfaces[p->surface_now].pts == MP_NOPTS_VALUE) { - gl_video_upload_image(p, t->current); + if (!gl_video_upload_image(p, t->current)) + return; pass_render_frame(p); finish_pass_fbo(p, &p->surfaces[p->surface_now].fbotex, vp_w, vp_h, FBOTEX_FUZZY); @@ -2273,7 +2627,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, continue; if (f->pts > p->surfaces[p->surface_idx].pts) { - gl_video_upload_image(p, f); + if (!gl_video_upload_image(p, f)) + return; pass_render_frame(p); finish_pass_fbo(p, &p->surfaces[surface_dst].fbotex, vp_w, vp_h, FBOTEX_FUZZY); @@ -2349,7 +2704,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, for (int i = 0; i < size; i++) { struct img_tex img = img_tex_fbo(&p->surfaces[fbosurface_wrap(surface_bse+i)].fbotex, - identity_trans, PLANE_RGB, p->components); + PLANE_RGB, p->components); // Since the code in pass_sample_separated currently assumes // the textures are bound in-order and starting at 0, we just // assert to make sure this is the case (which it should always be) @@ -2366,12 +2721,24 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, p->frames_drawn += 1; } +static void timer_dbg(struct gl_video *p, const char *name, struct gl_timer *t) +{ + if (gl_timer_sample_count(t) > 0) { + MP_DBG(p, "%s time: last %dus avg %dus peak %dus\n", name, + (int)gl_timer_last_us(t), + (int)gl_timer_avg_us(t), + (int)gl_timer_peak_us(t)); + } +} + // (fbo==0 makes BindFramebuffer select the screen backbuffer) void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) { GL *gl = p->gl; struct video_image *vimg = &p->image; + p->broken_frame = false; + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); bool has_frame = frame->current || vimg->mpi; @@ -2402,7 +2769,8 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) if (is_new || !p->output_fbo_valid) { p->output_fbo_valid = false; - gl_video_upload_image(p, frame->current); + if (!gl_video_upload_image(p, frame->current)) + goto done; pass_render_frame(p); // For the non-interplation case, we draw to a single "cache" @@ -2438,6 +2806,10 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) } } +done: + + unmap_current_image(p); + debug_check_gl(p, "after video rendering"); gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); @@ -2447,8 +2819,15 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) p->osd_pts, p->osd_rect, p->vp_w, p->vp_h, fbo, true); debug_check_gl(p, "after OSD rendering"); } - gl->UseProgram(0); + + if (gl_sc_error_state(p->sc) || p->broken_frame) { + // Make the screen solid blue to make it visually clear that an + // error has occurred + gl->ClearColor(0.0, 0.05, 0.5, 1.0); + gl->Clear(GL_COLOR_BUFFER_BIT); + } + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); // The playloop calls this last before waiting some time until it decides @@ -2457,6 +2836,11 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo) gl->Flush(); p->frames_rendered++; + + // Report performance metrics + timer_dbg(p, "upload", p->upload_timer); + timer_dbg(p, "render", p->render_timer); + timer_dbg(p, "present", p->present_timer); } // vp_w/vp_h is the implicit size of the target framebuffer. @@ -2472,11 +2856,30 @@ void gl_video_resize(struct gl_video *p, int vp_w, int vp_h, p->vp_h = vp_h; gl_video_reset_surfaces(p); + gl_video_setup_hooks(p); if (p->osd) mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo_out); } +static struct voctrl_performance_entry gl_video_perfentry(struct gl_timer *t) +{ + return (struct voctrl_performance_entry) { + .last = gl_timer_last_us(t), + .avg = gl_timer_avg_us(t), + .peak = gl_timer_peak_us(t), + }; +} + +struct voctrl_performance_data gl_video_perfdata(struct gl_video *p) +{ + return (struct voctrl_performance_data) { + .upload = gl_video_perfentry(p->upload_timer), + .render = gl_video_perfentry(p->render_timer), + .present = gl_video_perfentry(p->present_timer), + }; +} + static bool unmap_image(struct gl_video *p, struct mp_image *mpi) { GL *gl = p->gl; @@ -2504,15 +2907,17 @@ static bool map_image(struct gl_video *p, struct mp_image *mpi) for (int n = 0; n < p->plane_count; n++) { struct texplane *plane = &vimg->planes[n]; mpi->stride[n] = mp_image_plane_w(mpi, n) * p->image_desc.bytes[n]; + size_t buffer_size = mp_image_plane_h(mpi, n) * mpi->stride[n]; if (!plane->gl_buffer) { gl->GenBuffers(1, &plane->gl_buffer); gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, plane->gl_buffer); - size_t buffer_size = mp_image_plane_h(mpi, n) * mpi->stride[n]; gl->BufferData(GL_PIXEL_UNPACK_BUFFER, buffer_size, NULL, GL_DYNAMIC_DRAW); } gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, plane->gl_buffer); - mpi->planes[n] = gl->MapBuffer(GL_PIXEL_UNPACK_BUFFER, GL_WRITE_ONLY); + mpi->planes[n] = gl->MapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, + buffer_size, GL_MAP_WRITE_BIT | + GL_MAP_INVALIDATE_BUFFER_BIT); gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); if (!mpi->planes[n]) { unmap_image(p, mpi); @@ -2523,30 +2928,102 @@ static bool map_image(struct gl_video *p, struct mp_image *mpi) return true; } -static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi) +// This assumes nv12, with textures set to GL_NEAREST filtering. +static void reinterleave_vdpau(struct gl_video *p, struct gl_hwdec_frame *frame) +{ + struct gl_hwdec_frame res = {0}; + for (int n = 0; n < 2; n++) { + struct fbotex *fbo = &p->vdpau_deinterleave_fbo[n]; + // This is an array of the 2 to-merge planes. + struct gl_hwdec_plane *src = &frame->planes[n * 2]; + int w = src[0].tex_w; + int h = src[0].tex_h; + int ids[2]; + for (int t = 0; t < 2; t++) { + ids[t] = pass_bind(p, (struct img_tex){ + .gl_tex = src[t].gl_texture, + .gl_target = src[t].gl_target, + .multiplier = 1.0, + .transform = identity_trans, + .tex_w = w, + .tex_h = h, + .w = w, + .h = h, + }); + } + + GLSLF("color = fract(gl_FragCoord.y / 2) < 0.5\n"); + GLSLF(" ? texture(texture%d, texcoord%d)\n", ids[0], ids[0]); + GLSLF(" : texture(texture%d, texcoord%d);", ids[1], ids[1]); + + fbotex_change(fbo, p->gl, p->log, w, h * 2, n == 0 ? GL_R8 : GL_RG8, 0); + + finish_pass_direct(p, fbo->fbo, fbo->rw, fbo->rh, + &(struct mp_rect){0, 0, w, h * 2}); + + res.planes[n] = (struct gl_hwdec_plane){ + .gl_texture = fbo->texture, + .gl_target = GL_TEXTURE_2D, + .tex_w = w, + .tex_h = h * 2, + }; + } + *frame = res; +} + +// Returns false on failure. +static bool gl_video_upload_image(struct gl_video *p, struct mp_image *mpi) { GL *gl = p->gl; struct video_image *vimg = &p->image; + unref_current_image(p); + mpi = mp_image_new_ref(mpi); if (!mpi) - abort(); + goto error; - talloc_free(vimg->mpi); vimg->mpi = mpi; p->osd_pts = mpi->pts; p->frames_uploaded++; if (p->hwdec_active) { - GLuint imgtex[4] = {0}; - bool ok = p->hwdec->driver->map_image(p->hwdec, vimg->mpi, imgtex) >= 0; - for (int n = 0; n < p->plane_count; n++) - vimg->planes[n].gl_texture = ok ? imgtex[n] : -1; - return; + // Hardware decoding + struct gl_hwdec_frame gl_frame = {0}; + gl_timer_start(p->upload_timer); + bool ok = p->hwdec->driver->map_frame(p->hwdec, vimg->mpi, &gl_frame) >= 0; + gl_timer_stop(p->upload_timer); + vimg->hwdec_mapped = true; + if (ok) { + struct mp_image layout = {0}; + mp_image_set_params(&layout, &p->image_params); + if (gl_frame.vdpau_fields) + reinterleave_vdpau(p, &gl_frame); + for (int n = 0; n < p->plane_count; n++) { + struct gl_hwdec_plane *plane = &gl_frame.planes[n]; + vimg->planes[n] = (struct texplane){ + .w = mp_image_plane_w(&layout, n), + .h = mp_image_plane_h(&layout, n), + .tex_w = plane->tex_w, + .tex_h = plane->tex_h, + .gl_target = plane->gl_target, + .gl_texture = plane->gl_texture, + }; + snprintf(vimg->planes[n].swizzle, sizeof(vimg->planes[n].swizzle), + "%s", plane->swizzle); + } + } else { + MP_FATAL(p, "Mapping hardware decoded surface failed.\n"); + goto error; + } + return true; } + // Software decoding assert(mpi->num_planes == p->plane_count); + gl_timer_start(p->upload_timer); + mp_image_t pbo_mpi = *mpi; bool pbo = map_image(p, &pbo_mpi); if (pbo) { @@ -2567,28 +3044,36 @@ static void gl_video_upload_image(struct gl_video *p, struct mp_image *mpi) gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, plane->gl_buffer); gl->ActiveTexture(GL_TEXTURE0 + n); gl->BindTexture(plane->gl_target, plane->gl_texture); - glUploadTex(gl, plane->gl_target, plane->gl_format, plane->gl_type, - mpi->planes[n], mpi->stride[n], 0, 0, plane->w, plane->h, 0); + gl_upload_tex(gl, plane->gl_target, plane->gl_format, plane->gl_type, + mpi->planes[n], mpi->stride[n], 0, 0, plane->w, plane->h); } gl->ActiveTexture(GL_TEXTURE0); if (pbo) gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + gl_timer_stop(p->upload_timer); + + return true; + +error: + unref_current_image(p); + p->broken_frame = true; + return false; } -static bool test_fbo(struct gl_video *p) +static bool test_fbo(struct gl_video *p, GLint format) { GL *gl = p->gl; bool success = false; - MP_VERBOSE(p, "Testing user-set FBO format (0x%x)\n", - (unsigned)p->opts.fbo_format); + MP_VERBOSE(p, "Testing FBO format 0x%x\n", (unsigned)format); struct fbotex fbo = {0}; - if (fbotex_init(&fbo, p->gl, p->log, 16, 16, p->opts.fbo_format)) { + if (fbotex_init(&fbo, p->gl, p->log, 16, 16, format)) { gl->BindFramebuffer(GL_FRAMEBUFFER, fbo.fbo); gl->BindFramebuffer(GL_FRAMEBUFFER, 0); success = true; } fbotex_uninit(&fbo); - glCheckError(gl, p->log, "FBO test"); + gl_check_error(gl, p->log, "FBO test"); return success; } @@ -2603,7 +3088,7 @@ static bool check_dumb_mode(struct gl_video *p) return true; if (o->target_prim || o->target_trc || o->linear_scaling || o->correct_downscaling || o->sigmoid_upscaling || o->interpolation || - o->blend_subs || o->deband || o->unsharp || o->prescale_luma) + o->blend_subs || o->deband || o->unsharp) return false; // check remaining scalers (tscale is already implicitly excluded above) for (int i = 0; i < SCALER_COUNT; i++) { @@ -2617,6 +3102,8 @@ static bool check_dumb_mode(struct gl_video *p) return false; if (o->post_shaders && o->post_shaders[0]) return false; + if (o->user_shaders && o->user_shaders[0]) + return false; if (p->use_lut_3d) return false; return true; @@ -2626,24 +3113,31 @@ static bool check_dumb_mode(struct gl_video *p) static void check_gl_features(struct gl_video *p) { GL *gl = p->gl; - bool have_float_tex = gl->mpgl_caps & MPGL_CAP_FLOAT_TEX; - bool have_fbo = gl->mpgl_caps & MPGL_CAP_FB; + bool have_float_tex = !!gl_find_float16_format(gl, 1); bool have_3d_tex = gl->mpgl_caps & MPGL_CAP_3D_TEX; - bool have_mix = gl->glsl_version >= 130; + bool have_mglsl = gl->glsl_version >= 130; // modern GLSL (1st class arrays etc.) bool have_texrg = gl->mpgl_caps & MPGL_CAP_TEX_RG; - - if (have_fbo) { - if (!p->opts.fbo_format) { - p->opts.fbo_format = GL_RGBA16; - if (gl->es) - p->opts.fbo_format = have_float_tex ? GL_RGBA16F : GL_RGB10_A2; + bool have_tex16 = !gl->es || (gl->mpgl_caps & MPGL_CAP_EXT16); + + const GLint auto_fbo_fmts[] = {GL_RGBA16, GL_RGBA16F, GL_RGB10_A2, + GL_RGBA8, 0}; + GLint user_fbo_fmts[] = {p->opts.fbo_format, 0}; + const GLint *fbo_fmts = user_fbo_fmts[0] ? user_fbo_fmts : auto_fbo_fmts; + bool have_fbo = false; + for (int n = 0; fbo_fmts[n]; n++) { + GLint fmt = fbo_fmts[n]; + const struct gl_format *f = gl_find_internal_format(gl, fmt); + if (f && (f->flags & F_CF) == F_CF && test_fbo(p, fmt)) { + MP_VERBOSE(p, "Using FBO format 0x%x.\n", (unsigned)fmt); + have_fbo = true; + p->opts.fbo_format = fmt; + break; } - have_fbo = test_fbo(p); } - if (gl->es && p->opts.pbo) { + if (!gl->MapBufferRange && p->opts.pbo) { p->opts.pbo = 0; - MP_WARN(p, "Disabling PBOs (GLES unsupported).\n"); + MP_WARN(p, "Disabling PBOs (GL2.1/GLES2 unsupported).\n"); } p->forced_dumb_mode = p->opts.dumb_mode || !have_fbo || !have_texrg; @@ -2666,12 +3160,14 @@ static void check_gl_features(struct gl_video *p) .alpha_mode = p->opts.alpha_mode, .use_rectangle = p->opts.use_rectangle, .background = p->opts.background, - .dither_algo = -1, + .dither_algo = DITHER_NONE, + .target_brightness = p->opts.target_brightness, + .hdr_tone_mapping = p->opts.hdr_tone_mapping, + .tone_mapping_param = p->opts.tone_mapping_param, }; for (int n = 0; n < SCALER_COUNT; n++) new_opts.scaler[n] = gl_video_opts_def.scaler[n]; - assign_options(&p->opts, &new_opts); - p->opts.deband_opts = m_config_alloc_struct(NULL, &deband_conf); + set_options(p, &new_opts); return; } p->dumb_mode = false; @@ -2687,59 +3183,45 @@ static void check_gl_features(struct gl_video *p) char *reason = NULL; if (!have_float_tex) reason = "(float tex. missing)"; + if (!have_mglsl) + reason = "(GLSL version too old)"; if (reason) { + MP_WARN(p, "Disabling scaler #%d %s %s.\n", n, + p->opts.scaler[n].kernel.name, reason); + // p->opts is a copy of p->opts_alloc => we can just mess with it. p->opts.scaler[n].kernel.name = "bilinear"; - MP_WARN(p, "Disabling scaler #%d %s.\n", n, reason); + if (n == SCALER_TSCALE) + p->opts.interpolation = 0; } } } // GLES3 doesn't provide filtered 16 bit integer textures // GLES2 doesn't even provide 3D textures - if (p->use_lut_3d && (!have_3d_tex || gl->es)) { + if (p->use_lut_3d && (!have_3d_tex || !have_tex16)) { p->use_lut_3d = false; - MP_WARN(p, "Disabling color management (GLES unsupported).\n"); + MP_WARN(p, "Disabling color management (no RGB16 3D textures).\n"); } int use_cms = p->opts.target_prim != MP_CSP_PRIM_AUTO || p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d; // mix() is needed for some gamma functions - if (!have_mix && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) { + if (!have_mglsl && (p->opts.linear_scaling || p->opts.sigmoid_upscaling)) { p->opts.linear_scaling = false; p->opts.sigmoid_upscaling = false; MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n"); } - if (!have_mix && use_cms) { + if (!have_mglsl && use_cms) { p->opts.target_prim = MP_CSP_PRIM_AUTO; p->opts.target_trc = MP_CSP_TRC_AUTO; p->use_lut_3d = false; MP_WARN(p, "Disabling color management (GLSL version too old).\n"); } - if (!have_mix && p->opts.deband) { + if (!have_mglsl && p->opts.deband) { p->opts.deband = 0; MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); } - - if (p->opts.prescale_luma == 2) { - if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_UBO) { - // Check features for uniform buffer objects. - if (!gl->BindBufferBase || !gl->GetUniformBlockIndex) { - MP_WARN(p, "Disabling NNEDI3 (%s required).\n", - gl->es ? "OpenGL ES 3.0" : "OpenGL 3.1"); - p->opts.prescale_luma = 0; - } - } else if (p->opts.nnedi3_opts->upload == NNEDI3_UPLOAD_SHADER) { - // Check features for hard coding approach. - if ((!gl->es && gl->glsl_version < 330) || - (gl->es && gl->glsl_version < 300)) - { - MP_WARN(p, "Disabling NNEDI3 (%s required).\n", - gl->es ? "OpenGL ES 3.0" : "OpenGL 3.3"); - p->opts.prescale_luma = 0; - } - } - } } static void init_gl(struct gl_video *p) @@ -2748,9 +3230,6 @@ static void init_gl(struct gl_video *p) debug_check_gl(p, "before init_gl"); - MP_VERBOSE(p, "Reported display depth: R=%d, G=%d, B=%d\n", - gl->fb_r, gl->fb_g, gl->fb_b); - gl->Disable(GL_DITHER); gl_vao_init(&p->vao, gl, sizeof(struct vertex), vertex_vao); @@ -2759,8 +3238,8 @@ static void init_gl(struct gl_video *p) // Test whether we can use 10 bit. Hope that testing a single format/channel // is good enough (instead of testing all 1-4 channels variants etc.). - const struct fmt_entry *fmt = find_tex_format(gl, 2, 1); - if (gl->GetTexLevelParameteriv && fmt->format) { + const struct gl_format *fmt = gl_find_unorm_format(gl, 2, 1); + if (gl->GetTexLevelParameteriv && fmt) { GLuint tex; gl->GenTextures(1, &tex); gl->BindTexture(GL_TEXTURE_2D, tex); @@ -2781,6 +3260,34 @@ static void init_gl(struct gl_video *p) gl->DeleteTextures(1, &tex); } + if ((gl->es >= 300 || gl->version) && (gl->mpgl_caps & MPGL_CAP_FB)) { + gl->BindFramebuffer(GL_FRAMEBUFFER, gl->main_fb); + + GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK; + if (gl->main_fb) + obj = GL_COLOR_ATTACHMENT0; + + GLint depth_r = -1, depth_g = -1, depth_b = -1; + + gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE, &depth_r); + gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g); + gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE, &depth_b); + + MP_VERBOSE(p, "Reported display depth: R=%d, G=%d, B=%d\n", + depth_r, depth_g, depth_b); + + p->fb_depth = depth_g > 0 ? depth_g : 8; + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + } + + p->upload_timer = gl_timer_create(p->gl); + p->render_timer = gl_timer_create(p->gl); + p->present_timer = gl_timer_create(p->gl); + debug_check_gl(p, "after init_gl"); } @@ -2799,16 +3306,25 @@ void gl_video_uninit(struct gl_video *p) gl->DeleteTextures(1, &p->lut_3d_texture); + gl_timer_free(p->upload_timer); + gl_timer_free(p->render_timer); + gl_timer_free(p->present_timer); + mpgl_osd_destroy(p->osd); gl_set_debug_logger(gl, NULL); - assign_options(&p->opts, &(struct gl_video_opts){0}); talloc_free(p); } void gl_video_set_gl_state(struct gl_video *p) { + // This resets certain important state to defaults. + gl_video_unset_gl_state(p); +} + +void gl_video_unset_gl_state(struct gl_video *p) +{ GL *gl = p->gl; gl->ActiveTexture(GL_TEXTURE0); @@ -2817,11 +3333,6 @@ void gl_video_set_gl_state(struct gl_video *p) gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); } -void gl_video_unset_gl_state(struct gl_video *p) -{ - /* nop */ -} - void gl_video_reset(struct gl_video *p) { gl_video_reset_surfaces(p); @@ -2833,40 +3344,42 @@ bool gl_video_showing_interpolated_frame(struct gl_video *p) } // dest = src.<w> (always using 4 components) -static void packed_fmt_swizzle(char w[5], const struct fmt_entry *texfmt, - const struct packed_fmt_entry *fmt) +static void packed_fmt_swizzle(char w[5], const struct packed_fmt_entry *fmt) { - const char *comp = "rgba"; - - // Normally, we work with GL_RG - if (texfmt && texfmt->internal_format == GL_LUMINANCE_ALPHA) - comp = "ragb"; - for (int c = 0; c < 4; c++) - w[c] = comp[MPMAX(fmt->components[c] - 1, 0)]; + w[c] = "rgba"[MPMAX(fmt->components[c] - 1, 0)]; w[4] = '\0'; } -// Like find_tex_format(), but takes bits (not bytes), and but if no fixed point -// format is available, return an unsigned integer format. -static const struct fmt_entry *find_plane_format(GL *gl, int bytes_per_comp, - int n_channels) +// Like gl_find_unorm_format(), but takes bits (not bytes), and if no fixed +// point format is available, return an unsigned integer format. +static const struct gl_format *find_plane_format(GL *gl, int bits, int n_channels) { - const struct fmt_entry *e = find_tex_format(gl, bytes_per_comp, n_channels); - if (e->format || gl->es < 300) - return e; - return &gl_ui_byte_formats_gles3[n_channels - 1 + (bytes_per_comp - 1) * 4]; + int bytes = (bits + 7) / 8; + const struct gl_format *f = gl_find_unorm_format(gl, bytes, n_channels); + if (f) + return f; + return gl_find_uint_format(gl, bytes, n_channels); } -static bool init_format(int fmt, struct gl_video *init) +static void init_image_desc(struct gl_video *p, int fmt) { - struct GL *gl = init->gl; + p->image_desc = mp_imgfmt_get_desc(fmt); - init->hwdec_active = false; - if (init->hwdec && init->hwdec->driver->imgfmt == fmt) { - fmt = init->hwdec->converted_imgfmt; - init->hwdec_active = true; - } + p->plane_count = p->image_desc.num_planes; + p->is_yuv = p->image_desc.flags & MP_IMGFLAG_YUV; + p->has_alpha = p->image_desc.flags & MP_IMGFLAG_ALPHA; + p->use_integer_conversion = false; + p->color_swizzle[0] = '\0'; + p->is_packed_yuv = fmt == IMGFMT_UYVY || fmt == IMGFMT_YUYV; + p->hwdec_active = false; +} + +// test_only=true checks if the format is supported +// test_only=false also initializes some rendering parameters accordingly +static bool init_format(struct gl_video *p, int fmt, bool test_only) +{ + struct GL *gl = p->gl; struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(fmt); if (!desc.id) @@ -2875,22 +3388,20 @@ static bool init_format(int fmt, struct gl_video *init) if (desc.num_planes > 4) return false; - const struct fmt_entry *plane_format[4] = {0}; - - init->color_swizzle[0] = '\0'; - init->has_alpha = false; + const struct gl_format *plane_format[4] = {0}; + char color_swizzle[5] = ""; + const struct packed_fmt_entry *packed_format = {0}; // YUV/planar formats if (desc.flags & (MP_IMGFLAG_YUV_P | MP_IMGFLAG_RGB_P)) { int bits = desc.component_bits; if ((desc.flags & MP_IMGFLAG_NE) && bits >= 8 && bits <= 16) { - init->has_alpha = desc.num_planes > 3; - plane_format[0] = find_plane_format(gl, (bits + 7) / 8, 1); - for (int p = 1; p < desc.num_planes; p++) - plane_format[p] = plane_format[0]; + plane_format[0] = find_plane_format(gl, bits, 1); + for (int n = 1; n < desc.num_planes; n++) + plane_format[n] = plane_format[0]; // RGB/planar if (desc.flags & MP_IMGFLAG_RGB_P) - snprintf(init->color_swizzle, sizeof(init->color_swizzle), "brga"); + snprintf(color_swizzle, sizeof(color_swizzle), "brga"); goto supported; } } @@ -2899,50 +3410,37 @@ static bool init_format(int fmt, struct gl_video *init) if (desc.flags & MP_IMGFLAG_YUV_NV) { int bits = desc.component_bits; if ((desc.flags & MP_IMGFLAG_NE) && bits >= 8 && bits <= 16) { - plane_format[0] = find_plane_format(gl, (bits + 7) / 8, 1); - plane_format[1] = find_plane_format(gl, (bits + 7) / 8, 2); + plane_format[0] = find_plane_format(gl, bits, 1); + plane_format[1] = find_plane_format(gl, bits, 2); if (desc.flags & MP_IMGFLAG_YUV_NV_SWAP) - snprintf(init->color_swizzle, sizeof(init->color_swizzle), "rbga"); + snprintf(color_swizzle, sizeof(color_swizzle), "rbga"); goto supported; } } // XYZ (same organization as RGB packed, but requires conversion matrix) if (fmt == IMGFMT_XYZ12) { - plane_format[0] = find_tex_format(gl, 2, 3); + plane_format[0] = gl_find_unorm_format(gl, 2, 3); goto supported; } - // Packed RGB special formats - for (const struct fmt_entry *e = mp_to_gl_formats; e->mp_format; e++) { - if (!gl->es && e->mp_format == fmt) { - plane_format[0] = e; - goto supported; - } - } - // Packed RGB(A) formats for (const struct packed_fmt_entry *e = mp_packed_formats; e->fmt; e++) { if (e->fmt == fmt) { int n_comp = desc.bytes[0] / e->component_size; - plane_format[0] = find_tex_format(gl, e->component_size, n_comp); - packed_fmt_swizzle(init->color_swizzle, plane_format[0], e); - init->has_alpha = e->components[3] != 0; + plane_format[0] = gl_find_unorm_format(gl, e->component_size, n_comp); + packed_format = e; goto supported; } } - // Packed YUV Apple formats - if (init->gl->mpgl_caps & MPGL_CAP_APPLE_RGB_422) { - for (const struct fmt_entry *e = gl_apple_formats; e->mp_format; e++) { - if (e->mp_format == fmt) { - init->is_packed_yuv = true; - snprintf(init->color_swizzle, sizeof(init->color_swizzle), - "gbra"); - plane_format[0] = e; - goto supported; - } - } + // Special formats for which OpenGL happens to have direct support. + plane_format[0] = gl_find_special_format(gl, fmt); + if (plane_format[0]) { + // Packed YUV Apple formats color permutation + if (plane_format[0]->format == GL_RGB_422_APPLE) + snprintf(color_swizzle, sizeof(color_swizzle), "gbra"); + goto supported; } // Unsupported format @@ -2951,46 +3449,56 @@ static bool init_format(int fmt, struct gl_video *init) supported: if (desc.component_bits > 8 && desc.component_bits < 16) { - if (init->texture_16bit_depth < 16) + if (p->texture_16bit_depth < 16) return false; } int use_integer = -1; - for (int p = 0; p < desc.num_planes; p++) { - if (!plane_format[p]->format) + for (int n = 0; n < desc.num_planes; n++) { + if (!plane_format[n]) return false; - int use_int_plane = !!is_integer_format(plane_format[p]); + int use_int_plane = !!gl_integer_format_to_base(plane_format[n]->format); if (use_integer < 0) use_integer = use_int_plane; if (use_integer != use_int_plane) return false; // mixed planes not supported } - init->use_integer_conversion = use_integer; - if (init->use_integer_conversion && init->forced_dumb_mode) + if (use_integer && p->forced_dumb_mode) return false; - for (int p = 0; p < desc.num_planes; p++) { - struct texplane *plane = &init->image.planes[p]; - const struct fmt_entry *format = plane_format[p]; - assert(format); - plane->gl_format = format->format; - plane->gl_internal_format = format->internal_format; - plane->gl_type = format->type; - plane->use_integer = init->use_integer_conversion; - } + if (!test_only) { + for (int n = 0; n < desc.num_planes; n++) { + struct texplane *plane = &p->image.planes[n]; + const struct gl_format *format = plane_format[n]; + assert(format); + plane->gl_format = format->format; + plane->gl_internal_format = format->internal_format; + plane->gl_type = format->type; + plane->use_integer = use_integer; + snprintf(plane->swizzle, sizeof(plane->swizzle), "rgba"); + if (packed_format) + packed_fmt_swizzle(plane->swizzle, packed_format); + if (plane->gl_format == GL_LUMINANCE_ALPHA) + MPSWAP(char, plane->swizzle[1], plane->swizzle[3]); + } + + init_image_desc(p, fmt); - init->is_yuv = desc.flags & MP_IMGFLAG_YUV; - init->plane_count = desc.num_planes; - init->image_desc = desc; + p->use_integer_conversion = use_integer; + snprintf(p->color_swizzle, sizeof(p->color_swizzle), "%s", color_swizzle); + } return true; } bool gl_video_check_format(struct gl_video *p, int mp_format) { - struct gl_video tmp = *p; - return init_format(mp_format, &tmp); + if (init_format(p, mp_format, true)) + return true; + if (p->hwdec && p->hwdec->driver->imgfmt == mp_format) + return true; + return false; } void gl_video_config(struct gl_video *p, struct mp_image_params *params) @@ -3013,11 +3521,10 @@ void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd) mpgl_osd_destroy(p->osd); p->osd = NULL; p->osd_state = osd; - recreate_osd(p); + reinit_osd(p); } -struct gl_video *gl_video_init(GL *gl, struct mp_log *log, struct mpv_global *g, - struct gl_lcms *cms) +struct gl_video *gl_video_init(GL *gl, struct mp_log *log, struct mpv_global *g) { if (gl->version < 210 && gl->es < 200) { mp_err(log, "At least OpenGL 2.1 or OpenGL ES 2.0 required.\n"); @@ -3029,17 +3536,15 @@ struct gl_video *gl_video_init(GL *gl, struct mp_log *log, struct mpv_global *g, .gl = gl, .global = g, .log = log, - .cms = cms, - .opts = gl_video_opts_def, - .gl_target = GL_TEXTURE_2D, + .cms = gl_lcms_init(p, log, g), .texture_16bit_depth = 16, .sc = gl_sc_create(gl, log), }; + set_options(p, NULL); for (int n = 0; n < SCALER_COUNT; n++) p->scaler[n] = (struct scaler){.index = n}; gl_video_set_debug(p, true); init_gl(p); - recreate_osd(p); return p; } @@ -3062,62 +3567,32 @@ static const char *handle_scaler_opt(const char *name, bool tscale) return NULL; } -static char **dup_str_array(void *parent, char **src) -{ - if (!src) - return NULL; - - char **res = talloc_new(parent); - int num = 0; - for (int n = 0; src && src[n]; n++) - MP_TARRAY_APPEND(res, res, num, talloc_strdup(res, src[n])); - MP_TARRAY_APPEND(res, res, num, NULL); - return res; -} - -static void assign_options(struct gl_video_opts *dst, struct gl_video_opts *src) +static void set_options(struct gl_video *p, struct gl_video_opts *src) { - talloc_free(dst->scale_shader); - talloc_free(dst->pre_shaders); - talloc_free(dst->post_shaders); - talloc_free(dst->deband_opts); - talloc_free(dst->superxbr_opts); - talloc_free(dst->nnedi3_opts); - - *dst = *src; - - if (src->deband_opts) - dst->deband_opts = m_sub_options_copy(NULL, &deband_conf, src->deband_opts); - - if (src->superxbr_opts) { - dst->superxbr_opts = m_sub_options_copy(NULL, &superxbr_conf, - src->superxbr_opts); - } - - if (src->nnedi3_opts) { - dst->nnedi3_opts = m_sub_options_copy(NULL, &nnedi3_conf, - src->nnedi3_opts); - } - - for (int n = 0; n < SCALER_COUNT; n++) { - dst->scaler[n].kernel.name = - (char *)handle_scaler_opt(dst->scaler[n].kernel.name, - n == SCALER_TSCALE); - } - - dst->scale_shader = talloc_strdup(NULL, dst->scale_shader); - dst->pre_shaders = dup_str_array(NULL, dst->pre_shaders); - dst->post_shaders = dup_str_array(NULL, dst->post_shaders); + talloc_free(p->opts_alloc); + p->opts_alloc = m_sub_options_copy(p, &gl_video_conf, src); + p->opts = *p->opts_alloc; } // Set the options, and possibly update the filter chain too. // Note: assumes all options are valid and verified by the option parser. void gl_video_set_options(struct gl_video *p, struct gl_video_opts *opts) { - assign_options(&p->opts, opts); + set_options(p, opts); + reinit_from_options(p); +} + +static void reinit_from_options(struct gl_video *p) +{ + p->use_lut_3d = false; + + gl_lcms_set_options(p->cms, p->opts.icc_opts); + p->use_lut_3d = gl_lcms_has_profile(p->cms); check_gl_features(p); uninit_rendering(p); + gl_video_setup_hooks(p); + reinit_osd(p); if (p->opts.interpolation && !p->global->opts->video_sync && !p->dsi_warned) { MP_WARN(p, "Interpolation now requires enabling display-sync mode.\n" @@ -3239,5 +3714,5 @@ void gl_video_set_ambient_lux(struct gl_video *p, int lux) void gl_video_set_hwdec(struct gl_video *p, struct gl_hwdec *hwdec) { p->hwdec = hwdec; - mp_image_unrefp(&p->image.mpi); + unref_current_image(p); } diff --git a/video/out/opengl/video.h b/video/out/opengl/video.h index 4702f8c..140a468 100644 --- a/video/out/opengl/video.h +++ b/video/out/opengl/video.h @@ -35,11 +35,6 @@ #define TEXUNIT_3DLUT (TEXUNIT_SCALERS+SCALER_COUNT) #define TEXUNIT_DITHER (TEXUNIT_3DLUT+1) -struct lut3d { - uint16_t *data; - int size[3]; -}; - struct scaler_fun { char *name; float params[2]; @@ -78,6 +73,33 @@ enum scaler_unit { SCALER_COUNT }; +enum dither_algo { + DITHER_NONE = 0, + DITHER_FRUIT, + DITHER_ORDERED, +}; + +enum alpha_mode { + ALPHA_NO = 0, + ALPHA_YES, + ALPHA_BLEND, + ALPHA_BLEND_TILES, +}; + +enum blend_subs_mode { + BLEND_SUBS_NO = 0, + BLEND_SUBS_YES, + BLEND_SUBS_VIDEO, +}; + +enum tone_mapping { + TONE_MAPPING_CLIP, + TONE_MAPPING_REINHARD, + TONE_MAPPING_HABLE, + TONE_MAPPING_GAMMA, + TONE_MAPPING_LINEAR, +}; + struct gl_video_opts { int dumb_mode; struct scaler_config scaler[4]; @@ -86,6 +108,9 @@ struct gl_video_opts { int gamma_auto; int target_prim; int target_trc; + int target_brightness; + int hdr_tone_mapping; + float tone_mapping_param; int linear_scaling; int correct_downscaling; int sigmoid_upscaling; @@ -108,14 +133,11 @@ struct gl_video_opts { char *scale_shader; char **pre_shaders; char **post_shaders; + char **user_shaders; int deband; struct deband_opts *deband_opts; float unsharp; - int prescale_luma; - int prescale_passes; - float prescale_downscaling_threshold; - struct superxbr_opts *superxbr_opts; - struct nnedi3_opts *nnedi3_opts; + struct mp_icc_opts *icc_opts; }; extern const struct m_sub_options gl_video_conf; @@ -125,19 +147,18 @@ extern const struct gl_video_opts gl_video_opts_def; struct gl_video; struct vo_frame; -struct gl_video *gl_video_init(GL *gl, struct mp_log *log, struct mpv_global *g, - struct gl_lcms *cms); +struct gl_video *gl_video_init(GL *gl, struct mp_log *log, struct mpv_global *g); void gl_video_uninit(struct gl_video *p); void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd); void gl_video_set_options(struct gl_video *p, struct gl_video_opts *opts); bool gl_video_check_format(struct gl_video *p, int mp_format); void gl_video_config(struct gl_video *p, struct mp_image_params *params); void gl_video_set_output_depth(struct gl_video *p, int r, int g, int b); -void gl_video_update_profile(struct gl_video *p); void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, int fbo); void gl_video_resize(struct gl_video *p, int vp_w, int vp_h, struct mp_rect *src, struct mp_rect *dst, struct mp_osd_res *osd); +struct voctrl_performance_data gl_video_perfdata(struct gl_video *p); struct mp_csp_equalizer; struct mp_csp_equalizer *gl_video_eq_ptr(struct gl_video *p); void gl_video_eq_update(struct gl_video *p); @@ -147,6 +168,8 @@ void gl_video_set_debug(struct gl_video *p, bool enable); float gl_video_scale_ambient_lux(float lmin, float lmax, float rmin, float rmax, float lux); void gl_video_set_ambient_lux(struct gl_video *p, int lux); +void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data); +bool gl_video_icc_auto_enabled(struct gl_video *p); void gl_video_set_gl_state(struct gl_video *p); void gl_video_unset_gl_state(struct gl_video *p); diff --git a/video/out/opengl/video_shaders.c b/video/out/opengl/video_shaders.c index bea1bbf..1f37f4f 100644 --- a/video/out/opengl/video_shaders.c +++ b/video/out/opengl/video_shaders.c @@ -45,7 +45,7 @@ static void pass_sample_separated_get_weights(struct gl_shader_cache *sc, int N = scaler->kernel->size; if (N == 2) { - GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord_lut)).RG;) + GLSL(vec2 c1 = texture(lut, vec2(0.5, fcoord_lut)).rg;) GLSL(float weights[2] = float[](c1.r, c1.g);) } else if (N == 6) { GLSL(vec4 c1 = texture(lut, vec2(0.25, fcoord_lut));) @@ -177,7 +177,7 @@ static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); - GLSLF("%s.xy += vec2(1 + %s, 1 - %s);\n", t, s, s); + GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s); } void pass_sample_bicubic_fast(struct gl_shader_cache *sc) @@ -187,8 +187,8 @@ void pass_sample_bicubic_fast(struct gl_shader_cache *sc) bicubic_calcweights(sc, "parmx", "fcoord.x"); bicubic_calcweights(sc, "parmy", "fcoord.y"); GLSL(vec4 cdelta;) - GLSL(cdelta.xz = parmx.RG * vec2(-pt.x, pt.x);) - GLSL(cdelta.yw = parmy.RG * vec2(-pt.y, pt.y);) + GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);) + GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);) // first y-interpolation GLSL(vec4 ar = texture(tex, pos + cdelta.xy);) GLSL(vec4 ag = texture(tex, pos + cdelta.xw);) @@ -208,34 +208,25 @@ void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, GLSLF("{\n"); GLSL(vec2 pos = pos + vec2(0.5) * pt;) // round to nearest GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) - // We only need to sample from the four corner pixels since we're using - // nearest neighbour and can compute the exact transition point - GLSL(vec2 baseNW = pos - fcoord * pt;) - GLSL(vec2 baseNE = baseNW + vec2(pt.x, 0.0);) - GLSL(vec2 baseSW = baseNW + vec2(0.0, pt.y);) - GLSL(vec2 baseSE = baseNW + pt;) // Determine the mixing coefficient vector gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h}); - GLSL(vec2 coeff = vec2((baseSE - pos) * output_size);) - GLSL(coeff = clamp(coeff, 0.0, 1.0);) + GLSL(vec2 coeff = fcoord * output_size/size;) float threshold = scaler->conf.kernel.params[0]; - if (threshold > 0) { // also rules out NAN - GLSLF("coeff = mix(coeff, vec2(0.0), " - "lessThanEqual(coeff, vec2(%f)));\n", threshold); - GLSLF("coeff = mix(coeff, vec2(1.0), " - "greaterThanEqual(coeff, vec2(%f)));\n", 1.0 - threshold); - } + threshold = isnan(threshold) ? 0.0 : threshold; + GLSLF("coeff = (coeff - %f) / %f;\n", threshold, 1.0 - 2 * threshold); + GLSL(coeff = clamp(coeff, 0.0, 1.0);) // Compute the right blend of colors - GLSL(vec4 left = mix(texture(tex, baseSW), - texture(tex, baseNW), - coeff.y);) - GLSL(vec4 right = mix(texture(tex, baseSE), - texture(tex, baseNE), - coeff.y);) - GLSL(color = mix(right, left, coeff.x);) + GLSL(color = texture(tex, pos + pt * (coeff - fcoord));) GLSLF("}\n"); } +// Common constants for SMPTE ST.2084 (HDR) +static const float HDR_M1 = 2610./4096 * 1./4, + HDR_M2 = 2523./4096 * 128, + HDR_C1 = 3424./4096, + HDR_C2 = 2413./4096 * 32, + HDR_C3 = 2392./4096 * 32; + // Linearize (expand), given a TRC as input void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) { @@ -267,6 +258,15 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) pow(color.rgb, vec3(1.8)), lessThan(vec3(0.03125), color.rgb));) break; + case MP_CSP_TRC_SMPTE_ST2084: + GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", HDR_M2); + GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n" + " / (vec3(%f) - vec3(%f) * color.rgb);\n", + HDR_C1, HDR_C2, HDR_C3); + GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", HDR_M1); + break; + default: + abort(); } } @@ -301,12 +301,67 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) pow(color.rgb, vec3(1.0/1.8)), lessThanEqual(vec3(0.001953), color.rgb));) break; + case MP_CSP_TRC_SMPTE_ST2084: + GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", HDR_M1); + GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" + " / (vec3(1.0) + vec3(%f) * color.rgb);\n", + HDR_C1, HDR_C2, HDR_C3); + GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", HDR_M2); + break; + default: + abort(); + } +} + +// Tone map from a known peak brightness to the range [0,1] +void pass_tone_map(struct gl_shader_cache *sc, float peak, + enum tone_mapping algo, float param) +{ + switch (algo) { + case TONE_MAPPING_CLIP: + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + break; + + case TONE_MAPPING_REINHARD: { + float contrast = isnan(param) ? 0.5 : param, + offset = (1.0 - contrast) / contrast; + GLSLF("color.rgb = color.rgb / (color.rgb + vec3(%f));\n", offset); + GLSLF("color.rgb *= vec3(%f);\n", (peak + offset) / peak); + break; + } + + case TONE_MAPPING_HABLE: { + float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; + GLSLHF("vec3 hable(vec3 x) {\n"); + GLSLHF("return ((x * (%f*x + %f)+%f)/(x * (%f*x + %f) + %f)) - %f;\n", + A, C*B, D*E, A, B, D*F, E/F); + GLSLHF("}\n"); + + GLSLF("color.rgb = hable(color.rgb) / hable(vec3(%f));\n", peak); + break; + } + + case TONE_MAPPING_GAMMA: { + float gamma = isnan(param) ? 1.8 : param; + GLSLF("color.rgb = pow(color.rgb / vec3(%f), vec3(%f));\n", + peak, 1.0/gamma); + break; + } + + case TONE_MAPPING_LINEAR: { + float coeff = isnan(param) ? 1.0 : param; + GLSLF("color.rgb = vec3(%f) * color.rgb;\n", coeff / peak); + break; + } + + default: + abort(); } } // Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post. // Obtain random numbers by calling rand(h), followed by h = permute(h) to -// update the state. +// update the state. Assumes the texture was hooked. static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) { GLSLH(float mod289(float x) { return x - floor(x / 289.0) * 289.0; }) @@ -314,7 +369,7 @@ static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) GLSLH(float rand(float x) { return fract(x / 41.0); }) // Initialize the PRNG by hashing the position + a random uniform - GLSL(vec3 _m = vec3(pos, random) + vec3(1.0);) + GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);) gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); } @@ -347,44 +402,40 @@ const struct m_sub_options deband_conf = { .defaults = &deband_opts_def, }; -// Stochastically sample a debanded result from a given texture +// Stochastically sample a debanded result from a hooked texture. void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, - int tex_num, float tex_mul, GLenum tex_target, AVLFG *lfg) + AVLFG *lfg) { - // Set up common variables and initialize the PRNG + // Initialize the PRNG GLSLF("{\n"); - sampler_prelude(sc, tex_num); prng_init(sc, lfg); // Helper: Compute a stochastic approximation of the avg color around a // pixel - GLSLHF("vec4 average(%s tex, vec2 pos, vec2 pt, float range, inout float h) {", - mp_sampler_type(tex_target)); + GLSLHF("vec4 average(float range, inout float h) {\n"); // Compute a random rangle and distance GLSLH(float dist = rand(h) * range; h = permute(h);) GLSLH(float dir = rand(h) * 6.2831853; h = permute(h);) - - GLSLHF("pt *= dist;\n"); - GLSLH(vec2 o = vec2(cos(dir), sin(dir));) + GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));) // Sample at quarter-turn intervals around the source pixel GLSLH(vec4 ref[4];) - GLSLH(ref[0] = texture(tex, pos + pt * vec2( o.x, o.y));) - GLSLH(ref[1] = texture(tex, pos + pt * vec2(-o.y, o.x));) - GLSLH(ref[2] = texture(tex, pos + pt * vec2(-o.x, -o.y));) - GLSLH(ref[3] = texture(tex, pos + pt * vec2( o.y, -o.x));) + GLSLH(ref[0] = HOOKED_texOff(vec2( o.x, o.y));) + GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y, o.x));) + GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));) + GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));) // Return the (normalized) average - GLSLHF("return %f * (ref[0] + ref[1] + ref[2] + ref[3])/4.0;\n", tex_mul); - GLSLH(}) + GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])/4.0;) + GLSLHF("}\n"); // Sample the source pixel - GLSLF("color = %f * texture(tex, pos);\n", tex_mul); + GLSL(color = HOOKED_tex(HOOKED_pos);) GLSLF("vec4 avg, diff;\n"); for (int i = 1; i <= opts->iterations; i++) { // Sample the average pixel and use it instead of the original if // the difference is below the given threshold - GLSLF("avg = average(tex, pos, pt, %f, h);\n", i * opts->range); + GLSLF("avg = average(%f, h);\n", i * opts->range); GLSL(diff = abs(color - avg);) GLSLF("color = mix(avg, color, greaterThan(diff, vec4(%f)));\n", opts->threshold / (i * 16384.0)); @@ -399,23 +450,21 @@ void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, GLSLF("}\n"); } -void pass_sample_unsharp(struct gl_shader_cache *sc, int tex_num, float param) -{ +// Assumes the texture was hooked +void pass_sample_unsharp(struct gl_shader_cache *sc, float param) { GLSLF("// unsharp\n"); - sampler_prelude(sc, tex_num); - GLSLF("{\n"); - GLSL(vec2 st1 = pt * 1.2;) - GLSL(vec4 p = texture(tex, pos);) - GLSL(vec4 sum1 = texture(tex, pos + st1 * vec2(+1, +1)) - + texture(tex, pos + st1 * vec2(+1, -1)) - + texture(tex, pos + st1 * vec2(-1, +1)) - + texture(tex, pos + st1 * vec2(-1, -1));) - GLSL(vec2 st2 = pt * 1.5;) - GLSL(vec4 sum2 = texture(tex, pos + st2 * vec2(+1, 0)) - + texture(tex, pos + st2 * vec2( 0, +1)) - + texture(tex, pos + st2 * vec2(-1, 0)) - + texture(tex, pos + st2 * vec2( 0, -1));) + GLSL(float st1 = 1.2;) + GLSL(vec4 p = HOOKED_tex(HOOKED_pos);) + GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1)) + + HOOKED_texOff(st1 * vec2(+1, -1)) + + HOOKED_texOff(st1 * vec2(-1, +1)) + + HOOKED_texOff(st1 * vec2(-1, -1));) + GLSL(float st2 = 1.5;) + GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1, 0)) + + HOOKED_texOff(st2 * vec2( 0, +1)) + + HOOKED_texOff(st2 * vec2(-1, 0)) + + HOOKED_texOff(st2 * vec2( 0, -1));) GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) GLSLF("color = p + t * %f;\n", param); GLSLF("}\n"); diff --git a/video/out/opengl/video_shaders.h b/video/out/opengl/video_shaders.h index e010fdb..0ee3d81 100644 --- a/video/out/opengl/video_shaders.h +++ b/video/out/opengl/video_shaders.h @@ -38,9 +38,12 @@ void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); +void pass_tone_map(struct gl_shader_cache *sc, float peak, + enum tone_mapping algo, float param); + void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, - int tex_num, float tex_mul, GLenum tex_target, AVLFG *lfg); + AVLFG *lfg); -void pass_sample_unsharp(struct gl_shader_cache *sc, int tex_num, float param); +void pass_sample_unsharp(struct gl_shader_cache *sc, float param); #endif diff --git a/video/out/vo.c b/video/out/vo.c index 3e7999a..07476ad 100644 --- a/video/out/vo.c +++ b/video/out/vo.c @@ -43,6 +43,7 @@ #include "options/m_config.h" #include "common/msg.h" #include "common/global.h" +#include "video/hwdec.h" #include "video/mp_image.h" #include "sub/osd.h" #include "osdep/io.h" @@ -258,12 +259,12 @@ static struct vo *vo_create(bool probing, struct mpv_global *global, mp_input_set_mouse_transform(vo->input_ctx, NULL, NULL); if (vo->driver->encode != !!vo->encode_lavc_ctx) goto error; - struct m_config *config = m_config_from_obj_desc(vo, vo->log, &desc); - if (m_config_apply_defaults(config, name, vo->opts->vo_defs) < 0) + vo->config = m_config_from_obj_desc(vo, vo->log, &desc); + if (m_config_apply_defaults(vo->config, name, vo->opts->vo_defs) < 0) goto error; - if (m_config_set_obj_params(config, args) < 0) + if (m_config_set_obj_params(vo->config, args) < 0) goto error; - vo->priv = config->optstruct; + vo->priv = vo->config->optstruct; if (pthread_create(&vo->in->thread, NULL, vo_thread, vo)) goto error; @@ -595,14 +596,14 @@ static void wait_event_fd(struct vo *vo, int64_t until_time) if (fds[1].revents & POLLIN) { char buf[100]; - read(in->wakeup_pipe[0], buf, sizeof(buf)); // flush + (void)read(in->wakeup_pipe[0], buf, sizeof(buf)); // flush } } static void wakeup_event_fd(struct vo *vo) { struct vo_internal *in = vo->in; - write(in->wakeup_pipe[1], &(char){0}, 1); + (void)write(in->wakeup_pipe[1], &(char){0}, 1); } #else static void wait_event_fd(struct vo *vo, int64_t until_time){} diff --git a/video/out/vo.h b/video/out/vo.h index 49a7546..9c29d5f 100644 --- a/video/out/vo.h +++ b/video/out/vo.h @@ -61,9 +61,8 @@ enum mp_voctrl { VOCTRL_SET_EQUALIZER, // struct voctrl_set_equalizer_args* VOCTRL_GET_EQUALIZER, // struct voctrl_get_equalizer_args* - /* for hardware decoding */ - VOCTRL_GET_HWDEC_INFO, // struct mp_hwdec_info** - VOCTRL_LOAD_HWDEC_API, // private to vo_opengl + /* private to vo_opengl */ + VOCTRL_LOAD_HWDEC_API, // Redraw the image previously passed to draw_image() (basically, repeat // the previous draw_image call). If this is handled, the OSD should also @@ -78,6 +77,8 @@ enum mp_voctrl { VOCTRL_UPDATE_WINDOW_TITLE, // char* VOCTRL_UPDATE_PLAYBACK_STATE, // struct voctrl_playback_state* + VOCTRL_PERFORMANCE_DATA, // struct voctrl_performance_data* + VOCTRL_SET_CURSOR_VISIBILITY, // bool* VOCTRL_KILL_SCREENSAVER, @@ -132,11 +133,22 @@ struct voctrl_get_equalizer_args { // VOCTRL_UPDATE_PLAYBACK_STATE struct voctrl_playback_state { + bool taskbar_progress; bool playing; bool paused; int percent_pos; }; +// VOCTRL_PERFORMANCE_DATA +struct voctrl_performance_entry { + // Times are in microseconds + uint64_t last, avg, peak; +}; + +struct voctrl_performance_data { + struct voctrl_performance_entry upload, render, present; +}; + enum { // VO does handle mp_image_params.rotate in 90 degree steps VO_CAP_ROTATE90 = 1 << 0, @@ -296,12 +308,14 @@ struct vo { struct vo_w32_state *w32; struct vo_cocoa_state *cocoa; struct vo_wayland_state *wayland; + struct mp_hwdec_devices *hwdec_devs; struct input_ctx *input_ctx; struct osd_state *osd; struct encode_lavc_context *encode_lavc_ctx; struct vo_internal *in; struct mp_vo_opts *opts; struct vo_extra extra; + struct m_config *config; // --- The following fields are generally only changed during initialization. diff --git a/video/out/vo_direct3d.c b/video/out/vo_direct3d.c index e074572..5190095 100644 --- a/video/out/vo_direct3d.c +++ b/video/out/vo_direct3d.c @@ -34,7 +34,6 @@ #include "video/csputils.h" #include "video/mp_image.h" #include "video/img_format.h" -#include "video/d3d.h" #include "common/msg.h" #include "common/common.h" #include "w32_common.h" @@ -192,10 +191,6 @@ typedef struct d3d_priv { struct mp_csp_equalizer video_eq; struct osdpart *osd[MAX_OSD_PARTS]; - - struct mp_hwdec_info hwdec_info; - struct mp_hwdec_ctx hwdec_ctx; - struct mp_d3d_ctx hwdec_d3d; } d3d_priv; struct fmt_entry { @@ -743,9 +738,6 @@ static bool change_d3d_backbuffer(d3d_priv *priv) MP_VERBOSE(priv, "Creating Direct3D device failed.\n"); return 0; } - - // (race condition if this is called when recovering from a "lost" device) - priv->hwdec_d3d.d3d9_device = priv->d3d_device; } else { if (FAILED(IDirect3DDevice9_Reset(priv->d3d_device, &present_params))) { MP_ERR(priv, "Reseting Direct3D device failed.\n"); @@ -779,8 +771,6 @@ static bool change_d3d_backbuffer(d3d_priv *priv) static void destroy_d3d(d3d_priv *priv) { - priv->hwdec_d3d.d3d9_device = NULL; - destroy_d3d_surfaces(priv); for (int n = 0; n < NUM_SHADERS; n++) { @@ -1225,9 +1215,6 @@ static int preinit(struct vo *vo) priv->vo = vo; priv->log = vo->log; - priv->hwdec_info.hwctx = &priv->hwdec_ctx; - priv->hwdec_ctx.d3d_ctx = &priv->hwdec_d3d; - for (int n = 0; n < MAX_OSD_PARTS; n++) { struct osdpart *osd = talloc_ptrtype(priv, osd); *osd = (struct osdpart) { @@ -1275,11 +1262,6 @@ static int control(struct vo *vo, uint32_t request, void *data) d3d_priv *priv = vo->priv; switch (request) { - case VOCTRL_GET_HWDEC_INFO: { - struct mp_hwdec_info **arg = data; - *arg = &priv->hwdec_info; - return true; - } case VOCTRL_REDRAW_FRAME: d3d_draw_frame(priv); return VO_TRUE; diff --git a/video/out/vo_lavc.c b/video/out/vo_lavc.c index bd07d10..188a575 100644 --- a/video/out/vo_lavc.c +++ b/video/out/vo_lavc.c @@ -37,6 +37,7 @@ struct priv { uint8_t *buffer; size_t buffer_size; AVStream *stream; + AVCodecContext *codec; int have_first_packet; int harddup; @@ -108,14 +109,14 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) * warning here. We choose to ignore that; just because ffmpeg currently * uses a plain 'int' for these struct fields, it doesn't mean it always * will */ - if (width == vc->stream->codec->width && - height == vc->stream->codec->height) { - if (aspect.num != vc->stream->codec->sample_aspect_ratio.num || - aspect.den != vc->stream->codec->sample_aspect_ratio.den) { + if (width == vc->codec->width && + height == vc->codec->height) { + if (aspect.num != vc->codec->sample_aspect_ratio.num || + aspect.den != vc->codec->sample_aspect_ratio.den) { /* aspect-only changes are not critical */ MP_WARN(vo, "unsupported pixel aspect ratio change from %d:%d to %d:%d\n", - vc->stream->codec->sample_aspect_ratio.num, - vc->stream->codec->sample_aspect_ratio.den, + vc->codec->sample_aspect_ratio.num, + vc->codec->sample_aspect_ratio.den, aspect.num, aspect.den); } goto done; @@ -144,18 +145,20 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) goto error; } - vc->stream = encode_lavc_alloc_stream(vo->encode_lavc_ctx, - AVMEDIA_TYPE_VIDEO); - vc->stream->sample_aspect_ratio = vc->stream->codec->sample_aspect_ratio = + if (encode_lavc_alloc_stream(vo->encode_lavc_ctx, + AVMEDIA_TYPE_VIDEO, + &vc->stream, &vc->codec) < 0) + goto error; + vc->stream->sample_aspect_ratio = vc->codec->sample_aspect_ratio = aspect; - vc->stream->codec->width = width; - vc->stream->codec->height = height; - vc->stream->codec->pix_fmt = pix_fmt; + vc->codec->width = width; + vc->codec->height = height; + vc->codec->pix_fmt = pix_fmt; - encode_lavc_set_csp(vo->encode_lavc_ctx, vc->stream, params->colorspace); - encode_lavc_set_csp_levels(vo->encode_lavc_ctx, vc->stream, params->colorlevels); + encode_lavc_set_csp(vo->encode_lavc_ctx, vc->codec, params->colorspace); + encode_lavc_set_csp_levels(vo->encode_lavc_ctx, vc->codec, params->colorlevels); - if (encode_lavc_open_codec(vo->encode_lavc_ctx, vc->stream) < 0) + if (encode_lavc_open_codec(vo->encode_lavc_ctx, vc->codec) < 0) goto error; vc->buffer_size = 6 * width * height + 200; @@ -204,7 +207,7 @@ static void write_packet(struct vo *vo, int size, AVPacket *packet) packet->stream_index = vc->stream->index; if (packet->pts != AV_NOPTS_VALUE) { packet->pts = av_rescale_q(packet->pts, - vc->stream->codec->time_base, + vc->codec->time_base, vc->stream->time_base); } else { MP_VERBOSE(vo, "codec did not provide pts\n"); @@ -213,12 +216,12 @@ static void write_packet(struct vo *vo, int size, AVPacket *packet) } if (packet->dts != AV_NOPTS_VALUE) { packet->dts = av_rescale_q(packet->dts, - vc->stream->codec->time_base, + vc->codec->time_base, vc->stream->time_base); } if (packet->duration > 0) { packet->duration = av_rescale_q(packet->duration, - vc->stream->codec->time_base, + vc->codec->time_base, vc->stream->time_base); } else { // HACK: libavformat calculates dts wrong if the initial packet @@ -226,15 +229,16 @@ static void write_packet(struct vo *vo, int size, AVPacket *packet) // have b-frames! if (!packet->duration) if (!vc->have_first_packet) - if (vc->stream->codec->has_b_frames - || vc->stream->codec->max_b_frames) + if (vc->codec->has_b_frames + || vc->codec->max_b_frames) if (vc->stream->time_base.num * 1000LL <= vc->stream->time_base.den) packet->duration = FFMAX(1, av_rescale_q(1, - vc->stream->codec->time_base, vc->stream->time_base)); + vc->codec->time_base, vc->stream->time_base)); } - if (encode_lavc_write_frame(vo->encode_lavc_ctx, packet) < 0) { + if (encode_lavc_write_frame(vo->encode_lavc_ctx, + vc->stream, packet) < 0) { MP_ERR(vo, "error writing\n"); return; } @@ -246,30 +250,19 @@ static void write_packet(struct vo *vo, int size, AVPacket *packet) static int encode_video(struct vo *vo, AVFrame *frame, AVPacket *packet) { struct priv *vc = vo->priv; - if (encode_lavc_oformat_flags(vo->encode_lavc_ctx) & AVFMT_RAWPICTURE) { - if (!frame) - return 0; - memcpy(vc->buffer, frame, sizeof(AVPicture)); - MP_DBG(vo, "got pts %f\n", - frame->pts * (double) vc->stream->codec->time_base.num / - (double) vc->stream->codec->time_base.den); - packet->size = sizeof(AVPicture); - return packet->size; - } else { - int got_packet = 0; - int status = avcodec_encode_video2(vc->stream->codec, packet, - frame, &got_packet); - int size = (status < 0) ? status : got_packet ? packet->size : 0; - - if (frame) - MP_DBG(vo, "got pts %f; out size: %d\n", - frame->pts * (double) vc->stream->codec->time_base.num / - (double) vc->stream->codec->time_base.den, size); - - if (got_packet) - encode_lavc_write_stats(vo->encode_lavc_ctx, vc->stream); - return size; - } + int got_packet = 0; + int status = avcodec_encode_video2(vc->codec, packet, + frame, &got_packet); + int size = (status < 0) ? status : got_packet ? packet->size : 0; + + if (frame) + MP_DBG(vo, "got pts %f; out size: %d\n", + frame->pts * (double) vc->codec->time_base.num / + (double) vc->codec->time_base.den, size); + + if (got_packet) + encode_lavc_write_stats(vo->encode_lavc_ctx, vc->codec); + return size; } static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) @@ -295,7 +288,7 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) pts = vc->expected_next_pts; } - avc = vc->stream->codec; + avc = vc->codec; if (vc->worst_time_base.den == 0) { //if (avc->time_base.num / avc->time_base.den >= vc->stream->time_base.num / vc->stream->time_base.den) @@ -376,7 +369,7 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) } vc->lastpts = outpts; ectx->last_video_in_pts = pts; - frameipts = floor((outpts + encode_lavc_getoffset(ectx, vc->stream)) + frameipts = floor((outpts + encode_lavc_getoffset(ectx, vc->codec)) / timeunit + 0.5); // calculate expected pts of next video frame @@ -396,7 +389,7 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) MP_INFO(vo, "--oneverdrop increased pts by %d\n", (int) (vc->lastipts - frameipts + step)); frameipts = vc->lastipts + step; - vc->lastpts = frameipts * timeunit - encode_lavc_getoffset(ectx, vc->stream); + vc->lastpts = frameipts * timeunit - encode_lavc_getoffset(ectx, vc->codec); } } @@ -417,16 +410,15 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) skipframes = 0; if (thisduration > skipframes) { - AVFrame *frame = av_frame_alloc(); + AVFrame *frame = mp_image_to_av_frame(vc->lastimg); + if (!frame) + abort(); // this is a nop, unless the worst time base is the STREAM time base frame->pts = av_rescale_q(vc->lastipts + skipframes, vc->worst_time_base, avc->time_base); - enum AVPictureType savetype = frame->pict_type; - mp_image_copy_fields_to_av_frame(frame, vc->lastimg); - frame->pict_type = savetype; - // keep this at avcodec_get_frame_defaults default + frame->pict_type = 0; // keep this at unknown/undefined frame->quality = avc->global_quality; diff --git a/video/out/vo_opengl.c b/video/out/vo_opengl.c index dfef6ec..08b9b11 100644 --- a/video/out/vo_opengl.c +++ b/video/out/vo_opengl.c @@ -45,7 +45,6 @@ #include "filter_kernels.h" #include "video/hwdec.h" #include "opengl/video.h" -#include "opengl/lcms.h" #define NUM_VSYNC_FENCES 10 @@ -56,14 +55,15 @@ struct gl_priv { GL *gl; struct gl_video *renderer; - struct gl_lcms *cms; struct gl_hwdec *hwdec; - struct mp_hwdec_info hwdec_info; + + int events; + + void *original_opts; // Options struct gl_video_opts *renderer_opts; - struct mp_icc_opts *icc_opts; int use_glFinish; int waitvsync; int use_gl_debug; @@ -130,7 +130,7 @@ static void draw_frame(struct vo *vo, struct vo_frame *frame) p->vsync_fences[p->num_vsync_fences++] = fence; } - gl_video_render_frame(p->renderer, frame, 0); + gl_video_render_frame(p->renderer, frame, gl->main_fb); if (p->use_glFinish) gl->Finish(); @@ -196,34 +196,31 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) return 0; } -static void request_hwdec_api(struct gl_priv *p, const char *api_name) +static void request_hwdec_api(struct vo *vo, void *api) { + struct gl_priv *p = vo->priv; + if (p->hwdec) return; - p->hwdec = gl_hwdec_load_api(p->vo->log, p->gl, p->vo->global, api_name); + p->hwdec = gl_hwdec_load_api(p->vo->log, p->gl, p->vo->global, + vo->hwdec_devs, (intptr_t)api); gl_video_set_hwdec(p->renderer, p->hwdec); - if (p->hwdec) - p->hwdec_info.hwctx = p->hwdec->hwctx; } -static void call_request_hwdec_api(struct mp_hwdec_info *info, - const char *api_name) +static void call_request_hwdec_api(void *ctx, enum hwdec_type type) { - struct vo *vo = info->load_api_ctx; - assert(&((struct gl_priv *)vo->priv)->hwdec_info == info); // Roundabout way to run hwdec loading on the VO thread. // Redirects to request_hwdec_api(). - vo_control(vo, VOCTRL_LOAD_HWDEC_API, (void *)api_name); + vo_control(ctx, VOCTRL_LOAD_HWDEC_API, (void *)(intptr_t)type); } -static void get_and_update_icc_profile(struct gl_priv *p, int *events) +static void get_and_update_icc_profile(struct gl_priv *p) { - bool has_profile = p->icc_opts->profile && p->icc_opts->profile[0]; - if (p->icc_opts->profile_auto && !has_profile) { + if (gl_video_icc_auto_enabled(p->renderer)) { MP_VERBOSE(p, "Querying ICC profile...\n"); bstr icc = bstr0(NULL); - int r = mpgl_control(p->glctx, events, VOCTRL_GET_ICC_PROFILE, &icc); + int r = mpgl_control(p->glctx, &p->events, VOCTRL_GET_ICC_PROFILE, &icc); if (r != VO_NOTAVAIL) { if (r == VO_FALSE) { @@ -232,19 +229,15 @@ static void get_and_update_icc_profile(struct gl_priv *p, int *events) MP_ERR(p, "icc-profile-auto not implemented on this platform.\n"); } - gl_lcms_set_memory_profile(p->cms, &icc); - has_profile = true; + gl_video_set_icc_profile(p->renderer, icc); } } - - if (has_profile) - gl_video_update_profile(p->renderer); } -static void get_and_update_ambient_lighting(struct gl_priv *p, int *events) +static void get_and_update_ambient_lighting(struct gl_priv *p) { int lux; - int r = mpgl_control(p->glctx, events, VOCTRL_GET_AMBIENT_LUX, &lux); + int r = mpgl_control(p->glctx, &p->events, VOCTRL_GET_AMBIENT_LUX, &lux); if (r == VO_TRUE) { gl_video_set_ambient_lux(p->renderer, lux); } @@ -254,36 +247,31 @@ static void get_and_update_ambient_lighting(struct gl_priv *p, int *events) } } -static bool reparse_cmdline(struct gl_priv *p, char *args) +static const struct m_option options[]; + +static const struct m_sub_options opengl_conf = { + .opts = options, + .size = sizeof(struct gl_priv), +}; + +static bool reparse_cmdline(struct vo *vo, char *args) { - struct m_config *cfg = NULL; - struct gl_priv *opts = NULL; + struct gl_priv *p = vo->priv; int r = 0; - // list of options which can be changed at runtime -#define OPT_BASE_STRUCT struct gl_priv - static const struct m_option change_otps[] = { - OPT_SUBSTRUCT("", renderer_opts, gl_video_conf, 0), - {0} - }; -#undef OPT_BASE_STRUCT + struct gl_priv *opts = p; if (strcmp(args, "-") == 0) { - opts = p; + opts = p->original_opts; } else { - const struct gl_priv *vodef = p->vo->driver->priv_defaults; - cfg = m_config_new(NULL, p->vo->log, sizeof(*opts), vodef, change_otps); - opts = cfg->optstruct; - r = m_config_parse_suboptions(cfg, "opengl", args); + r = m_config_parse_suboptions(vo->config, "opengl", args); } - if (r >= 0) { - gl_video_set_options(p->renderer, opts->renderer_opts); - gl_video_configure_queue(p->renderer, p->vo); - p->vo->want_redraw = true; - } + gl_video_set_options(p->renderer, opts->renderer_opts); + get_and_update_icc_profile(p); + gl_video_configure_queue(p->renderer, p->vo); + p->vo->want_redraw = true; - talloc_free(cfg); return r >= 0; } @@ -314,7 +302,7 @@ static int control(struct vo *vo, uint32_t request, void *data) return VO_NOTIMPL; } case VOCTRL_SCREENSHOT_WIN: { - struct mp_image *screen = glGetWindowScreenshot(p->gl); + struct mp_image *screen = gl_read_window_contents(p->gl); // set image parameters according to the display, if possible if (screen) { screen->params.primaries = p->renderer_opts->target_prim; @@ -325,17 +313,12 @@ static int control(struct vo *vo, uint32_t request, void *data) *(struct mp_image **)data = screen; return true; } - case VOCTRL_GET_HWDEC_INFO: { - struct mp_hwdec_info **arg = data; - *arg = &p->hwdec_info; - return true; - } case VOCTRL_LOAD_HWDEC_API: - request_hwdec_api(p, data); + request_hwdec_api(vo, data); return true; case VOCTRL_SET_COMMAND_LINE: { char *arg = data; - return reparse_cmdline(p, arg); + return reparse_cmdline(vo, arg); } case VOCTRL_RESET: gl_video_reset(p->renderer); @@ -346,18 +329,23 @@ static int control(struct vo *vo, uint32_t request, void *data) vo_wakeup(vo); } return true; + case VOCTRL_PERFORMANCE_DATA: + *(struct voctrl_performance_data *)data = gl_video_perfdata(p->renderer); + return true; } int events = 0; int r = mpgl_control(p->glctx, &events, request, data); if (events & VO_EVENT_ICC_PROFILE_CHANGED) { - get_and_update_icc_profile(p, &events); + get_and_update_icc_profile(p); vo->want_redraw = true; } if (events & VO_EVENT_AMBIENT_LIGHTING_CHANGED) { - get_and_update_ambient_lighting(p, &events); + get_and_update_ambient_lighting(p); vo->want_redraw = true; } + events |= p->events; + p->events = 0; if (events & VO_EVENT_RESIZE) resize(p); if (events & VO_EVENT_EXPOSE) @@ -373,6 +361,10 @@ static void uninit(struct vo *vo) gl_video_uninit(p->renderer); gl_hwdec_uninit(p->hwdec); + if (vo->hwdec_devs) { + hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL); + hwdec_devices_destroy(vo->hwdec_devs); + } mpgl_uninit(p->glctx); } @@ -411,32 +403,30 @@ static int preinit(struct vo *vo) MP_VERBOSE(vo, "swap_control extension missing.\n"); } - p->cms = gl_lcms_init(p, vo->log, vo->global); - if (!p->cms) - goto err_out; - p->renderer = gl_video_init(p->gl, vo->log, vo->global, p->cms); + p->renderer = gl_video_init(p->gl, vo->log, vo->global); if (!p->renderer) goto err_out; gl_video_set_osd_source(p->renderer, vo->osd); gl_video_set_options(p->renderer, p->renderer_opts); gl_video_configure_queue(p->renderer, vo); - gl_lcms_set_options(p->cms, p->icc_opts); - get_and_update_icc_profile(p, &(int){0}); + get_and_update_icc_profile(p); - p->hwdec_info.load_api = call_request_hwdec_api; - p->hwdec_info.load_api_ctx = vo; + vo->hwdec_devs = hwdec_devices_create(); + + hwdec_devices_set_loader(vo->hwdec_devs, call_request_hwdec_api, vo); int hwdec = vo->opts->hwdec_preload_api; if (hwdec == HWDEC_NONE) hwdec = vo->global->opts->hwdec_api; if (hwdec != HWDEC_NONE) { - p->hwdec = gl_hwdec_load_api_id(p->vo->log, p->gl, vo->global, hwdec); + p->hwdec = gl_hwdec_load_api(p->vo->log, p->gl, vo->global, + vo->hwdec_devs, hwdec); gl_video_set_hwdec(p->renderer, p->hwdec); - if (p->hwdec) - p->hwdec_info.hwctx = p->hwdec->hwctx; } + p->original_opts = m_sub_options_copy(p, &opengl_conf, p); + return 0; err_out: @@ -459,7 +449,6 @@ static const struct m_option options[] = { OPT_INTRANGE("vsync-fences", opt_vsync_fences, 0, 0, NUM_VSYNC_FENCES), OPT_SUBSTRUCT("", renderer_opts, gl_video_conf, 0), - OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0), {0}, }; @@ -494,7 +483,6 @@ const struct vo_driver video_out_opengl_hq = { .priv_size = sizeof(struct gl_priv), .priv_defaults = &(const struct gl_priv){ .renderer_opts = (struct gl_video_opts *)&gl_video_opts_hq_def, - .es = -1, }, .options = options, }; diff --git a/video/out/vo_opengl_cb.c b/video/out/vo_opengl_cb.c index 40930fb..4ac0c96 100644 --- a/video/out/vo_opengl_cb.c +++ b/video/out/vo_opengl_cb.c @@ -89,13 +89,16 @@ struct mpv_opengl_cb_context { struct vo *active; int hwdec_api; + // --- This is only mutable while initialized=false, during which nothing + // except the OpenGL context manager is allowed to access it. + struct mp_hwdec_devices *hwdec_devs; + // --- All of these can only be accessed from the thread where the host // application's OpenGL context is current - i.e. only while the // host application is calling certain mpv_opengl_cb_* APIs. GL *gl; struct gl_video *renderer; struct gl_hwdec *hwdec; - struct mp_hwdec_info hwdec_info; // it's also semi-immutable after init }; static void update(struct vo_priv *p); @@ -176,15 +179,14 @@ int mpv_opengl_cb_init_gl(struct mpv_opengl_cb_context *ctx, const char *exts, mpgl_load_functions2(ctx->gl, get_proc_address, get_proc_address_ctx, exts, ctx->log); - ctx->renderer = gl_video_init(ctx->gl, ctx->log, ctx->global, NULL); + ctx->renderer = gl_video_init(ctx->gl, ctx->log, ctx->global); if (!ctx->renderer) return MPV_ERROR_UNSUPPORTED; - ctx->hwdec = gl_hwdec_load_api_id(ctx->log, ctx->gl, ctx->global, - ctx->hwdec_api); + ctx->hwdec_devs = hwdec_devices_create(); + ctx->hwdec = gl_hwdec_load_api(ctx->log, ctx->gl, ctx->global, + ctx->hwdec_devs, ctx->hwdec_api); gl_video_set_hwdec(ctx->renderer, ctx->hwdec); - if (ctx->hwdec) - ctx->hwdec_info.hwctx = ctx->hwdec->hwctx; pthread_mutex_lock(&ctx->lock); // We don't know the exact caps yet - use a known superset @@ -222,6 +224,8 @@ int mpv_opengl_cb_uninit_gl(struct mpv_opengl_cb_context *ctx) ctx->renderer = NULL; gl_hwdec_uninit(ctx->hwdec); ctx->hwdec = NULL; + hwdec_devices_destroy(ctx->hwdec_devs); + ctx->hwdec_devs = NULL; talloc_free(ctx->gl); ctx->gl = NULL; talloc_free(ctx->new_opts_cfg); @@ -274,6 +278,8 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) ctx->gl->debug_context = opts->use_gl_debug; gl_video_set_debug(ctx->renderer, opts->use_gl_debug); } + if (gl_video_icc_auto_enabled(ctx->renderer)) + MP_ERR(ctx, "icc-profile-auto is not available with opengl-cb\n"); } ctx->reconfigured = false; ctx->update_new_opts = false; @@ -514,11 +520,6 @@ static int control(struct vo *vo, uint32_t request, void *data) char *arg = data; return reparse_cmdline(p, arg); } - case VOCTRL_GET_HWDEC_INFO: { - struct mp_hwdec_info **arg = data; - *arg = p->ctx ? &p->ctx->hwdec_info : NULL; - return true; - } } return VO_NOTIMPL; @@ -561,6 +562,8 @@ static int preinit(struct vo *vo) p->ctx->eq_changed = true; pthread_mutex_unlock(&p->ctx->lock); + vo->hwdec_devs = p->ctx->hwdec_devs; + return 0; } diff --git a/video/out/vo_rpi.c b/video/out/vo_rpi.c index 9d782fc..cd37362 100644 --- a/video/out/vo_rpi.c +++ b/video/out/vo_rpi.c @@ -30,11 +30,10 @@ #include <libavutil/rational.h> -#include "osdep/atomics.h" - #include "common/common.h" #include "common/msg.h" #include "options/m_config.h" +#include "osdep/timer.h" #include "vo.h" #include "win_state.h" #include "video/mp_image.h" @@ -69,11 +68,10 @@ struct priv { // for RAM input MMAL_POOL_T *swpool; - atomic_bool update_display; - - pthread_mutex_t vsync_mutex; - pthread_cond_t vsync_cond; + pthread_mutex_t display_mutex; + pthread_cond_t display_cond; int64_t vsync_counter; + bool reload_display; int background_layer; int video_layer; @@ -89,6 +87,8 @@ struct priv { #define ALIGN_W 32 #define ALIGN_H 16 +static void recreate_renderer(struct vo *vo); + // Make mpi point to buffer, assuming MMAL_ENCODING_I420. // buffer can be NULL. // Return the required buffer space. @@ -255,16 +255,18 @@ static int create_overlays(struct vo *vo) struct priv *p = vo->priv; destroy_overlays(vo); - if (vo->opts->fullscreen) { - // Use the whole screen. - VC_RECT_T dst = {.width = p->w, .height = p->h}; - VC_RECT_T src = {.width = 1 << 16, .height = 1 << 16}; - VC_DISPMANX_ALPHA_T alpha = { - .flags = DISPMANX_FLAGS_ALPHA_FIXED_ALL_PIXELS, - .opacity = 0xFF, - }; + if (!p->display) + return -1; + + if (vo->opts->fullscreen && p->background) { + // Use the whole screen. + VC_RECT_T dst = {.width = p->w, .height = p->h}; + VC_RECT_T src = {.width = 1 << 16, .height = 1 << 16}; + VC_DISPMANX_ALPHA_T alpha = { + .flags = DISPMANX_FLAGS_ALPHA_FIXED_ALL_PIXELS, + .opacity = 0xFF, + }; - if (p->background) { p->window = vc_dispmanx_element_add(p->update, p->display, p->background_layer, &dst, 0, &src, @@ -275,7 +277,6 @@ static int create_overlays(struct vo *vo) return -1; } } - } if (p->enable_osd) { VC_RECT_T dst = {.x = p->x, .y = p->y, @@ -362,16 +363,23 @@ static int set_geometry(struct vo *vo) static void wait_next_vsync(struct vo *vo) { struct priv *p = vo->priv; - pthread_mutex_lock(&p->vsync_mutex); + pthread_mutex_lock(&p->display_mutex); + struct timespec end = mp_rel_time_to_timespec(0.050); int64_t old = p->vsync_counter; - while (old == p->vsync_counter) - pthread_cond_wait(&p->vsync_cond, &p->vsync_mutex); - pthread_mutex_unlock(&p->vsync_mutex); + while (old == p->vsync_counter && !p->reload_display) { + if (pthread_cond_timedwait(&p->display_cond, &p->display_mutex, &end)) + break; + } + pthread_mutex_unlock(&p->display_mutex); } static void flip_page(struct vo *vo) { struct priv *p = vo->priv; + + if (!p->renderer_enabled) + return; + struct mp_image *mpi = p->next_image; p->next_image = NULL; @@ -407,6 +415,9 @@ static void draw_frame(struct vo *vo, struct vo_frame *frame) { struct priv *p = vo->priv; + if (!p->renderer_enabled) + return; + mp_image_t *mpi = NULL; if (!frame->redraw && !frame->repeat) mpi = mp_image_new_ref(frame->current); @@ -435,8 +446,7 @@ static void draw_frame(struct vo *vo, struct vo_frame *frame) } mmal_buffer_header_reset(buffer); - struct mp_image *new_ref = mp_image_new_custom_ref(&(struct mp_image){0}, - buffer, + struct mp_image *new_ref = mp_image_new_custom_ref(NULL, buffer, free_mmal_buffer); if (!new_ref) { mmal_buffer_header_release(buffer); @@ -509,6 +519,9 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) MMAL_PORT_T *input = p->renderer->input[0]; bool opaque = params->imgfmt == IMGFMT_MMAL; + if (!p->display) + return -1; + disable_renderer(vo); input->format->encoding = opaque ? MMAL_ENCODING_OPAQUE : MMAL_ENCODING_I420; @@ -563,6 +576,9 @@ static struct mp_image *take_screenshot(struct vo *vo) { struct priv *p = vo->priv; + if (!p->display) + return NULL; + struct mp_image *img = mp_image_alloc(IMGFMT_BGR0, p->w, p->h); if (!img) return NULL; @@ -615,14 +631,15 @@ static int control(struct vo *vo, uint32_t request, void *data) case VOCTRL_SCREENSHOT_WIN: *(struct mp_image **)data = take_screenshot(vo); return VO_TRUE; - case VOCTRL_CHECK_EVENTS: - if (atomic_load(&p->update_display)) { - atomic_store(&p->update_display, false); - update_display_size(vo); - if (p->renderer_enabled) - set_geometry(vo); - } + case VOCTRL_CHECK_EVENTS: { + pthread_mutex_lock(&p->display_mutex); + bool reload_required = p->reload_display; + p->reload_display = false; + pthread_mutex_unlock(&p->display_mutex); + if (reload_required) + recreate_renderer(vo); return VO_TRUE; + } case VOCTRL_GET_DISPLAY_FPS: *(double *)data = p->display_fps; return VO_TRUE; @@ -636,7 +653,10 @@ static void tv_callback(void *callback_data, uint32_t reason, uint32_t param1, { struct vo *vo = callback_data; struct priv *p = vo->priv; - atomic_store(&p->update_display, true); + pthread_mutex_lock(&p->display_mutex); + p->reload_display = true; + pthread_cond_signal(&p->display_cond); + pthread_mutex_unlock(&p->display_mutex); vo_wakeup(vo); } @@ -644,10 +664,59 @@ static void vsync_callback(DISPMANX_UPDATE_HANDLE_T u, void *arg) { struct vo *vo = arg; struct priv *p = vo->priv; - pthread_mutex_lock(&p->vsync_mutex); + pthread_mutex_lock(&p->display_mutex); p->vsync_counter += 1; - pthread_cond_signal(&p->vsync_cond); - pthread_mutex_unlock(&p->vsync_mutex); + pthread_cond_signal(&p->display_cond); + pthread_mutex_unlock(&p->display_mutex); +} + +static void destroy_dispmanx(struct vo *vo) +{ + struct priv *p = vo->priv; + + disable_renderer(vo); + destroy_overlays(vo); + + if (p->display) { + vc_dispmanx_vsync_callback(p->display, NULL, NULL); + vc_dispmanx_display_close(p->display); + } + p->display = 0; +} + +static int recreate_dispmanx(struct vo *vo) +{ + struct priv *p = vo->priv; + + p->display = vc_dispmanx_display_open(p->display_nr); + p->update = vc_dispmanx_update_start(0); + if (!p->display || !p->update) { + MP_FATAL(vo, "Could not get DISPMANX objects.\n"); + if (p->display) + vc_dispmanx_display_close(p->display); + p->display = 0; + p->update = 0; + return -1; + } + + update_display_size(vo); + + vc_dispmanx_vsync_callback(p->display, vsync_callback, vo); + + return 0; +} + +static void recreate_renderer(struct vo *vo) +{ + MP_WARN(vo, "Recreating renderer after display change.\n"); + + destroy_dispmanx(vo); + recreate_dispmanx(vo); + + if (vo->params) { + if (reconfig(vo, vo->params) < 0) + MP_FATAL(vo, "Recreation failed.\n"); + } } static void uninit(struct vo *vo) @@ -658,25 +727,18 @@ static void uninit(struct vo *vo) talloc_free(p->next_image); - destroy_overlays(vo); + destroy_dispmanx(vo); if (p->update) vc_dispmanx_update_submit_sync(p->update); - if (p->renderer) { - disable_renderer(vo); + if (p->renderer) mmal_component_release(p->renderer); - } - - if (p->display) { - vc_dispmanx_vsync_callback(p->display, NULL, NULL); - vc_dispmanx_display_close(p->display); - } mmal_vc_deinit(); - pthread_cond_destroy(&p->vsync_cond); - pthread_mutex_destroy(&p->vsync_mutex); + pthread_cond_destroy(&p->display_cond); + pthread_mutex_destroy(&p->display_mutex); } static int preinit(struct vo *vo) @@ -696,12 +758,14 @@ static int preinit(struct vo *vo) return -1; } - p->display = vc_dispmanx_display_open(p->display_nr); - p->update = vc_dispmanx_update_start(0); - if (!p->display || !p->update) { - MP_FATAL(vo, "Could not get DISPMANX objects.\n"); + pthread_mutex_init(&p->display_mutex, NULL); + pthread_cond_init(&p->display_cond, NULL); + + if (recreate_dispmanx(vo) < 0) + goto fail; + + if (update_display_size(vo) < 0) goto fail; - } if (mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &p->renderer)) { @@ -709,16 +773,8 @@ static int preinit(struct vo *vo) goto fail; } - if (update_display_size(vo) < 0) - goto fail; - vc_tv_register_callback(tv_callback, vo); - pthread_mutex_init(&p->vsync_mutex, NULL); - pthread_cond_init(&p->vsync_cond, NULL); - - vc_dispmanx_vsync_callback(p->display, vsync_callback, vo); - return 0; fail: diff --git a/video/out/vo_sdl.c b/video/out/vo_sdl.c index 9d34564..dd18f6e 100644 --- a/video/out/vo_sdl.c +++ b/video/out/vo_sdl.c @@ -58,30 +58,27 @@ const struct formatmap_entry formats[] = { {SDL_PIXELFORMAT_UYVY, IMGFMT_UYVY, 0}, //{SDL_PIXELFORMAT_YVYU, IMGFMT_YVYU, 0}, #if BYTE_ORDER == BIG_ENDIAN - {SDL_PIXELFORMAT_RGBX8888, IMGFMT_RGBA, 0}, // has no alpha -> bad for OSD - {SDL_PIXELFORMAT_BGRX8888, IMGFMT_BGRA, 0}, // has no alpha -> bad for OSD + {SDL_PIXELFORMAT_RGB888, IMGFMT_0RGB, 0}, // RGB888 means XRGB8888 + {SDL_PIXELFORMAT_RGBX8888, IMGFMT_RGB0, 0}, // has no alpha -> bad for OSD + {SDL_PIXELFORMAT_BGR888, IMGFMT_0BGR, 0}, // BGR888 means XBGR8888 + {SDL_PIXELFORMAT_BGRX8888, IMGFMT_BGR0, 0}, // has no alpha -> bad for OSD {SDL_PIXELFORMAT_ARGB8888, IMGFMT_ARGB, 1}, // matches SUBBITMAP_RGBA {SDL_PIXELFORMAT_RGBA8888, IMGFMT_RGBA, 1}, {SDL_PIXELFORMAT_ABGR8888, IMGFMT_ABGR, 1}, {SDL_PIXELFORMAT_BGRA8888, IMGFMT_BGRA, 1}, - {SDL_PIXELFORMAT_RGB24, IMGFMT_RGB24, 0}, - {SDL_PIXELFORMAT_BGR24, IMGFMT_BGR24, 0}, - {SDL_PIXELFORMAT_RGB888, IMGFMT_RGB24, 0}, - {SDL_PIXELFORMAT_BGR888, IMGFMT_BGR24, 0}, - {SDL_PIXELFORMAT_BGR565, IMGFMT_RGB565, 0}, #else - {SDL_PIXELFORMAT_RGBX8888, IMGFMT_ABGR, 0}, // has no alpha -> bad for OSD - {SDL_PIXELFORMAT_BGRX8888, IMGFMT_ARGB, 0}, // has no alpha -> bad for OSD + {SDL_PIXELFORMAT_RGB888, IMGFMT_BGR0, 0}, // RGB888 means XRGB8888 + {SDL_PIXELFORMAT_RGBX8888, IMGFMT_0BGR, 0}, // has no alpha -> bad for OSD + {SDL_PIXELFORMAT_BGR888, IMGFMT_RGB0, 0}, // BGR888 means XBGR8888 + {SDL_PIXELFORMAT_BGRX8888, IMGFMT_0RGB, 0}, // has no alpha -> bad for OSD {SDL_PIXELFORMAT_ARGB8888, IMGFMT_BGRA, 1}, // matches SUBBITMAP_RGBA {SDL_PIXELFORMAT_RGBA8888, IMGFMT_ABGR, 1}, {SDL_PIXELFORMAT_ABGR8888, IMGFMT_RGBA, 1}, {SDL_PIXELFORMAT_BGRA8888, IMGFMT_ARGB, 1}, +#endif {SDL_PIXELFORMAT_RGB24, IMGFMT_RGB24, 0}, {SDL_PIXELFORMAT_BGR24, IMGFMT_BGR24, 0}, - {SDL_PIXELFORMAT_RGB888, IMGFMT_BGR24, 0}, - {SDL_PIXELFORMAT_BGR888, IMGFMT_RGB24, 0}, {SDL_PIXELFORMAT_RGB565, IMGFMT_RGB565, 0}, -#endif }; struct keymap_entry { diff --git a/video/out/vo_vaapi.c b/video/out/vo_vaapi.c index 5275d4d..dc8aaac 100644 --- a/video/out/vo_vaapi.c +++ b/video/out/vo_vaapi.c @@ -68,7 +68,6 @@ struct priv { struct vo *vo; VADisplay display; struct mp_vaapi_ctx *mpvaapi; - struct mp_hwdec_info hwdec_info; struct mp_image_params image_params; struct mp_rect src_rect; @@ -515,11 +514,6 @@ static int control(struct vo *vo, uint32_t request, void *data) struct priv *p = vo->priv; switch (request) { - case VOCTRL_GET_HWDEC_INFO: { - struct mp_hwdec_info **arg = data; - *arg = &p->hwdec_info; - return true; - } case VOCTRL_SET_EQUALIZER: { struct voctrl_set_equalizer_args *eq = data; return set_equalizer(p, eq->name, eq->value); @@ -561,6 +555,11 @@ static void uninit(struct vo *vo) free_subpicture(p, &part->image); } + if (vo->hwdec_devs) { + hwdec_devices_remove(vo->hwdec_devs, &p->mpvaapi->hwctx); + hwdec_devices_destroy(vo->hwdec_devs); + } + va_destroy(p->mpvaapi); vo_x11_uninit(vo); @@ -591,8 +590,6 @@ static int preinit(struct vo *vo) goto fail; } - p->hwdec_info.hwctx = &p->mpvaapi->hwctx; - if (va_guess_if_emulated(p->mpvaapi)) { MP_WARN(vo, "VA-API is most likely emulated via VDPAU.\n" "It's better to use VDPAU directly with: --vo=vdpau\n"); @@ -645,6 +642,10 @@ static int preinit(struct vo *vo) p->va_num_display_attrs = 0; p->mp_display_attr = talloc_zero_array(vo, int, p->va_num_display_attrs); } + + vo->hwdec_devs = hwdec_devices_create(); + hwdec_devices_add(vo->hwdec_devs, &p->mpvaapi->hwctx); + return 0; fail: diff --git a/video/out/vo_vdpau.c b/video/out/vo_vdpau.c index b85780e..15472b2 100644 --- a/video/out/vo_vdpau.c +++ b/video/out/vo_vdpau.c @@ -71,7 +71,6 @@ struct vdpctx { struct vdp_functions *vdp; VdpDevice vdp_device; uint64_t preemption_counter; - struct mp_hwdec_info hwdec_info; struct m_color colorkey; @@ -448,7 +447,6 @@ static void mark_vdpau_objects_uninitialized(struct vo *vo) forget_frames(vo, false); vc->black_pixel = VDP_INVALID_HANDLE; - vc->video_mixer->video_mixer = VDP_INVALID_HANDLE; vc->flip_queue = VDP_INVALID_HANDLE; vc->flip_target = VDP_INVALID_HANDLE; for (int i = 0; i < MAX_OUTPUT_SURFACES; i++) @@ -1029,6 +1027,9 @@ static void uninit(struct vo *vo) { struct vdpctx *vc = vo->priv; + hwdec_devices_remove(vo->hwdec_devs, &vc->mpvdp->hwctx); + hwdec_devices_destroy(vo->hwdec_devs); + /* Destroy all vdpau objects */ mp_vdpau_mixer_destroy(vc->video_mixer); destroy_vdpau_objects(vo); @@ -1054,7 +1055,8 @@ static int preinit(struct vo *vo) return -1; } - vc->hwdec_info.hwctx = &vc->mpvdp->hwctx; + vo->hwdec_devs = hwdec_devices_create(); + hwdec_devices_add(vo->hwdec_devs, &vc->mpvdp->hwctx); vc->video_mixer = mp_vdpau_mixer_create(vc->mpvdp, vo->log); @@ -1118,11 +1120,6 @@ static int control(struct vo *vo, uint32_t request, void *data) check_preemption(vo); switch (request) { - case VOCTRL_GET_HWDEC_INFO: { - struct mp_hwdec_info **arg = data; - *arg = &vc->hwdec_info; - return true; - } case VOCTRL_GET_PANSCAN: return VO_TRUE; case VOCTRL_SET_PANSCAN: diff --git a/video/out/vo_wayland.c b/video/out/vo_wayland.c index 57d6c7f..2997b38 100644 --- a/video/out/vo_wayland.c +++ b/video/out/vo_wayland.c @@ -249,10 +249,15 @@ static bool resize(struct priv *p) if (!p->video_bufpool.back_buffer || SHM_BUFFER_IS_BUSY(p->video_bufpool.back_buffer)) return false; // skip resizing if we can't guarantee pixel perfectness! + int32_t scale = 1; int32_t x = wl->window.sh_x; int32_t y = wl->window.sh_y; - wl->vo->dwidth = wl->window.sh_width; - wl->vo->dheight = wl->window.sh_height; + + if (wl->display.current_output) + scale = wl->display.current_output->scale; + + wl->vo->dwidth = scale*wl->window.sh_width; + wl->vo->dheight = scale*wl->window.sh_height; vo_get_src_dst_rects(p->vo, &p->src, &p->dst, &p->osd); p->src_w = p->src.x1 - p->src.x0; @@ -273,6 +278,7 @@ static bool resize(struct priv *p) if (y != 0) y = wl->window.height - p->dst_h; + wl_surface_set_buffer_scale(wl->window.video_surface, scale); mp_sws_set_from_cmdline(p->sws, p->vo->opts->sws_opts); p->sws->src = p->in_format; p->sws->dst = (struct mp_image_params) { @@ -301,7 +307,7 @@ static bool resize(struct priv *p) if (!p->enable_alpha) { struct wl_region *opaque = wl_compositor_create_region(wl->display.compositor); - wl_region_add(opaque, 0, 0, p->dst_w, p->dst_h); + wl_region_add(opaque, 0, 0, p->dst_w/scale, p->dst_h/scale); wl_surface_set_opaque_region(wl->window.video_surface, opaque); wl_region_destroy(opaque); } @@ -464,14 +470,19 @@ static const bool osd_formats[SUBBITMAP_COUNT] = { static void draw_osd(struct vo *vo) { + int32_t scale = 1; struct priv *p = vo->priv; + if (p->wl && p->wl->display.current_output) + scale = p->wl->display.current_output->scale; + // detach all buffers and attach all needed buffers in osd_draw // only the most recent attach & commit is applied once the parent surface // is committed for (int i = 0; i < MAX_OSD_PARTS; ++i) { struct wl_surface *s = p->osd_surfaces[i]; wl_surface_attach(s, NULL, 0, 0); + wl_surface_set_buffer_scale(s, scale); wl_surface_damage(s, 0, 0, p->dst_w, p->dst_h); wl_surface_commit(s); } diff --git a/video/out/vo_xv.c b/video/out/vo_xv.c index e02ea2d..1e7ae7c 100644 --- a/video/out/vo_xv.c +++ b/video/out/vo_xv.c @@ -577,6 +577,15 @@ static bool allocate_xvimage(struct vo *vo, int foo) return false; XSync(x11->display, False); } + + if ((ctx->xvimage[foo]->width != aligned_w) || + (ctx->xvimage[foo]->height != ctx->image_height)) { + MP_ERR(vo, "Got XvImage with incorrect size: %ux%u (expected %ux%u)\n", + ctx->xvimage[foo]->width, ctx->xvimage[foo]->height, + aligned_w, ctx->image_height); + return false; + } + struct mp_image img = get_xv_buffer(vo, foo); img.w = aligned_w; mp_image_clear(&img, 0, 0, img.w, img.h); diff --git a/video/out/w32_common.c b/video/out/w32_common.c index d26de3b..f3b59f1 100644 --- a/video/out/w32_common.c +++ b/video/out/w32_common.c @@ -15,13 +15,13 @@ * with mpv. If not, see <http://www.gnu.org/licenses/>. */ +#include <initguid.h> #include <stdio.h> #include <limits.h> #include <pthread.h> #include <assert.h> #include <windows.h> #include <windowsx.h> -#include <initguid.h> #include <ole2.h> #include <shobjidl.h> #include <avrt.h> @@ -45,6 +45,9 @@ #include "misc/rendezvous.h" #include "mpv_talloc.h" +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase) + static const wchar_t classname[] = L"mpv"; static __thread struct vo_w32_state *w32_thread_context; @@ -1067,15 +1070,31 @@ static void reinit_window_state(struct vo_w32_state *w32) RECT cr = r; add_window_borders(w32->window, &r); + // Check on client area size instead of window size on --fit-border=no + long o_w; + long o_h; + if( w32->opts->fit_border ) { + o_w = r.right - r.left; + o_h = r.bottom - r.top; + } else { + o_w = cr.right - cr.left; + o_h = cr.bottom - cr.top; + } - if (!w32->current_fs && - ((r.right - r.left) >= screen_w || (r.bottom - r.top) >= screen_h)) + if ( !w32->current_fs && ( o_w > screen_w || o_h > screen_h ) ) { MP_VERBOSE(w32, "requested window size larger than the screen\n"); // Use the aspect of the client area, not the full window size. // Basically, try to compute the maximum window size. - long n_w = screen_w - (r.right - cr.right) - (cr.left - r.left) - 1; - long n_h = screen_h - (r.bottom - cr.bottom) - (cr.top - r.top) - 1; + long n_w; + long n_h; + if( w32->opts->fit_border ) { + n_w = screen_w - (r.right - cr.right) - (cr.left - r.left); + n_h = screen_h - (r.bottom - cr.bottom) - (cr.top - r.top); + } else { + n_w = screen_w; + n_h = screen_h; + } // Letterbox double asp = (cr.right - cr.left) / (double)(cr.bottom - cr.top); double s_asp = n_w / (double)n_h; @@ -1084,15 +1103,28 @@ static void reinit_window_state(struct vo_w32_state *w32) } else { n_w = n_h * asp; } + // Save new size + w32->dw = n_w; + w32->dh = n_h; + // Get old window center + long o_cx = r.left + (r.right - r.left) / 2; + long o_cy = r.top + (r.bottom - r.top) / 2; + // Add window borders to the new window size r = (RECT){.right = n_w, .bottom = n_h}; add_window_borders(w32->window, &r); - // Center the final window + // Get top and left border size for client area position calculation + long b_top = -r.top; + long b_left = -r.left; + // Center the final window around the old window center n_w = r.right - r.left; n_h = r.bottom - r.top; - r.left = w32->screenrc.x0 + screen_w / 2 - n_w / 2; - r.top = w32->screenrc.y0 + screen_h / 2 - n_h / 2; + r.left = o_cx - n_w / 2; + r.top = o_cy - n_h / 2; r.right = r.left + n_w; r.bottom = r.top + n_h; + // Save new client area position + w32->window_x = r.left + b_left; + w32->window_y = r.top + b_top; } MP_VERBOSE(w32, "reset window bounds: %d:%d:%d:%d\n", @@ -1116,6 +1148,7 @@ static void gui_thread_reconfig(void *ptr) vo_apply_window_geometry(vo, &geo); bool reset_size = w32->o_dwidth != vo->dwidth || w32->o_dheight != vo->dheight; + bool pos_init = false; w32->o_dwidth = vo->dwidth; w32->o_dheight = vo->dheight; @@ -1132,6 +1165,7 @@ static void gui_thread_reconfig(void *ptr) } else { w32->window_bounds_initialized = true; reset_size = true; + pos_init = true; w32->window_x = w32->prev_x = geo.win.x0; w32->window_y = w32->prev_y = geo.win.y0; } @@ -1147,6 +1181,12 @@ static void gui_thread_reconfig(void *ptr) vo->dheight = r.bottom; } + // Recenter window around old position on new video size + // excluding the case when initial positon handled by win_state. + if (!pos_init) { + w32->window_x += w32->dw / 2 - vo->dwidth / 2; + w32->window_y += w32->dh / 2 - vo->dheight / 2; + } w32->dw = vo->dwidth; w32->dh = vo->dheight; @@ -1184,14 +1224,12 @@ static void *gui_thread(void *ptr) thread_disable_ime(); - HINSTANCE hInstance = GetModuleHandleW(NULL); - WNDCLASSEXW wcex = { .cbSize = sizeof wcex, .style = CS_HREDRAW | CS_VREDRAW, .lpfnWndProc = WndProc, - .hInstance = hInstance, - .hIcon = LoadIconW(hInstance, L"IDI_ICON1"), + .hInstance = HINST_THISCOMPONENT, + .hIcon = LoadIconW(HINST_THISCOMPONENT, L"IDI_ICON1"), .hCursor = LoadCursor(NULL, IDC_ARROW), .lpszClassName = classname, }; @@ -1209,13 +1247,13 @@ static void *gui_thread(void *ptr) classname, WS_CHILD | WS_VISIBLE, 0, 0, r.right, r.bottom, - w32->parent, 0, hInstance, NULL); + w32->parent, 0, HINST_THISCOMPONENT, NULL); } else { w32->window = CreateWindowExW(0, classname, classname, update_style(w32, 0), CW_USEDEFAULT, SW_HIDE, 100, 100, - 0, 0, hInstance, NULL); + 0, 0, HINST_THISCOMPONENT, NULL); } if (!w32->window) { @@ -1374,9 +1412,13 @@ static int gui_thread_control(struct vo_w32_state *w32, int request, void *arg) if (!w32->window_bounds_initialized) return VO_FALSE; if (w32->current_fs) { + w32->prev_x += w32->prev_width / 2 - s[0] / 2; + w32->prev_y += w32->prev_height / 2 - s[1] / 2; w32->prev_width = s[0]; w32->prev_height = s[1]; } else { + w32->window_x += w32->dw / 2 - s[0] / 2; + w32->window_y += w32->dh / 2 - s[1] / 2; w32->dw = s[0]; w32->dh = s[1]; } @@ -1419,7 +1461,7 @@ static int gui_thread_control(struct vo_w32_state *w32, int request, void *arg) if (!w32->taskbar_list3 || !w32->tbtnCreated) return VO_TRUE; - if (!pstate->playing) { + if (!pstate->playing || !pstate->taskbar_progress) { ITaskbarList3_SetProgressState(w32->taskbar_list3, w32->window, TBPF_NOPROGRESS); return VO_TRUE; diff --git a/video/out/wayland_common.c b/video/out/wayland_common.c index b9dac90..0e44ddd 100644 --- a/video/out/wayland_common.c +++ b/video/out/wayland_common.c @@ -189,9 +189,22 @@ static void output_handle_mode(void *data, output->refresh_rate = refresh; } +static void output_handle_done(void* data, struct wl_output *wl_output) +{ +} + +static void output_handle_scale(void* data, struct wl_output *wl_output, + int32_t factor) +{ + struct vo_wayland_output *output = data; + output->scale = factor; +} + static const struct wl_output_listener output_listener = { output_handle_geometry, - output_handle_mode + output_handle_mode, + output_handle_done, + output_handle_scale }; @@ -211,6 +224,8 @@ static void surface_handle_enter(void *data, break; } } + + wl->window.events |= VO_EVENT_WIN_STATE; } static void surface_handle_leave(void *data, @@ -401,11 +416,15 @@ static void pointer_handle_motion(void *data, wl_fixed_t sx_w, wl_fixed_t sy_w) { + int32_t scale = 1; struct vo_wayland_state *wl = data; + if (wl->display.current_output) + scale = wl->display.current_output->scale; + wl->cursor.pointer = pointer; - wl->window.mouse_x = wl_fixed_to_int(sx_w); - wl->window.mouse_y = wl_fixed_to_int(sy_w); + wl->window.mouse_x = scale*wl_fixed_to_int(sx_w); + wl->window.mouse_y = scale*wl_fixed_to_int(sy_w); mp_input_set_mouse_pos(wl->vo->input_ctx, wl->window.mouse_x, wl->window.mouse_y); @@ -521,7 +540,7 @@ static void data_device_handle_data_offer(void *data, { struct vo_wayland_state *wl = data; if (wl->input.offer) { - MP_ERR(wl, "There is already a dnd entry point.\n"); + MP_DBG(wl, "There is already a dnd entry point.\n"); wl_data_offer_destroy(wl->input.offer); } @@ -606,7 +625,8 @@ static void registry_handle_global (void *data, if (strcmp(interface, "wl_compositor") == 0) { wl->display.compositor = wl_registry_bind(reg, id, - &wl_compositor_interface, 1); + &wl_compositor_interface, + MPMIN(3, version)); } else if (strcmp(interface, "wl_shell") == 0) { @@ -625,7 +645,9 @@ static void registry_handle_global (void *data, talloc_zero(wl, struct vo_wayland_output); output->id = id; - output->output = wl_registry_bind(reg, id, &wl_output_interface, 1); + output->scale = 1; + output->output = wl_registry_bind(reg, id, &wl_output_interface, + MPMIN(2, version)); wl_output_add_listener(output->output, &output_listener, output); wl_list_insert(&wl->display.output_list, &output->link); @@ -739,7 +761,6 @@ static void schedule_resize(struct vo_wayland_state *wl, { int32_t minimum_size = 150; int32_t x, y; - float temp_aspect = width / (float) MPMAX(height, 1); float win_aspect = wl->window.aspect; if (win_aspect <= 0) win_aspect = 1; @@ -770,12 +791,6 @@ static void schedule_resize(struct vo_wayland_state *wl, case WL_SHELL_SURFACE_RESIZE_BOTTOM_RIGHT: height = (1 / win_aspect) * width; break; - default: - if (wl->window.aspect < temp_aspect) - width = wl->window.aspect * height; - else - height = (1 / win_aspect) * width; - break; } if (edges & WL_SHELL_SURFACE_RESIZE_LEFT) @@ -792,7 +807,7 @@ static void schedule_resize(struct vo_wayland_state *wl, wl->window.sh_height = height; wl->window.sh_x = x; wl->window.sh_y = y; - wl->window.events |= VO_EVENT_RESIZE; + wl->window.events |= VO_EVENT_WIN_STATE | VO_EVENT_RESIZE; wl->vo->dwidth = width; wl->vo->dheight = height; } @@ -1023,10 +1038,11 @@ int vo_wayland_init (struct vo *vo) "\tvendor: %s\n" "\tmodel: %s\n" "\tw: %d, h: %d\n" - "\tHz: %d\n", + "\tscale: %d\n" + "\tHz: %f\n", o->make, o->model, - o->width, o->height, - o->refresh_rate / 1000); + o->width, o->height, o->scale, + o->refresh_rate / 1000.0f); } vo->event_fd = wl->display.display_fd; @@ -1276,7 +1292,7 @@ int vo_wayland_control (struct vo *vo, int *events, int request, void *arg) break; // refresh rate is stored in milli-Hertz (mHz) - double fps = wl->display.current_output->refresh_rate / 1000; + double fps = wl->display.current_output->refresh_rate / 1000.0f; *(double*) arg = fps; return VO_TRUE; } diff --git a/video/out/wayland_common.h b/video/out/wayland_common.h index d23b2f2..ec3f72c 100644 --- a/video/out/wayland_common.h +++ b/video/out/wayland_common.h @@ -41,6 +41,7 @@ struct vo_wayland_output { uint32_t flags; int32_t width; int32_t height; + int32_t scale; int32_t refresh_rate; // fps (mHz) const char *make; const char *model; diff --git a/video/out/x11_common.c b/video/out/x11_common.c index 034f785..647a910 100644 --- a/video/out/x11_common.c +++ b/video/out/x11_common.c @@ -280,6 +280,9 @@ static void vo_set_cursor_hidden(struct vo *vo, bool cursor_hidden) static int x11_errorhandler(Display *display, XErrorEvent *event) { struct mp_log *log = x11_error_output; + if (!log) + return 0; + char msg[60]; XGetErrorText(display, event->error_code, (char *) &msg, sizeof(msg)); @@ -746,8 +749,8 @@ void vo_x11_uninit(struct vo *vo) if (x11->xim) XCloseIM(x11->xim); if (x11->display) { - x11_error_output = NULL; XSetErrorHandler(NULL); + x11_error_output = NULL; XCloseDisplay(x11->display); } @@ -950,6 +953,22 @@ static int get_mods(unsigned int state) return modifiers; } +static void vo_x11_update_composition_hint(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + long hint = 0; + switch (vo->opts->x11_bypass_compositor) { + case 0: hint = 0; break; // leave default + case 1: hint = 1; break; // always bypass + case 2: hint = x11->fs ? 1 : 0; break; // bypass in FS + case 3: hint = 2; break; // always enable + } + + XChangeProperty(x11->display, x11->window, XA(x11,_NET_WM_BYPASS_COMPOSITOR), + XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&hint, 1); +} + static void vo_x11_check_net_wm_state_fullscreen_change(struct vo *vo) { struct vo_x11_state *x11 = vo->x11; @@ -986,6 +1005,8 @@ static void vo_x11_check_net_wm_state_fullscreen_change(struct vo *vo) x11->size_changed_during_fs = false; x11->pos_changed_during_fs = false; + + vo_x11_update_composition_hint(vo); } } } @@ -1437,15 +1458,11 @@ static void vo_x11_create_window(struct vo *vo, XVisualInfo *vis, } if (!x11->parent) { - if (vo->opts->x11_bypass_compositor) { - long v = 1; // request disabling compositor - XChangeProperty(x11->display, x11->window, - XA(x11,_NET_WM_BYPASS_COMPOSITOR), XA_CARDINAL, 32, - PropModeReplace, (unsigned char *)&v, 1); - } + vo_x11_update_composition_hint(vo); vo_x11_set_wm_icon(x11); vo_x11_update_window_title(vo); vo_x11_dnd_init_window(vo); + vo_x11_set_property_utf8(vo, XA(x11, _GTK_THEME_VARIANT), "dark"); } vo_x11_xembed_update(x11, 0); } @@ -1485,6 +1502,8 @@ static void vo_x11_map_window(struct vo *vo, struct mp_rect rc) XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&v, 1); } + vo_x11_update_composition_hint(vo); + // map window int events = StructureNotifyMask | ExposureMask | PropertyChangeMask | LeaveWindowMask | EnterWindowMask; @@ -1731,6 +1750,8 @@ static void vo_x11_fullscreen(struct vo *vo) x11->size_changed_during_fs = false; x11->pos_changed_during_fs = false; + + vo_x11_update_composition_hint(vo); } int vo_x11_control(struct vo *vo, int *events, int request, void *arg) diff --git a/video/vaapi.c b/video/vaapi.c index 61d94ef..f8d0fab 100644 --- a/video/vaapi.c +++ b/video/vaapi.c @@ -128,8 +128,7 @@ struct mp_vaapi_ctx *va_initialize(VADisplay *display, struct mp_log *plog, .display = display, .hwctx = { .type = HWDEC_VAAPI, - .priv = res, - .vaapi_ctx = res, + .ctx = res, .download_image = ctx_download_image, }, }; @@ -487,6 +486,38 @@ struct mp_image *va_surface_download(struct mp_image *src, return NULL; } +// Set the hw_subfmt from the surface's real format. Because of this bug: +// https://bugs.freedesktop.org/show_bug.cgi?id=79848 +// it should be assumed that the real format is only known after an arbitrary +// vaCreateContext() call has been made, or even better, after the surface +// has been rendered to. +// If the hw_subfmt is already set, this is a NOP. +void va_surface_init_subformat(struct mp_image *mpi) +{ + VAStatus status; + if (mpi->params.hw_subfmt) + return; + struct va_surface *p = va_surface_in_mp_image(mpi); + if (!p) + return; + + VAImage va_image = { .image_id = VA_INVALID_ID }; + + va_lock(p->ctx); + + status = vaDeriveImage(p->display, va_surface_id(mpi), &va_image); + if (status != VA_STATUS_SUCCESS) + goto err; + + mpi->params.hw_subfmt = va_image.format.fourcc; + + status = vaDestroyImage(p->display, va_image.image_id); + CHECK_VA_STATUS(p->ctx, "vaDestroyImage()"); + +err: + va_unlock(p->ctx); +} + struct pool_alloc_ctx { struct mp_vaapi_ctx *vaapi; int rt_format; diff --git a/video/vaapi.h b/video/vaapi.h index 11ff2c9..3f0d1dc 100644 --- a/video/vaapi.h +++ b/video/vaapi.h @@ -69,6 +69,8 @@ struct mp_image *va_surface_download(struct mp_image *src, int va_surface_alloc_imgfmt(struct mp_image *img, int imgfmt); int va_surface_upload(struct mp_image *va_dst, struct mp_image *sw_src); +void va_surface_init_subformat(struct mp_image *mpi); + bool va_guess_if_emulated(struct mp_vaapi_ctx *ctx); #endif diff --git a/video/vdpau.c b/video/vdpau.c index 9dfbc2b..dffb02e 100644 --- a/video/vdpau.c +++ b/video/vdpau.c @@ -28,11 +28,61 @@ #include "mp_image_pool.h" #include "vdpau_mixer.h" +static struct mp_image *download_image_yuv(struct mp_hwdec_ctx *hwctx, + struct mp_image *mpi, + struct mp_image_pool *swpool) +{ + struct mp_vdpau_ctx *ctx = hwctx->ctx; + struct vdp_functions *vdp = &ctx->vdp; + VdpStatus vdp_st; + + if (mpi->imgfmt != IMGFMT_VDPAU || mp_vdpau_mixed_frame_get(mpi)) + return NULL; + + VdpVideoSurface surface = (uintptr_t)mpi->planes[3]; + + VdpChromaType s_chroma_type; + uint32_t s_w, s_h; + vdp_st = vdp->video_surface_get_parameters(surface, &s_chroma_type, &s_w, &s_h); + CHECK_VDP_ERROR_NORETURN(ctx, + "Error when calling vdp_video_surface_get_parameters"); + if (vdp_st != VDP_STATUS_OK) + return NULL; + + // Don't bother supporting other types for now. + if (s_chroma_type != VDP_CHROMA_TYPE_420) + return NULL; + + // The allocation needs to be uncropped, because get_bits writes to it. + struct mp_image *out = mp_image_pool_get(swpool, IMGFMT_NV12, s_w, s_h); + if (!out) + return NULL; + + mp_image_set_size(out, mpi->w, mpi->h); + mp_image_copy_attributes(out, mpi); + + vdp_st = vdp->video_surface_get_bits_y_cb_cr(surface, + VDP_YCBCR_FORMAT_NV12, + (void * const *)out->planes, + out->stride); + CHECK_VDP_ERROR_NORETURN(ctx, + "Error when calling vdp_output_surface_get_bits_y_cb_cr"); + if (vdp_st != VDP_STATUS_OK) { + talloc_free(out); + return NULL; + } + + return out; +} + static struct mp_image *download_image(struct mp_hwdec_ctx *hwctx, struct mp_image *mpi, struct mp_image_pool *swpool) { - struct mp_vdpau_ctx *ctx = hwctx->vdpau_ctx; + if (mpi->imgfmt != IMGFMT_VDPAU && mpi->imgfmt != IMGFMT_VDPAU_OUTPUT) + return NULL; + + struct mp_vdpau_ctx *ctx = hwctx->ctx; struct vdp_functions *vdp = &ctx->vdp; VdpStatus vdp_st; @@ -40,6 +90,10 @@ static struct mp_image *download_image(struct mp_hwdec_ctx *hwctx, int w, h; mp_image_params_get_dsize(&mpi->params, &w, &h); + res = download_image_yuv(hwctx, mpi, swpool); + if (res) + return res; + // Abuse this lock for our own purposes. It could use its own lock instead. pthread_mutex_lock(&ctx->pool_lock); @@ -268,8 +322,7 @@ static struct mp_image *create_ref(struct mp_vdpau_ctx *ctx, int index) struct surface_ref *ref = talloc_ptrtype(NULL, ref); *ref = (struct surface_ref){ctx, index}; struct mp_image *res = - mp_image_new_custom_ref(&(struct mp_image){0}, ref, - release_decoder_surface); + mp_image_new_custom_ref(NULL, ref, release_decoder_surface); if (res) { mp_image_setfmt(res, e->rgb ? IMGFMT_VDPAU_OUTPUT : IMGFMT_VDPAU); mp_image_set_size(res, e->w, e->h); @@ -396,8 +449,7 @@ struct mp_vdpau_ctx *mp_vdpau_create_device_x11(struct mp_log *log, Display *x11 .preemption_counter = 1, .hwctx = { .type = HWDEC_VDPAU, - .priv = ctx, - .vdpau_ctx = ctx, + .ctx = ctx, .download_image = download_image, }, .getimg_surface = VDP_INVALID_HANDLE, diff --git a/video/vdpau.h b/video/vdpau.h index db73a87..389e1c7 100644 --- a/video/vdpau.h +++ b/video/vdpau.h @@ -23,6 +23,9 @@ #define CHECK_VDP_ERROR(ctx, message) \ CHECK_VDP_ERROR_ST(ctx, message, return -1;) +#define CHECK_VDP_ERROR_NORETURN(ctx, message) \ + CHECK_VDP_ERROR_ST(ctx, message, ;) + #define CHECK_VDP_WARNING(ctx, message) \ do { \ if (vdp_st != VDP_STATUS_OK) \ diff --git a/video/vdpau_mixer.c b/video/vdpau_mixer.c index 7025aef..d6f93a9 100644 --- a/video/vdpau_mixer.c +++ b/video/vdpau_mixer.c @@ -71,6 +71,7 @@ struct mp_vdpau_mixer *mp_vdpau_mixer_create(struct mp_vdpau_ctx *vdp_ctx, .capabilities = MP_CSP_EQ_CAPS_COLORMATRIX, }, }; + mp_vdpau_handle_preemption(mixer->ctx, &mixer->preemption_counter); return mixer; } @@ -228,6 +229,13 @@ int mp_vdpau_mixer_render(struct mp_vdpau_mixer *mixer, if (!video_rect) video_rect = &fallback_rect; + int pe = mp_vdpau_handle_preemption(mixer->ctx, &mixer->preemption_counter); + if (pe < 1) { + mixer->video_mixer = VDP_INVALID_HANDLE; + if (pe < 0) + return -1; + } + if (video->imgfmt == IMGFMT_VDPAU_OUTPUT) { VdpOutputSurface surface = (uintptr_t)video->planes[3]; int flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_0; diff --git a/video/vdpau_mixer.h b/video/vdpau_mixer.h index 97bef86..716b57e 100644 --- a/video/vdpau_mixer.h +++ b/video/vdpau_mixer.h @@ -30,6 +30,7 @@ struct mp_vdpau_mixer_frame { struct mp_vdpau_mixer { struct mp_log *log; struct mp_vdpau_ctx *ctx; + uint64_t preemption_counter; bool initialized; struct mp_image_params image_params; |