diff options
Diffstat (limited to 'video/out')
113 files changed, 14216 insertions, 5982 deletions
diff --git a/video/out/cocoa/window.m b/video/out/cocoa/window.m index 6d464a1..2feaab9 100644 --- a/video/out/cocoa/window.m +++ b/video/out/cocoa/window.m @@ -386,8 +386,11 @@ - (NSRect)constrainFrameRect:(NSRect)nf toScreen:(NSScreen *)screen { - if (_is_animating && ![self.adapter isInFullScreenMode]) + if ((_is_animating && ![self.adapter isInFullScreenMode]) || + (!_is_animating && [self.adapter isInFullScreenMode])) + { return nf; + } screen = screen ?: self.screen ?: [NSScreen mainScreen]; NSRect of = [self frame]; diff --git a/video/out/d3d11/context.c b/video/out/d3d11/context.c new file mode 100644 index 0000000..b02d2e8 --- /dev/null +++ b/video/out/d3d11/context.c @@ -0,0 +1,244 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "common/msg.h" +#include "options/m_config.h" +#include "osdep/windows_utils.h" + +#include "video/out/gpu/context.h" +#include "video/out/gpu/d3d11_helpers.h" +#include "video/out/gpu/spirv.h" +#include "video/out/w32_common.h" +#include "ra_d3d11.h" + +struct d3d11_opts { + int feature_level; + int warp; + int flip; + int sync_interval; +}; + +#define OPT_BASE_STRUCT struct d3d11_opts +const struct m_sub_options d3d11_conf = { + .opts = (const struct m_option[]) { + OPT_CHOICE("d3d11-warp", warp, 0, + ({"auto", -1}, + {"no", 0}, + {"yes", 1})), + OPT_CHOICE("d3d11-feature-level", feature_level, 0, + ({"12_1", D3D_FEATURE_LEVEL_12_1}, + {"12_0", D3D_FEATURE_LEVEL_12_0}, + {"11_1", D3D_FEATURE_LEVEL_11_1}, + {"11_0", D3D_FEATURE_LEVEL_11_0}, + {"10_1", D3D_FEATURE_LEVEL_10_1}, + {"10_0", D3D_FEATURE_LEVEL_10_0}, + {"9_3", D3D_FEATURE_LEVEL_9_3}, + {"9_2", D3D_FEATURE_LEVEL_9_2}, + {"9_1", D3D_FEATURE_LEVEL_9_1})), + OPT_FLAG("d3d11-flip", flip, 0), + OPT_INTRANGE("d3d11-sync-interval", sync_interval, 0, 0, 4), + {0} + }, + .defaults = &(const struct d3d11_opts) { + .feature_level = D3D_FEATURE_LEVEL_12_1, + .warp = -1, + .flip = 1, + .sync_interval = 1, + }, + .size = sizeof(struct d3d11_opts) +}; + +struct priv { + struct d3d11_opts *opts; + + struct ra_tex *backbuffer; + ID3D11Device *device; + IDXGISwapChain *swapchain; +}; + +static struct mp_image *d3d11_screenshot(struct ra_swapchain *sw) +{ + struct priv *p = sw->ctx->priv; + if (!p->swapchain) + return NULL; + return mp_d3d11_screenshot(p->swapchain); +} + +static struct ra_tex *get_backbuffer(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ID3D11Texture2D *backbuffer = NULL; + struct ra_tex *tex = NULL; + HRESULT hr; + + hr = IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D, + (void**)&backbuffer); + if (FAILED(hr)) { + MP_ERR(ctx, "Couldn't get swapchain image\n"); + goto done; + } + + tex = ra_d3d11_wrap_tex(ctx->ra, (ID3D11Resource *)backbuffer); +done: + SAFE_RELEASE(backbuffer); + return tex; +} + +static bool resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + HRESULT hr; + + ra_tex_free(ctx->ra, &p->backbuffer); + + hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, ctx->vo->dwidth, + ctx->vo->dheight, DXGI_FORMAT_UNKNOWN, 0); + if (FAILED(hr)) { + MP_FATAL(ctx, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr)); + return false; + } + + p->backbuffer = get_backbuffer(ctx); + + return true; +} + +static bool d3d11_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + return resize(ctx); +} + +static int d3d11_color_depth(struct ra_swapchain *sw) +{ + return 8; +} + +static bool d3d11_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct priv *p = sw->priv; + *out_fbo = (struct ra_fbo) { + .tex = p->backbuffer, + .flip = false, + }; + return true; +} + +static bool d3d11_submit_frame(struct ra_swapchain *sw, + const struct vo_frame *frame) +{ + ra_d3d11_flush(sw->ctx->ra); + return true; +} + +static void d3d11_swap_buffers(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + IDXGISwapChain_Present(p->swapchain, p->opts->sync_interval, 0); +} + +static int d3d11_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +static void d3d11_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_tex_free(ctx->ra, &p->backbuffer); + SAFE_RELEASE(p->swapchain); + vo_w32_uninit(ctx->vo); + SAFE_RELEASE(p->device); + + // Destory the RA last to prevent objects we hold from showing up in D3D's + // leak checker + ctx->ra->fns->destroy(ctx->ra); +} + +static const struct ra_swapchain_fns d3d11_swapchain = { + .color_depth = d3d11_color_depth, + .screenshot = d3d11_screenshot, + .start_frame = d3d11_start_frame, + .submit_frame = d3d11_submit_frame, + .swap_buffers = d3d11_swap_buffers, +}; + +static bool d3d11_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + p->opts = mp_get_config_group(ctx, ctx->global, &d3d11_conf); + + struct ra_swapchain *sw = ctx->swapchain = talloc_zero(ctx, struct ra_swapchain); + sw->priv = p; + sw->ctx = ctx; + sw->fns = &d3d11_swapchain; + + struct d3d11_device_opts dopts = { + .debug = ctx->opts.debug, + .allow_warp = p->opts->warp != 0, + .force_warp = p->opts->warp == 1, + .max_feature_level = p->opts->feature_level, + .max_frame_latency = ctx->opts.swapchain_depth, + }; + if (!mp_d3d11_create_present_device(ctx->log, &dopts, &p->device)) + goto error; + + if (!spirv_compiler_init(ctx)) + goto error; + ctx->ra = ra_d3d11_create(p->device, ctx->log, ctx->spirv); + if (!ctx->ra) + goto error; + + if (!vo_w32_init(ctx->vo)) + goto error; + + struct d3d11_swapchain_opts scopts = { + .window = vo_w32_hwnd(ctx->vo), + .width = ctx->vo->dwidth, + .height = ctx->vo->dheight, + .flip = p->opts->flip, + // Add one frame for the backbuffer and one frame of "slack" to reduce + // contention with the window manager when acquiring the backbuffer + .length = ctx->opts.swapchain_depth + 2, + .usage = DXGI_USAGE_RENDER_TARGET_OUTPUT, + }; + if (!mp_d3d11_create_swapchain(p->device, ctx->log, &scopts, &p->swapchain)) + goto error; + + p->backbuffer = get_backbuffer(ctx); + + return true; + +error: + d3d11_uninit(ctx); + return false; +} + +const struct ra_ctx_fns ra_ctx_d3d11 = { + .type = "d3d11", + .name = "d3d11", + .reconfig = d3d11_reconfig, + .control = d3d11_control, + .init = d3d11_init, + .uninit = d3d11_uninit, +}; diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c new file mode 100644 index 0000000..d83fdc5 --- /dev/null +++ b/video/out/d3d11/hwdec_d3d11va.c @@ -0,0 +1,249 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <d3d11.h> +#include <d3d11_1.h> + +#include "config.h" + +#include "common/common.h" +#include "options/m_config.h" +#include "osdep/windows_utils.h" +#include "video/hwdec.h" +#include "video/d3d.h" +#include "video/out/d3d11/ra_d3d11.h" +#include "video/out/gpu/hwdec.h" + +struct d3d11va_opts { + int zero_copy; +}; + +#define OPT_BASE_STRUCT struct d3d11va_opts +const struct m_sub_options d3d11va_conf = { + .opts = (const struct m_option[]) { + OPT_FLAG("d3d11va-zero-copy", zero_copy, 0), + {0} + }, + .defaults = &(const struct d3d11va_opts) { + .zero_copy = 0, + }, + .size = sizeof(struct d3d11va_opts) +}; + +struct priv_owner { + struct d3d11va_opts *opts; + + struct mp_hwdec_ctx hwctx; + ID3D11Device *device; + ID3D11Device1 *device1; +}; + +struct priv { + // 1-copy path + ID3D11DeviceContext1 *ctx; + ID3D11Texture2D *copy_tex; + + // zero-copy path + int num_planes; + const struct ra_format *fmt[4]; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + hwdec_devices_remove(hw->devs, &p->hwctx); + SAFE_RELEASE(p->device); + SAFE_RELEASE(p->device1); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + HRESULT hr; + + if (!ra_is_d3d11(hw->ra)) + return -1; + p->device = ra_d3d11_get_device(hw->ra); + if (!p->device) + return -1; + + p->opts = mp_get_config_group(hw->priv, hw->global, &d3d11va_conf); + + // D3D11VA requires Direct3D 11.1, so this should always succeed + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1, + (void**)&p->device1); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get D3D11.1 interface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + ID3D10Multithread *multithread; + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread, + (void **)&multithread); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get Multithread interface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); + ID3D10Multithread_Release(multithread); + + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = hw->driver->name, + .av_device_ref = d3d11_wrap_device_ref(p->device), + }; + hwdec_devices_add(hw->devs, &p->hwctx); + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + for (int i = 0; i < 4; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); + SAFE_RELEASE(p->copy_tex); + SAFE_RELEASE(p->ctx); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + HRESULT hr; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + struct ra_imgfmt_desc desc = {0}; + + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return -1; + + if (o->opts->zero_copy) { + // In the zero-copy path, we create the ra_tex objects in the map + // operation, so we just need to store the format of each plane + p->num_planes = desc.num_planes; + for (int i = 0; i < desc.num_planes; i++) + p->fmt[i] = desc.planes[i]; + } else { + struct mp_image layout = {0}; + mp_image_set_params(&layout, &mapper->dst_params); + + DXGI_FORMAT copy_fmt; + switch (mapper->dst_params.imgfmt) { + case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break; + case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break; + default: return -1; + } + + D3D11_TEXTURE2D_DESC copy_desc = { + .Width = mapper->dst_params.w, + .Height = mapper->dst_params.h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = copy_fmt, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, + &p->copy_tex); + if (FAILED(hr)) { + MP_FATAL(mapper, "Could not create shader resource texture\n"); + return -1; + } + + for (int i = 0; i < desc.num_planes; i++) { + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex, + mp_image_plane_w(&layout, i), mp_image_plane_h(&layout, i), 0, + desc.planes[i]); + if (!mapper->tex[i]) { + MP_FATAL(mapper, "Could not create RA texture view\n"); + return -1; + } + } + + // A ref to the immediate context is needed for CopySubresourceRegion + ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx); + } + + return 0; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + ID3D11Texture2D *tex = (void *)mapper->src->planes[0]; + int subresource = (intptr_t)mapper->src->planes[1]; + + if (p->copy_tex) { + ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx, + (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0, + (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) { + .left = 0, + .top = 0, + .front = 0, + .right = mapper->dst_params.w, + .bottom = mapper->dst_params.h, + .back = 1, + }), D3D11_COPY_DISCARD); + } else { + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex, &desc2d); + + for (int i = 0; i < p->num_planes; i++) { + // The video decode texture may include padding, so the size of the + // ra_tex needs to be determined by the actual size of the Tex2D + bool chroma = i >= 1; + int w = desc2d.Width / (chroma ? 2 : 1); + int h = desc2d.Height / (chroma ? 2 : 1); + + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, tex, + w, h, subresource, p->fmt[i]); + if (!mapper->tex[i]) + return -1; + } + } + + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + if (p->copy_tex) + return; + for (int i = 0; i < 4; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); +} + +const struct ra_hwdec_driver ra_hwdec_d3d11va = { + .name = "d3d11va", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_D3D11VA, IMGFMT_D3D11NV12, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c new file mode 100644 index 0000000..63dc5b9 --- /dev/null +++ b/video/out/d3d11/ra_d3d11.c @@ -0,0 +1,2371 @@ +#include <windows.h> +#include <versionhelpers.h> +#include <d3d11_1.h> +#include <d3d11sdklayers.h> +#include <dxgi1_2.h> +#include <d3dcompiler.h> +#include <crossc.h> + +#include "common/msg.h" +#include "osdep/io.h" +#include "osdep/subprocess.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" +#include "video/out/gpu/spirv.h" +#include "video/out/gpu/utils.h" + +#include "ra_d3d11.h" + +#ifndef D3D11_1_UAV_SLOT_COUNT +#define D3D11_1_UAV_SLOT_COUNT (64) +#endif + +struct dll_version { + uint16_t major; + uint16_t minor; + uint16_t build; + uint16_t revision; +}; + +struct ra_d3d11 { + struct spirv_compiler *spirv; + + ID3D11Device *dev; + ID3D11Device1 *dev1; + ID3D11DeviceContext *ctx; + ID3D11DeviceContext1 *ctx1; + pD3DCompile D3DCompile; + + struct dll_version d3d_compiler_ver; + + // Debug interfaces (--gpu-debug) + ID3D11Debug *debug; + ID3D11InfoQueue *iqueue; + + // Device capabilities + D3D_FEATURE_LEVEL fl; + bool has_clear_view; + bool has_timestamp_queries; + int max_uavs; + + // Streaming dynamic vertex buffer, which is used for all renderpasses + ID3D11Buffer *vbuf; + size_t vbuf_size; + size_t vbuf_used; + + // clear() renderpass resources (only used when has_clear_view is false) + ID3D11PixelShader *clear_ps; + ID3D11VertexShader *clear_vs; + ID3D11InputLayout *clear_layout; + ID3D11Buffer *clear_vbuf; + ID3D11Buffer *clear_cbuf; + + // blit() renderpass resources + ID3D11PixelShader *blit_float_ps; + ID3D11VertexShader *blit_vs; + ID3D11InputLayout *blit_layout; + ID3D11Buffer *blit_vbuf; + ID3D11SamplerState *blit_sampler; +}; + +struct d3d_tex { + // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not + // hold an additional reference to the texture object. + ID3D11Resource *res; + + ID3D11Texture1D *tex1d; + ID3D11Texture2D *tex2d; + ID3D11Texture3D *tex3d; + int array_slice; + + ID3D11ShaderResourceView *srv; + ID3D11RenderTargetView *rtv; + ID3D11UnorderedAccessView *uav; + ID3D11SamplerState *sampler; +}; + +struct d3d_buf { + ID3D11Buffer *buf; + ID3D11Buffer *staging; + ID3D11UnorderedAccessView *uav; + void *data; // Data for mapped staging texture +}; + +struct d3d_rpass { + ID3D11PixelShader *ps; + ID3D11VertexShader *vs; + ID3D11ComputeShader *cs; + ID3D11InputLayout *layout; + ID3D11BlendState *bstate; +}; + +struct d3d_timer { + ID3D11Query *ts_start; + ID3D11Query *ts_end; + ID3D11Query *disjoint; + uint64_t result; // Latches the result from the previous use of the timer +}; + +struct d3d_fmt { + const char *name; + int components; + int bytes; + int bits[4]; + DXGI_FORMAT fmt; + enum ra_ctype ctype; + bool unordered; +}; + +static const char clear_vs[] = "\ +float4 main(float2 pos : POSITION) : SV_Position\n\ +{\n\ + return float4(pos, 0.0, 1.0);\n\ +}\n\ +"; + +static const char clear_ps[] = "\ +cbuffer ps_cbuf : register(b0) {\n\ + float4 color : packoffset(c0);\n\ +}\n\ +\n\ +float4 main(float4 pos : SV_Position) : SV_Target\n\ +{\n\ + return color;\n\ +}\n\ +"; + +struct blit_vert { + float x, y, u, v; +}; + +static const char blit_vs[] = "\ +void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\ + out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\ +{\n\ + out_pos = float4(pos, 0.0, 1.0);\n\ + out_coord = coord;\n\ +}\n\ +"; + +static const char blit_float_ps[] = "\ +Texture2D<float4> tex : register(t0);\n\ +SamplerState samp : register(s0);\n\ +\n\ +float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\ +{\n\ + return tex.Sample(samp, coord);\n\ +}\n\ +"; + +#define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t +static struct d3d_fmt formats[] = { + { "r8", 1, 1, { 8}, DXFMT(R8, UNORM) }, + { "rg8", 2, 2, { 8, 8}, DXFMT(R8G8, UNORM) }, + { "rgba8", 4, 4, { 8, 8, 8, 8}, DXFMT(R8G8B8A8, UNORM) }, + { "r16", 1, 2, {16}, DXFMT(R16, UNORM) }, + { "rg16", 2, 4, {16, 16}, DXFMT(R16G16, UNORM) }, + { "rgba16", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) }, + + { "r32ui", 1, 4, {32}, DXFMT(R32, UINT) }, + { "rg32ui", 2, 8, {32, 32}, DXFMT(R32G32, UINT) }, + { "rgb32ui", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, UINT) }, + { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT) }, + + { "r16hf", 1, 2, {16}, DXFMT(R16, FLOAT) }, + { "rg16hf", 2, 4, {16, 16}, DXFMT(R16G16, FLOAT) }, + { "rgba16hf", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) }, + { "r32f", 1, 4, {32}, DXFMT(R32, FLOAT) }, + { "rg32f", 2, 8, {32, 32}, DXFMT(R32G32, FLOAT) }, + { "rgb32f", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, FLOAT) }, + { "rgba32f", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) }, + + { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) }, + { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = true }, +}; + +static bool dll_version_equal(struct dll_version a, struct dll_version b) +{ + return a.major == b.major && + a.minor == b.minor && + a.build == b.build && + a.revision == b.revision; +} + +static DXGI_FORMAT fmt_to_dxgi(const struct ra_format *fmt) +{ + struct d3d_fmt *d3d = fmt->priv; + return d3d->fmt; +} + +static void setup_formats(struct ra *ra) +{ + // All formats must be usable as a 2D texture + static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D; + // SHADER_SAMPLE indicates support for linear sampling, point always works + static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE; + // RA requires renderable surfaces to be blendable as well + static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET | + D3D11_FORMAT_SUPPORT_BLENDABLE; + + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) { + struct d3d_fmt *d3dfmt = &formats[i]; + UINT support = 0; + hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support); + if (FAILED(hr)) + continue; + if ((support & sup_basic) != sup_basic) + continue; + + struct ra_format *fmt = talloc_zero(ra, struct ra_format); + *fmt = (struct ra_format) { + .name = d3dfmt->name, + .priv = d3dfmt, + .ctype = d3dfmt->ctype, + .ordered = !d3dfmt->unordered, + .num_components = d3dfmt->components, + .pixel_size = d3dfmt->bytes, + .linear_filter = (support & sup_filter) == sup_filter, + .renderable = (support & sup_render) == sup_render, + }; + + if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D) + ra->caps |= RA_CAP_TEX_1D; + + for (int j = 0; j < d3dfmt->components; j++) + fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j]; + + fmt->glsl_format = ra_fmt_glsl_format(fmt); + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); + } +} + +static bool tex_init(struct ra *ra, struct ra_tex *tex) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + HRESULT hr; + + // A SRV is required for renderpasses and blitting, since blitting can use + // a renderpass internally + if (params->render_src || params->blit_src) { + // Always specify the SRV format for simplicity. This will match the + // texture format for textures created with tex_create, but it can be + // different for wrapped planar video textures. + D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = { + .Format = fmt_to_dxgi(params->format), + }; + switch (params->dimensions) { + case 1: + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + srvdesc.Texture1DArray.MipLevels = 1; + srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture1DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + srvdesc.Texture1D.MipLevels = 1; + } + break; + case 2: + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvdesc.Texture2DArray.MipLevels = 1; + srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture2DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvdesc.Texture2D.MipLevels = 1; + } + break; + case 3: + // D3D11 does not have Texture3D arrays + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + srvdesc.Texture3D.MipLevels = 1; + break; + } + hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc, + &tex_p->srv); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + // Samplers are required for renderpasses, but not blitting, since the blit + // code uses its own point sampler + if (params->render_src) { + D3D11_SAMPLER_DESC sdesc = { + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .ComparisonFunc = D3D11_COMPARISON_NEVER, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX, + .MaxAnisotropy = 1, + }; + if (params->src_linear) + sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + if (params->src_repeat) { + sdesc.AddressU = sdesc.AddressV = sdesc.AddressW = + D3D11_TEXTURE_ADDRESS_WRAP; + } + // The runtime pools sampler state objects internally, so we don't have + // to worry about resource usage when creating one for every ra_tex + hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + // Like SRVs, an RTV is required for renderpass output and blitting + if (params->render_dst || params->blit_dst) { + hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL, + &tex_p->rtv); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) { + hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL, + &tex_p->uav); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + return true; +error: + return false; +} + +static void tex_destroy(struct ra *ra, struct ra_tex *tex) +{ + if (!tex) + return; + struct d3d_tex *tex_p = tex->priv; + + SAFE_RELEASE(tex_p->srv); + SAFE_RELEASE(tex_p->rtv); + SAFE_RELEASE(tex_p->uav); + SAFE_RELEASE(tex_p->sampler); + SAFE_RELEASE(tex_p->res); + talloc_free(tex); +} + +static struct ra_tex *tex_create(struct ra *ra, + const struct ra_tex_params *params) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + tex->params = *params; + tex->params.initial_data = NULL; + + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + DXGI_FORMAT fmt = fmt_to_dxgi(params->format); + + D3D11_SUBRESOURCE_DATA *pdata = NULL; + if (params->initial_data) { + pdata = &(D3D11_SUBRESOURCE_DATA) { + .pSysMem = params->initial_data, + .SysMemPitch = params->w * params->format->pixel_size, + }; + if (params->dimensions >= 3) + pdata->SysMemSlicePitch = pdata->SysMemPitch * params->h; + } + + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + + if (params->render_src || params->blit_src) + bind_flags |= D3D11_BIND_SHADER_RESOURCE; + if (params->render_dst || params->blit_dst) + bind_flags |= D3D11_BIND_RENDER_TARGET; + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) + bind_flags |= D3D11_BIND_UNORDERED_ACCESS; + + // Apparently IMMUTABLE textures are efficient, so try to infer whether we + // can use one + if (params->initial_data && !params->render_dst && !params->storage_dst && + !params->blit_dst && !params->host_mutable) + usage = D3D11_USAGE_IMMUTABLE; + + switch (params->dimensions) { + case 1:; + D3D11_TEXTURE1D_DESC desc1d = { + .Width = params->w, + .MipLevels = 1, + .ArraySize = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture1D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex1d; + break; + case 2:; + D3D11_TEXTURE2D_DESC desc2d = { + .Width = params->w, + .Height = params->h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture2D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + break; + case 3:; + D3D11_TEXTURE3D_DESC desc3d = { + .Width = params->w, + .Height = params->h, + .Depth = params->d, + .MipLevels = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture3D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex3d; + break; + default: + abort(); + } + + tex_p->array_slice = -1; + + if (!tex_init(ra, tex)) + goto error; + + return tex; + +error: + tex_destroy(ra, tex); + return NULL; +} + +struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res) +{ + HRESULT hr; + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + struct ra_tex_params *params = &tex->params; + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + + DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + + D3D11_RESOURCE_DIMENSION type; + ID3D11Resource_GetType(res, &type); + switch (type) { + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D, + (void**)&tex_p->tex2d); + if (FAILED(hr)) { + MP_ERR(ra, "Resource is not a ID3D11Texture2D\n"); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); + if (desc2d.MipLevels != 1) { + MP_ERR(ra, "Mipmapped textures not supported for wrapping\n"); + goto error; + } + if (desc2d.ArraySize != 1) { + MP_ERR(ra, "Texture arrays not supported for wrapping\n"); + goto error; + } + if (desc2d.SampleDesc.Count != 1) { + MP_ERR(ra, "Multisampled textures not supported for wrapping\n"); + goto error; + } + + params->dimensions = 2; + params->w = desc2d.Width; + params->h = desc2d.Height; + params->d = 1; + usage = desc2d.Usage; + bind_flags = desc2d.BindFlags; + fmt = desc2d.Format; + break; + default: + // We could wrap Texture1D/3D as well, but keep it simple, since this + // function is only used for swapchain backbuffers at the moment + MP_ERR(ra, "Resource is not suitable to wrap\n"); + goto error; + } + + for (int i = 0; i < ra->num_formats; i++) { + DXGI_FORMAT target_fmt = fmt_to_dxgi(ra->formats[i]); + if (fmt == target_fmt) { + params->format = ra->formats[i]; + break; + } + } + if (!params->format) { + MP_ERR(ra, "Could not find a suitable RA format for wrapped resource\n"); + goto error; + } + + if (bind_flags & D3D11_BIND_SHADER_RESOURCE) + params->render_src = params->blit_src = true; + if (bind_flags & D3D11_BIND_RENDER_TARGET) + params->render_dst = params->blit_dst = true; + if (bind_flags & D3D11_BIND_UNORDERED_ACCESS) + params->storage_dst = true; + + if (usage != D3D11_USAGE_DEFAULT) { + MP_ERR(ra, "Resource is not D3D11_USAGE_DEFAULT\n"); + goto error; + } + + tex_p->array_slice = -1; + + if (!tex_init(ra, tex)) + goto error; + + return tex; +error: + tex_destroy(ra, tex); + return NULL; +} + +struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, + int w, int h, int array_slice, + const struct ra_format *fmt) +{ + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + struct ra_tex_params *params = &tex->params; + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + + tex_p->tex2d = res; + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + ID3D11Texture2D_AddRef(res); + + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); + if (!(desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE)) { + MP_ERR(ra, "Video resource is not bindable\n"); + goto error; + } + + params->dimensions = 2; + params->w = w; + params->h = h; + params->d = 1; + params->render_src = true; + params->src_linear = true; + // fmt can be different to the texture format for planar video textures + params->format = fmt; + + if (desc2d.ArraySize > 1) { + tex_p->array_slice = array_slice; + } else { + tex_p->array_slice = -1; + } + + if (!tex_init(ra, tex)) + goto error; + + return tex; +error: + tex_destroy(ra, tex); + return NULL; +} + +static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_tex *tex = params->tex; + struct d3d_tex *tex_p = tex->priv; + + if (!params->src) { + MP_ERR(ra, "Pixel buffers are not supported\n"); + return false; + } + + const char *src = params->src; + ptrdiff_t stride = tex->params.dimensions >= 2 ? tex->params.w : 0; + ptrdiff_t pitch = tex->params.dimensions >= 3 ? stride * tex->params.h : 0; + bool invalidate = true; + D3D11_BOX *rc = NULL; + + if (tex->params.dimensions == 2) { + stride = params->stride; + + if (params->rc && (params->rc->x0 != 0 || params->rc->y0 != 0 || + params->rc->x1 != tex->params.w || params->rc->y1 != tex->params.h)) + { + rc = &(D3D11_BOX) { + .left = params->rc->x0, + .top = params->rc->y0, + .front = 0, + .right = params->rc->x1, + .bottom = params->rc->y1, + .back = 1, + }; + invalidate = params->invalidate; + } + } + + int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0; + if (p->ctx1) { + ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, + subresource, rc, src, stride, pitch, + invalidate ? D3D11_COPY_DISCARD : 0); + } else { + ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource, + rc, src, stride, pitch); + } + + return true; +} + +static void buf_destroy(struct ra *ra, struct ra_buf *buf) +{ + if (!buf) + return; + struct ra_d3d11 *p = ra->priv; + struct d3d_buf *buf_p = buf->priv; + + if (buf_p->data) + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)buf_p->staging, 0); + SAFE_RELEASE(buf_p->buf); + SAFE_RELEASE(buf_p->staging); + SAFE_RELEASE(buf_p->uav); + talloc_free(buf); +} + +static struct ra_buf *buf_create(struct ra *ra, + const struct ra_buf_params *params) +{ + // D3D11 does not support permanent mapping or pixel buffers + if (params->host_mapped || params->type == RA_BUF_TYPE_TEX_UPLOAD) + return NULL; + + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + struct ra_buf *buf = talloc_zero(NULL, struct ra_buf); + buf->params = *params; + buf->params.initial_data = NULL; + + struct d3d_buf *buf_p = buf->priv = talloc_zero(buf, struct d3d_buf); + + D3D11_SUBRESOURCE_DATA *pdata = NULL; + if (params->initial_data) + pdata = &(D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data }; + + D3D11_BUFFER_DESC desc = { .ByteWidth = params->size }; + switch (params->type) { + case RA_BUF_TYPE_SHADER_STORAGE: + desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS; + desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float)); + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; + break; + case RA_BUF_TYPE_UNIFORM: + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float[4])); + break; + } + + hr = ID3D11Device_CreateBuffer(p->dev, &desc, pdata, &buf_p->buf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create buffer: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + if (params->host_mutable) { + // D3D11 doesn't allow constant buffer updates that aren't aligned to a + // full constant boundary (vec4,) and some drivers don't allow partial + // constant buffer updates at all, but the RA consumer is allowed to + // partially update an ra_buf. The best way to handle partial updates + // without causing a pipeline stall is probably to keep a copy of the + // data in a staging buffer. + + desc.Usage = D3D11_USAGE_STAGING; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.BindFlags = 0; + hr = ID3D11Device_CreateBuffer(p->dev, &desc, NULL, &buf_p->staging); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create staging buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + } + + if (params->type == RA_BUF_TYPE_SHADER_STORAGE) { + D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, + .Buffer = { + .NumElements = desc.ByteWidth / sizeof(float), + .Flags = D3D11_BUFFER_UAV_FLAG_RAW, + }, + }; + hr = ID3D11Device_CreateUnorderedAccessView(p->dev, + (ID3D11Resource *)buf_p->buf, &udesc, &buf_p->uav); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + return buf; +error: + buf_destroy(ra, buf); + return NULL; +} + +static void buf_resolve(struct ra *ra, struct ra_buf *buf) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_buf *buf_p = buf->priv; + + assert(buf->params.host_mutable); + if (!buf_p->data) + return; + + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)buf_p->staging, 0); + buf_p->data = NULL; + + // Synchronize the GPU buffer with the staging buffer + ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource *)buf_p->buf, + (ID3D11Resource *)buf_p->staging); +} + +static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_buf *buf_p = buf->priv; + HRESULT hr; + + if (!buf_p->data) { + // If this is the first update after the buffer was created or after it + // has been used in a renderpass, it will be unmapped, so map it + D3D11_MAPPED_SUBRESOURCE map = {0}; + hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)buf_p->staging, + 0, D3D11_MAP_WRITE, 0, &map); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to map resource\n"); + return; + } + buf_p->data = map.pData; + } + + char *cdata = buf_p->data; + memcpy(cdata + offset, data, size); +} + +static const char *get_shader_target(struct ra *ra, enum glsl_shader type) +{ + struct ra_d3d11 *p = ra->priv; + switch (p->fl) { + default: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_5_0"; + case GLSL_SHADER_FRAGMENT: return "ps_5_0"; + case GLSL_SHADER_COMPUTE: return "cs_5_0"; + } + break; + case D3D_FEATURE_LEVEL_10_1: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_1"; + case GLSL_SHADER_FRAGMENT: return "ps_4_1"; + case GLSL_SHADER_COMPUTE: return "cs_4_1"; + } + break; + case D3D_FEATURE_LEVEL_10_0: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0"; + case GLSL_SHADER_COMPUTE: return "cs_4_0"; + } + break; + case D3D_FEATURE_LEVEL_9_3: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3"; + } + break; + case D3D_FEATURE_LEVEL_9_2: + case D3D_FEATURE_LEVEL_9_1: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1"; + } + break; + } + return NULL; +} + +static const char *shader_type_name(enum glsl_shader type) +{ + switch (type) { + case GLSL_SHADER_VERTEX: return "vertex"; + case GLSL_SHADER_FRAGMENT: return "fragment"; + case GLSL_SHADER_COMPUTE: return "compute"; + default: return "unknown"; + } +} + +static bool setup_clear_rpass(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *ps_blob = NULL; + HRESULT hr; + + hr = p->D3DCompile(clear_vs, sizeof(clear_vs), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_VERTEX), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile clear() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, + ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), + NULL, &p->clear_vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = p->D3DCompile(clear_ps, sizeof(clear_ps), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_FRAGMENT), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &ps_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile clear() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, + ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), + NULL, &p->clear_ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC in_descs[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 }, + }; + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), &p->clear_layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() IA layout: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + // clear() always draws to a quad covering the whole viewport + static const float verts[] = { + -1, -1, + 1, -1, + 1, 1, + -1, 1, + -1, -1, + 1, 1, + }; + D3D11_BUFFER_DESC vdesc = { + .ByteWidth = sizeof(verts), + .Usage = D3D11_USAGE_IMMUTABLE, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + }; + D3D11_SUBRESOURCE_DATA vdata = { + .pSysMem = verts, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, &vdata, &p->clear_vbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_BUFFER_DESC cdesc = { + .ByteWidth = sizeof(float[4]), + .BindFlags = D3D11_BIND_CONSTANT_BUFFER, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &cdesc, NULL, &p->clear_cbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() constant buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return true; +error: + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return false; +} + +static void clear_rpass(struct ra *ra, struct ra_tex *tex, float color[4], + struct mp_rect *rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + + ID3D11DeviceContext_UpdateSubresource(p->ctx, + (ID3D11Resource *)p->clear_cbuf, 0, NULL, color, 0, 0); + + ID3D11DeviceContext_IASetInputLayout(p->ctx, p->clear_layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->clear_vbuf, + &(UINT) { sizeof(float[2]) }, &(UINT) { 0 }); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, p->clear_vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .Width = params->w, + .Height = params->h, + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = rc->x0, + .top = rc->y0, + .right = rc->x1, + .bottom = rc->y1, + })); + ID3D11DeviceContext_PSSetShader(p->ctx, p->clear_ps, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, &p->clear_cbuf); + + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &tex_p->rtv, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, 6, 0); + + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, + &(ID3D11Buffer *){ NULL }); + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL); +} + +static void clear(struct ra *ra, struct ra_tex *tex, float color[4], + struct mp_rect *rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + + if (!tex_p->rtv) + return; + + if (rc->x0 || rc->y0 || rc->x1 != params->w || rc->y1 != params->h) { + if (p->has_clear_view) { + ID3D11DeviceContext1_ClearView(p->ctx1, (ID3D11View *)tex_p->rtv, + color, (&(D3D11_RECT) { + .left = rc->x0, + .top = rc->y0, + .right = rc->x1, + .bottom = rc->y1, + }), 1); + } else { + clear_rpass(ra, tex, color, rc); + } + } else { + ID3D11DeviceContext_ClearRenderTargetView(p->ctx, tex_p->rtv, color); + } +} + +static bool setup_blit_rpass(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *float_ps_blob = NULL; + HRESULT hr; + + hr = p->D3DCompile(blit_vs, sizeof(blit_vs), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_VERTEX), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile blit() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, + ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), + NULL, &p->blit_vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = p->D3DCompile(blit_float_ps, sizeof(blit_float_ps), NULL, NULL, NULL, + "main", get_shader_target(ra, GLSL_SHADER_FRAGMENT), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &float_ps_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile blit() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, + ID3D10Blob_GetBufferPointer(float_ps_blob), + ID3D10Blob_GetBufferSize(float_ps_blob), + NULL, &p->blit_float_ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC in_descs[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8 }, + }; + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), &p->blit_layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() IA layout: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_BUFFER_DESC vdesc = { + .ByteWidth = sizeof(struct blit_vert[6]), + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, NULL, &p->blit_vbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + // Blit always uses point sampling, regardless of the source texture + D3D11_SAMPLER_DESC sdesc = { + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .ComparisonFunc = D3D11_COMPARISON_NEVER, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX, + .MaxAnisotropy = 1, + }; + hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->blit_sampler); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() sampler: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(float_ps_blob); + return true; +error: + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(float_ps_blob); + return false; +} + +static void blit_rpass(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *dst_p = dst->priv; + struct d3d_tex *src_p = src->priv; + + float u_min = (double)src_rc->x0 / src->params.w; + float u_max = (double)src_rc->x1 / src->params.w; + float v_min = (double)src_rc->y0 / src->params.h; + float v_max = (double)src_rc->y1 / src->params.h; + + struct blit_vert verts[6] = { + { .x = -1, .y = -1, .u = u_min, .v = v_max }, + { .x = 1, .y = -1, .u = u_max, .v = v_max }, + { .x = 1, .y = 1, .u = u_max, .v = v_min }, + { .x = -1, .y = 1, .u = u_min, .v = v_min }, + }; + verts[4] = verts[0]; + verts[5] = verts[2]; + ID3D11DeviceContext_UpdateSubresource(p->ctx, + (ID3D11Resource *)p->blit_vbuf, 0, NULL, verts, 0, 0); + + ID3D11DeviceContext_IASetInputLayout(p->ctx, p->blit_layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->blit_vbuf, + &(UINT) { sizeof(verts[0]) }, &(UINT) { 0 }); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, p->blit_vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .TopLeftX = dst_rc->x0, + .TopLeftY = dst_rc->y0, + .Width = mp_rect_w(*dst_rc), + .Height = mp_rect_h(*dst_rc), + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = dst_rc->x0, + .top = dst_rc->y0, + .right = dst_rc->x1, + .bottom = dst_rc->y1, + })); + + ID3D11DeviceContext_PSSetShader(p->ctx, p->blit_float_ps, NULL, 0); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, &src_p->srv); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, &p->blit_sampler); + + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &dst_p->rtv, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, 6, 0); + + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, + &(ID3D11ShaderResourceView *) { NULL }); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, + &(ID3D11SamplerState *) { NULL }); + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL); +} + +static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc_ptr, struct mp_rect *src_rc_ptr) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *dst_p = dst->priv; + struct d3d_tex *src_p = src->priv; + struct mp_rect dst_rc = *dst_rc_ptr; + struct mp_rect src_rc = *src_rc_ptr; + + assert(dst->params.dimensions == 2); + assert(src->params.dimensions == 2); + + // A zero-sized target rectangle is a no-op + if (!mp_rect_w(dst_rc) || !mp_rect_h(dst_rc)) + return; + + // ra.h seems to imply that both dst_rc and src_rc can be flipped, but it's + // easier for blit_rpass() if only src_rc can be flipped, so unflip dst_rc. + if (dst_rc.x0 > dst_rc.x1) { + MPSWAP(int, dst_rc.x0, dst_rc.x1); + MPSWAP(int, src_rc.x0, src_rc.x1); + } + if (dst_rc.y0 > dst_rc.y1) { + MPSWAP(int, dst_rc.y0, dst_rc.y1); + MPSWAP(int, src_rc.y0, src_rc.y1); + } + + // If format conversion, stretching or flipping is required, a renderpass + // must be used + if (dst->params.format != src->params.format || + mp_rect_w(dst_rc) != mp_rect_w(src_rc) || + mp_rect_h(dst_rc) != mp_rect_h(src_rc)) + { + blit_rpass(ra, dst, src, &dst_rc, &src_rc); + } else { + int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0; + int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0; + ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr, + dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) { + .left = src_rc.x0, + .top = src_rc.y0, + .front = 0, + .right = src_rc.x1, + .bottom = src_rc.y1, + .back = 1, + })); + } +} + +static int desc_namespace(enum ra_vartype type) +{ + // Images and SSBOs both use UAV bindings + if (type == RA_VARTYPE_IMG_W) + type = RA_VARTYPE_BUF_RW; + return type; +} + +static bool compile_glsl(struct ra *ra, enum glsl_shader type, + const char *glsl, ID3DBlob **out) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + void *ta_ctx = talloc_new(NULL); + crossc_compiler *cross = NULL; + const char *hlsl = NULL; + ID3DBlob *errors = NULL; + bool success = false; + HRESULT hr; + + int cross_shader_model; + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + cross_shader_model = 50; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { + cross_shader_model = 41; + } else { + cross_shader_model = 40; + } + + int64_t start_us = mp_time_us(); + + bstr spv_module; + if (!spirv->fns->compile_glsl(spirv, ta_ctx, type, glsl, &spv_module)) + goto done; + + int64_t shaderc_us = mp_time_us(); + + cross = crossc_hlsl_create((uint32_t*)spv_module.start, + spv_module.len / sizeof(uint32_t)); + + crossc_hlsl_set_shader_model(cross, cross_shader_model); + crossc_set_flip_vert_y(cross, type == GLSL_SHADER_VERTEX); + + hlsl = crossc_compile(cross); + if (!hlsl) { + MP_ERR(ra, "SPIRV-Cross failed: %s\n", crossc_strerror(cross)); + goto done; + } + + int64_t cross_us = mp_time_us(); + + hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main", + get_shader_target(ra, type), D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, out, + &errors); + if (FAILED(hr)) { + MP_ERR(ra, "D3DCompile failed: %s\n%.*s", mp_HRESULT_to_str(hr), + (int)ID3D10Blob_GetBufferSize(errors), + (char*)ID3D10Blob_GetBufferPointer(errors)); + goto done; + } + + int64_t d3dcompile_us = mp_time_us(); + + MP_VERBOSE(ra, "Compiled a %s shader in %lldus\n", shader_type_name(type), + d3dcompile_us - start_us); + MP_VERBOSE(ra, "shaderc: %lldus, SPIRV-Cross: %lldus, D3DCompile: %lldus\n", + shaderc_us - start_us, + cross_us - shaderc_us, + d3dcompile_us - cross_us); + + success = true; +done:; + int level = success ? MSGL_DEBUG : MSGL_ERR; + MP_MSG(ra, level, "GLSL source:\n"); + mp_log_source(ra->log, level, glsl); + if (hlsl) { + MP_MSG(ra, level, "HLSL source:\n"); + mp_log_source(ra->log, level, hlsl); + } + SAFE_RELEASE(errors); + crossc_destroy(cross); + talloc_free(ta_ctx); + return success; +} + +static void renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) +{ + if (!pass) + return; + struct d3d_rpass *pass_p = pass->priv; + + SAFE_RELEASE(pass_p->vs); + SAFE_RELEASE(pass_p->ps); + SAFE_RELEASE(pass_p->cs); + SAFE_RELEASE(pass_p->layout); + SAFE_RELEASE(pass_p->bstate); + talloc_free(pass); +} + +static D3D11_BLEND map_ra_blend(enum ra_blend blend) +{ + switch (blend) { + default: + case RA_BLEND_ZERO: return D3D11_BLEND_ZERO; + case RA_BLEND_ONE: return D3D11_BLEND_ONE; + case RA_BLEND_SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA; + case RA_BLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA; + }; +} + +static size_t vbuf_upload(struct ra *ra, void *data, size_t size) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + // Arbitrary size limit in case there is an insane number of vertices + if (size > 1e9) { + MP_ERR(ra, "Vertex buffer is too large\n"); + return -1; + } + + // If the vertex data doesn't fit, realloc the vertex buffer + if (size > p->vbuf_size) { + size_t new_size = p->vbuf_size; + // Arbitrary base size + if (!new_size) + new_size = 64 * 1024; + while (new_size < size) + new_size *= 2; + + ID3D11Buffer *new_buf; + D3D11_BUFFER_DESC vbuf_desc = { + .ByteWidth = new_size, + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + SAFE_RELEASE(p->vbuf); + p->vbuf = new_buf; + p->vbuf_size = new_size; + p->vbuf_used = 0; + } + + bool discard = false; + size_t offset = p->vbuf_used; + if (offset + size > p->vbuf_size) { + // We reached the end of the buffer, so discard and wrap around + discard = true; + offset = 0; + } + + D3D11_MAPPED_SUBRESOURCE map = { 0 }; + hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)p->vbuf, 0, + discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE, + 0, &map); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to map vertex buffer: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + char *cdata = map.pData; + memcpy(cdata + offset, data, size); + + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)p->vbuf, 0); + + p->vbuf_used = offset + size; + return offset; +} + +static const char cache_magic[4] = "RD11"; +static const int cache_version = 2; + +struct cache_header { + char magic[sizeof(cache_magic)]; + int cache_version; + char compiler[SPIRV_NAME_MAX_LEN]; + int spv_compiler_version; + uint32_t cross_version; + struct dll_version d3d_compiler_version; + int feature_level; + size_t vert_bytecode_len; + size_t frag_bytecode_len; + size_t comp_bytecode_len; +}; + +static void load_cached_program(struct ra *ra, + const struct ra_renderpass_params *params, + bstr *vert_bc, + bstr *frag_bc, + bstr *comp_bc) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + bstr cache = params->cached_program; + + if (cache.len < sizeof(struct cache_header)) + return; + + struct cache_header *header = (struct cache_header *)cache.start; + cache = bstr_cut(cache, sizeof(*header)); + + if (strncmp(header->magic, cache_magic, sizeof(cache_magic)) != 0) + return; + if (header->cache_version != cache_version) + return; + if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0) + return; + if (header->spv_compiler_version != spirv->compiler_version) + return; + if (header->cross_version != crossc_version()) + return; + if (!dll_version_equal(header->d3d_compiler_version, p->d3d_compiler_ver)) + return; + if (header->feature_level != p->fl) + return; + + if (header->vert_bytecode_len && vert_bc) { + *vert_bc = bstr_splice(cache, 0, header->vert_bytecode_len); + MP_VERBOSE(ra, "Using cached vertex shader\n"); + } + cache = bstr_cut(cache, header->vert_bytecode_len); + + if (header->frag_bytecode_len && frag_bc) { + *frag_bc = bstr_splice(cache, 0, header->frag_bytecode_len); + MP_VERBOSE(ra, "Using cached fragment shader\n"); + } + cache = bstr_cut(cache, header->frag_bytecode_len); + + if (header->comp_bytecode_len && comp_bc) { + *comp_bc = bstr_splice(cache, 0, header->comp_bytecode_len); + MP_VERBOSE(ra, "Using cached compute shader\n"); + } + cache = bstr_cut(cache, header->comp_bytecode_len); +} + +static void save_cached_program(struct ra *ra, struct ra_renderpass *pass, + bstr vert_bc, + bstr frag_bc, + bstr comp_bc) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + + struct cache_header header = { + .cache_version = cache_version, + .spv_compiler_version = p->spirv->compiler_version, + .cross_version = crossc_version(), + .d3d_compiler_version = p->d3d_compiler_ver, + .feature_level = p->fl, + .vert_bytecode_len = vert_bc.len, + .frag_bytecode_len = frag_bc.len, + .comp_bytecode_len = comp_bc.len, + }; + strncpy(header.magic, cache_magic, sizeof(header.magic)); + strncpy(header.compiler, spirv->name, sizeof(header.compiler)); + + struct bstr *prog = &pass->params.cached_program; + bstr_xappend(pass, prog, (bstr){ (char *) &header, sizeof(header) }); + bstr_xappend(pass, prog, vert_bc); + bstr_xappend(pass, prog, frag_bc); + bstr_xappend(pass, prog, comp_bc); +} + +static struct ra_renderpass *renderpass_create_raster(struct ra *ra, + struct ra_renderpass *pass, const struct ra_renderpass_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_rpass *pass_p = pass->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *ps_blob = NULL; + HRESULT hr; + + // load_cached_program will load compiled shader bytecode into vert_bc and + // frag_bc if the cache is valid. If not, vert_bc/frag_bc will remain NULL. + bstr vert_bc = {0}; + bstr frag_bc = {0}; + load_cached_program(ra, params, &vert_bc, &frag_bc, NULL); + + if (!vert_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_VERTEX, params->vertex_shader, + &vs_blob)) + goto error; + vert_bc = (bstr){ + ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), + }; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, vert_bc.start, vert_bc.len, + NULL, &pass_p->vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + if (!frag_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_FRAGMENT, params->frag_shader, + &ps_blob)) + goto error; + frag_bc = (bstr){ + ID3D10Blob_GetBufferPointer(ps_blob), + ID3D10Blob_GetBufferSize(ps_blob), + }; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, frag_bc.start, frag_bc.len, + NULL, &pass_p->ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC *in_descs = talloc_array(pass, + D3D11_INPUT_ELEMENT_DESC, params->num_vertex_attribs); + for (int i = 0; i < params->num_vertex_attribs; i++) { + struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i]; + + DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; + switch (inp->type) { + case RA_VARTYPE_FLOAT: + switch (inp->dim_v) { + case 1: fmt = DXGI_FORMAT_R32_FLOAT; break; + case 2: fmt = DXGI_FORMAT_R32G32_FLOAT; break; + case 3: fmt = DXGI_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = DXGI_FORMAT_R32G32B32A32_FLOAT; break; + } + break; + case RA_VARTYPE_BYTE_UNORM: + switch (inp->dim_v) { + case 1: fmt = DXGI_FORMAT_R8_UNORM; break; + case 2: fmt = DXGI_FORMAT_R8G8_UNORM; break; + // There is no 3-component 8-bit DXGI format + case 4: fmt = DXGI_FORMAT_R8G8B8A8_UNORM; break; + } + break; + } + if (fmt == DXGI_FORMAT_UNKNOWN) { + MP_ERR(ra, "Could not find suitable vertex input format\n"); + goto error; + } + + in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) { + // The semantic name doesn't mean much and is just used to verify + // the input description matches the shader. SPIRV-Cross always + // uses TEXCOORD, so we should too. + .SemanticName = "TEXCOORD", + .SemanticIndex = i, + .AlignedByteOffset = inp->offset, + .Format = fmt, + }; + } + + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + params->num_vertex_attribs, vert_bc.start, vert_bc.len, + &pass_p->layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create IA layout: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + talloc_free(in_descs); + in_descs = NULL; + + D3D11_BLEND_DESC bdesc = { + .RenderTarget[0] = { + .BlendEnable = params->enable_blend, + .SrcBlend = map_ra_blend(params->blend_src_rgb), + .DestBlend = map_ra_blend(params->blend_dst_rgb), + .BlendOp = D3D11_BLEND_OP_ADD, + .SrcBlendAlpha = map_ra_blend(params->blend_src_alpha), + .DestBlendAlpha = map_ra_blend(params->blend_dst_alpha), + .BlendOpAlpha = D3D11_BLEND_OP_ADD, + .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, + }, + }; + hr = ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blend state: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + save_cached_program(ra, pass, vert_bc, frag_bc, (bstr){0}); + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return pass; + +error: + renderpass_destroy(ra, pass); + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return NULL; +} + +static struct ra_renderpass *renderpass_create_compute(struct ra *ra, + struct ra_renderpass *pass, const struct ra_renderpass_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_rpass *pass_p = pass->priv; + ID3DBlob *cs_blob = NULL; + HRESULT hr; + + bstr comp_bc = {0}; + load_cached_program(ra, params, NULL, NULL, &comp_bc); + + if (!comp_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_COMPUTE, params->compute_shader, + &cs_blob)) + goto error; + comp_bc = (bstr){ + ID3D10Blob_GetBufferPointer(cs_blob), + ID3D10Blob_GetBufferSize(cs_blob), + }; + } + hr = ID3D11Device_CreateComputeShader(p->dev, comp_bc.start, comp_bc.len, + NULL, &pass_p->cs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create compute shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + save_cached_program(ra, pass, (bstr){0}, (bstr){0}, comp_bc); + + SAFE_RELEASE(cs_blob); + return pass; +error: + renderpass_destroy(ra, pass); + SAFE_RELEASE(cs_blob); + return NULL; +} + +static struct ra_renderpass *renderpass_create(struct ra *ra, + const struct ra_renderpass_params *params) +{ + struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); + pass->params = *ra_renderpass_params_copy(pass, params); + pass->params.cached_program = (bstr){0}; + pass->priv = talloc_zero(pass, struct d3d_rpass); + + if (params->type == RA_RENDERPASS_TYPE_COMPUTE) { + return renderpass_create_compute(ra, pass, params); + } else { + return renderpass_create_raster(ra, pass, params); + } +} + +static void renderpass_run_raster(struct ra *ra, + const struct ra_renderpass_run_params *params, + ID3D11Buffer *ubos[], int ubos_len, + ID3D11SamplerState *samplers[], + ID3D11ShaderResourceView *srvs[], + int samplers_len, + ID3D11UnorderedAccessView *uavs[], + int uavs_len) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + struct d3d_rpass *pass_p = pass->priv; + + UINT vbuf_offset = vbuf_upload(ra, params->vertex_data, + pass->params.vertex_stride * params->vertex_count); + if (vbuf_offset == (UINT)-1) + return; + + ID3D11DeviceContext_IASetInputLayout(p->ctx, pass_p->layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->vbuf, + &pass->params.vertex_stride, &vbuf_offset); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, pass_p->vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .TopLeftX = params->viewport.x0, + .TopLeftY = params->viewport.y0, + .Width = mp_rect_w(params->viewport), + .Height = mp_rect_h(params->viewport), + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = params->scissors.x0, + .top = params->scissors.y0, + .right = params->scissors.x1, + .bottom = params->scissors.y1, + })); + ID3D11DeviceContext_PSSetShader(p->ctx, pass_p->ps, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers); + + struct ra_tex *target = params->target; + struct d3d_tex *target_p = target->priv; + ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 1, + &target_p->rtv, NULL, 1, uavs_len, uavs, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, pass_p->bstate, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, params->vertex_count, 0); + + // Unbind everything. It's easier to do this than to actually track state, + // and if we leave the RTV bound, it could trip up D3D's conflict checker. + for (int i = 0; i < ubos_len; i++) + ubos[i] = NULL; + for (int i = 0; i < samplers_len; i++) { + samplers[i] = NULL; + srvs[i] = NULL; + } + for (int i = 0; i < uavs_len; i++) + uavs[i] = NULL; + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 0, + NULL, NULL, 1, uavs_len, uavs, NULL); +} + +static void renderpass_run_compute(struct ra *ra, + const struct ra_renderpass_run_params *params, + ID3D11Buffer *ubos[], int ubos_len, + ID3D11SamplerState *samplers[], + ID3D11ShaderResourceView *srvs[], + int samplers_len, + ID3D11UnorderedAccessView *uavs[], + int uavs_len) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + struct d3d_rpass *pass_p = pass->priv; + + ID3D11DeviceContext_CSSetShader(p->ctx, pass_p->cs, NULL, 0); + ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs, + NULL); + + ID3D11DeviceContext_Dispatch(p->ctx, params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + + for (int i = 0; i < ubos_len; i++) + ubos[i] = NULL; + for (int i = 0; i < samplers_len; i++) { + samplers[i] = NULL; + srvs[i] = NULL; + } + for (int i = 0; i < uavs_len; i++) + uavs[i] = NULL; + ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs, + NULL); +} + +static void renderpass_run(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + enum ra_renderpass_type type = pass->params.type; + + ID3D11Buffer *ubos[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT] = {0}; + int ubos_len = 0; + + ID3D11SamplerState *samplers[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0}; + ID3D11ShaderResourceView *srvs[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0}; + int samplers_len = 0; + + ID3D11UnorderedAccessView *uavs[D3D11_1_UAV_SLOT_COUNT] = {0}; + int uavs_len = 0; + + // In a raster pass, one of the UAV slots is used by the runtime for the RTV + int uavs_max = type == RA_RENDERPASS_TYPE_COMPUTE ? p->max_uavs + : p->max_uavs - 1; + + // Gather the input variables used in this pass. These will be mapped to + // HLSL registers. + for (int i = 0; i < params->num_values; i++) { + struct ra_renderpass_input_val *val = ¶ms->values[i]; + int binding = pass->params.inputs[val->index].binding; + switch (pass->params.inputs[val->index].type) { + case RA_VARTYPE_BUF_RO: + if (binding > MP_ARRAY_SIZE(ubos)) { + MP_ERR(ra, "Too many constant buffers in pass\n"); + return; + } + struct ra_buf *buf_ro = *(struct ra_buf **)val->data; + buf_resolve(ra, buf_ro); + struct d3d_buf *buf_ro_p = buf_ro->priv; + ubos[binding] = buf_ro_p->buf; + ubos_len = MPMAX(ubos_len, binding + 1); + break; + case RA_VARTYPE_BUF_RW: + if (binding > uavs_max) { + MP_ERR(ra, "Too many UAVs in pass\n"); + return; + } + struct ra_buf *buf_rw = *(struct ra_buf **)val->data; + buf_resolve(ra, buf_rw); + struct d3d_buf *buf_rw_p = buf_rw->priv; + uavs[binding] = buf_rw_p->uav; + uavs_len = MPMAX(uavs_len, binding + 1); + break; + case RA_VARTYPE_TEX: + if (binding > MP_ARRAY_SIZE(samplers)) { + MP_ERR(ra, "Too many textures in pass\n"); + return; + } + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct d3d_tex *tex_p = tex->priv; + samplers[binding] = tex_p->sampler; + srvs[binding] = tex_p->srv; + samplers_len = MPMAX(samplers_len, binding + 1); + break; + case RA_VARTYPE_IMG_W: + if (binding > uavs_max) { + MP_ERR(ra, "Too many UAVs in pass\n"); + return; + } + struct ra_tex *img = *(struct ra_tex **)val->data; + struct d3d_tex *img_p = img->priv; + uavs[binding] = img_p->uav; + uavs_len = MPMAX(uavs_len, binding + 1); + break; + } + } + + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + renderpass_run_compute(ra, params, ubos, ubos_len, samplers, srvs, + samplers_len, uavs, uavs_len); + } else { + renderpass_run_raster(ra, params, ubos, ubos_len, samplers, srvs, + samplers_len, uavs, uavs_len); + } +} + +static void timer_destroy(struct ra *ra, ra_timer *ratimer) +{ + if (!ratimer) + return; + struct d3d_timer *timer = ratimer; + + SAFE_RELEASE(timer->ts_start); + SAFE_RELEASE(timer->ts_end); + SAFE_RELEASE(timer->disjoint); + talloc_free(timer); +} + +static ra_timer *timer_create(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + if (!p->has_timestamp_queries) + return NULL; + + struct d3d_timer *timer = talloc_zero(NULL, struct d3d_timer); + HRESULT hr; + + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_start); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create start query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_end); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create end query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + // Measuring duration in D3D11 requires three queries: start and end + // timestamps, and a disjoint query containing a flag which says whether + // the timestamps are usable or if a discontinuity occured between them, + // like a change in power state or clock speed. The disjoint query also + // contains the timer frequency, so the timestamps are useless without it. + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &timer->disjoint); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create timer query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + return timer; +error: + timer_destroy(ra, timer); + return NULL; +} + +static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq) +{ + static const uint64_t ns_per_s = 1000000000llu; + return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq; +} + +static uint64_t timer_get_result(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + HRESULT hr; + + UINT64 start, end; + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj; + + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->ts_end, &end, sizeof(end), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE) + return 0; + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->ts_start, &start, sizeof(start), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE) + return 0; + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->disjoint, &dj, sizeof(dj), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE || dj.Disjoint || !dj.Frequency) + return 0; + + return timestamp_to_ns(end - start, dj.Frequency); +} + +static void timer_start(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + + // Latch the last result of this ra_timer (returned by timer_stop) + timer->result = timer_get_result(ra, ratimer); + + ID3D11DeviceContext_Begin(p->ctx, (ID3D11Asynchronous *)timer->disjoint); + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_start); +} + +static uint64_t timer_stop(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_end); + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->disjoint); + + return timer->result; +} + +static int map_msg_severity(D3D11_MESSAGE_SEVERITY sev) +{ + switch (sev) { + case D3D11_MESSAGE_SEVERITY_CORRUPTION: + return MSGL_FATAL; + case D3D11_MESSAGE_SEVERITY_ERROR: + return MSGL_ERR; + case D3D11_MESSAGE_SEVERITY_WARNING: + return MSGL_WARN; + default: + case D3D11_MESSAGE_SEVERITY_INFO: + case D3D11_MESSAGE_SEVERITY_MESSAGE: + return MSGL_DEBUG; + } +} + +static void debug_marker(struct ra *ra, const char *msg) +{ + struct ra_d3d11 *p = ra->priv; + void *talloc_ctx = talloc_new(NULL); + HRESULT hr; + + if (!p->iqueue) + goto done; + + // Copy debug-layer messages to mpv's log output + bool printed_header = false; + uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(p->iqueue); + for (uint64_t i = 0; i < messages; i++) { + size_t len; + hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, NULL, &len); + if (FAILED(hr) || !len) + goto done; + + D3D11_MESSAGE *d3dmsg = talloc_size(talloc_ctx, len); + hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, d3dmsg, &len); + if (FAILED(hr)) + goto done; + + int msgl = map_msg_severity(d3dmsg->Severity); + if (mp_msg_test(ra->log, msgl)) { + if (!printed_header) + MP_INFO(ra, "%s:\n", msg); + printed_header = true; + + MP_MSG(ra, msgl, "%d: %.*s\n", (int)d3dmsg->ID, + (int)d3dmsg->DescriptionByteLength, d3dmsg->pDescription); + talloc_free(d3dmsg); + } + } + + ID3D11InfoQueue_ClearStoredMessages(p->iqueue); +done: + talloc_free(talloc_ctx); +} + +static void destroy(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + + // Release everything except the interfaces needed to perform leak checking + SAFE_RELEASE(p->clear_ps); + SAFE_RELEASE(p->clear_vs); + SAFE_RELEASE(p->clear_layout); + SAFE_RELEASE(p->clear_vbuf); + SAFE_RELEASE(p->clear_cbuf); + SAFE_RELEASE(p->blit_float_ps); + SAFE_RELEASE(p->blit_vs); + SAFE_RELEASE(p->blit_layout); + SAFE_RELEASE(p->blit_vbuf); + SAFE_RELEASE(p->blit_sampler); + SAFE_RELEASE(p->vbuf); + SAFE_RELEASE(p->ctx1); + SAFE_RELEASE(p->dev1); + SAFE_RELEASE(p->dev); + + if (p->debug && p->ctx) { + // Destroy the device context synchronously so referenced objects don't + // show up in the leak check + ID3D11DeviceContext_ClearState(p->ctx); + ID3D11DeviceContext_Flush(p->ctx); + } + SAFE_RELEASE(p->ctx); + + if (p->debug) { + // Report any leaked objects + debug_marker(ra, "after destroy"); + ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_DETAIL); + debug_marker(ra, "after leak check"); + ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_SUMMARY); + debug_marker(ra, "after leak summary"); + } + SAFE_RELEASE(p->debug); + SAFE_RELEASE(p->iqueue); + + talloc_free(ra); +} + +static struct ra_fns ra_fns_d3d11 = { + .destroy = destroy, + .tex_create = tex_create, + .tex_destroy = tex_destroy, + .tex_upload = tex_upload, + .buf_create = buf_create, + .buf_destroy = buf_destroy, + .buf_update = buf_update, + .clear = clear, + .blit = blit, + .uniform_layout = std140_layout, + .desc_namespace = desc_namespace, + .renderpass_create = renderpass_create, + .renderpass_destroy = renderpass_destroy, + .renderpass_run = renderpass_run, + .timer_create = timer_create, + .timer_destroy = timer_destroy, + .timer_start = timer_start, + .timer_stop = timer_stop, + .debug_marker = debug_marker, +}; + +void ra_d3d11_flush(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3D11DeviceContext_Flush(p->ctx); +} + +static void init_debug_layer(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Debug, + (void**)&p->debug); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr)); + return; + } + + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11InfoQueue, + (void**)&p->iqueue); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr)); + return; + } + + // Store an unlimited amount of messages in the buffer. This is fine + // because we flush stored messages regularly (in debug_marker.) + ID3D11InfoQueue_SetMessageCountLimit(p->iqueue, -1); + + // Filter some annoying messages + D3D11_MESSAGE_ID deny_ids[] = { + // This error occurs during context creation when we try to figure out + // the real maximum texture size by attempting to create a texture + // larger than the current feature level allows. + D3D11_MESSAGE_ID_CREATETEXTURE2D_INVALIDDIMENSIONS, + + // These are normal. The RA timer queue habitually reuses timer objects + // without retrieving the results. + D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS, + D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS, + }; + D3D11_INFO_QUEUE_FILTER filter = { + .DenyList = { + .NumIDs = MP_ARRAY_SIZE(deny_ids), + .pIDList = deny_ids, + }, + }; + ID3D11InfoQueue_PushStorageFilter(p->iqueue, &filter); +} + +static struct dll_version get_dll_version(HMODULE dll) +{ + void *ctx = talloc_new(NULL); + struct dll_version ret = { 0 }; + + HRSRC rsrc = FindResourceW(dll, MAKEINTRESOURCEW(VS_VERSION_INFO), + MAKEINTRESOURCEW(VS_FILE_INFO)); + if (!rsrc) + goto done; + DWORD size = SizeofResource(dll, rsrc); + HGLOBAL res = LoadResource(dll, rsrc); + if (!res) + goto done; + void *ptr = LockResource(res); + if (!ptr) + goto done; + void *copy = talloc_memdup(ctx, ptr, size); + + VS_FIXEDFILEINFO *ffi; + UINT ffi_len; + if (!VerQueryValueW(copy, L"\\", (void**)&ffi, &ffi_len)) + goto done; + if (ffi_len < sizeof(*ffi)) + goto done; + + ret.major = HIWORD(ffi->dwFileVersionMS); + ret.minor = LOWORD(ffi->dwFileVersionMS); + ret.build = HIWORD(ffi->dwFileVersionLS); + ret.revision = LOWORD(ffi->dwFileVersionLS); + +done: + talloc_free(ctx); + return ret; +} + +static bool load_d3d_compiler(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + HMODULE d3dcompiler = NULL; + + // Try the inbox D3DCompiler first (Windows 8.1 and up) + if (IsWindows8Point1OrGreater()) { + d3dcompiler = LoadLibraryExW(L"d3dcompiler_47.dll", NULL, + LOAD_LIBRARY_SEARCH_SYSTEM32); + } + // Check for a packaged version of d3dcompiler_47.dll + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_47.dll"); + // Try d3dcompiler_46.dll from the Windows 8 SDK + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_46.dll"); + // Try d3dcompiler_43.dll from the June 2010 DirectX SDK + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_43.dll"); + // Can't find any compiler DLL, so give up + if (!d3dcompiler) + return false; + + p->d3d_compiler_ver = get_dll_version(d3dcompiler); + + p->D3DCompile = (pD3DCompile)GetProcAddress(d3dcompiler, "D3DCompile"); + if (!p->D3DCompile) + return false; + return true; +} + +static void find_max_texture_dimension(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + + D3D11_TEXTURE2D_DESC desc = { + .Width = ra->max_texture_wh, + .Height = ra->max_texture_wh, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = DXGI_FORMAT_R8_UNORM, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + while (true) { + desc.Height = desc.Width *= 2; + if (desc.Width >= 0x8000000u) + return; + if (FAILED(ID3D11Device_CreateTexture2D(p->dev, &desc, NULL, NULL))) + return; + ra->max_texture_wh = desc.Width; + } +} + +struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log, + struct spirv_compiler *spirv) +{ + HRESULT hr; + + struct ra *ra = talloc_zero(NULL, struct ra); + ra->log = log; + ra->fns = &ra_fns_d3d11; + + // Even Direct3D 10level9 supports 3D textures + ra->caps = RA_CAP_TEX_3D | RA_CAP_DIRECT_UPLOAD | RA_CAP_BUF_RO | + RA_CAP_BLIT | spirv->ra_caps; + + ra->glsl_version = spirv->glsl_version; + ra->glsl_vulkan = true; + + struct ra_d3d11 *p = ra->priv = talloc_zero(ra, struct ra_d3d11); + p->spirv = spirv; + + int minor = 0; + ID3D11Device_AddRef(dev); + p->dev = dev; + ID3D11Device_GetImmediateContext(p->dev, &p->ctx); + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1, + (void**)&p->dev1); + if (SUCCEEDED(hr)) { + minor = 1; + ID3D11Device1_GetImmediateContext1(p->dev1, &p->ctx1); + + D3D11_FEATURE_DATA_D3D11_OPTIONS fopts = { 0 }; + hr = ID3D11Device_CheckFeatureSupport(p->dev, + D3D11_FEATURE_D3D11_OPTIONS, &fopts, sizeof(fopts)); + if (SUCCEEDED(hr)) { + p->has_clear_view = fopts.ClearView; + } + } + + MP_VERBOSE(ra, "Using Direct3D 11.%d runtime\n", minor); + + p->fl = ID3D11Device_GetFeatureLevel(p->dev); + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + ra->max_texture_wh = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) { + ra->max_texture_wh = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) { + ra->max_texture_wh = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else { + ra->max_texture_wh = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0) + ra->caps |= RA_CAP_GATHER; + if (p->fl >= D3D_FEATURE_LEVEL_10_0) + ra->caps |= RA_CAP_FRAGCOORD; + + // Some 10_0 hardware has compute shaders, but only 11_0 has image load/store + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW; + ra->max_shmem = 32 * 1024; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_1) { + p->max_uavs = D3D11_1_UAV_SLOT_COUNT; + } else { + p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT; + } + + if (ID3D11Device_GetCreationFlags(p->dev) & D3D11_CREATE_DEVICE_DEBUG) + init_debug_layer(ra); + + // Some level 9_x devices don't have timestamp queries + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL); + p->has_timestamp_queries = SUCCEEDED(hr); + + // According to MSDN, the above texture sizes are just minimums and drivers + // may support larger textures. See: + // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476874.aspx + find_max_texture_dimension(ra); + MP_VERBOSE(ra, "Maximum Texture2D size: %dx%d\n", ra->max_texture_wh, + ra->max_texture_wh); + + if (!load_d3d_compiler(ra)) { + MP_FATAL(ra, "Could not find D3DCompiler DLL\n"); + goto error; + } + + MP_VERBOSE(ra, "D3DCompiler version: %u.%u.%u.%u\n", + p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor, + p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision); + + setup_formats(ra); + + // The rasterizer state never changes, so set it up here + ID3D11RasterizerState *rstate; + D3D11_RASTERIZER_DESC rdesc = { + .FillMode = D3D11_FILL_SOLID, + .CullMode = D3D11_CULL_NONE, + .FrontCounterClockwise = FALSE, + .DepthClipEnable = TRUE, // Required for 10level9 + .ScissorEnable = TRUE, + }; + hr = ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &rstate); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create rasterizer state: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + ID3D11DeviceContext_RSSetState(p->ctx, rstate); + SAFE_RELEASE(rstate); + + // If the device doesn't support ClearView, we have to set up a + // shader-based clear() implementation + if (!p->has_clear_view && !setup_clear_rpass(ra)) + goto error; + + if (!setup_blit_rpass(ra)) + goto error; + + return ra; + +error: + destroy(ra); + return NULL; +} + +ID3D11Device *ra_d3d11_get_device(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3D11Device_AddRef(p->dev); + return p->dev; +} + +bool ra_is_d3d11(struct ra *ra) +{ + return ra->fns == &ra_fns_d3d11; +} diff --git a/video/out/d3d11/ra_d3d11.h b/video/out/d3d11/ra_d3d11.h new file mode 100644 index 0000000..54033b6 --- /dev/null +++ b/video/out/d3d11/ra_d3d11.h @@ -0,0 +1,35 @@ +#pragma once + +#include <stdbool.h> +#include <windows.h> +#include <d3d11.h> +#include <dxgi1_2.h> + +#include "video/out/gpu/ra.h" +#include "video/out/gpu/spirv.h" + +// Create an RA instance from a D3D11 device. This takes a reference to the +// device, which is released when the RA instance is destroyed. +struct ra *ra_d3d11_create(ID3D11Device *device, struct mp_log *log, + struct spirv_compiler *spirv); + +// Flush the immediate context of the wrapped D3D11 device +void ra_d3d11_flush(struct ra *ra); + +// Create an RA texture from a D3D11 resource. This takes a reference to the +// texture, which is released when the RA texture is destroyed. +struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res); + +// As above, but for a D3D11VA video resource. The fmt parameter selects which +// plane of a planar format will be mapped when the RA texture is used. +// array_slice should be set for texture arrays and is ignored for non-arrays. +struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, + int w, int h, int array_slice, + const struct ra_format *fmt); + +// Get the underlying D3D11 device from an RA instance. The returned device is +// refcounted and must be released by the caller. +ID3D11Device *ra_d3d11_get_device(struct ra *ra); + +// True if the RA instance was created with ra_d3d11_create() +bool ra_is_d3d11(struct ra *ra); diff --git a/video/out/drm_atomic.c b/video/out/drm_atomic.c new file mode 100644 index 0000000..7a55483 --- /dev/null +++ b/video/out/drm_atomic.c @@ -0,0 +1,245 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <errno.h> +#include <inttypes.h> + +#include "common/common.h" +#include "common/msg.h" +#include "drm_atomic.h" + +int drm_object_create_properties(struct mp_log *log, int fd, + struct drm_object *object) +{ + object->props = drmModeObjectGetProperties(fd, object->id, object->type); + if (object->props) { + object->props_info = talloc_zero_size(NULL, object->props->count_props + * sizeof(object->props_info)); + if (object->props_info) { + for (int i = 0; i < object->props->count_props; i++) + object->props_info[i] = drmModeGetProperty(fd, object->props->props[i]); + } else { + mp_err(log, "Out of memory\n"); + goto fail; + } + } else { + mp_err(log, "Failed to retrieve properties for object id %d\n", object->id); + goto fail; + } + + return 0; + + fail: + drm_object_free_properties(object); + return -1; +} + +void drm_object_free_properties(struct drm_object *object) +{ + if (object->props) { + for (int i = 0; i < object->props->count_props; i++) { + if (object->props_info[i]) { + drmModeFreeProperty(object->props_info[i]); + object->props_info[i] = NULL; + } + } + + talloc_free(object->props_info); + object->props_info = NULL; + + drmModeFreeObjectProperties(object->props); + object->props = NULL; + } +} + +int drm_object_get_property(struct drm_object *object, char *name, uint64_t *value) +{ + for (int i = 0; i < object->props->count_props; i++) { + if (strcasecmp(name, object->props_info[i]->name) == 0) { + *value = object->props->prop_values[i]; + return 0; + } + } + + return -EINVAL; +} + +int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object, + char *name, uint64_t value) +{ + for (int i = 0; i < object->props->count_props; i++) { + if (strcasecmp(name, object->props_info[i]->name) == 0) { + return drmModeAtomicAddProperty(request, object->id, + object->props_info[i]->prop_id, value); + } + } + + return -EINVAL; +} + +struct drm_object * drm_object_create(struct mp_log *log, int fd, + uint32_t object_id, uint32_t type) +{ + struct drm_object *obj = NULL; + obj = talloc_zero(NULL, struct drm_object); + obj->id = object_id; + obj->type = type; + + if (drm_object_create_properties(log, fd, obj)) { + talloc_free(obj); + return NULL; + } + + return obj; +} + +void drm_object_free(struct drm_object *object) +{ + if (object) { + drm_object_free_properties(object); + talloc_free(object); + } +} + +void drm_object_print_info(struct mp_log *log, struct drm_object *object) +{ + mp_err(log, "Object ID = %d (type = %x) has %d properties\n", + object->id, object->type, object->props->count_props); + + for (int i = 0; i < object->props->count_props; i++) + mp_err(log, " Property '%s' = %lld\n", object->props_info[i]->name, + (long long)object->props->prop_values[i]); +} + +struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, + int crtc_id, int overlay_id) +{ + drmModePlane *drmplane = NULL; + drmModePlaneRes *plane_res = NULL; + drmModeRes *res = NULL; + struct drm_object *plane = NULL; + struct drm_atomic_context *ctx; + int crtc_index = -1; + int layercount = 0; + uint64_t value; + + res = drmModeGetResources(fd); + if (!res) { + mp_err(log, "Cannot retrieve DRM resources: %s\n", mp_strerror(errno)); + goto fail; + } + + plane_res = drmModeGetPlaneResources(fd); + if (!plane_res) { + mp_err(log, "Cannot retrieve plane ressources: %s\n", mp_strerror(errno)); + goto fail; + } + + ctx = talloc_zero(NULL, struct drm_atomic_context); + if (!ctx) { + mp_err(log, "Out of memory\n"); + goto fail; + } + + ctx->fd = fd; + ctx->crtc = drm_object_create(log, ctx->fd, crtc_id, DRM_MODE_OBJECT_CRTC); + if (!ctx->crtc) { + mp_err(log, "Failed to create CRTC object\n"); + goto fail; + } + + for (int i = 0; i < res->count_crtcs; i++) { + if (res->crtcs[i] == crtc_id) { + crtc_index = i; + break; + } + } + + for (unsigned int j = 0; j < plane_res->count_planes; j++) { + + drmplane = drmModeGetPlane (ctx->fd, plane_res->planes[j]); + if (drmplane->possible_crtcs & (1 << crtc_index)) { + plane = drm_object_create(log, ctx->fd, drmplane->plane_id, + DRM_MODE_OBJECT_PLANE); + + if (plane) { + if (drm_object_get_property(plane, "TYPE", &value) == -EINVAL) { + mp_err(log, "Unable to retrieve type property from plane %d\n", j); + goto fail; + } else { + if ((value == DRM_PLANE_TYPE_OVERLAY) && + (layercount == overlay_id)) { + ctx->overlay_plane = plane; + } + else if (value == DRM_PLANE_TYPE_PRIMARY) { + ctx->primary_plane = plane; + } + else { + drm_object_free(plane); + plane = NULL; + } + + if (value == DRM_PLANE_TYPE_OVERLAY) + layercount++; + } + } else { + mp_err(log, "Failed to create Plane object from plane ID %d\n", + drmplane->plane_id); + goto fail; + } + } + drmModeFreePlane(drmplane); + drmplane = NULL; + } + + if (!ctx->primary_plane) { + mp_err(log, "Failed to find primary plane\n"); + goto fail; + } + + if (!ctx->overlay_plane) { + mp_err(log, "Failed to find overlay plane with id=%d\n", overlay_id); + goto fail; + } + + mp_verbose(log, "Found Primary plane with ID %d, overlay with ID %d\n", + ctx->primary_plane->id, ctx->overlay_plane->id); + + drmModeFreePlaneResources(plane_res); + drmModeFreeResources(res); + return ctx; + + +fail: + if (res) + drmModeFreeResources(res); + if (plane_res) + drmModeFreePlaneResources(plane_res); + if (drmplane) + drmModeFreePlane(drmplane); + if (plane) + drm_object_free(plane); + return NULL; +} + +void drm_atomic_destroy_context(struct drm_atomic_context *ctx) +{ + drm_object_free(ctx->crtc); + drm_object_free(ctx->primary_plane); + drm_object_free(ctx->overlay_plane); + talloc_free(ctx); +} diff --git a/video/out/drm_atomic.h b/video/out/drm_atomic.h new file mode 100644 index 0000000..d0ebdb9 --- /dev/null +++ b/video/out/drm_atomic.h @@ -0,0 +1,55 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_DRMATOMIC_H +#define MP_DRMATOMIC_H + +#include <stdlib.h> +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "common/msg.h" + +struct drm_object { + uint32_t id; + uint32_t type; + drmModeObjectProperties *props; + drmModePropertyRes **props_info; +}; + +struct drm_atomic_context { + int fd; + + struct drm_object *crtc; + struct drm_object *primary_plane; + struct drm_object *overlay_plane; + + drmModeAtomicReq *request; +}; + + +int drm_object_create_properties(struct mp_log *log, int fd, struct drm_object *object); +void drm_object_free_properties(struct drm_object *object); +int drm_object_get_property(struct drm_object *object, char *name, uint64_t *value); +int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object, char *name, uint64_t value); +struct drm_object * drm_object_create(struct mp_log *log, int fd, uint32_t object_id, uint32_t type); +void drm_object_free(struct drm_object *object); +void drm_object_print_info(struct mp_log *log, struct drm_object *object); +struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, int crtc_id, int overlay_id); +void drm_atomic_destroy_context(struct drm_atomic_context *ctx); + +#endif // MP_DRMATOMIC_H diff --git a/video/out/drm_common.c b/video/out/drm_common.c index aea4afa..8402ac7 100644 --- a/video/out/drm_common.c +++ b/video/out/drm_common.c @@ -41,6 +41,18 @@ static int vt_switcher_pipe[2]; +#define OPT_BASE_STRUCT struct drm_opts +const struct m_sub_options drm_conf = { + .opts = (const struct m_option[]) { + OPT_STRING_VALIDATE("drm-connector", drm_connector_spec, + 0, drm_validate_connector_opt), + OPT_INT("drm-mode", drm_mode_id, 0), + OPT_INT("drm-overlay", drm_overlay_id, 0), + {0}, + }, + .size = sizeof(struct drm_opts), +}; + static const char *connector_names[] = { "Unknown", // DRM_MODE_CONNECTOR_Unknown "VGA", // DRM_MODE_CONNECTOR_VGA @@ -222,7 +234,7 @@ static void parse_connector_spec(struct mp_log *log, struct kms *kms_create(struct mp_log *log, const char *connector_spec, - int mode_id) + int mode_id, int overlay_id) { int card_no = -1; char *connector_name = NULL; @@ -260,6 +272,23 @@ struct kms *kms_create(struct mp_log *log, const char *connector_spec, if (!setup_mode(kms, mode_id)) goto err; + // Universal planes allows accessing all the planes (including primary) + if (drmSetClientCap(kms->fd, DRM_CLIENT_CAP_UNIVERSAL_PLANES, 1)) { + mp_err(log, "Failed to set Universal planes capability\n"); + } + + if (drmSetClientCap(kms->fd, DRM_CLIENT_CAP_ATOMIC, 1)) { + mp_verbose(log, "No DRM Atomic support found\n"); + } else { + mp_verbose(log, "DRM Atomic support found\n"); + kms->atomic_context = drm_atomic_create_context(kms->log, kms->fd, kms->crtc_id, overlay_id); + if (!kms->atomic_context) { + mp_err(log, "Failed to create DRM atomic context\n"); + goto err; + } + } + + drmModeFreeResources(res); return kms; @@ -284,6 +313,10 @@ void kms_destroy(struct kms *kms) drmModeFreeEncoder(kms->encoder); kms->encoder = NULL; } + if (kms->atomic_context) { + drm_atomic_destroy_context(kms->atomic_context); + } + close(kms->fd); talloc_free(kms); } diff --git a/video/out/drm_common.h b/video/out/drm_common.h index 6796472..ff913ff 100644 --- a/video/out/drm_common.h +++ b/video/out/drm_common.h @@ -22,6 +22,7 @@ #include <xf86drm.h> #include <xf86drmMode.h> #include "options/m_option.h" +#include "drm_atomic.h" struct kms { struct mp_log *log; @@ -31,6 +32,7 @@ struct kms { drmModeModeInfo mode; uint32_t crtc_id; int card_no; + struct drm_atomic_context *atomic_context; }; struct vt_switcher { @@ -40,6 +42,12 @@ struct vt_switcher { void *handler_data[2]; }; +struct drm_opts { + char *drm_connector_spec; + int drm_mode_id; + int drm_overlay_id; +}; + bool vt_switcher_init(struct vt_switcher *s, struct mp_log *log); void vt_switcher_destroy(struct vt_switcher *s); void vt_switcher_poll(struct vt_switcher *s, int timeout_ms); @@ -51,7 +59,7 @@ void vt_switcher_release(struct vt_switcher *s, void (*handler)(void*), void *user_data); struct kms *kms_create(struct mp_log *log, const char *connector_spec, - int mode_id); + int mode_id, int overlay_id); void kms_destroy(struct kms *kms); double kms_get_display_fps(const struct kms *kms); diff --git a/video/out/drm_prime.c b/video/out/drm_prime.c new file mode 100644 index 0000000..253fbb6 --- /dev/null +++ b/video/out/drm_prime.c @@ -0,0 +1,85 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <unistd.h> +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "common/msg.h" +#include "drm_common.h" +#include "drm_prime.h" + +int drm_prime_create_framebuffer(struct mp_log *log, int fd, AVDRMFrameDescriptor *descriptor, int width, int height, + struct drm_prime_framebuffer *framebuffer) +{ + AVDRMLayerDescriptor *layer = NULL; + uint32_t pitches[4], offsets[4], handles[4]; + int ret, layer_fd; + + if (descriptor && descriptor->nb_layers) { + *framebuffer = (struct drm_prime_framebuffer){0}; + + for (int object = 0; object < descriptor->nb_objects; object++) { + ret = drmPrimeFDToHandle(fd, descriptor->objects[object].fd, &framebuffer->gem_handles[object]); + if (ret < 0) { + mp_err(log, "Failed to retrieve the Prime Handle from handle %d (%d).\n", object, descriptor->objects[object].fd); + goto fail; + } + } + + layer = &descriptor->layers[0]; + + for (int plane = 0; plane < AV_DRM_MAX_PLANES; plane++) { + layer_fd = framebuffer->gem_handles[layer->planes[plane].object_index]; + if (layer_fd && layer->planes[plane].pitch) { + pitches[plane] = layer->planes[plane].pitch; + offsets[plane] = layer->planes[plane].offset; + handles[plane] = layer_fd; + } else { + pitches[plane] = 0; + offsets[plane] = 0; + handles[plane] = 0; + } + } + + ret = drmModeAddFB2(fd, width, height, layer->format, + handles, pitches, offsets, &framebuffer->fb_id, 0); + if (ret < 0) { + mp_err(log, "Failed to create framebuffer on layer %d.\n", 0); + goto fail; + } + } + + return 0; + +fail: + memset(framebuffer, 0, sizeof(*framebuffer)); + return -1; +} + +void drm_prime_destroy_framebuffer(struct mp_log *log, int fd, struct drm_prime_framebuffer *framebuffer) +{ + if (framebuffer->fb_id) + drmModeRmFB(fd, framebuffer->fb_id); + + for (int i = 0; i < AV_DRM_MAX_PLANES; i++) { + if (framebuffer->gem_handles[i]) + drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &framebuffer->gem_handles[i]); + } + + memset(framebuffer, 0, sizeof(*framebuffer)); +} diff --git a/video/out/win32/exclusive_hack.h b/video/out/drm_prime.h index 883e215..0653fdb 100644 --- a/video/out/win32/exclusive_hack.h +++ b/video/out/drm_prime.h @@ -15,12 +15,19 @@ * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef MP_WIN32_EXCLUSIVE_HACK_H_ -#define MP_WIN32_EXCLUSIVE_HACK_H_ +#ifndef DRM_PRIME_H +#define DRM_PRIME_H -#include <stdbool.h> +#include <libavutil/hwcontext_drm.h> -// Returns true if any program on the computer is in exclusive fullscreen mode -bool mp_w32_is_in_exclusive_mode(void); +#include "common/msg.h" -#endif +struct drm_prime_framebuffer { + uint32_t fb_id; + uint32_t gem_handles[AV_DRM_MAX_PLANES]; +}; + +int drm_prime_create_framebuffer(struct mp_log *log, int fd, AVDRMFrameDescriptor *descriptor, int width, int height, + struct drm_prime_framebuffer *framebuffers); +void drm_prime_destroy_framebuffer(struct mp_log *log, int fd, struct drm_prime_framebuffer *framebuffers); +#endif // DRM_PRIME_H diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c new file mode 100644 index 0000000..36f9c2d --- /dev/null +++ b/video/out/gpu/context.c @@ -0,0 +1,223 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <math.h> +#include <assert.h> + +#include "config.h" +#include "common/common.h" +#include "common/msg.h" +#include "options/options.h" +#include "options/m_option.h" +#include "video/out/vo.h" + +#include "context.h" +#include "spirv.h" + +/* OpenGL */ +extern const struct ra_ctx_fns ra_ctx_glx; +extern const struct ra_ctx_fns ra_ctx_glx_probe; +extern const struct ra_ctx_fns ra_ctx_x11_egl; +extern const struct ra_ctx_fns ra_ctx_drm_egl; +extern const struct ra_ctx_fns ra_ctx_cocoa; +extern const struct ra_ctx_fns ra_ctx_wayland_egl; +extern const struct ra_ctx_fns ra_ctx_wgl; +extern const struct ra_ctx_fns ra_ctx_angle; +extern const struct ra_ctx_fns ra_ctx_dxgl; +extern const struct ra_ctx_fns ra_ctx_rpi; +extern const struct ra_ctx_fns ra_ctx_android; +extern const struct ra_ctx_fns ra_ctx_mali_fbdev; +extern const struct ra_ctx_fns ra_ctx_vdpauglx; + +/* Vulkan */ +extern const struct ra_ctx_fns ra_ctx_vulkan_wayland; +extern const struct ra_ctx_fns ra_ctx_vulkan_win; +extern const struct ra_ctx_fns ra_ctx_vulkan_xlib; + +/* Direct3D 11 */ +extern const struct ra_ctx_fns ra_ctx_d3d11; + +static const struct ra_ctx_fns *contexts[] = { +#if HAVE_D3D11 + &ra_ctx_d3d11, +#endif + +// OpenGL contexts: +#if HAVE_ANDROID + &ra_ctx_android, +#endif +#if HAVE_RPI + &ra_ctx_rpi, +#endif +#if HAVE_GL_COCOA + &ra_ctx_cocoa, +#endif +#if HAVE_EGL_ANGLE_WIN32 + &ra_ctx_angle, +#endif +#if HAVE_GL_WIN32 + &ra_ctx_wgl, +#endif +#if HAVE_GL_DXINTEROP + &ra_ctx_dxgl, +#endif +#if HAVE_GL_X11 + &ra_ctx_glx_probe, +#endif +#if HAVE_EGL_X11 + &ra_ctx_x11_egl, +#endif +#if HAVE_GL_X11 + &ra_ctx_glx, +#endif +#if HAVE_GL_WAYLAND + &ra_ctx_wayland_egl, +#endif +#if HAVE_EGL_DRM + &ra_ctx_drm_egl, +#endif +#if HAVE_MALI_FBDEV + &ra_ctx_mali_fbdev, +#endif +#if HAVE_VDPAU_GL_X11 + &ra_ctx_vdpauglx, +#endif + +// Vulkan contexts: +#if HAVE_VULKAN + +#if HAVE_WIN32_DESKTOP + &ra_ctx_vulkan_win, +#endif +#if HAVE_WAYLAND + &ra_ctx_vulkan_wayland, +#endif +#if HAVE_X11 + &ra_ctx_vulkan_xlib, +#endif + +#endif +}; + +int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param) +{ + if (bstr_equals0(param, "help")) { + mp_info(log, "GPU APIs (contexts):\n"); + mp_info(log, " auto (autodetect)\n"); + for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++) + mp_info(log, " %s (%s)\n", contexts[n]->type, contexts[n]->name); + return M_OPT_EXIT; + } + if (bstr_equals0(param, "auto")) + return 1; + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (bstr_equals0(param, contexts[i]->type)) + return 1; + } + return M_OPT_INVALID; +} + +int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param) +{ + if (bstr_equals0(param, "help")) { + mp_info(log, "GPU contexts (APIs):\n"); + mp_info(log, " auto (autodetect)\n"); + for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++) + mp_info(log, " %s (%s)\n", contexts[n]->name, contexts[n]->type); + return M_OPT_EXIT; + } + if (bstr_equals0(param, "auto")) + return 1; + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (bstr_equals0(param, contexts[i]->name)) + return 1; + } + return M_OPT_INVALID; +} + +// Create a VO window and create a RA context on it. +// vo_flags: passed to the backend's create window function +struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type, + const char *context_name, struct ra_ctx_opts opts) +{ + bool api_auto = !context_type || strcmp(context_type, "auto") == 0; + bool ctx_auto = !context_name || strcmp(context_name, "auto") == 0; + + if (ctx_auto) { + MP_VERBOSE(vo, "Probing for best GPU context.\n"); + opts.probing = true; + } + + // Hack to silence backend (X11/Wayland/etc.) errors. Kill it once backends + // are separate from `struct vo` + bool old_probing = vo->probing; + vo->probing = opts.probing; + + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (!opts.probing && strcmp(contexts[i]->name, context_name) != 0) + continue; + if (!api_auto && strcmp(contexts[i]->type, context_type) != 0) + continue; + + struct ra_ctx *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct ra_ctx) { + .vo = vo, + .global = vo->global, + .log = mp_log_new(ctx, vo->log, contexts[i]->type), + .opts = opts, + .fns = contexts[i], + }; + + MP_VERBOSE(ctx, "Initializing GPU context '%s'\n", ctx->fns->name); + if (contexts[i]->init(ctx)) { + vo->probing = old_probing; + return ctx; + } + + talloc_free(ctx); + } + + vo->probing = old_probing; + + // If we've reached this point, then none of the contexts matched the name + // requested, or the backend creation failed for all of them. + if (!vo->probing) + MP_ERR(vo, "Failed initializing any suitable GPU context!\n"); + return NULL; +} + +void ra_ctx_destroy(struct ra_ctx **ctx_ptr) +{ + struct ra_ctx *ctx = *ctx_ptr; + if (!ctx) + return; + + if (ctx->spirv && ctx->spirv->fns->uninit) + ctx->spirv->fns->uninit(ctx); + + ctx->fns->uninit(ctx); + talloc_free(ctx); + + *ctx_ptr = NULL; +} diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h new file mode 100644 index 0000000..78c0441 --- /dev/null +++ b/video/out/gpu/context.h @@ -0,0 +1,101 @@ +#pragma once + +#include "video/out/vo.h" + +#include "config.h" +#include "ra.h" + +struct ra_ctx_opts { + int allow_sw; // allow software renderers + int want_alpha; // create an alpha framebuffer if possible + int debug; // enable debugging layers/callbacks etc. + bool probing; // the backend was auto-probed + int swapchain_depth; // max number of images to render ahead +}; + +struct ra_ctx { + struct vo *vo; + struct ra *ra; + struct mpv_global *global; + struct mp_log *log; + + struct ra_ctx_opts opts; + const struct ra_ctx_fns *fns; + struct ra_swapchain *swapchain; + struct spirv_compiler *spirv; + + void *priv; +}; + +// The functions that make up a ra_ctx. +struct ra_ctx_fns { + const char *type; // API type (for --gpu-api) + const char *name; // name (for --gpu-context) + + // Resize the window, or create a new window if there isn't one yet. + // Currently, there is an unfortunate interaction with ctx->vo, and + // display size etc. are determined by it. + bool (*reconfig)(struct ra_ctx *ctx); + + // This behaves exactly like vo_driver.control(). + int (*control)(struct ra_ctx *ctx, int *events, int request, void *arg); + + // These behave exactly like vo_driver.wakeup/wait_events. They are + // optional. + void (*wakeup)(struct ra_ctx *ctx); + void (*wait_events)(struct ra_ctx *ctx, int64_t until_time_us); + + // Initialize/destroy the 'struct ra' and possibly the underlying VO backend. + // Not normally called by the user of the ra_ctx. + bool (*init)(struct ra_ctx *ctx); + void (*uninit)(struct ra_ctx *ctx); +}; + +// Extra struct for the swapchain-related functions so they can be easily +// inherited from helpers. +struct ra_swapchain { + struct ra_ctx *ctx; + struct priv *priv; + const struct ra_swapchain_fns *fns; +}; + +// Represents a framebuffer / render target +struct ra_fbo { + struct ra_tex *tex; + bool flip; // rendering needs to be inverted +}; + +struct ra_swapchain_fns { + // Gets the current framebuffer depth in bits (0 if unknown). Optional. + int (*color_depth)(struct ra_swapchain *sw); + + // Retrieves a screenshot of the framebuffer. Optional. + struct mp_image *(*screenshot)(struct ra_swapchain *sw); + + // Called when rendering starts. Returns NULL on failure. This must be + // followed by submit_frame, to submit the rendered frame. This function + // can also fail sporadically, and such errors should be ignored unless + // they persist. + bool (*start_frame)(struct ra_swapchain *sw, struct ra_fbo *out_fbo); + + // Present the frame. Issued in lockstep with start_frame, with rendering + // commands in between. The `frame` is just there for timing data, for + // swapchains smart enough to do something with it. + bool (*submit_frame)(struct ra_swapchain *sw, const struct vo_frame *frame); + + // Performs a buffer swap. This blocks for as long as necessary to meet + // params.swapchain_depth, or until the next vblank (for vsynced contexts) + void (*swap_buffers)(struct ra_swapchain *sw); +}; + +// Create and destroy a ra_ctx. This also takes care of creating and destroying +// the underlying `struct ra`, and perhaps the underlying VO backend. +struct ra_ctx *ra_ctx_create(struct vo *vo, const char *context_type, + const char *context_name, struct ra_ctx_opts opts); +void ra_ctx_destroy(struct ra_ctx **ctx); + +struct m_option; +int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param); +int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param); diff --git a/video/out/opengl/d3d11_helpers.c b/video/out/gpu/d3d11_helpers.c index d9b7fc2..b96b03a 100644 --- a/video/out/opengl/d3d11_helpers.c +++ b/video/out/gpu/d3d11_helpers.c @@ -46,6 +46,8 @@ static int get_feature_levels(int max_fl, int min_fl, const D3D_FEATURE_LEVEL **out) { static const D3D_FEATURE_LEVEL levels[] = { + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, D3D_FEATURE_LEVEL_10_1, @@ -70,7 +72,7 @@ static int get_feature_levels(int max_fl, int min_fl, return len; } -static HRESULT create_device(struct mp_log *log, bool warp, bool bgra, +static HRESULT create_device(struct mp_log *log, bool warp, bool debug, int max_fl, int min_fl, ID3D11Device **dev) { const D3D_FEATURE_LEVEL *levels; @@ -82,7 +84,7 @@ static HRESULT create_device(struct mp_log *log, bool warp, bool bgra, D3D_DRIVER_TYPE type = warp ? D3D_DRIVER_TYPE_WARP : D3D_DRIVER_TYPE_HARDWARE; - UINT flags = bgra ? D3D11_CREATE_DEVICE_BGRA_SUPPORT : 0; + UINT flags = debug ? D3D11_CREATE_DEVICE_DEBUG : 0; return pD3D11CreateDevice(NULL, type, NULL, flags, levels, levels_len, D3D11_SDK_VERSION, dev, NULL, NULL); } @@ -95,7 +97,6 @@ bool mp_d3d11_create_present_device(struct mp_log *log, ID3D11Device **dev_out) { bool warp = opts->force_warp; - bool bgra = true; int max_fl = opts->max_feature_level; int min_fl = opts->min_feature_level; ID3D11Device *dev = NULL; @@ -116,25 +117,27 @@ bool mp_d3d11_create_present_device(struct mp_log *log, max_fl = max_fl ? max_fl : D3D_FEATURE_LEVEL_11_0; min_fl = min_fl ? min_fl : D3D_FEATURE_LEVEL_9_1; - hr = create_device(log, warp, bgra, max_fl, min_fl, &dev); + hr = create_device(log, warp, opts->debug, max_fl, min_fl, &dev); if (SUCCEEDED(hr)) break; - // BGRA is recommended, but FL 10_0 hardware may not support it - if (bgra) { - mp_dbg(log, "Failed to create D3D device with BGRA support\n"); - bgra = false; + // Trying to create a D3D_FEATURE_LEVEL_12_0 device on Windows 8.1 or + // below will not succeed. Try an 11_1 device. + if (max_fl >= D3D_FEATURE_LEVEL_12_0 && + min_fl <= D3D_FEATURE_LEVEL_11_1) + { + mp_dbg(log, "Failed to create 12_0+ device, trying 11_1\n"); + max_fl = D3D_FEATURE_LEVEL_11_1; continue; } // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7 - // without the platform update will not succeed. Try a 11_0 device. + // without the platform update will not succeed. Try an 11_0 device. if (max_fl >= D3D_FEATURE_LEVEL_11_1 && min_fl <= D3D_FEATURE_LEVEL_11_0) { mp_dbg(log, "Failed to create 11_1+ device, trying 11_0\n"); max_fl = D3D_FEATURE_LEVEL_11_0; - bgra = true; continue; } @@ -144,7 +147,6 @@ bool mp_d3d11_create_present_device(struct mp_log *log, warp = true; max_fl = opts->max_feature_level; min_fl = opts->min_feature_level; - bgra = true; continue; } @@ -179,11 +181,13 @@ bool mp_d3d11_create_present_device(struct mp_log *log, (((unsigned)selected_level) >> 8) & 0xf); char *dev_name = mp_to_utf8(NULL, desc.Description); - mp_verbose(log, "Device: %s\n" - "VendorId: 0x%04d\n" - "DeviceId: 0x%04d\n" + mp_verbose(log, "Device Name: %s\n" + "Device ID: %04x:%04x (rev %02x)\n" + "Subsystem ID: %04x:%04x\n" "LUID: %08lx%08lx\n", - dev_name, desc.VendorId, desc.DeviceId, + dev_name, + desc.VendorId, desc.DeviceId, desc.Revision, + LOWORD(desc.SubSysId), HIWORD(desc.SubSysId), desc.AdapterLuid.HighPart, desc.AdapterLuid.LowPart); talloc_free(dev_name); @@ -381,3 +385,84 @@ done: SAFE_RELEASE(dxgi_dev); return success; } + +struct mp_image *mp_d3d11_screenshot(IDXGISwapChain *swapchain) +{ + ID3D11Device *dev = NULL; + ID3D11DeviceContext *ctx = NULL; + ID3D11Texture2D *frontbuffer = NULL; + ID3D11Texture2D *staging = NULL; + struct mp_image *img = NULL; + HRESULT hr; + + // Validate the swap chain. This screenshot method will only work on DXGI + // 1.2+ flip/sequential swap chains. It's probably not possible at all with + // discard swap chains, since by definition, the backbuffer contents is + // discarded on Present(). + DXGI_SWAP_CHAIN_DESC scd; + hr = IDXGISwapChain_GetDesc(swapchain, &scd); + if (FAILED(hr)) + goto done; + if (scd.SwapEffect != DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL) + goto done; + + // Get the last buffer that was presented with Present(). This should be + // the n-1th buffer for a swap chain of length n. + hr = IDXGISwapChain_GetBuffer(swapchain, scd.BufferCount - 1, + &IID_ID3D11Texture2D, (void**)&frontbuffer); + if (FAILED(hr)) + goto done; + + ID3D11Texture2D_GetDevice(frontbuffer, &dev); + ID3D11Device_GetImmediateContext(dev, &ctx); + + D3D11_TEXTURE2D_DESC td; + ID3D11Texture2D_GetDesc(frontbuffer, &td); + if (td.SampleDesc.Count > 1) + goto done; + + // Validate the backbuffer format and convert to an mpv IMGFMT + enum mp_imgfmt fmt; + switch (td.Format) { + case DXGI_FORMAT_B8G8R8A8_UNORM: fmt = IMGFMT_BGR0; break; + case DXGI_FORMAT_R8G8B8A8_UNORM: fmt = IMGFMT_RGB0; break; + default: + goto done; + } + + // Create a staging texture based on the frontbuffer with CPU access + td.BindFlags = 0; + td.MiscFlags = 0; + td.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + td.Usage = D3D11_USAGE_STAGING; + hr = ID3D11Device_CreateTexture2D(dev, &td, 0, &staging); + if (FAILED(hr)) + goto done; + + ID3D11DeviceContext_CopyResource(ctx, (ID3D11Resource*)staging, + (ID3D11Resource*)frontbuffer); + + // Attempt to map the staging texture to CPU-accessible memory + D3D11_MAPPED_SUBRESOURCE lock; + hr = ID3D11DeviceContext_Map(ctx, (ID3D11Resource*)staging, 0, + D3D11_MAP_READ, 0, &lock); + if (FAILED(hr)) + goto done; + + img = mp_image_alloc(fmt, td.Width, td.Height); + if (!img) + return NULL; + for (int i = 0; i < td.Height; i++) { + memcpy(img->planes[0] + img->stride[0] * i, + (char*)lock.pData + lock.RowPitch * i, td.Width * 4); + } + + ID3D11DeviceContext_Unmap(ctx, (ID3D11Resource*)staging, 0); + +done: + SAFE_RELEASE(frontbuffer); + SAFE_RELEASE(staging); + SAFE_RELEASE(ctx); + SAFE_RELEASE(dev); + return img; +} diff --git a/video/out/opengl/d3d11_helpers.h b/video/out/gpu/d3d11_helpers.h index f34d1d4..481c183 100644 --- a/video/out/opengl/d3d11_helpers.h +++ b/video/out/gpu/d3d11_helpers.h @@ -23,7 +23,15 @@ #include <d3d11.h> #include <dxgi1_2.h> +#include "video/mp_image.h" + +#define D3D_FEATURE_LEVEL_12_0 (0xc000) +#define D3D_FEATURE_LEVEL_12_1 (0xc100) + struct d3d11_device_opts { + // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG) + bool debug; + // Allow a software (WARP) adapter. Note, sometimes a software adapter will // be used even when allow_warp is false. This is because, on Windows 8 and // up, if there are no hardware adapters, Windows will pretend the WARP @@ -70,4 +78,6 @@ bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, struct d3d11_swapchain_opts *opts, IDXGISwapChain **swapchain_out); +struct mp_image *mp_d3d11_screenshot(IDXGISwapChain *swapchain); + #endif diff --git a/video/out/opengl/hwdec.c b/video/out/gpu/hwdec.c index 5fbc1aa..5284116 100644 --- a/video/out/opengl/hwdec.c +++ b/video/out/gpu/hwdec.c @@ -34,19 +34,16 @@ extern const struct ra_hwdec_driver ra_hwdec_d3d11egl; extern const struct ra_hwdec_driver ra_hwdec_d3d11eglrgb; extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx; extern const struct ra_hwdec_driver ra_hwdec_dxva2; +extern const struct ra_hwdec_driver ra_hwdec_d3d11va; extern const struct ra_hwdec_driver ra_hwdec_cuda; +extern const struct ra_hwdec_driver ra_hwdec_cuda_nvdec; extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay; +extern const struct ra_hwdec_driver ra_hwdec_drmprime_drm; -static const struct ra_hwdec_driver *const mpgl_hwdec_drivers[] = { +const struct ra_hwdec_driver *const ra_hwdec_drivers[] = { #if HAVE_VAAPI_EGL &ra_hwdec_vaegl, #endif -#if HAVE_VAAPI_GLX - &ra_hwdec_vaglx, -#endif -#if HAVE_VDPAU_GL_X11 - &ra_hwdec_vdpau, -#endif #if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL &ra_hwdec_videotoolbox, #endif @@ -56,6 +53,9 @@ static const struct ra_hwdec_driver *const mpgl_hwdec_drivers[] = { #if HAVE_D3D9_HWACCEL &ra_hwdec_dxva2egl, #endif + #if HAVE_D3D11 + &ra_hwdec_d3d11va, + #endif #endif #if HAVE_GL_DXINTEROP_D3D9 &ra_hwdec_dxva2gldx, @@ -63,17 +63,24 @@ static const struct ra_hwdec_driver *const mpgl_hwdec_drivers[] = { #if HAVE_CUDA_HWACCEL &ra_hwdec_cuda, #endif +#if HAVE_VDPAU_GL_X11 + &ra_hwdec_vdpau, +#endif #if HAVE_RPI &ra_hwdec_rpi_overlay, #endif +#if HAVE_DRMPRIME && HAVE_DRM + &ra_hwdec_drmprime_drm, +#endif + NULL }; -static struct ra_hwdec *load_hwdec_driver(struct mp_log *log, struct ra *ra, - struct mpv_global *global, - struct mp_hwdec_devices *devs, - const struct ra_hwdec_driver *drv, - bool is_auto) +struct ra_hwdec *ra_hwdec_load_driver(struct ra *ra, struct mp_log *log, + struct mpv_global *global, + struct mp_hwdec_devices *devs, + const struct ra_hwdec_driver *drv, + bool is_auto) { struct ra_hwdec *hwdec = talloc(NULL, struct ra_hwdec); *hwdec = (struct ra_hwdec) { @@ -94,81 +101,31 @@ static struct ra_hwdec *load_hwdec_driver(struct mp_log *log, struct ra *ra, return hwdec; } -struct ra_hwdec *ra_hwdec_load_api(struct mp_log *log, struct ra *ra, - struct mpv_global *g, - struct mp_hwdec_devices *devs, - enum hwdec_type api) -{ - bool is_auto = HWDEC_IS_AUTO(api); - for (int n = 0; mpgl_hwdec_drivers[n]; n++) { - const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; - if ((is_auto || api == drv->api) && !drv->testing_only) { - struct ra_hwdec *r = load_hwdec_driver(log, ra, g, devs, drv, is_auto); - if (r) - return r; - } - } - return NULL; -} - -// Load by option name. -struct ra_hwdec *ra_hwdec_load(struct mp_log *log, struct ra *ra, - struct mpv_global *g, - struct mp_hwdec_devices *devs, - const char *name) -{ - int g_hwdec_api; - mp_read_option_raw(g, "hwdec", &m_option_type_choice, &g_hwdec_api); - if (!name || !name[0]) - name = m_opt_choice_str(mp_hwdec_names, g_hwdec_api); - - int api_id = HWDEC_NONE; - for (int n = 0; mp_hwdec_names[n].name; n++) { - if (name && strcmp(mp_hwdec_names[n].name, name) == 0) - api_id = mp_hwdec_names[n].value; - } - - for (int n = 0; mpgl_hwdec_drivers[n]; n++) { - const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; - if (name && strcmp(drv->name, name) == 0) { - struct ra_hwdec *r = load_hwdec_driver(log, ra, g, devs, drv, false); - if (r) - return r; - } - } - - return ra_hwdec_load_api(log, ra, g, devs, api_id); -} - int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, struct bstr name, struct bstr param) { bool help = bstr_equals0(param, "help"); if (help) mp_info(log, "Available hwdecs:\n"); - for (int n = 0; mpgl_hwdec_drivers[n]; n++) { - const struct ra_hwdec_driver *drv = mpgl_hwdec_drivers[n]; - const char *api_name = m_opt_choice_str(mp_hwdec_names, drv->api); + for (int n = 0; ra_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n]; if (help) { - mp_info(log, " %s [%s]\n", drv->name, api_name); - } else if (bstr_equals0(param, drv->name) || - bstr_equals0(param, api_name)) - { + mp_info(log, " %s\n", drv->name); + } else if (bstr_equals0(param, drv->name)) { return 1; } } if (help) { - mp_info(log, " auto (loads best)\n" - " (other --hwdec values)\n" - "Setting an empty string means use --hwdec.\n"); + mp_info(log, " auto (behavior depends on context)\n" + " all (load all hwdecs)\n" + " no (do not load any and block loading on demand)\n"); return M_OPT_EXIT; } if (!param.len) return 1; // "" is treated specially - for (int n = 0; mp_hwdec_names[n].name; n++) { - if (bstr_equals0(param, mp_hwdec_names[n].name)) - return 1; - } + if (bstr_equals0(param, "all") || bstr_equals0(param, "auto") || + bstr_equals0(param, "no")) + return 1; mp_fatal(log, "No hwdec backend named '%.*s' found!\n", BSTR_P(param)); return M_OPT_INVALID; } diff --git a/video/out/opengl/hwdec.h b/video/out/gpu/hwdec.h index 20bbaae..258ab88 100644 --- a/video/out/opengl/hwdec.h +++ b/video/out/gpu/hwdec.h @@ -72,17 +72,14 @@ struct ra_hwdec_mapper_driver { }; struct ra_hwdec_driver { - // Name of the interop backend. This is used for informational purposes only. + // Name of the interop backend. This is used for informational purposes and + // for use with debugging options. const char *name; // Used to create ra_hwdec.priv. size_t priv_size; - // Used to explicitly request a specific API. - enum hwdec_type api; // One of the hardware surface IMGFMT_ that must be passed to map_image later. // Terminated with a 0 entry. (Extend the array size as needed.) const int imgfmts[3]; - // Dosn't load this unless requested by name. - bool testing_only; // Create the hwdec device. It must add it to hw->devs, if applicable. int (*init)(struct ra_hwdec *hw); @@ -104,15 +101,13 @@ struct ra_hwdec_driver { struct mp_rect *src, struct mp_rect *dst, bool newframe); }; -struct ra_hwdec *ra_hwdec_load_api(struct mp_log *log, struct ra *ra, - struct mpv_global *g, - struct mp_hwdec_devices *devs, - enum hwdec_type api); +extern const struct ra_hwdec_driver *const ra_hwdec_drivers[]; -struct ra_hwdec *ra_hwdec_load(struct mp_log *log, struct ra *ra, - struct mpv_global *g, - struct mp_hwdec_devices *devs, - const char *name); +struct ra_hwdec *ra_hwdec_load_driver(struct ra *ra, struct mp_log *log, + struct mpv_global *global, + struct mp_hwdec_devices *devs, + const struct ra_hwdec_driver *drv, + bool is_auto); int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, struct bstr name, struct bstr param); diff --git a/video/out/opengl/lcms.c b/video/out/gpu/lcms.c index 8747ae6..3552351 100644 --- a/video/out/opengl/lcms.c +++ b/video/out/gpu/lcms.c @@ -236,7 +236,7 @@ static cmsHPROFILE get_vid_profile(struct gl_lcms *p, cmsContext cms, } // Otherwise, warn the user and generate the profile as usual - MP_WARN(p, "Video contained an invalid ICC profile! Ignoring..\n"); + MP_WARN(p, "Video contained an invalid ICC profile! Ignoring...\n"); } // The input profile for the transformation is dependent on the video diff --git a/video/out/opengl/lcms.h b/video/out/gpu/lcms.h index 35bbd61..35bbd61 100644 --- a/video/out/opengl/lcms.h +++ b/video/out/gpu/lcms.h diff --git a/video/out/opengl/osd.c b/video/out/gpu/osd.c index f7c325d..317deb6 100644 --- a/video/out/opengl/osd.c +++ b/video/out/gpu/osd.c @@ -47,7 +47,6 @@ static const struct ra_renderpass_input vertex_vao[] = { {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, {"texcoord" , RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord)}, {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)}, - {0} }; struct mpgl_osd_part { @@ -231,8 +230,6 @@ bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, abort(); } - gl_sc_set_vertex_format(sc, vertex_vao, sizeof(struct vertex)); - return true; } @@ -256,8 +253,8 @@ static void write_quad(struct vertex *va, struct gl_transform t, static void generate_verts(struct mpgl_osd_part *part, struct gl_transform t) { - int num_vertices = part->num_subparts * 6; - MP_TARRAY_GROW(part, part->vertices, part->num_vertices + num_vertices); + MP_TARRAY_GROW(part, part->vertices, + part->num_vertices + part->num_subparts * 6); for (int n = 0; n < part->num_subparts; n++) { struct sub_bitmap *b = &part->subparts[n]; @@ -269,13 +266,13 @@ static void generate_verts(struct mpgl_osd_part *part, struct gl_transform t) uint8_t color[4] = { c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, 255 - (c & 0xff) }; - write_quad(&va[n * 6], t, + write_quad(va, t, b->x, b->y, b->x + b->dw, b->y + b->dh, b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h, part->w, part->h, color); - } - part->num_vertices += num_vertices; + part->num_vertices += 6; + } } // number of screen divisions per axis (x=0, y=1) for the current 3D mode @@ -291,7 +288,7 @@ static void get_3d_side_by_side(int stereo_mode, int div[2]) } void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, - struct gl_shader_cache *sc, struct fbodst target) + struct gl_shader_cache *sc, struct ra_fbo fbo) { struct mpgl_osd_part *part = ctx->parts[index]; @@ -303,7 +300,7 @@ void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, for (int x = 0; x < div[0]; x++) { for (int y = 0; y < div[1]; y++) { struct gl_transform t; - gl_transform_ortho_fbodst(&t, target); + gl_transform_ortho_fbo(&t, fbo); float a_x = ctx->osd_res.w * x; float a_y = ctx->osd_res.h * y; @@ -317,7 +314,8 @@ void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, const int *factors = &blend_factors[part->format][0]; gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]); - gl_sc_dispatch_draw(sc, target.tex, part->vertices, part->num_vertices); + gl_sc_dispatch_draw(sc, fbo.tex, vertex_vao, MP_ARRAY_SIZE(vertex_vao), + sizeof(struct vertex), part->vertices, part->num_vertices); } static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) diff --git a/video/out/opengl/osd.h b/video/out/gpu/osd.h index 6c2b886..00fbc49 100644 --- a/video/out/opengl/osd.h +++ b/video/out/gpu/osd.h @@ -18,7 +18,7 @@ void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mod bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, struct gl_shader_cache *sc); void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, - struct gl_shader_cache *sc, struct fbodst target); + struct gl_shader_cache *sc, struct ra_fbo fbo); bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, double pts); diff --git a/video/out/opengl/ra.c b/video/out/gpu/ra.c index 208507d..fdb20fe 100644 --- a/video/out/opengl/ra.c +++ b/video/out/gpu/ra.c @@ -71,7 +71,7 @@ static struct ra_renderpass_input *dup_inputs(void *ta_parent, } // Return a newly allocated deep-copy of params. -struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent, +struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, const struct ra_renderpass_params *params) { struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res); @@ -86,6 +86,65 @@ struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent, return res; }; +struct glsl_fmt { + enum ra_ctype ctype; + int num_components; + int component_depth[4]; + const char *glsl_format; +}; + +// List taken from the GLSL specification, sans snorm and sint formats +static const struct glsl_fmt ra_glsl_fmts[] = { + {RA_CTYPE_FLOAT, 1, {16}, "r16f"}, + {RA_CTYPE_FLOAT, 1, {32}, "r32f"}, + {RA_CTYPE_FLOAT, 2, {16, 16}, "rg16f"}, + {RA_CTYPE_FLOAT, 2, {32, 32}, "rg32f"}, + {RA_CTYPE_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"}, + {RA_CTYPE_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"}, + {RA_CTYPE_FLOAT, 3, {11, 11, 10}, "r11f_g11f_b10f"}, + + {RA_CTYPE_UNORM, 1, {8}, "r8"}, + {RA_CTYPE_UNORM, 1, {16}, "r16"}, + {RA_CTYPE_UNORM, 2, {8, 8}, "rg8"}, + {RA_CTYPE_UNORM, 2, {16, 16}, "rg16"}, + {RA_CTYPE_UNORM, 4, {8, 8, 8, 8}, "rgba8"}, + {RA_CTYPE_UNORM, 4, {16, 16, 16, 16}, "rgba16"}, + {RA_CTYPE_UNORM, 4, {10, 10, 10, 2}, "rgb10_a2"}, + + {RA_CTYPE_UINT, 1, {8}, "r8ui"}, + {RA_CTYPE_UINT, 1, {16}, "r16ui"}, + {RA_CTYPE_UINT, 1, {32}, "r32ui"}, + {RA_CTYPE_UINT, 2, {8, 8}, "rg8ui"}, + {RA_CTYPE_UINT, 2, {16, 16}, "rg16ui"}, + {RA_CTYPE_UINT, 2, {32, 32}, "rg32ui"}, + {RA_CTYPE_UINT, 4, {8, 8, 8, 8}, "rgba8ui"}, + {RA_CTYPE_UINT, 4, {16, 16, 16, 16}, "rgba16ui"}, + {RA_CTYPE_UINT, 4, {32, 32, 32, 32}, "rgba32ui"}, + {RA_CTYPE_UINT, 4, {10, 10, 10, 2}, "rgb10_a2ui"}, +}; + +const char *ra_fmt_glsl_format(const struct ra_format *fmt) +{ + for (int n = 0; n < MP_ARRAY_SIZE(ra_glsl_fmts); n++) { + const struct glsl_fmt *gfmt = &ra_glsl_fmts[n]; + + if (fmt->ctype != gfmt->ctype) + continue; + if (fmt->num_components != gfmt->num_components) + continue; + + for (int i = 0; i < fmt->num_components; i++) { + if (fmt->component_depth[i] != gfmt->component_depth[i]) + goto next_fmt; + } + + return gfmt->glsl_format; + +next_fmt: ; // equivalent to `continue` + } + + return NULL; +} // Return whether this is a tightly packed format with no external padding and // with the same bit size/depth in all components, and the shader returns diff --git a/video/out/opengl/ra.h b/video/out/gpu/ra.h index 46a69f2..934e5db 100644 --- a/video/out/opengl/ra.h +++ b/video/out/gpu/ra.h @@ -26,6 +26,9 @@ struct ra { // time. size_t max_shmem; + // Maximum push constant size. Set by the RA backend at init time. + size_t max_pushc_size; + // Set of supported texture formats. Must be added by RA backend at init time. // If there are equivalent formats with different caveats, the preferred // formats should have a lower index. (E.g. GLES3 should put rg8 before la.) @@ -47,8 +50,9 @@ enum { RA_CAP_BUF_RO = 1 << 5, // supports RA_VARTYPE_BUF_RO RA_CAP_BUF_RW = 1 << 6, // supports RA_VARTYPE_BUF_RW RA_CAP_NESTED_ARRAY = 1 << 7, // supports nested arrays - RA_CAP_SHARED_BINDING = 1 << 8, // sampler/image/buffer namespaces are disjoint - RA_CAP_GLOBAL_UNIFORM = 1 << 9, // supports using "naked" uniforms (not UBO) + RA_CAP_GLOBAL_UNIFORM = 1 << 8, // supports using "naked" uniforms (not UBO) + RA_CAP_GATHER = 1 << 9, // supports textureGather in GLSL + RA_CAP_FRAGCOORD = 1 << 10, // supports reading from gl_FragCoord }; enum ra_ctype { @@ -85,6 +89,10 @@ struct ra_format { // shader representation is given by the special_imgfmt_desc pointer. int special_imgfmt; const struct ra_imgfmt_desc *special_imgfmt_desc; + + // This gives the GLSL image format corresponding to the format, if any. + // (e.g. rgba16ui) + const char *glsl_format; }; struct ra_tex_params { @@ -139,13 +147,14 @@ struct ra_tex_upload_params { ptrdiff_t stride; // The size of a horizontal line in bytes (*not* texels!) }; -// Buffer type hint. Setting this may result in more or less efficient -// operation, although it shouldn't technically prohibit anything +// Buffer usage type. This restricts what types of operations may be performed +// on a buffer. enum ra_buf_type { RA_BUF_TYPE_INVALID, RA_BUF_TYPE_TEX_UPLOAD, // texture upload buffer (pixel buffer object) RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO + RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage) }; struct ra_buf_params { @@ -202,8 +211,8 @@ struct ra_renderpass_input { // RA_VARTYPE_IMG_W: image unit // RA_VARTYPE_BUF_* buffer binding point // Other uniforms: unused - // If RA_CAP_SHARED_BINDING is set, these may only be unique per input type. - // Otherwise, these must be unique for all input values. + // Bindings must be unique within each namespace, as specified by + // desc_namespace() int binding; }; @@ -244,6 +253,7 @@ struct ra_renderpass_params { // Uniforms, including texture/sampler inputs. struct ra_renderpass_input *inputs; int num_inputs; + size_t push_constants_size; // must be <= ra.max_pushc_size and a multiple of 4 // Highly implementation-specific byte array storing a compiled version // of the program. Can be used to speed up shader compilation. A backend @@ -281,7 +291,7 @@ struct ra_renderpass_params { const char *compute_shader; }; -struct ra_renderpass_params *ra_render_pass_params_copy(void *ta_parent, +struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, const struct ra_renderpass_params *params); // Conflates the following typical GPU API concepts: @@ -316,6 +326,7 @@ struct ra_renderpass_run_params { // even if they do not change. struct ra_renderpass_input_val *values; int num_values; + void *push_constants; // must be set if params.push_constants_size > 0 // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only @@ -369,10 +380,10 @@ struct ra_fns { void (*buf_destroy)(struct ra *ra, struct ra_buf *buf); - // Update the contents of a buffer, starting at a given offset and up to a - // given size, with the contents of *data. This is an extremely common - // operation. Calling this while the buffer is considered "in use" is an - // error. (See: buf_poll) + // Update the contents of a buffer, starting at a given offset (*must* be a + // multiple of 4) and up to a given size, with the contents of *data. This + // is an extremely common operation. Calling this while the buffer is + // considered "in use" is an error. (See: buf_poll) void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, const void *data, size_t size); @@ -386,6 +397,15 @@ struct ra_fns { // but must be implemented if RA_CAP_BUF_RO is supported. struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp); + // Returns the layout requirements of a push constant element. Optional, + // but must be implemented if ra.max_pushc_size > 0. + struct ra_layout (*push_constant_layout)(struct ra_renderpass_input *inp); + + // Returns an abstract namespace index for a given renderpass input type. + // This will always be a value >= 0 and < RA_VARTYPE_COUNT. This is used to + // figure out which inputs may share the same value of `binding`. + int (*desc_namespace)(enum ra_vartype type); + // Clear the dst with the given color (rgba) and within the given scissor. // dst must have dst->params.render_dst==true. Content outside of the // scissor is preserved. @@ -436,9 +456,6 @@ struct ra_fns { // delayed by a few frames. When no value is available, this returns 0. uint64_t (*timer_stop)(struct ra *ra, ra_timer *timer); - // Hint that possibly queued up commands should be sent to the GPU. Optional. - void (*flush)(struct ra *ra); - // Associates a marker with any past error messages, for debugging // purposes. Optional. void (*debug_marker)(struct ra *ra, const char *msg); @@ -483,6 +500,8 @@ struct ra_imgfmt_desc { uint8_t components[4][4]; }; +const char *ra_fmt_glsl_format(const struct ra_format *fmt); + bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out); void ra_dump_tex_formats(struct ra *ra, int msgl); diff --git a/video/out/opengl/shader_cache.c b/video/out/gpu/shader_cache.c index 90a7576..6d0f370 100644 --- a/video/out/opengl/shader_cache.c +++ b/video/out/gpu/shader_cache.c @@ -14,7 +14,6 @@ #include "options/path.h" #include "stream/stream.h" #include "shader_cache.h" -#include "formats.h" #include "utils.h" // Force cache flush if more than this number of shaders is created. @@ -30,6 +29,7 @@ union uniform_val { enum sc_uniform_type { SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM) SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO) + SC_UNIFORM_TYPE_PUSHC = 2, // push constant (ra.max_pushc_size) }; struct sc_uniform { @@ -38,7 +38,7 @@ struct sc_uniform { const char *glsl_type; union uniform_val v; char *buffer_format; - // for SC_UNIFORM_TYPE_UBO: + // for SC_UNIFORM_TYPE_UBO/PUSHC: struct ra_layout layout; size_t offset; // byte offset within the buffer }; @@ -57,6 +57,7 @@ struct sc_entry { struct timer_pool *timer; struct ra_buf *ubo; int ubo_index; // for ra_renderpass_input_val.index + void *pushc; }; struct gl_shader_cache { @@ -75,6 +76,7 @@ struct gl_shader_cache { // Next binding point (texture unit, image unit, buffer binding, etc.) // In OpenGL these are separate for each input type int next_binding[RA_VARTYPE_COUNT]; + bool next_uniform_dynamic; struct ra_renderpass_params params; @@ -88,6 +90,7 @@ struct gl_shader_cache { int ubo_binding; size_t ubo_size; + size_t pushc_size; struct ra_renderpass_input_val *values; int num_values; @@ -105,8 +108,6 @@ struct gl_shader_cache { struct mpv_global *global; // can be NULL }; -static void gl_sc_reset(struct gl_shader_cache *sc); - struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, struct mp_log *log) { @@ -121,8 +122,8 @@ struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, } // Reset the previous pass. This must be called after gl_sc_generate and before -// starting a new shader. -static void gl_sc_reset(struct gl_shader_cache *sc) +// starting a new shader. It may also be called on errors. +void gl_sc_reset(struct gl_shader_cache *sc) { sc->prelude_text.len = 0; sc->header_text.len = 0; @@ -132,8 +133,10 @@ static void gl_sc_reset(struct gl_shader_cache *sc) sc->num_uniforms = 0; sc->ubo_binding = 0; sc->ubo_size = 0; + sc->pushc_size = 0; for (int i = 0; i < RA_VARTYPE_COUNT; i++) sc->next_binding[i] = 0; + sc->next_uniform_dynamic = false; sc->current_shader = NULL; sc->params = (struct ra_renderpass_params){0}; sc->needs_reset = false; @@ -141,7 +144,7 @@ static void gl_sc_reset(struct gl_shader_cache *sc) static void sc_flush_cache(struct gl_shader_cache *sc) { - MP_VERBOSE(sc, "flushing shader cache\n"); + MP_DBG(sc, "flushing shader cache\n"); for (int n = 0; n < sc->num_entries; n++) { struct sc_entry *e = sc->entries[n]; @@ -251,32 +254,59 @@ static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) { - if (sc->ra->caps & RA_CAP_SHARED_BINDING) { - return sc->next_binding[type]++; - } else { - return sc->next_binding[0]++; - } + return sc->next_binding[sc->ra->fns->desc_namespace(type)]++; } -// Updates the UBO metadata for the given sc_uniform. Assumes sc_uniform->input -// is already set. Also updates sc_uniform->type. -static void update_ubo_params(struct gl_shader_cache *sc, struct sc_uniform *u) +void gl_sc_uniform_dynamic(struct gl_shader_cache *sc) { - if (!(sc->ra->caps & RA_CAP_BUF_RO)) - return; + sc->next_uniform_dynamic = true; +} + +// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input +// and glsl_type/buffer_format are already set. +static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u) +{ + bool dynamic = sc->next_uniform_dynamic; + sc->next_uniform_dynamic = false; + + // Try not using push constants for "large" values like matrices, since + // this is likely to both exceed the VGPR budget as well as the pushc size + // budget + bool try_pushc = u->input.dim_m == 1 || dynamic; + + // Attempt using push constants first + if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) { + struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input); + size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align); + // Push constants have limited size, so make sure we don't exceed this + size_t new_size = offset + layout.size; + if (new_size <= sc->ra->max_pushc_size) { + u->type = SC_UNIFORM_TYPE_PUSHC; + u->layout = layout; + u->offset = offset; + sc->pushc_size = new_size; + return; + } + } - // Using UBOs with explicit layout(offset) like we do requires GLSL version - // 440 or higher. In theory the UBO code can also use older versions, but - // just try and avoid potential headaches. This also ensures they're only - // used on drivers that are probably modern enough to actually support them - // correctly. - if (sc->ra->glsl_version < 440) + // Attempt using uniform buffer next. The GLSL version 440 check is due + // to explicit offsets on UBO entries. In theory we could leave away + // the offsets and support UBOs for older GL as well, but this is a nice + // safety net for driver bugs (and also rules out potentially buggy drivers) + // Also avoid UBOs for highly dynamic stuff since that requires synchronizing + // the UBO writes every frame + bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic; + if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) { + u->type = SC_UNIFORM_TYPE_UBO; + u->layout = sc->ra->fns->uniform_layout(&u->input); + u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); + sc->ubo_size = u->offset + u->layout.size; return; + } - u->type = SC_UNIFORM_TYPE_UBO; - u->layout = sc->ra->fns->uniform_layout(&u->input); - u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); - sc->ubo_size = u->offset + u->layout.size; + // If all else fails, use global uniforms + assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); + u->type = SC_UNIFORM_TYPE_GLOBAL; } void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, @@ -337,7 +367,7 @@ void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f) struct sc_uniform *u = find_uniform(sc, name); u->input.type = RA_VARTYPE_FLOAT; u->glsl_type = "float"; - update_ubo_params(sc, u); + update_uniform_params(sc, u); u->v.f[0] = f; } @@ -346,7 +376,7 @@ void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i) struct sc_uniform *u = find_uniform(sc, name); u->input.type = RA_VARTYPE_INT; u->glsl_type = "int"; - update_ubo_params(sc, u); + update_uniform_params(sc, u); u->v.i[0] = i; } @@ -356,18 +386,18 @@ void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]) u->input.type = RA_VARTYPE_FLOAT; u->input.dim_v = 2; u->glsl_type = "vec2"; - update_ubo_params(sc, u); + update_uniform_params(sc, u); u->v.f[0] = f[0]; u->v.f[1] = f[1]; } -void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, GLfloat f[3]) +void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]) { struct sc_uniform *u = find_uniform(sc, name); u->input.type = RA_VARTYPE_FLOAT; u->input.dim_v = 3; u->glsl_type = "vec3"; - update_ubo_params(sc, u); + update_uniform_params(sc, u); u->v.f[0] = f[0]; u->v.f[1] = f[1]; u->v.f[2] = f[2]; @@ -379,14 +409,14 @@ static void transpose2x2(float r[2 * 2]) } void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, - bool transpose, GLfloat *v) + bool transpose, float *v) { struct sc_uniform *u = find_uniform(sc, name); u->input.type = RA_VARTYPE_FLOAT; u->input.dim_v = 2; u->input.dim_m = 2; u->glsl_type = "mat2"; - update_ubo_params(sc, u); + update_uniform_params(sc, u); for (int n = 0; n < 4; n++) u->v.f[n] = v[n]; if (transpose) @@ -401,34 +431,20 @@ static void transpose3x3(float r[3 * 3]) } void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, - bool transpose, GLfloat *v) + bool transpose, float *v) { struct sc_uniform *u = find_uniform(sc, name); u->input.type = RA_VARTYPE_FLOAT; u->input.dim_v = 3; u->input.dim_m = 3; u->glsl_type = "mat3"; - update_ubo_params(sc, u); + update_uniform_params(sc, u); for (int n = 0; n < 9; n++) u->v.f[n] = v[n]; if (transpose) transpose3x3(&u->v.f[0]); } -// Tell the shader generator (and later gl_sc_draw_data()) about the vertex -// data layout and attribute names. The entries array is terminated with a {0} -// entry. The array memory must remain valid indefinitely (for now). -void gl_sc_set_vertex_format(struct gl_shader_cache *sc, - const struct ra_renderpass_input *entries, - int vertex_stride) -{ - sc->params.vertex_attribs = (struct ra_renderpass_input *)entries; - sc->params.num_vertex_attribs = 0; - while (entries[sc->params.num_vertex_attribs].name) - sc->params.num_vertex_attribs++; - sc->params.vertex_stride = vertex_stride; -} - void gl_sc_blend(struct gl_shader_cache *sc, enum ra_blend blend_src_rgb, enum ra_blend blend_dst_rgb, @@ -468,6 +484,20 @@ static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u) } } +static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u) +{ + uintptr_t src = (uintptr_t) &u->v; + uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset; + struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); + struct ra_layout dst_layout = u->layout; + + for (int i = 0; i < u->input.dim_m; i++) { + memcpy((void *)dst, (void *)src, src_layout.stride); + src += src_layout.stride; + dst += dst_layout.stride; + } +} + static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, struct sc_uniform *u, int n) { @@ -479,6 +509,13 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, un->v = u->v; un->set = true; + static const char *desc[] = { + [SC_UNIFORM_TYPE_UBO] = "UBO", + [SC_UNIFORM_TYPE_PUSHC] = "PC", + [SC_UNIFORM_TYPE_GLOBAL] = "global", + }; + MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name); + switch (u->type) { case SC_UNIFORM_TYPE_GLOBAL: { struct ra_renderpass_input_val value = { @@ -492,6 +529,10 @@ static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, assert(e->ubo); update_ubo(sc->ra, e->ubo, u); break; + case SC_UNIFORM_TYPE_PUSHC: + assert(e->pushc); + update_pushc(sc->ra, e->pushc, u); + break; default: abort(); } } @@ -509,25 +550,6 @@ static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) void *tmp = talloc_new(NULL); struct ra_renderpass_params params = sc->params; - MP_VERBOSE(sc, "new shader program:\n"); - if (sc->header_text.len) { - MP_VERBOSE(sc, "header:\n"); - mp_log_source(sc->log, MSGL_V, sc->header_text.start); - MP_VERBOSE(sc, "body:\n"); - } - if (sc->text.len) - mp_log_source(sc->log, MSGL_V, sc->text.start); - - // The vertex shader uses mangled names for the vertex attributes, so that - // the fragment shader can use the "real" names. But the shader is expecting - // the vertex attribute names (at least with older GLSL targets for GL). - params.vertex_attribs = talloc_memdup(tmp, params.vertex_attribs, - params.num_vertex_attribs * sizeof(params.vertex_attribs[0])); - for (int n = 0; n < params.num_vertex_attribs; n++) { - struct ra_renderpass_input *attrib = ¶ms.vertex_attribs[n]; - attrib->name = talloc_asprintf(tmp, "vertex_%s", attrib->name); - } - const char *cache_header = "mpv shader cache v1\n"; char *cache_filename = NULL; char *cache_dir = NULL; @@ -552,7 +574,7 @@ static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) cache_filename = mp_path_join(tmp, cache_dir, hashstr); if (stat(cache_filename, &(struct stat){0}) == 0) { - MP_VERBOSE(sc, "Trying to load shader from disk...\n"); + MP_DBG(sc, "Trying to load shader from disk...\n"); struct bstr cachedata = stream_read_file(cache_filename, tmp, sc->global, 1000000000); if (bstr_eatstart0(&cachedata, cache_header)) @@ -574,9 +596,10 @@ static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input); } - entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); - if (!entry->pass) - goto error; + if (sc->pushc_size) { + params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4); + entry->pushc = talloc_zero_size(entry, params.push_constants_size); + } if (sc->ubo_size) { struct ra_buf_params ubo_params = { @@ -592,12 +615,16 @@ static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) } } + entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); + if (!entry->pass) + goto error; + if (entry->pass && cache_filename) { bstr nc = entry->pass->params.cached_program; if (nc.len && !bstr_equals(params.cached_program, nc)) { mp_mkdirp(cache_dir); - MP_VERBOSE(sc, "Writing shader cache file: %s\n", cache_filename); + MP_DBG(sc, "Writing shader cache file: %s\n", cache_filename); FILE *out = fopen(cache_filename, "wb"); if (out) { fwrite(cache_header, strlen(cache_header), 1, out); @@ -626,8 +653,22 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) struct sc_uniform *u = &sc->uniforms[n]; if (u->type != SC_UNIFORM_TYPE_UBO) continue; - ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, - u->glsl_type, u->input.name); + ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, + u->input.name); + } + ADD(dst, "};\n"); + } + + // Ditto for push constants + if (sc->pushc_size > 0) { + ADD(dst, "layout(std430, push_constant) uniform PushC {\n"); + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type != SC_UNIFORM_TYPE_PUSHC) + continue; + // push constants don't support explicit offsets + ADD(dst, "/*offset=%zu*/ %s %s;\n", u->offset, u->glsl_type, + u->input.name); } ADD(dst, "};\n"); } @@ -642,7 +683,6 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); // fall through case RA_VARTYPE_TEX: - case RA_VARTYPE_IMG_W: // Vulkan requires explicitly assigning the bindings in the shader // source. For OpenGL it's optional, but requires higher GL version // so we don't do it (and instead have ra_gl update the bindings @@ -659,6 +699,22 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) ADD(dst, "layout(std430, binding=%d) buffer %s { %s };\n", u->input.binding, u->input.name, u->buffer_format); break; + case RA_VARTYPE_IMG_W: { + // For better compatibility, we have to explicitly label the + // type of data we will be reading/writing to this image. + const char *fmt = u->v.tex->params.format->glsl_format; + + if (sc->ra->glsl_vulkan) { + if (fmt) { + ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt); + } else { + ADD(dst, "layout(binding=%d) ", u->input.binding); + } + } else if (fmt) { + ADD(dst, "layout(%s) ", fmt); + } + ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name); + } } } } @@ -674,7 +730,9 @@ static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) // and fragment operations needed for the next program have to be re-added.) static void gl_sc_generate(struct gl_shader_cache *sc, enum ra_renderpass_type type, - const struct ra_format *target_format) + const struct ra_format *target_format, + const struct ra_renderpass_input *vao, + int vao_len, size_t vertex_stride) { int glsl_version = sc->ra->glsl_version; int glsl_es = sc->ra->glsl_es ? glsl_version : 0; @@ -686,9 +744,6 @@ static void gl_sc_generate(struct gl_shader_cache *sc, assert(!sc->needs_reset); sc->needs_reset = true; - // gl_sc_set_vertex_format() must always be called - assert(sc->params.vertex_attribs); - // If using a UBO, pick a binding (needed for shader generation) if (sc->ubo_size) sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO); @@ -745,8 +800,8 @@ static void gl_sc_generate(struct gl_shader_cache *sc, bstr *vert_body = &sc->tmp[2]; ADD(vert_body, "void main() {\n"); bstr *frag_vaos = &sc->tmp[3]; - for (int n = 0; n < sc->params.num_vertex_attribs; n++) { - const struct ra_renderpass_input *e = &sc->params.vertex_attribs[n]; + for (int n = 0; n < vao_len; n++) { + const struct ra_renderpass_input *e = &vao[n]; const char *glsl_type = vao_glsl_type(e); char loc[32] = {0}; if (sc->ra->glsl_vulkan) @@ -857,6 +912,19 @@ static void gl_sc_generate(struct gl_shader_cache *sc, .total = bstrdup(entry, *hash_total), .timer = timer_pool_create(sc->ra), }; + + // The vertex shader uses mangled names for the vertex attributes, so + // that the fragment shader can use the "real" names. But the shader is + // expecting the vertex attribute names (at least with older GLSL + // targets for GL). + sc->params.vertex_stride = vertex_stride; + for (int n = 0; n < vao_len; n++) { + struct ra_renderpass_input attrib = vao[n]; + attrib.name = talloc_asprintf(entry, "vertex_%s", attrib.name); + MP_TARRAY_APPEND(sc, sc->params.vertex_attribs, + sc->params.num_vertex_attribs, attrib); + } + for (int n = 0; n < sc->num_uniforms; n++) { struct sc_cached_uniform u = {0}; if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) { @@ -872,8 +940,11 @@ static void gl_sc_generate(struct gl_shader_cache *sc, sc->error_state = true; MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry); } - if (sc->error_state) + + if (!entry->pass) { + sc->current_shader = NULL; return; + } assert(sc->num_uniforms == entry->num_cached_uniforms); @@ -895,11 +966,14 @@ static void gl_sc_generate(struct gl_shader_cache *sc, struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, struct ra_tex *target, - void *ptr, size_t num) + const struct ra_renderpass_input *vao, + int vao_len, size_t vertex_stride, + void *vertices, size_t num_vertices) { struct timer_pool *timer = NULL; - gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format); + gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format, + vao, vao_len, vertex_stride); if (!sc->current_shader) goto error; @@ -911,9 +985,10 @@ struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, .pass = sc->current_shader->pass, .values = sc->values, .num_values = sc->num_values, + .push_constants = sc->current_shader->pushc, .target = target, - .vertex_data = ptr, - .vertex_count = num, + .vertex_data = vertices, + .vertex_count = num_vertices, .viewport = full_rc, .scissors = full_rc, }; @@ -932,7 +1007,7 @@ struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, { struct timer_pool *timer = NULL; - gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL); + gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL, NULL, 0, 0); if (!sc->current_shader) goto error; @@ -942,6 +1017,7 @@ struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, .pass = sc->current_shader->pass, .values = sc->values, .num_values = sc->num_values, + .push_constants = sc->current_shader->pushc, .compute_groups = {w, h, d}, }; diff --git a/video/out/opengl/shader_cache.h b/video/out/gpu/shader_cache.h index 82a0780..2fe7dcf 100644 --- a/video/out/opengl/shader_cache.h +++ b/video/out/gpu/shader_cache.h @@ -25,6 +25,10 @@ void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text); void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) PRINTF_ATTRIBUTE(2, 3); + +// A hint that the next data-type (i.e. non-binding) uniform is expected to +// change frequently. This refers to the _f, _i, _vecN etc. uniform types. +void gl_sc_uniform_dynamic(struct gl_shader_cache *sc); void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, struct ra_tex *tex); void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, @@ -39,9 +43,6 @@ void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, bool transpose, float *v); void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, bool transpose, float *v); -void gl_sc_set_vertex_format(struct gl_shader_cache *sc, - const struct ra_renderpass_input *vertex_attribs, - int vertex_stride); void gl_sc_blend(struct gl_shader_cache *sc, enum ra_blend blend_src_rgb, enum ra_blend blend_dst_rgb, @@ -50,7 +51,12 @@ void gl_sc_blend(struct gl_shader_cache *sc, void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, struct ra_tex *target, + const struct ra_renderpass_input *vao, + int vao_len, size_t vertex_stride, void *ptr, size_t num); struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, int w, int h, int d); +// The application can call this on errors, to reset the current shader. This +// is normally done implicitly by gl_sc_dispatch_* +void gl_sc_reset(struct gl_shader_cache *sc); void gl_sc_set_cache_dir(struct gl_shader_cache *sc, const char *dir); diff --git a/video/out/gpu/spirv.c b/video/out/gpu/spirv.c new file mode 100644 index 0000000..e20fbe7 --- /dev/null +++ b/video/out/gpu/spirv.c @@ -0,0 +1,78 @@ +#include "common/msg.h" +#include "options/m_config.h" + +#include "spirv.h" +#include "config.h" + +extern const struct spirv_compiler_fns spirv_shaderc; +extern const struct spirv_compiler_fns spirv_nvidia_builtin; + +// in probe-order +enum { + SPIRV_AUTO = 0, + SPIRV_SHADERC, // generally preferred, but not packaged everywhere + SPIRV_NVIDIA, // can be useful for testing, only available on nvidia +}; + +static const struct spirv_compiler_fns *compilers[] = { +#if HAVE_SHADERC + [SPIRV_SHADERC] = &spirv_shaderc, +#endif +#if HAVE_VULKAN + [SPIRV_NVIDIA] = &spirv_nvidia_builtin, +#endif +}; + +static const struct m_opt_choice_alternatives compiler_choices[] = { + {"auto", SPIRV_AUTO}, +#if HAVE_SHADERC + {"shaderc", SPIRV_SHADERC}, +#endif +#if HAVE_VULKAN + {"nvidia", SPIRV_NVIDIA}, +#endif + {0} +}; + +struct spirv_opts { + int compiler; +}; + +#define OPT_BASE_STRUCT struct spirv_opts +const struct m_sub_options spirv_conf = { + .opts = (const struct m_option[]) { + OPT_CHOICE_C("spirv-compiler", compiler, 0, compiler_choices), + {0} + }, + .size = sizeof(struct spirv_opts), +}; + +bool spirv_compiler_init(struct ra_ctx *ctx) +{ + void *tmp = talloc_new(NULL); + struct spirv_opts *opts = mp_get_config_group(tmp, ctx->global, &spirv_conf); + int compiler = opts->compiler; + talloc_free(tmp); + + for (int i = SPIRV_AUTO+1; i < MP_ARRAY_SIZE(compilers); i++) { + if (compiler != SPIRV_AUTO && i != compiler) + continue; + if (!compilers[i]) + continue; + + ctx->spirv = talloc_zero(ctx, struct spirv_compiler); + ctx->spirv->log = ctx->log, + ctx->spirv->fns = compilers[i]; + + const char *name = m_opt_choice_str(compiler_choices, i); + strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name)); + MP_VERBOSE(ctx, "Initializing SPIR-V compiler '%s'\n", name); + if (ctx->spirv->fns->init(ctx)) + return true; + talloc_free(ctx->spirv); + ctx->spirv = NULL; + } + + MP_ERR(ctx, "Failed initializing SPIR-V compiler!\n"); + return false; +} diff --git a/video/out/gpu/spirv.h b/video/out/gpu/spirv.h new file mode 100644 index 0000000..e3dbd4f --- /dev/null +++ b/video/out/gpu/spirv.h @@ -0,0 +1,41 @@ +#pragma once + +#include "common/msg.h" +#include "common/common.h" +#include "context.h" + +enum glsl_shader { + GLSL_SHADER_VERTEX, + GLSL_SHADER_FRAGMENT, + GLSL_SHADER_COMPUTE, +}; + +#define SPIRV_NAME_MAX_LEN 32 + +struct spirv_compiler { + char name[SPIRV_NAME_MAX_LEN]; + const struct spirv_compiler_fns *fns; + struct mp_log *log; + void *priv; + + const char *required_ext; // or NULL + int glsl_version; // GLSL version supported + int compiler_version; // for cache invalidation, may be left as 0 + int ra_caps; // RA_CAP_* provided by this implementation, if any +}; + +struct spirv_compiler_fns { + // Compile GLSL to SPIR-V, under GL_KHR_vulkan_glsl semantics. + bool (*compile_glsl)(struct spirv_compiler *spirv, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *out_spirv); + + // Called by spirv_compiler_init / ra_ctx_destroy. These don't need to + // allocate/free ctx->spirv, that is done by the caller + bool (*init)(struct ra_ctx *ctx); + void (*uninit)(struct ra_ctx *ctx); // optional +}; + +// Initializes ctx->spirv to a valid SPIR-V compiler, or returns false on +// failure. Cleanup will be handled by ra_ctx_destroy. +bool spirv_compiler_init(struct ra_ctx *ctx); diff --git a/video/out/gpu/spirv_shaderc.c b/video/out/gpu/spirv_shaderc.c new file mode 100644 index 0000000..ee70205 --- /dev/null +++ b/video/out/gpu/spirv_shaderc.c @@ -0,0 +1,125 @@ +#include "common/msg.h" + +#include "context.h" +#include "spirv.h" + +#include <shaderc/shaderc.h> + +struct priv { + shaderc_compiler_t compiler; + shaderc_compile_options_t opts; +}; + +static void shaderc_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->spirv->priv; + if (!p) + return; + + shaderc_compile_options_release(p->opts); + shaderc_compiler_release(p->compiler); +} + +static bool shaderc_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->spirv->priv = talloc_zero(ctx->spirv, struct priv); + + p->compiler = shaderc_compiler_initialize(); + if (!p->compiler) + goto error; + p->opts = shaderc_compile_options_initialize(); + if (!p->opts) + goto error; + + shaderc_compile_options_set_optimization_level(p->opts, + shaderc_optimization_level_size); + if (ctx->opts.debug) + shaderc_compile_options_set_generate_debug_info(p->opts); + + int ver, rev; + shaderc_get_spv_version(&ver, &rev); + ctx->spirv->compiler_version = ver * 100 + rev; // forwards compatibility + ctx->spirv->glsl_version = 450; // impossible to query? + return true; + +error: + shaderc_uninit(ctx); + return false; +} + +static shaderc_compilation_result_t compile(struct priv *p, + enum glsl_shader type, + const char *glsl, bool debug) +{ + static const shaderc_shader_kind kinds[] = { + [GLSL_SHADER_VERTEX] = shaderc_glsl_vertex_shader, + [GLSL_SHADER_FRAGMENT] = shaderc_glsl_fragment_shader, + [GLSL_SHADER_COMPUTE] = shaderc_glsl_compute_shader, + }; + + if (debug) { + return shaderc_compile_into_spv_assembly(p->compiler, glsl, strlen(glsl), + kinds[type], "input", "main", p->opts); + } else { + return shaderc_compile_into_spv(p->compiler, glsl, strlen(glsl), + kinds[type], "input", "main", p->opts); + } +} + +static bool shaderc_compile(struct spirv_compiler *spirv, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *out_spirv) +{ + struct priv *p = spirv->priv; + + shaderc_compilation_result_t res = compile(p, type, glsl, false); + int errs = shaderc_result_get_num_errors(res), + warn = shaderc_result_get_num_warnings(res), + msgl = errs ? MSGL_ERR : warn ? MSGL_WARN : MSGL_V; + + const char *msg = shaderc_result_get_error_message(res); + if (msg[0]) + MP_MSG(spirv, msgl, "shaderc output:\n%s", msg); + + int s = shaderc_result_get_compilation_status(res); + bool success = s == shaderc_compilation_status_success; + + static const char *results[] = { + [shaderc_compilation_status_success] = "success", + [shaderc_compilation_status_invalid_stage] = "invalid stage", + [shaderc_compilation_status_compilation_error] = "error", + [shaderc_compilation_status_internal_error] = "internal error", + [shaderc_compilation_status_null_result_object] = "no result", + [shaderc_compilation_status_invalid_assembly] = "invalid assembly", + }; + + const char *status = s < MP_ARRAY_SIZE(results) ? results[s] : "unknown"; + MP_MSG(spirv, msgl, "shaderc compile status '%s' (%d errors, %d warnings)\n", + status, errs, warn); + + if (success) { + void *bytes = (void *) shaderc_result_get_bytes(res); + out_spirv->len = shaderc_result_get_length(res); + out_spirv->start = talloc_memdup(tactx, bytes, out_spirv->len); + } + + // Also print SPIR-V disassembly for debugging purposes. Unfortunately + // there doesn't seem to be a way to get this except compiling the shader + // a second time.. + if (mp_msg_test(spirv->log, MSGL_TRACE)) { + shaderc_compilation_result_t dis = compile(p, type, glsl, true); + MP_TRACE(spirv, "Generated SPIR-V:\n%.*s", + (int)shaderc_result_get_length(dis), + shaderc_result_get_bytes(dis)); + shaderc_result_release(dis); + } + + shaderc_result_release(res); + return success; +} + +const struct spirv_compiler_fns spirv_shaderc = { + .compile_glsl = shaderc_compile, + .init = shaderc_init, + .uninit = shaderc_uninit, +}; diff --git a/video/out/opengl/user_shaders.c b/video/out/gpu/user_shaders.c index 58a1ac9..446941b 100644 --- a/video/out/opengl/user_shaders.c +++ b/video/out/gpu/user_shaders.c @@ -17,9 +17,9 @@ #include <assert.h> +#include "common/msg.h" #include "misc/ctype.h" #include "user_shaders.h" -#include "formats.h" static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE]) { diff --git a/video/out/opengl/user_shaders.h b/video/out/gpu/user_shaders.h index 94a070c..8d8cc6b 100644 --- a/video/out/opengl/user_shaders.h +++ b/video/out/gpu/user_shaders.h @@ -21,10 +21,8 @@ #include "utils.h" #include "ra.h" -#define SHADER_MAX_PASSES 32 #define SHADER_MAX_HOOKS 16 -#define SHADER_MAX_BINDS 6 -#define SHADER_MAX_SAVED 64 +#define SHADER_MAX_BINDS 16 #define MAX_SZEXP_SIZE 32 enum szexp_op { diff --git a/video/out/gpu/utils.c b/video/out/gpu/utils.c new file mode 100644 index 0000000..078a31c --- /dev/null +++ b/video/out/gpu/utils.c @@ -0,0 +1,332 @@ +#include "common/msg.h" +#include "video/out/vo.h" +#include "utils.h" + +// Standard parallel 2D projection, except y1 < y0 means that the coordinate +// system is flipped, not the projection. +void gl_transform_ortho(struct gl_transform *t, float x0, float x1, + float y0, float y1) +{ + if (y1 < y0) { + float tmp = y0; + y0 = tmp - y1; + y1 = tmp; + } + + t->m[0][0] = 2.0f / (x1 - x0); + t->m[0][1] = 0.0f; + t->m[1][0] = 0.0f; + t->m[1][1] = 2.0f / (y1 - y0); + t->t[0] = -(x1 + x0) / (x1 - x0); + t->t[1] = -(y1 + y0) / (y1 - y0); +} + +// Apply the effects of one transformation to another, transforming it in the +// process. In other words: post-composes t onto x +void gl_transform_trans(struct gl_transform t, struct gl_transform *x) +{ + struct gl_transform xt = *x; + x->m[0][0] = t.m[0][0] * xt.m[0][0] + t.m[0][1] * xt.m[1][0]; + x->m[1][0] = t.m[1][0] * xt.m[0][0] + t.m[1][1] * xt.m[1][0]; + x->m[0][1] = t.m[0][0] * xt.m[0][1] + t.m[0][1] * xt.m[1][1]; + x->m[1][1] = t.m[1][0] * xt.m[0][1] + t.m[1][1] * xt.m[1][1]; + gl_transform_vec(t, &x->t[0], &x->t[1]); +} + +void gl_transform_ortho_fbo(struct gl_transform *t, struct ra_fbo fbo) +{ + int y_dir = fbo.flip ? -1 : 1; + gl_transform_ortho(t, 0, fbo.tex->params.w, 0, fbo.tex->params.h * y_dir); +} + +void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool) +{ + for (int i = 0; i < pool->num_buffers; i++) + ra_buf_free(ra, &pool->buffers[i]); + + talloc_free(pool->buffers); + *pool = (struct ra_buf_pool){0}; +} + +static bool ra_buf_params_compatible(const struct ra_buf_params *new, + const struct ra_buf_params *old) +{ + return new->type == old->type && + new->size <= old->size && + new->host_mapped == old->host_mapped && + new->host_mutable == old->host_mutable; +} + +static bool ra_buf_pool_grow(struct ra *ra, struct ra_buf_pool *pool) +{ + struct ra_buf *buf = ra_buf_create(ra, &pool->current_params); + if (!buf) + return false; + + MP_TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf); + MP_VERBOSE(ra, "Resized buffer pool of type %u to size %d\n", + pool->current_params.type, pool->num_buffers); + return true; +} + +struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, + const struct ra_buf_params *params) +{ + assert(!params->initial_data); + + if (!ra_buf_params_compatible(params, &pool->current_params)) { + ra_buf_pool_uninit(ra, pool); + pool->current_params = *params; + } + + // Make sure we have at least one buffer available + if (!pool->buffers && !ra_buf_pool_grow(ra, pool)) + return NULL; + + // Make sure the next buffer is available for use + if (!ra->fns->buf_poll(ra, pool->buffers[pool->index]) && + !ra_buf_pool_grow(ra, pool)) + { + return NULL; + } + + struct ra_buf *buf = pool->buffers[pool->index++]; + pool->index %= pool->num_buffers; + + return buf; +} + +bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, + const struct ra_tex_upload_params *params) +{ + if (params->buf) + return ra->fns->tex_upload(ra, params); + + struct ra_tex *tex = params->tex; + size_t row_size = tex->params.dimensions == 2 ? params->stride : + tex->params.w * tex->params.format->pixel_size; + + int height = tex->params.h; + if (tex->params.dimensions == 2 && params->rc) + height = mp_rect_h(*params->rc); + + struct ra_buf_params bufparams = { + .type = RA_BUF_TYPE_TEX_UPLOAD, + .size = row_size * height * tex->params.d, + .host_mutable = true, + }; + + struct ra_buf *buf = ra_buf_pool_get(ra, pbo, &bufparams); + if (!buf) + return false; + + ra->fns->buf_update(ra, buf, 0, params->src, bufparams.size); + + struct ra_tex_upload_params newparams = *params; + newparams.buf = buf; + newparams.src = NULL; + + return ra->fns->tex_upload(ra, &newparams); +} + +struct ra_layout std140_layout(struct ra_renderpass_input *inp) +{ + size_t el_size = ra_vartype_size(inp->type); + + // std140 packing rules: + // 1. The alignment of generic values is their size in bytes + // 2. The alignment of vectors is the vector length * the base count, with + // the exception of vec3 which is always aligned like vec4 + // 3. The alignment of arrays is that of the element size rounded up to + // the nearest multiple of vec4 + // 4. Matrices are treated like arrays of vectors + // 5. Arrays/matrices are laid out with a stride equal to the alignment + size_t size = el_size * inp->dim_v; + if (inp->dim_v == 3) + size += el_size; + if (inp->dim_m > 1) + size = MP_ALIGN_UP(size, sizeof(float[4])); + + return (struct ra_layout) { + .align = size, + .stride = size, + .size = size * inp->dim_m, + }; +} + +struct ra_layout std430_layout(struct ra_renderpass_input *inp) +{ + size_t el_size = ra_vartype_size(inp->type); + + // std430 packing rules: like std140, except arrays/matrices are always + // "tightly" packed, even arrays/matrices of vec3s + size_t size = el_size * inp->dim_v; + if (inp->dim_v == 3 && inp->dim_m == 1) + size += el_size; + + return (struct ra_layout) { + .align = size, + .stride = size, + .size = size * inp->dim_m, + }; +} + +// Resize a texture to a new desired size and format if necessary +bool ra_tex_resize(struct ra *ra, struct mp_log *log, struct ra_tex **tex, + int w, int h, const struct ra_format *fmt) +{ + if (*tex) { + struct ra_tex_params cur_params = (*tex)->params; + if (cur_params.w == w && cur_params.h == h && cur_params.format == fmt) + return true; + } + + mp_dbg(log, "Resizing texture: %dx%d\n", w, h); + + if (!fmt || !fmt->renderable || !fmt->linear_filter) { + mp_err(log, "Format %s not supported.\n", fmt ? fmt->name : "(unset)"); + return false; + } + + ra_tex_free(ra, tex); + struct ra_tex_params params = { + .dimensions = 2, + .w = w, + .h = h, + .d = 1, + .format = fmt, + .src_linear = true, + .render_src = true, + .render_dst = true, + .storage_dst = true, + .blit_src = true, + }; + + *tex = ra_tex_create(ra, ¶ms); + if (!*tex) + mp_err(log, "Error: texture could not be created.\n"); + + return *tex; +} + +struct timer_pool { + struct ra *ra; + ra_timer *timer; + bool running; // detect invalid usage + + uint64_t samples[VO_PERF_SAMPLE_COUNT]; + int sample_idx; + int sample_count; + + uint64_t sum; + uint64_t peak; +}; + +struct timer_pool *timer_pool_create(struct ra *ra) +{ + if (!ra->fns->timer_create) + return NULL; + + ra_timer *timer = ra->fns->timer_create(ra); + if (!timer) + return NULL; + + struct timer_pool *pool = talloc(NULL, struct timer_pool); + if (!pool) { + ra->fns->timer_destroy(ra, timer); + return NULL; + } + + *pool = (struct timer_pool){ .ra = ra, .timer = timer }; + return pool; +} + +void timer_pool_destroy(struct timer_pool *pool) +{ + if (!pool) + return; + + pool->ra->fns->timer_destroy(pool->ra, pool->timer); + talloc_free(pool); +} + +void timer_pool_start(struct timer_pool *pool) +{ + if (!pool) + return; + + assert(!pool->running); + pool->ra->fns->timer_start(pool->ra, pool->timer); + pool->running = true; +} + +void timer_pool_stop(struct timer_pool *pool) +{ + if (!pool) + return; + + assert(pool->running); + uint64_t res = pool->ra->fns->timer_stop(pool->ra, pool->timer); + pool->running = false; + + if (res) { + // Input res into the buffer and grab the previous value + uint64_t old = pool->samples[pool->sample_idx]; + pool->sample_count = MPMIN(pool->sample_count + 1, VO_PERF_SAMPLE_COUNT); + pool->samples[pool->sample_idx++] = res; + pool->sample_idx %= VO_PERF_SAMPLE_COUNT; + pool->sum = pool->sum + res - old; + + // Update peak if necessary + if (res >= pool->peak) { + pool->peak = res; + } else if (pool->peak == old) { + // It's possible that the last peak was the value we just removed, + // if so we need to scan for the new peak + uint64_t peak = res; + for (int i = 0; i < VO_PERF_SAMPLE_COUNT; i++) + peak = MPMAX(peak, pool->samples[i]); + pool->peak = peak; + } + } +} + +struct mp_pass_perf timer_pool_measure(struct timer_pool *pool) +{ + if (!pool) + return (struct mp_pass_perf){0}; + + struct mp_pass_perf res = { + .peak = pool->peak, + .count = pool->sample_count, + }; + + int idx = pool->sample_idx - pool->sample_count + VO_PERF_SAMPLE_COUNT; + for (int i = 0; i < res.count; i++) { + idx %= VO_PERF_SAMPLE_COUNT; + res.samples[i] = pool->samples[idx++]; + } + + if (res.count > 0) { + res.last = res.samples[res.count - 1]; + res.avg = pool->sum / res.count; + } + + return res; +} + +void mp_log_source(struct mp_log *log, int lev, const char *src) +{ + int line = 1; + if (!src) + return; + while (*src) { + const char *end = strchr(src, '\n'); + const char *next = end + 1; + if (!end) + next = end = src + strlen(src); + mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src); + line++; + src = next; + } +} diff --git a/video/out/gpu/utils.h b/video/out/gpu/utils.h new file mode 100644 index 0000000..ac0cbf2 --- /dev/null +++ b/video/out/gpu/utils.h @@ -0,0 +1,105 @@ +#pragma once + +#include <stdbool.h> +#include <math.h> + +#include "ra.h" +#include "context.h" + +// A 3x2 matrix, with the translation part separate. +struct gl_transform { + // row-major, e.g. in mathematical notation: + // | m[0][0] m[0][1] | + // | m[1][0] m[1][1] | + float m[2][2]; + float t[2]; +}; + +static const struct gl_transform identity_trans = { + .m = {{1.0, 0.0}, {0.0, 1.0}}, + .t = {0.0, 0.0}, +}; + +void gl_transform_ortho(struct gl_transform *t, float x0, float x1, + float y0, float y1); + +// This treats m as an affine transformation, in other words m[2][n] gets +// added to the output. +static inline void gl_transform_vec(struct gl_transform t, float *x, float *y) +{ + float vx = *x, vy = *y; + *x = vx * t.m[0][0] + vy * t.m[0][1] + t.t[0]; + *y = vx * t.m[1][0] + vy * t.m[1][1] + t.t[1]; +} + +struct mp_rect_f { + float x0, y0, x1, y1; +}; + +// Semantic equality (fuzzy comparison) +static inline bool mp_rect_f_seq(struct mp_rect_f a, struct mp_rect_f b) +{ + return fabs(a.x0 - b.x0) < 1e-6 && fabs(a.x1 - b.x1) < 1e-6 && + fabs(a.y0 - b.y0) < 1e-6 && fabs(a.y1 - b.y1) < 1e-6; +} + +static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r) +{ + gl_transform_vec(t, &r->x0, &r->y0); + gl_transform_vec(t, &r->x1, &r->y1); +} + +static inline bool gl_transform_eq(struct gl_transform a, struct gl_transform b) +{ + for (int x = 0; x < 2; x++) { + for (int y = 0; y < 2; y++) { + if (a.m[x][y] != b.m[x][y]) + return false; + } + } + + return a.t[0] == b.t[0] && a.t[1] == b.t[1]; +} + +void gl_transform_trans(struct gl_transform t, struct gl_transform *x); + +void gl_transform_ortho_fbo(struct gl_transform *t, struct ra_fbo fbo); + +// A pool of buffers, which can grow as needed +struct ra_buf_pool { + struct ra_buf_params current_params; + struct ra_buf **buffers; + int num_buffers; + int index; +}; + +void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool); + +// Note: params->initial_data is *not* supported +struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, + const struct ra_buf_params *params); + +// Helper that wraps ra_tex_upload using texture upload buffers to ensure that +// params->buf is always set. This is intended for RA-internal usage. +bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, + const struct ra_tex_upload_params *params); + +// Layout rules for GLSL's packing modes +struct ra_layout std140_layout(struct ra_renderpass_input *inp); +struct ra_layout std430_layout(struct ra_renderpass_input *inp); + +bool ra_tex_resize(struct ra *ra, struct mp_log *log, struct ra_tex **tex, + int w, int h, const struct ra_format *fmt); + +// A wrapper around ra_timer that does result pooling, averaging etc. +struct timer_pool; + +struct timer_pool *timer_pool_create(struct ra *ra); +void timer_pool_destroy(struct timer_pool *pool); +void timer_pool_start(struct timer_pool *pool); +void timer_pool_stop(struct timer_pool *pool); +struct mp_pass_perf timer_pool_measure(struct timer_pool *pool); + +// print a multi line string with line numbers (e.g. for shader sources) +// log, lev: module and log level, as in mp_msg() +void mp_log_source(struct mp_log *log, int lev, const char *src); diff --git a/video/out/opengl/video.c b/video/out/gpu/video.c index 3362381..f80d63a 100644 --- a/video/out/opengl/video.c +++ b/video/out/gpu/video.c @@ -60,28 +60,12 @@ static const char *const fixed_tscale_filters[] = { // must be sorted, and terminated with 0 int filter_sizes[] = {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0}; -int tscale_sizes[] = {2, 4, 6, 0}; // limited by TEXUNIT_VIDEO_NUM +int tscale_sizes[] = {2, 4, 6, 8, 0}; struct vertex_pt { float x, y; }; -struct vertex { - struct vertex_pt position; - struct vertex_pt texcoord[TEXUNIT_VIDEO_NUM]; -}; - -static const struct ra_renderpass_input vertex_vao[] = { - {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, - {"texcoord0", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[0])}, - {"texcoord1", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[1])}, - {"texcoord2", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[2])}, - {"texcoord3", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[3])}, - {"texcoord4", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[4])}, - {"texcoord5", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord[5])}, - {0} -}; - struct texplane { struct ra_tex *tex; int w, h; @@ -115,7 +99,7 @@ static const char *plane_names[] = { // A self-contained description of a source image which can be bound to a // texture unit and sampled from. Contains metadata about how it's to be used -struct img_tex { +struct image { enum plane_type type; // must be set to something non-zero int components; // number of relevant coordinates float multiplier; // multiplier to be used when sampling @@ -124,10 +108,10 @@ struct img_tex { struct gl_transform transform; // rendering transformation }; -// A named img_tex, for user scripting purposes -struct saved_tex { +// A named image, for user scripting purposes +struct saved_img { const char *name; - struct img_tex tex; + struct image img; }; // A texture hook. This is some operation that transforms a named texture as @@ -135,21 +119,21 @@ struct saved_tex { struct tex_hook { const char *save_tex; const char *hook_tex[SHADER_MAX_HOOKS]; - const char *bind_tex[TEXUNIT_VIDEO_NUM]; + const char *bind_tex[SHADER_MAX_BINDS]; int components; // how many components are relevant (0 = same as input) void *priv; // this gets talloc_freed when the tex_hook is removed - void (*hook)(struct gl_video *p, struct img_tex tex, // generates GLSL + void (*hook)(struct gl_video *p, struct image img, // generates GLSL struct gl_transform *trans, void *priv); - bool (*cond)(struct gl_video *p, struct img_tex tex, void *priv); + bool (*cond)(struct gl_video *p, struct image img, void *priv); }; -struct fbosurface { - struct fbotex fbotex; +struct surface { + struct ra_tex *tex; uint64_t id; double pts; }; -#define FBOSURFACES_MAX 10 +#define SURFACES_MAX 10 struct cached_file { char *path; @@ -161,8 +145,6 @@ struct pass_info { struct mp_pass_perf perf; }; -#define PASS_INFO_MAX (SHADER_MAX_PASSES + 32) - struct dr_buffer { struct ra_buf *buf; // The mpi reference will keep the data from being recycled (or from other @@ -215,29 +197,40 @@ struct gl_video { bool dumb_mode; bool forced_dumb_mode; + // Cached vertex array, to avoid re-allocation per frame. For simplicity, + // our vertex format is simply a list of `vertex_pt`s, since this greatly + // simplifies offset calculation at the cost of (unneeded) flexibility. + struct vertex_pt *tmp_vertex; + struct ra_renderpass_input *vao; + int vao_len; + const struct ra_format *fbo_format; - struct fbotex merge_fbo[4]; - struct fbotex scale_fbo[4]; - struct fbotex integer_fbo[4]; - struct fbotex indirect_fbo; - struct fbotex blend_subs_fbo; - struct fbotex screen_fbo; - struct fbotex output_fbo; - struct fbosurface surfaces[FBOSURFACES_MAX]; - struct fbotex vdpau_deinterleave_fbo[2]; + struct ra_tex *merge_tex[4]; + struct ra_tex *scale_tex[4]; + struct ra_tex *integer_tex[4]; + struct ra_tex *indirect_tex; + struct ra_tex *blend_subs_tex; + struct ra_tex *screen_tex; + struct ra_tex *output_tex; + struct ra_tex *vdpau_deinterleave_tex[2]; + struct ra_tex **hook_textures; + int num_hook_textures; + int idx_hook_textures; + struct ra_buf *hdr_peak_ssbo; + struct surface surfaces[SURFACES_MAX]; // user pass descriptions and textures - struct tex_hook tex_hooks[SHADER_MAX_PASSES]; - int tex_hook_num; - struct gl_user_shader_tex user_textures[SHADER_MAX_PASSES]; - int user_tex_num; + struct tex_hook *tex_hooks; + int num_tex_hooks; + struct gl_user_shader_tex *user_textures; + int num_user_textures; int surface_idx; int surface_now; int frames_drawn; bool is_interpolated; - bool output_fbo_valid; + bool output_tex_valid; // state for configured scalers struct scaler scaler[SCALER_COUNT]; @@ -249,9 +242,15 @@ struct gl_video { struct mp_osd_res osd_rect; // OSD size/margins // temporary during rendering - struct img_tex pass_tex[TEXUNIT_VIDEO_NUM]; struct compute_info pass_compute; // compute shader metadata for this pass - int pass_tex_num; + struct image *pass_imgs; // bound images for this pass + int num_pass_imgs; + struct saved_img *saved_imgs; // saved (named) images for this frame + int num_saved_imgs; + + // effective current texture metadata - this will essentially affect the + // next render pass target, as well as implicitly tracking what needs to + // be done with the image int texture_w, texture_h; struct gl_transform texture_offset; // texture transform without rotation int components; @@ -259,20 +258,14 @@ struct gl_video { float user_gamma; // pass info / metrics - struct pass_info pass_fresh[PASS_INFO_MAX]; - struct pass_info pass_redraw[PASS_INFO_MAX]; + struct pass_info pass_fresh[VO_PASS_PERF_MAX]; + struct pass_info pass_redraw[VO_PASS_PERF_MAX]; struct pass_info *pass; int pass_idx; struct timer_pool *upload_timer; struct timer_pool *blit_timer; struct timer_pool *osd_timer; - // intermediate textures - struct saved_tex saved_tex[SHADER_MAX_SAVED]; - int saved_tex_num; - struct fbotex hook_fbos[SHADER_MAX_SAVED]; - int hook_fbo_num; - int frames_uploaded; int frames_rendered; AVLFG lfg; @@ -284,8 +277,12 @@ struct gl_video { struct cached_file *files; int num_files; - struct ra_hwdec *hwdec; + bool hwdec_interop_loading_done; + struct ra_hwdec **hwdecs; + int num_hwdecs; + struct ra_hwdec_mapper *hwdec_mapper; + struct ra_hwdec *hwdec_overlay; bool hwdec_active; bool dsi_warned; @@ -318,8 +315,9 @@ static const struct gl_video_opts gl_video_opts_def = { .gamma = 1.0f, .tone_mapping = TONE_MAPPING_MOBIUS, .tone_mapping_param = NAN, - .tone_mapping_desat = 2.0, + .tone_mapping_desat = 1.0, .early_flush = -1, + .hwdec_interop = "auto", }; static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, @@ -347,9 +345,9 @@ static int validate_window_opt(struct mp_log *log, const m_option_t *opt, const struct m_sub_options gl_video_conf = { .opts = (const m_option_t[]) { - OPT_CHOICE("opengl-dumb-mode", dumb_mode, 0, + OPT_CHOICE("gpu-dumb-mode", dumb_mode, 0, ({"auto", 0}, {"yes", 1}, {"no", -1})), - OPT_FLOATRANGE("opengl-gamma", gamma, 0, 0.1, 2.0), + OPT_FLOATRANGE("gamma-factor", gamma, 0, 0.1, 2.0), OPT_FLAG("gamma-auto", gamma_auto, 0), OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), @@ -376,7 +374,7 @@ const struct m_sub_options gl_video_conf = { OPT_FLAG("sigmoid-upscaling", sigmoid_upscaling, 0), OPT_FLOATRANGE("sigmoid-center", sigmoid_center, 0, 0.0, 1.0), OPT_FLOATRANGE("sigmoid-slope", sigmoid_slope, 0, 1.0, 20.0), - OPT_STRING("opengl-fbo-format", fbo_format, 0), + OPT_STRING("fbo-format", fbo_format, 0), OPT_CHOICE_OR_INT("dither-depth", dither_depth, 0, -1, 16, ({"no", -1}, {"auto", 0})), OPT_CHOICE("dither", dither_algo, 0, @@ -399,18 +397,28 @@ const struct m_sub_options gl_video_conf = { ({"no", BLEND_SUBS_NO}, {"yes", BLEND_SUBS_YES}, {"video", BLEND_SUBS_VIDEO})), - OPT_PATHLIST("opengl-shaders", user_shaders, 0), - OPT_CLI_ALIAS("opengl-shader", "opengl-shaders-append"), + OPT_PATHLIST("glsl-shaders", user_shaders, 0), + OPT_CLI_ALIAS("glsl-shader", "glsl-shaders-append"), OPT_FLAG("deband", deband, 0), OPT_SUBSTRUCT("deband", deband_opts, deband_conf, 0), OPT_FLOAT("sharpen", unsharp, 0), - OPT_INTRANGE("opengl-tex-pad-x", tex_pad_x, 0, 0, 4096), - OPT_INTRANGE("opengl-tex-pad-y", tex_pad_y, 0, 0, 4096), + OPT_INTRANGE("gpu-tex-pad-x", tex_pad_x, 0, 0, 4096), + OPT_INTRANGE("gpu-tex-pad-y", tex_pad_y, 0, 0, 4096), OPT_SUBSTRUCT("", icc_opts, mp_icc_conf, 0), - OPT_CHOICE("opengl-early-flush", early_flush, 0, - ({"no", 0}, {"yes", 1}, {"auto", -1})), - OPT_STRING("opengl-shader-cache-dir", shader_cache_dir, 0), + OPT_STRING("gpu-shader-cache-dir", shader_cache_dir, 0), + OPT_STRING_VALIDATE("gpu-hwdec-interop", hwdec_interop, 0, + ra_hwdec_validate_opt), + OPT_REPLACED("opengl-hwdec-interop", "gpu-hwdec-interop"), + OPT_REPLACED("hwdec-preload", "opengl-hwdec-interop"), OPT_REPLACED("hdr-tone-mapping", "tone-mapping"), + OPT_REPLACED("opengl-shaders", "glsl-shaders"), + OPT_REPLACED("opengl-shader", "glsl-shader"), + OPT_REPLACED("opengl-shader-cache-dir", "gpu-shader-cache-dir"), + OPT_REPLACED("opengl-tex-pad-x", "gpu-tex-pad-x"), + OPT_REPLACED("opengl-tex-pad-y", "gpu-tex-pad-y"), + OPT_REPLACED("opengl-fbo-format", "fbo-format"), + OPT_REPLACED("opengl-dumb-mode", "gpu-dumb-mode"), + OPT_REPLACED("opengl-gamma", "gamma-factor"), {0} }, .size = sizeof(struct gl_video_opts), @@ -425,6 +433,7 @@ static const char *handle_scaler_opt(const char *name, bool tscale); static void reinit_from_options(struct gl_video *p); static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]); static void gl_video_setup_hooks(struct gl_video *p); +static void gl_video_update_options(struct gl_video *p); #define GLSL(x) gl_sc_add(p->sc, #x "\n"); #define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) @@ -460,32 +469,32 @@ static void debug_check_gl(struct gl_video *p, const char *msg) static void gl_video_reset_surfaces(struct gl_video *p) { - for (int i = 0; i < FBOSURFACES_MAX; i++) { + for (int i = 0; i < SURFACES_MAX; i++) { p->surfaces[i].id = 0; p->surfaces[i].pts = MP_NOPTS_VALUE; } p->surface_idx = 0; p->surface_now = 0; p->frames_drawn = 0; - p->output_fbo_valid = false; + p->output_tex_valid = false; } static void gl_video_reset_hooks(struct gl_video *p) { - for (int i = 0; i < p->tex_hook_num; i++) + for (int i = 0; i < p->num_tex_hooks; i++) talloc_free(p->tex_hooks[i].priv); - for (int i = 0; i < p->user_tex_num; i++) + for (int i = 0; i < p->num_user_textures; i++) ra_tex_free(p->ra, &p->user_textures[i].tex); - p->tex_hook_num = 0; - p->user_tex_num = 0; + p->num_tex_hooks = 0; + p->num_user_textures = 0; } -static inline int fbosurface_wrap(int id) +static inline int surface_wrap(int id) { - id = id % FBOSURFACES_MAX; - return id < 0 ? id + FBOSURFACES_MAX : id; + id = id % SURFACES_MAX; + return id < 0 ? id + SURFACES_MAX : id; } static void reinit_osd(struct gl_video *p) @@ -504,24 +513,24 @@ static void uninit_rendering(struct gl_video *p) ra_tex_free(p->ra, &p->dither_texture); for (int n = 0; n < 4; n++) { - fbotex_uninit(&p->merge_fbo[n]); - fbotex_uninit(&p->scale_fbo[n]); - fbotex_uninit(&p->integer_fbo[n]); + ra_tex_free(p->ra, &p->merge_tex[n]); + ra_tex_free(p->ra, &p->scale_tex[n]); + ra_tex_free(p->ra, &p->integer_tex[n]); } - fbotex_uninit(&p->indirect_fbo); - fbotex_uninit(&p->blend_subs_fbo); - fbotex_uninit(&p->screen_fbo); - fbotex_uninit(&p->output_fbo); + ra_tex_free(p->ra, &p->indirect_tex); + ra_tex_free(p->ra, &p->blend_subs_tex); + ra_tex_free(p->ra, &p->screen_tex); + ra_tex_free(p->ra, &p->output_tex); - for (int n = 0; n < FBOSURFACES_MAX; n++) - fbotex_uninit(&p->surfaces[n].fbotex); + for (int n = 0; n < SURFACES_MAX; n++) + ra_tex_free(p->ra, &p->surfaces[n].tex); - for (int n = 0; n < SHADER_MAX_SAVED; n++) - fbotex_uninit(&p->hook_fbos[n]); + for (int n = 0; n < p->num_hook_textures; n++) + ra_tex_free(p->ra, &p->hook_textures[n]); for (int n = 0; n < 2; n++) - fbotex_uninit(&p->vdpau_deinterleave_fbo[n]); + ra_tex_free(p->ra, &p->vdpau_deinterleave_tex[n]); gl_video_reset_surfaces(p); gl_video_reset_hooks(p); @@ -607,29 +616,28 @@ static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, return true; } -// Fill an img_tex struct from an FBO + some metadata -static struct img_tex img_tex_fbo(struct fbotex *fbo, enum plane_type type, - int components) +// Fill an image struct from a ra_tex + some metadata +static struct image image_wrap(struct ra_tex *tex, enum plane_type type, + int components) { assert(type != PLANE_NONE); - return (struct img_tex){ + return (struct image){ .type = type, - .tex = fbo->tex, + .tex = tex, .multiplier = 1.0, - .w = fbo->lw, - .h = fbo->lh, + .w = tex ? tex->params.w : 1, + .h = tex ? tex->params.h : 1, .transform = identity_trans, .components = components, }; } -// Bind an img_tex to a free texture unit and return its ID. At most -// TEXUNIT_VIDEO_NUM texture units can be bound at once -static int pass_bind(struct gl_video *p, struct img_tex tex) +// Bind an image to a free texture unit and return its ID. +static int pass_bind(struct gl_video *p, struct image img) { - assert(p->pass_tex_num < TEXUNIT_VIDEO_NUM); - p->pass_tex[p->pass_tex_num] = tex; - return p->pass_tex_num++; + int idx = p->num_pass_imgs; + MP_TARRAY_APPEND(p, p->pass_imgs, p->num_pass_imgs, img); + return idx; } // Rotation by 90° and flipping. @@ -678,11 +686,11 @@ static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b) return a; } -// Places a video_image's image textures + associated metadata into tex[]. The +// Places a video_image's image textures + associated metadata into img[]. The // number of textures is equal to p->plane_count. Any necessary plane offsets // are stored in off. (e.g. chroma position) -static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, - struct img_tex tex[4], struct gl_transform off[4]) +static void pass_get_images(struct gl_video *p, struct video_image *vimg, + struct image img[4], struct gl_transform off[4]) { assert(vimg->mpi); @@ -715,7 +723,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, msb_valid_bits, p->ra_format.component_bits); - memset(tex, 0, 4 * sizeof(tex[0])); + memset(img, 0, 4 * sizeof(img[0])); for (int n = 0; n < p->plane_count; n++) { struct texplane *t = &vimg->planes[n]; @@ -737,7 +745,7 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, type = merge_plane_types(type, ctype); } - tex[n] = (struct img_tex){ + img[n] = (struct image){ .type = type, .tex = t->tex, .multiplier = tex_mul, @@ -746,12 +754,12 @@ static void pass_get_img_tex(struct gl_video *p, struct video_image *vimg, }; for (int i = 0; i < 4; i++) - tex[n].components += !!p->ra_format.components[n][i]; + img[n].components += !!p->ra_format.components[n][i]; get_transform(t->w, t->h, p->image_params.rotate, t->flipped, - &tex[n].transform); + &img[n].transform); if (p->image_params.rotate % 180 == 90) - MPSWAP(int, tex[n].w, tex[n].h); + MPSWAP(int, img[n].w, img[n].h); off[n] = identity_trans; @@ -804,18 +812,27 @@ static void init_video(struct gl_video *p) { p->use_integer_conversion = false; - if (p->hwdec && ra_hwdec_test_format(p->hwdec, p->image_params.imgfmt)) { - if (p->hwdec->driver->overlay_frame) { + struct ra_hwdec *hwdec = NULL; + for (int n = 0; n < p->num_hwdecs; n++) { + if (ra_hwdec_test_format(p->hwdecs[n], p->image_params.imgfmt)) { + hwdec = p->hwdecs[n]; + break; + } + } + + if (hwdec) { + if (hwdec->driver->overlay_frame) { MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed " "on the video!\n"); + p->hwdec_overlay = hwdec; } else { - p->hwdec_mapper = ra_hwdec_mapper_create(p->hwdec, &p->image_params); + p->hwdec_mapper = ra_hwdec_mapper_create(hwdec, &p->image_params); if (!p->hwdec_mapper) MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); } if (p->hwdec_mapper) p->image_params = p->hwdec_mapper->dst_params; - const char **exts = p->hwdec->glsl_extensions; + const char **exts = hwdec->glsl_extensions; for (int n = 0; exts && exts[n]; n++) gl_sc_enable_extension(p->sc, (char *)exts[n]); p->hwdec_active = true; @@ -895,20 +912,6 @@ static void init_video(struct gl_video *p) gl_video_setup_hooks(p); } -// Release any texture mappings associated with the current frame. -static void unmap_current_image(struct gl_video *p) -{ - struct video_image *vimg = &p->image; - - if (vimg->hwdec_mapped) { - assert(p->hwdec_active && p->hwdec_mapper); - ra_hwdec_mapper_unmap(p->hwdec_mapper); - memset(vimg->planes, 0, sizeof(vimg->planes)); - vimg->hwdec_mapped = false; - vimg->id = 0; // needs to be mapped again - } -} - static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) { for (int i = 0; i < p->num_dr_buffers; i++) { @@ -949,10 +952,18 @@ again:; static void unref_current_image(struct gl_video *p) { - unmap_current_image(p); - p->image.id = 0; + struct video_image *vimg = &p->image; + + if (vimg->hwdec_mapped) { + assert(p->hwdec_active && p->hwdec_mapper); + ra_hwdec_mapper_unmap(p->hwdec_mapper); + memset(vimg->planes, 0, sizeof(vimg->planes)); + vimg->hwdec_mapped = false; + } + + vimg->id = 0; - mp_image_unrefp(&p->image.mpi); + mp_image_unrefp(&vimg->mpi); // While we're at it, also garbage collect pending fences in here to // get it out of the way. @@ -964,8 +975,8 @@ static void unref_current_image(struct gl_video *p) // lead to flickering artifacts. static void unmap_overlay(struct gl_video *p) { - if (p->hwdec_active && p->hwdec->driver->overlay_frame) - p->hwdec->driver->overlay_frame(p->hwdec, NULL, NULL, NULL, true); + if (p->hwdec_overlay) + p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, NULL, NULL, NULL, true); } static void uninit_video(struct gl_video *p) @@ -988,12 +999,13 @@ static void uninit_video(struct gl_video *p) p->real_image_params = (struct mp_image_params){0}; p->image_params = p->real_image_params; p->hwdec_active = false; + p->hwdec_overlay = NULL; ra_hwdec_mapper_free(&p->hwdec_mapper); } static void pass_record(struct gl_video *p, struct mp_pass_perf perf) { - if (!p->pass || p->pass_idx == PASS_INFO_MAX) + if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX) return; struct pass_info *pass = &p->pass[p->pass_idx]; @@ -1008,7 +1020,7 @@ static void pass_record(struct gl_video *p, struct mp_pass_perf perf) PRINTF_ATTRIBUTE(2, 3) static void pass_describe(struct gl_video *p, const char *textf, ...) { - if (!p->pass || p->pass_idx == PASS_INFO_MAX) + if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX) return; struct pass_info *pass = &p->pass[p->pass_idx]; @@ -1027,7 +1039,7 @@ static void pass_info_reset(struct gl_video *p, bool is_redraw) p->pass = is_redraw ? p->pass_redraw : p->pass_fresh; p->pass_idx = 0; - for (int i = 0; i < PASS_INFO_MAX; i++) { + for (int i = 0; i < VO_PASS_PERF_MAX; i++) { p->pass[i].desc.len = 0; p->pass[i].perf = (struct mp_pass_perf){0}; } @@ -1038,14 +1050,14 @@ static void pass_report_performance(struct gl_video *p) if (!p->pass) return; - for (int i = 0; i < PASS_INFO_MAX; i++) { + for (int i = 0; i < VO_PASS_PERF_MAX; i++) { struct pass_info *pass = &p->pass[i]; if (pass->desc.len) { - MP_DBG(p, "pass '%.*s': last %dus avg %dus peak %dus\n", - BSTR_P(pass->desc), - (int)pass->perf.last/1000, - (int)pass->perf.avg/1000, - (int)pass->perf.peak/1000); + MP_TRACE(p, "pass '%.*s': last %dus avg %dus peak %dus\n", + BSTR_P(pass->desc), + (int)pass->perf.last/1000, + (int)pass->perf.avg/1000, + (int)pass->perf.peak/1000); } } } @@ -1054,8 +1066,8 @@ static void pass_prepare_src_tex(struct gl_video *p) { struct gl_shader_cache *sc = p->sc; - for (int n = 0; n < p->pass_tex_num; n++) { - struct img_tex *s = &p->pass_tex[n]; + for (int n = 0; n < p->num_pass_imgs; n++) { + struct image *s = &p->pass_imgs[n]; if (!s->tex) continue; @@ -1079,6 +1091,11 @@ static void pass_prepare_src_tex(struct gl_video *p) } } +static void cleanup_binds(struct gl_video *p) +{ + p->num_pass_imgs = 0; +} + // Sets the appropriate compute shader metadata for an implicit compute pass // bw/bh: block size static void pass_is_compute(struct gl_video *p, int bw, int bh) @@ -1101,7 +1118,6 @@ static void dispatch_compute(struct gl_video *p, int w, int h, info.threads_h > 0 ? info.threads_h : info.block_h); pass_prepare_src_tex(p); - gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex)); // Since we don't actually have vertices, we pretend for convenience // reasons that we do and calculate the right texture coordinates based on @@ -1109,25 +1125,21 @@ static void dispatch_compute(struct gl_video *p, int w, int h, gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h }); PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n"); - for (int n = 0; n < TEXUNIT_VIDEO_NUM; n++) { - struct img_tex *s = &p->pass_tex[n]; + for (int n = 0; n < p->num_pass_imgs; n++) { + struct image *s = &p->pass_imgs[n]; if (!s->tex) continue; // We need to rescale the coordinates to the true texture size - char tex_scale[32]; - snprintf(tex_scale, sizeof(tex_scale), "tex_scale%d", n); + char *tex_scale = mp_tprintf(32, "tex_scale%d", n); gl_sc_uniform_vec2(p->sc, tex_scale, (float[2]){ (float)s->w / s->tex->params.w, (float)s->h / s->tex->params.h, }); - PRELUDE("#define texcoord%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n); - PRELUDE("#define texcoord%d_rot(id) (texture_rot%d * texcoord%d_raw(id) + " + PRELUDE("#define texmap%d_raw(id) (tex_scale%d * outcoord(id))\n", n, n); + PRELUDE("#define texmap%d(id) (texture_rot%d * texmap%d_raw(id) + " "pixel_size%d * texture_off%d)\n", n, n, n, n, n); - // Clamp the texture coordinates to prevent sampling out-of-bounds in - // threads that exceed the requested width/height - PRELUDE("#define texmap%d(id) min(texcoord%d_rot(id), vec2(1.0))\n", n, n); PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n); } @@ -1137,19 +1149,34 @@ static void dispatch_compute(struct gl_video *p, int w, int h, num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1; pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1)); - - memset(&p->pass_tex, 0, sizeof(p->pass_tex)); - p->pass_tex_num = 0; + cleanup_binds(p); } static struct mp_pass_perf render_pass_quad(struct gl_video *p, - struct fbodst target, + struct ra_fbo fbo, const struct mp_rect *dst) { - struct vertex va[6] = {0}; + // The first element is reserved for `vec2 position` + int num_vertex_attribs = 1 + p->num_pass_imgs; + size_t vertex_stride = num_vertex_attribs * sizeof(struct vertex_pt); + + // Expand the VAO if necessary + while (p->vao_len < num_vertex_attribs) { + MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) { + .name = talloc_asprintf(p, "texcoord%d", p->vao_len - 1), + .type = RA_VARTYPE_FLOAT, + .dim_v = 2, + .dim_m = 1, + .offset = p->vao_len * sizeof(struct vertex_pt), + }); + } + + int num_vertices = 6; // quad as triangle list + int num_attribs_total = num_vertices * num_vertex_attribs; + MP_TARRAY_GROW(p, p->tmp_vertex, num_attribs_total); struct gl_transform t; - gl_transform_ortho_fbodst(&t, target); + gl_transform_ortho_fbo(&t, fbo); float x[2] = {dst->x0, dst->x1}; float y[2] = {dst->y0, dst->y1}; @@ -1157,11 +1184,12 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p, gl_transform_vec(t, &x[1], &y[1]); for (int n = 0; n < 4; n++) { - struct vertex *v = &va[n]; - v->position.x = x[n / 2]; - v->position.y = y[n % 2]; - for (int i = 0; i < p->pass_tex_num; i++) { - struct img_tex *s = &p->pass_tex[i]; + struct vertex_pt *vs = &p->tmp_vertex[num_vertex_attribs * n]; + // vec2 position in idx 0 + vs[0].x = x[n / 2]; + vs[0].y = y[n % 2]; + for (int i = 0; i < p->num_pass_imgs; i++) { + struct image *s = &p->pass_imgs[i]; if (!s->tex) continue; struct gl_transform tr = s->transform; @@ -1169,43 +1197,48 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p, float ty = (n % 2) * s->h; gl_transform_vec(tr, &tx, &ty); bool rect = s->tex->params.non_normalized; - v->texcoord[i].x = tx / (rect ? 1 : s->tex->params.w); - v->texcoord[i].y = ty / (rect ? 1 : s->tex->params.h); + // vec2 texcoordN in idx N+1 + vs[i + 1].x = tx / (rect ? 1 : s->tex->params.w); + vs[i + 1].y = ty / (rect ? 1 : s->tex->params.h); } } - va[4] = va[2]; - va[5] = va[1]; + memmove(&p->tmp_vertex[num_vertex_attribs * 4], + &p->tmp_vertex[num_vertex_attribs * 2], + vertex_stride); + + memmove(&p->tmp_vertex[num_vertex_attribs * 5], + &p->tmp_vertex[num_vertex_attribs * 1], + vertex_stride); - return gl_sc_dispatch_draw(p->sc, target.tex, va, 6); + return gl_sc_dispatch_draw(p->sc, fbo.tex, p->vao, num_vertex_attribs, + vertex_stride, p->tmp_vertex, num_vertices); } -static void finish_pass_direct(struct gl_video *p, struct fbodst target, - const struct mp_rect *dst) +static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo, + const struct mp_rect *dst) { pass_prepare_src_tex(p); - gl_sc_set_vertex_format(p->sc, vertex_vao, sizeof(struct vertex)); - pass_record(p, render_pass_quad(p, target, dst)); + pass_record(p, render_pass_quad(p, fbo, dst)); debug_check_gl(p, "after rendering"); - memset(&p->pass_tex, 0, sizeof(p->pass_tex)); - p->pass_tex_num = 0; + cleanup_binds(p); } // dst_fbo: this will be used for rendering; possibly reallocating the whole // FBO, if the required parameters have changed // w, h: required FBO target dimension, and also defines the target rectangle // used for rasterization -// flags: 0 or combination of FBOTEX_FUZZY_W/FBOTEX_FUZZY_H (setting the fuzzy -// flags allows the FBO to be larger than the w/h parameters) -static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, - int w, int h, int flags) +static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, + int w, int h) { - fbotex_change(dst_fbo, p->ra, p->log, w, h, p->fbo_format, flags); + if (!ra_tex_resize(p->ra, p->log, dst_tex, w, h, p->fbo_format)) { + cleanup_binds(p); + gl_sc_reset(p->sc); + return; + } if (p->pass_compute.active) { - if (!dst_fbo->tex) - return; - gl_sc_uniform_image2D_wo(p->sc, "out_image", dst_fbo->tex); + gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex); if (!p->pass_compute.directly_writes) GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);) @@ -1214,11 +1247,12 @@ static void finish_pass_fbo(struct gl_video *p, struct fbotex *dst_fbo, debug_check_gl(p, "after dispatching compute shader"); } else { - finish_pass_direct(p, dst_fbo->fbo, &(struct mp_rect){0, 0, w, h}); + struct ra_fbo fbo = { .tex = *dst_tex, }; + finish_pass_fbo(p, fbo, &(struct mp_rect){0, 0, w, h}); } } -static const char *get_tex_swizzle(struct img_tex *img) +static const char *get_tex_swizzle(struct image *img) { if (!img->tex) return "rgba"; @@ -1227,7 +1261,7 @@ static const char *get_tex_swizzle(struct img_tex *img) // Copy a texture to the vec4 color, while increasing offset. Also applies // the texture multiplier to the sampled color -static void copy_img_tex(struct gl_video *p, int *offset, struct img_tex img) +static void copy_image(struct gl_video *p, int *offset, struct image img) { int count = img.components; assert(*offset + count <= 4); @@ -1261,14 +1295,14 @@ static void skip_unused(struct gl_video *p, int num_components) static void uninit_scaler(struct gl_video *p, struct scaler *scaler) { - fbotex_uninit(&scaler->sep_fbo); + ra_tex_free(p->ra, &scaler->sep_fbo); ra_tex_free(p->ra, &scaler->lut); scaler->kernel = NULL; scaler->initialized = false; } static void hook_prelude(struct gl_video *p, const char *name, int id, - struct img_tex tex) + struct image img) { GLSLHF("#define %s_raw texture%d\n", name, id); GLSLHF("#define %s_pos texcoord%d\n", name, id); @@ -1276,15 +1310,15 @@ static void hook_prelude(struct gl_video *p, const char *name, int id, GLSLHF("#define %s_rot texture_rot%d\n", name, id); GLSLHF("#define %s_pt pixel_size%d\n", name, id); GLSLHF("#define %s_map texmap%d\n", name, id); - GLSLHF("#define %s_mul %f\n", name, tex.multiplier); + GLSLHF("#define %s_mul %f\n", name, img.multiplier); // Set up the sampling functions GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n", - name, name, name, get_tex_swizzle(&tex)); + name, name, name, get_tex_swizzle(&img)); // Since the extra matrix multiplication impacts performance, // skip it unless the texture was actually rotated - if (gl_transform_eq(tex.transform, identity_trans)) { + if (gl_transform_eq(img.transform, identity_trans)) { GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n", name, name, name, name); } else { @@ -1294,15 +1328,15 @@ static void hook_prelude(struct gl_video *p, const char *name, int id, } } -static bool saved_tex_find(struct gl_video *p, const char *name, - struct img_tex *out) +static bool saved_img_find(struct gl_video *p, const char *name, + struct image *out) { if (!name || !out) return false; - for (int i = 0; i < p->saved_tex_num; i++) { - if (strcmp(p->saved_tex[i].name, name) == 0) { - *out = p->saved_tex[i].tex; + for (int i = 0; i < p->num_saved_imgs; i++) { + if (strcmp(p->saved_imgs[i].name, name) == 0) { + *out = p->saved_imgs[i].img; return true; } } @@ -1310,29 +1344,28 @@ static bool saved_tex_find(struct gl_video *p, const char *name, return false; } -static void saved_tex_store(struct gl_video *p, const char *name, - struct img_tex tex) +static void saved_img_store(struct gl_video *p, const char *name, + struct image img) { assert(name); - for (int i = 0; i < p->saved_tex_num; i++) { - if (strcmp(p->saved_tex[i].name, name) == 0) { - p->saved_tex[i].tex = tex; + for (int i = 0; i < p->num_saved_imgs; i++) { + if (strcmp(p->saved_imgs[i].name, name) == 0) { + p->saved_imgs[i].img = img; return; } } - assert(p->saved_tex_num < SHADER_MAX_SAVED); - p->saved_tex[p->saved_tex_num++] = (struct saved_tex) { + MP_TARRAY_APPEND(p, p->saved_imgs, p->num_saved_imgs, (struct saved_img) { .name = name, - .tex = tex - }; + .img = img + }); } static bool pass_hook_setup_binds(struct gl_video *p, const char *name, - struct img_tex tex, struct tex_hook *hook) + struct image img, struct tex_hook *hook) { - for (int t = 0; t < TEXUNIT_VIDEO_NUM; t++) { + for (int t = 0; t < SHADER_MAX_BINDS; t++) { char *bind_name = (char *)hook->bind_tex[t]; if (!bind_name) @@ -1340,16 +1373,16 @@ static bool pass_hook_setup_binds(struct gl_video *p, const char *name, // This is a special name that means "currently hooked texture" if (strcmp(bind_name, "HOOKED") == 0) { - int id = pass_bind(p, tex); - hook_prelude(p, "HOOKED", id, tex); - hook_prelude(p, name, id, tex); + int id = pass_bind(p, img); + hook_prelude(p, "HOOKED", id, img); + hook_prelude(p, name, id, img); continue; } // BIND can also be used to load user-defined textures, in which // case we will directly load them as a uniform instead of // generating the hook_prelude boilerplate - for (int u = 0; u < p->user_tex_num; u++) { + for (int u = 0; u < p->num_user_textures; u++) { struct gl_user_shader_tex *utex = &p->user_textures[u]; if (bstr_equals0(utex->name, bind_name)) { gl_sc_uniform_texture(p->sc, bind_name, utex->tex); @@ -1357,16 +1390,16 @@ static bool pass_hook_setup_binds(struct gl_video *p, const char *name, } } - struct img_tex bind_tex; - if (!saved_tex_find(p, bind_name, &bind_tex)) { + struct image bind_img; + if (!saved_img_find(p, bind_name, &bind_img)) { // Clean up texture bindings and move on to the next hook - MP_DBG(p, "Skipping hook on %s due to no texture named %s.\n", - name, bind_name); - p->pass_tex_num -= t; + MP_TRACE(p, "Skipping hook on %s due to no texture named %s.\n", + name, bind_name); + p->num_pass_imgs -= t; return false; } - hook_prelude(p, bind_name, pass_bind(p, bind_tex), bind_tex); + hook_prelude(p, bind_name, pass_bind(p, bind_img), bind_img); next_bind: ; } @@ -1374,18 +1407,26 @@ next_bind: ; return true; } -// Process hooks for a plane, saving the result and returning a new img_tex -// If 'trans' is NULL, the shader is forbidden from transforming tex -static struct img_tex pass_hook(struct gl_video *p, const char *name, - struct img_tex tex, struct gl_transform *trans) +static struct ra_tex **next_hook_tex(struct gl_video *p) +{ + if (p->idx_hook_textures == p->num_hook_textures) + MP_TARRAY_APPEND(p, p->hook_textures, p->num_hook_textures, NULL); + + return &p->hook_textures[p->idx_hook_textures++]; +} + +// Process hooks for a plane, saving the result and returning a new image +// If 'trans' is NULL, the shader is forbidden from transforming img +static struct image pass_hook(struct gl_video *p, const char *name, + struct image img, struct gl_transform *trans) { if (!name) - return tex; + return img; - saved_tex_store(p, name, tex); + saved_img_store(p, name, img); - MP_DBG(p, "Running hooks for %s\n", name); - for (int i = 0; i < p->tex_hook_num; i++) { + MP_TRACE(p, "Running hooks for %s\n", name); + for (int i = 0; i < p->num_tex_hooks; i++) { struct tex_hook *hook = &p->tex_hooks[i]; // Figure out if this pass hooks this texture @@ -1398,34 +1439,32 @@ static struct img_tex pass_hook(struct gl_video *p, const char *name, found: // Check the hook's condition - if (hook->cond && !hook->cond(p, tex, hook->priv)) { - MP_DBG(p, "Skipping hook on %s due to condition.\n", name); + if (hook->cond && !hook->cond(p, img, hook->priv)) { + MP_TRACE(p, "Skipping hook on %s due to condition.\n", name); continue; } - if (!pass_hook_setup_binds(p, name, tex, hook)) + if (!pass_hook_setup_binds(p, name, img, hook)) continue; // Run the actual hook. This generates a series of GLSL shader // instructions sufficient for drawing the hook's output struct gl_transform hook_off = identity_trans; - hook->hook(p, tex, &hook_off, hook->priv); + hook->hook(p, img, &hook_off, hook->priv); - int comps = hook->components ? hook->components : tex.components; + int comps = hook->components ? hook->components : img.components; skip_unused(p, comps); // Compute the updated FBO dimensions and store the result - struct mp_rect_f sz = {0, 0, tex.w, tex.h}; + struct mp_rect_f sz = {0, 0, img.w, img.h}; gl_transform_rect(hook_off, &sz); int w = lroundf(fabs(sz.x1 - sz.x0)); int h = lroundf(fabs(sz.y1 - sz.y0)); - assert(p->hook_fbo_num < SHADER_MAX_SAVED); - struct fbotex *fbo = &p->hook_fbos[p->hook_fbo_num++]; - finish_pass_fbo(p, fbo, w, h, 0); - + struct ra_tex **tex = next_hook_tex(p); + finish_pass_tex(p, tex, w, h); const char *store_name = hook->save_tex ? hook->save_tex : name; - struct img_tex saved_tex = img_tex_fbo(fbo, tex.type, comps); + struct image saved_img = image_wrap(*tex, img.type, comps); // If the texture we're saving overwrites the "current" texture, also // update the tex parameter so that the future loop cycles will use the @@ -1434,18 +1473,18 @@ found: if (!trans && !gl_transform_eq(hook_off, identity_trans)) { MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n", name); - return tex; + return img; } - tex = saved_tex; + img = saved_img; if (trans) gl_transform_trans(hook_off, trans); } - saved_tex_store(p, store_name, saved_tex); + saved_img_store(p, store_name, saved_img); } - return tex; + return img; } // This can be used at any time in the middle of rendering to specify an @@ -1459,7 +1498,7 @@ static void pass_opt_hook_point(struct gl_video *p, const char *name, if (!name) return; - for (int i = 0; i < p->tex_hook_num; i++) { + for (int i = 0; i < p->num_tex_hooks; i++) { struct tex_hook *hook = &p->tex_hooks[i]; for (int h = 0; h < SHADER_MAX_HOOKS; h++) { @@ -1467,7 +1506,7 @@ static void pass_opt_hook_point(struct gl_video *p, const char *name, goto found; } - for (int b = 0; b < TEXUNIT_VIDEO_NUM; b++) { + for (int b = 0; b < SHADER_MAX_BINDS; b++) { if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0) goto found; } @@ -1476,14 +1515,12 @@ static void pass_opt_hook_point(struct gl_video *p, const char *name, // Nothing uses this texture, don't bother storing it return; -found: - assert(p->hook_fbo_num < SHADER_MAX_SAVED); - struct fbotex *fbo = &p->hook_fbos[p->hook_fbo_num++]; - finish_pass_fbo(p, fbo, p->texture_w, p->texture_h, 0); - - struct img_tex img = img_tex_fbo(fbo, PLANE_RGB, p->components); +found: ; + struct ra_tex **tex = next_hook_tex(p); + finish_pass_tex(p, tex, p->texture_w, p->texture_h); + struct image img = image_wrap(*tex, PLANE_RGB, p->components); img = pass_hook(p, name, img, tex_trans); - copy_img_tex(p, &(int){0}, img); + copy_image(p, &(int){0}, img); p->texture_w = img.w; p->texture_h = img.h; p->components = img.components; @@ -1493,7 +1530,9 @@ found: static void load_shader(struct gl_video *p, struct bstr body) { gl_sc_hadd_bstr(p->sc, body); + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX); + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded); gl_sc_uniform_vec2(p->sc, "input_size", (float[]){(p->src_rect.x1 - p->src_rect.x0) * @@ -1631,7 +1670,7 @@ static void reinit_scaler(struct gl_video *p, struct scaler *scaler, } // Special helper for sampling from two separated stages -static void pass_sample_separated(struct gl_video *p, struct img_tex src, +static void pass_sample_separated(struct gl_video *p, struct image src, struct scaler *scaler, int w, int h) { // Separate the transformation into x and y components, per pass @@ -1650,10 +1689,10 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src, GLSLF("// first pass\n"); pass_sample_separated_gen(p->sc, scaler, 0, 1); GLSLF("color *= %f;\n", src.multiplier); - finish_pass_fbo(p, &scaler->sep_fbo, src.w, h, FBOTEX_FUZZY_H); + finish_pass_tex(p, &scaler->sep_fbo, src.w, h); // Second pass (scale only in the x dir) - src = img_tex_fbo(&scaler->sep_fbo, src.type, src.components); + src = image_wrap(scaler->sep_fbo, src.type, src.components); src.transform = t_x; pass_describe(p, "%s second pass", scaler->conf.kernel.name); sampler_prelude(p->sc, pass_bind(p, src)); @@ -1663,9 +1702,9 @@ static void pass_sample_separated(struct gl_video *p, struct img_tex src, // Picks either the compute shader version or the regular sampler version // depending on hardware support static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler, - struct img_tex tex, int w, int h) + struct image img, int w, int h) { - uint64_t reqs = RA_CAP_COMPUTE | RA_CAP_NESTED_ARRAY; + uint64_t reqs = RA_CAP_COMPUTE; if ((p->ra->caps & reqs) != reqs) goto fallback; @@ -1673,8 +1712,8 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler int offset = bound - 1; // padding top/left int padding = offset + bound; // total padding - float ratiox = (float)w / tex.w, - ratioy = (float)h / tex.h; + float ratiox = (float)w / img.w, + ratioy = (float)h / img.h; // For performance we want to load at least as many pixels // horizontally as there are threads in a warp (32 for nvidia), as @@ -1688,27 +1727,28 @@ static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler int iw = (int)ceil(bw / ratiox) + padding + 1, ih = (int)ceil(bh / ratioy) + padding + 1; - int shmem_req = iw * ih * tex.components * sizeof(float); + int shmem_req = iw * ih * img.components * sizeof(float); if (shmem_req > p->ra->max_shmem) goto fallback; pass_is_compute(p, bw, bh); - pass_compute_polar(p->sc, scaler, tex.components, bw, bh, iw, ih); + pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih); return; fallback: // Fall back to regular polar shader when compute shaders are unsupported // or the kernel is too big for shmem - pass_sample_polar(p->sc, scaler, tex.components, p->ra->glsl_version); + pass_sample_polar(p->sc, scaler, img.components, + p->ra->caps & RA_CAP_GATHER); } -// Sample from img_tex, with the src rectangle given by it. +// Sample from image, with the src rectangle given by it. // The dst rectangle is implicit by what the caller will do next, but w and h // must still be what is going to be used (to dimension FBOs correctly). // This will write the scaled contents to the vec4 "color". // The scaler unit is initialized by this function; in order to avoid cache // thrashing, the scaler unit should usually use the same parameters. -static void pass_sample(struct gl_video *p, struct img_tex tex, +static void pass_sample(struct gl_video *p, struct image img, struct scaler *scaler, const struct scaler_config *conf, double scale_factor, int w, int h) { @@ -1723,14 +1763,14 @@ static void pass_sample(struct gl_video *p, struct img_tex tex, }; pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index], - scaler->conf.kernel.name, plane_names[tex.type]); + scaler->conf.kernel.name, plane_names[img.type]); bool is_separated = scaler->kernel && !scaler->kernel->polar; // Set up the transformation+prelude and bind the texture, for everything // other than separated scaling (which does this in the subfunction) if (!is_separated) - sampler_prelude(p->sc, pass_bind(p, tex)); + sampler_prelude(p->sc, pass_bind(p, img)); // Dispatch the scaler. They're all wildly different. const char *name = scaler->conf.kernel.name; @@ -1741,9 +1781,9 @@ static void pass_sample(struct gl_video *p, struct img_tex tex, } else if (strcmp(name, "oversample") == 0) { pass_sample_oversample(p->sc, scaler, w, h); } else if (scaler->kernel && scaler->kernel->polar) { - pass_dispatch_sample_polar(p, scaler, tex, w, h); + pass_dispatch_sample_polar(p, scaler, img, w, h); } else if (scaler->kernel) { - pass_sample_separated(p, tex, scaler, w, h); + pass_sample_separated(p, img, scaler, w, h); } else { // Should never happen abort(); @@ -1752,14 +1792,14 @@ static void pass_sample(struct gl_video *p, struct img_tex tex, // Apply any required multipliers. Separated scaling already does this in // its first stage if (!is_separated) - GLSLF("color *= %f;\n", tex.multiplier); + GLSLF("color *= %f;\n", img.multiplier); // Micro-optimization: Avoid scaling unneeded channels - skip_unused(p, tex.components); + skip_unused(p, img.components); } -// Returns true if two img_texs are semantically equivalent (same metadata) -static bool img_tex_equiv(struct img_tex a, struct img_tex b) +// Returns true if two images are semantically equivalent (same metadata) +static bool image_equiv(struct image a, struct image b) { return a.type == b.type && a.components == b.components && @@ -1772,27 +1812,15 @@ static bool img_tex_equiv(struct img_tex a, struct img_tex b) gl_transform_eq(a.transform, b.transform); } -static bool add_hook(struct gl_video *p, struct tex_hook hook) -{ - if (p->tex_hook_num < SHADER_MAX_PASSES) { - p->tex_hooks[p->tex_hook_num++] = hook; - return true; - } else { - MP_ERR(p, "Too many passes! Limit is %d.\n", SHADER_MAX_PASSES); - talloc_free(hook.priv); - return false; - } -} - -static void deband_hook(struct gl_video *p, struct img_tex tex, +static void deband_hook(struct gl_video *p, struct image img, struct gl_transform *trans, void *priv) { - pass_describe(p, "debanding (%s)", plane_names[tex.type]); + pass_describe(p, "debanding (%s)", plane_names[img.type]); pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg, p->image_params.color.gamma); } -static void unsharp_hook(struct gl_video *p, struct img_tex tex, +static void unsharp_hook(struct gl_video *p, struct image img, struct gl_transform *trans, void *priv) { pass_describe(p, "unsharp masking"); @@ -1801,7 +1829,7 @@ static void unsharp_hook(struct gl_video *p, struct img_tex tex, struct szexp_ctx { struct gl_video *p; - struct img_tex tex; + struct image img; }; static bool szexp_lookup(void *priv, struct bstr var, float size[2]) @@ -1825,15 +1853,15 @@ static bool szexp_lookup(void *priv, struct bstr var, float size[2]) // HOOKED is a special case if (bstr_equals0(var, "HOOKED")) { - size[0] = ctx->tex.w; - size[1] = ctx->tex.h; + size[0] = ctx->img.w; + size[1] = ctx->img.h; return true; } - for (int o = 0; o < p->saved_tex_num; o++) { - if (bstr_equals0(var, p->saved_tex[o].name)) { - size[0] = p->saved_tex[o].tex.w; - size[1] = p->saved_tex[o].tex.h; + for (int o = 0; o < p->num_saved_imgs; o++) { + if (bstr_equals0(var, p->saved_imgs[o].name)) { + size[0] = p->saved_imgs[o].img.w; + size[1] = p->saved_imgs[o].img.h; return true; } } @@ -1841,17 +1869,18 @@ static bool szexp_lookup(void *priv, struct bstr var, float size[2]) return false; } -static bool user_hook_cond(struct gl_video *p, struct img_tex tex, void *priv) +static bool user_hook_cond(struct gl_video *p, struct image img, void *priv) { struct gl_user_shader_hook *shader = priv; assert(shader); float res = false; - eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->cond, &res); + struct szexp_ctx ctx = {p, img}; + eval_szexpr(p->log, &ctx, szexp_lookup, shader->cond, &res); return res; } -static void user_hook(struct gl_video *p, struct img_tex tex, +static void user_hook(struct gl_video *p, struct image img, struct gl_transform *trans, void *priv) { struct gl_user_shader_hook *shader = priv; @@ -1859,7 +1888,7 @@ static void user_hook(struct gl_video *p, struct img_tex tex, load_shader(p, shader->pass_body); pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc), - plane_names[tex.type]); + plane_names[img.type]); if (shader->compute.active) { p->pass_compute = shader->compute; @@ -1872,10 +1901,10 @@ static void user_hook(struct gl_video *p, struct img_tex tex, // to do this and display an error message than just crash OpenGL float w = 1.0, h = 1.0; - eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->width, &w); - eval_szexpr(p->log, &(struct szexp_ctx){p, tex}, szexp_lookup, shader->height, &h); + eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->width, &w); + eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->height, &h); - *trans = (struct gl_transform){{{w / tex.w, 0}, {0, h / tex.h}}}; + *trans = (struct gl_transform){{{w / img.w, 0}, {0, h / img.h}}}; gl_transform_trans(shader->offset, trans); } @@ -1898,27 +1927,22 @@ static bool add_user_hook(void *priv, struct gl_user_shader_hook hook) for (int h = 0; h < SHADER_MAX_BINDS; h++) texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]); - return add_hook(p, texhook); + MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook); + return true; } static bool add_user_tex(void *priv, struct gl_user_shader_tex tex) { struct gl_video *p = priv; - if (p->user_tex_num == SHADER_MAX_PASSES) { - MP_ERR(p, "Too many textures! Limit is %d.\n", SHADER_MAX_PASSES); - goto err; - } - tex.tex = ra_tex_create(p->ra, &tex.params); TA_FREEP(&tex.params.initial_data); - p->user_textures[p->user_tex_num++] = tex; - return true; + if (!tex.tex) + return false; -err: - talloc_free(tex.params.initial_data); - return false; + MP_TARRAY_APPEND(p, p->user_textures, p->num_user_textures, tex); + return true; } static void load_user_shaders(struct gl_video *p, char **shaders) @@ -1937,7 +1961,7 @@ static void gl_video_setup_hooks(struct gl_video *p) gl_video_reset_hooks(p); if (p->opts.deband) { - add_hook(p, (struct tex_hook) { + MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) { .hook_tex = {"LUMA", "CHROMA", "RGB", "XYZ"}, .bind_tex = {"HOOKED"}, .hook = deband_hook, @@ -1945,7 +1969,7 @@ static void gl_video_setup_hooks(struct gl_video *p) } if (p->opts.unsharp != 0.0) { - add_hook(p, (struct tex_hook) { + MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) { .hook_tex = {"MAIN"}, .bind_tex = {"HOOKED"}, .hook = unsharp_hook, @@ -1958,55 +1982,55 @@ static void gl_video_setup_hooks(struct gl_video *p) // sample from video textures, set "color" variable to yuv value static void pass_read_video(struct gl_video *p) { - struct img_tex tex[4]; + struct image img[4]; struct gl_transform offsets[4]; - pass_get_img_tex(p, &p->image, tex, offsets); + pass_get_images(p, &p->image, img, offsets); // To keep the code as simple as possibly, we currently run all shader // stages even if they would be unnecessary (e.g. no hooks for a texture). - // In the future, deferred img_tex should optimize this away. + // In the future, deferred image should optimize this away. // Merge semantically identical textures. This loop is done from back // to front so that merged textures end up in the right order while // simultaneously allowing us to skip unnecessary merges for (int n = 3; n >= 0; n--) { - if (tex[n].type == PLANE_NONE) + if (img[n].type == PLANE_NONE) continue; int first = n; int num = 0; for (int i = 0; i < n; i++) { - if (img_tex_equiv(tex[n], tex[i]) && + if (image_equiv(img[n], img[i]) && gl_transform_eq(offsets[n], offsets[i])) { GLSLF("// merging plane %d ...\n", i); - copy_img_tex(p, &num, tex[i]); + copy_image(p, &num, img[i]); first = MPMIN(first, i); - tex[i] = (struct img_tex){0}; + img[i] = (struct image){0}; } } if (num > 0) { GLSLF("// merging plane %d ... into %d\n", n, first); - copy_img_tex(p, &num, tex[n]); + copy_image(p, &num, img[n]); pass_describe(p, "merging planes"); - finish_pass_fbo(p, &p->merge_fbo[n], tex[n].w, tex[n].h, 0); - tex[first] = img_tex_fbo(&p->merge_fbo[n], tex[n].type, num); - tex[n] = (struct img_tex){0}; + finish_pass_tex(p, &p->merge_tex[n], img[n].w, img[n].h); + img[first] = image_wrap(p->merge_tex[n], img[n].type, num); + img[n] = (struct image){0}; } } // If any textures are still in integer format by this point, we need // to introduce an explicit conversion pass to avoid breaking hooks/scaling for (int n = 0; n < 4; n++) { - if (tex[n].tex && tex[n].tex->params.format->ctype == RA_CTYPE_UINT) { + if (img[n].tex && img[n].tex->params.format->ctype == RA_CTYPE_UINT) { GLSLF("// use_integer fix for plane %d\n", n); - copy_img_tex(p, &(int){0}, tex[n]); + copy_image(p, &(int){0}, img[n]); pass_describe(p, "use_integer fix"); - finish_pass_fbo(p, &p->integer_fbo[n], tex[n].w, tex[n].h, 0); - tex[n] = img_tex_fbo(&p->integer_fbo[n], tex[n].type, - tex[n].components); + finish_pass_tex(p, &p->integer_tex[n], img[n].w, img[n].h); + img[n] = image_wrap(p->integer_tex[n], img[n].type, + img[n].components); } } @@ -2014,7 +2038,7 @@ static void pass_read_video(struct gl_video *p) // modifying them in the process for (int n = 0; n < 4; n++) { const char *name; - switch (tex[n].type) { + switch (img[n].type) { case PLANE_RGB: name = "RGB"; break; case PLANE_LUMA: name = "LUMA"; break; case PLANE_CHROMA: name = "CHROMA"; break; @@ -2023,7 +2047,7 @@ static void pass_read_video(struct gl_video *p) default: continue; } - tex[n] = pass_hook(p, name, tex[n], &offsets[n]); + img[n] = pass_hook(p, name, img[n], &offsets[n]); } // At this point all planes are finalized but they may not be at the @@ -2032,15 +2056,15 @@ static void pass_read_video(struct gl_video *p) // the rgb/luma texture is the "reference" and scale everything else // to match. for (int n = 0; n < 4; n++) { - switch (tex[n].type) { + switch (img[n].type) { case PLANE_RGB: case PLANE_XYZ: case PLANE_LUMA: break; default: continue; } - p->texture_w = tex[n].w; - p->texture_h = tex[n].h; + p->texture_w = img[n].w; + p->texture_h = img[n].h; p->texture_offset = offsets[n]; break; } @@ -2049,20 +2073,16 @@ static void pass_read_video(struct gl_video *p) struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h}; struct mp_rect_f ref = src; gl_transform_rect(p->texture_offset, &ref); - MP_DBG(p, "ref rect: {%f %f} {%f %f}\n", ref.x0, ref.y0, ref.x1, ref.y1); // Explicitly scale all of the textures that don't match for (int n = 0; n < 4; n++) { - if (tex[n].type == PLANE_NONE) + if (img[n].type == PLANE_NONE) continue; // If the planes are aligned identically, we will end up with the // exact same source rectangle. struct mp_rect_f rect = src; gl_transform_rect(offsets[n], &rect); - MP_DBG(p, "rect[%d]: {%f %f} {%f %f}\n", n, - rect.x0, rect.y0, rect.x1, rect.y1); - if (mp_rect_f_seq(ref, rect)) continue; @@ -2074,23 +2094,19 @@ static void pass_read_video(struct gl_video *p) {0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}}, .t = {ref.x0, ref.y0}, }; - MP_DBG(p, "-> fix[%d] = {%f %f} + off {%f %f}\n", n, - fix.m[0][0], fix.m[1][1], fix.t[0], fix.t[1]); // Since the scale in texture space is different from the scale in // absolute terms, we have to scale the coefficients down to be // relative to the texture's physical dimensions and local offset struct gl_transform scale = { - .m = {{(float)tex[n].w / p->texture_w, 0.0}, - {0.0, (float)tex[n].h / p->texture_h}}, + .m = {{(float)img[n].w / p->texture_w, 0.0}, + {0.0, (float)img[n].h / p->texture_h}}, .t = {-rect.x0, -rect.y0}, }; if (p->image_params.rotate % 180 == 90) MPSWAP(double, scale.m[0][0], scale.m[1][1]); gl_transform_trans(scale, &fix); - MP_DBG(p, "-> scaled[%d] = {%f %f} + off {%f %f}\n", n, - fix.m[0][0], fix.m[1][1], fix.t[0], fix.t[1]); // Since the texture transform is a function of the texture coordinates // to texture space, rather than the other way around, we have to @@ -2100,11 +2116,11 @@ static void pass_read_video(struct gl_video *p) fix.m[1][1] = 1.0 / fix.m[1][1]; fix.t[0] = fix.m[0][0] * -fix.t[0]; fix.t[1] = fix.m[1][1] * -fix.t[1]; - gl_transform_trans(fix, &tex[n].transform); + gl_transform_trans(fix, &img[n].transform); int scaler_id = -1; const char *name = NULL; - switch (tex[n].type) { + switch (img[n].type) { case PLANE_RGB: case PLANE_LUMA: case PLANE_XYZ: @@ -2129,31 +2145,31 @@ static void pass_read_video(struct gl_video *p) // bilinear scaling is a free no-op thanks to GPU sampling if (strcmp(conf->kernel.name, "bilinear") != 0) { GLSLF("// upscaling plane %d\n", n); - pass_sample(p, tex[n], scaler, conf, 1.0, p->texture_w, p->texture_h); - finish_pass_fbo(p, &p->scale_fbo[n], p->texture_w, p->texture_h, 0); - tex[n] = img_tex_fbo(&p->scale_fbo[n], tex[n].type, tex[n].components); + pass_sample(p, img[n], scaler, conf, 1.0, p->texture_w, p->texture_h); + finish_pass_tex(p, &p->scale_tex[n], p->texture_w, p->texture_h); + img[n] = image_wrap(p->scale_tex[n], img[n].type, img[n].components); } // Run any post-scaling hooks - tex[n] = pass_hook(p, name, tex[n], NULL); + img[n] = pass_hook(p, name, img[n], NULL); } // All planes are of the same size and properly aligned at this point - GLSLF("// combining planes\n"); + pass_describe(p, "combining planes"); int coord = 0; for (int i = 0; i < 4; i++) { - if (tex[i].type != PLANE_NONE) - copy_img_tex(p, &coord, tex[i]); + if (img[i].type != PLANE_NONE) + copy_image(p, &coord, img[i]); } p->components = coord; } -// Utility function that simply binds an FBO and reads from it, without any +// Utility function that simply binds a texture and reads from it, without any // transformations. -static void pass_read_fbo(struct gl_video *p, struct fbotex *fbo) +static void pass_read_tex(struct gl_video *p, struct ra_tex *tex) { - struct img_tex tex = img_tex_fbo(fbo, PLANE_RGB, p->components); - copy_img_tex(p, &(int){0}, tex); + struct image img = image_wrap(tex, PLANE_RGB, p->components); + copy_image(p, &(int){0}, img); } // yuv conversion, and any other conversions before main up/down-scaling @@ -2335,8 +2351,8 @@ static void pass_scale_main(struct gl_video *p) compute_src_transform(p, &transform); GLSLF("// main scaling\n"); - finish_pass_fbo(p, &p->indirect_fbo, p->texture_w, p->texture_h, 0); - struct img_tex src = img_tex_fbo(&p->indirect_fbo, PLANE_RGB, p->components); + finish_pass_tex(p, &p->indirect_tex, p->texture_w, p->texture_h); + struct image src = image_wrap(p->indirect_tex, PLANE_RGB, p->components); gl_transform_trans(transform, &src.transform); pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h); @@ -2571,6 +2587,7 @@ static void pass_dither(struct gl_video *p) float matrix[2][2] = {{cos(r), -sin(r) }, {sin(r) * m, cos(r) * m}}; + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]); GLSL(dither_pos = dither_trafo * dither_pos;) @@ -2584,7 +2601,7 @@ static void pass_dither(struct gl_video *p) // Draws the OSD, in scene-referred colors.. If cms is true, subtitles are // instead adapted to the display's gamut. static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, - struct mp_osd_res rect, struct fbodst target, bool cms) + struct mp_osd_res rect, struct ra_fbo fbo, bool cms) { mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo_out, draw_flags); @@ -2604,7 +2621,7 @@ static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, pass_colormanage(p, csp_srgb, true); } - mpgl_osd_draw_finish(p->osd, n, p->sc, target); + mpgl_osd_draw_finish(p->osd, n, p->sc, fbo); } timer_pool_stop(p->osd_timer); @@ -2620,17 +2637,17 @@ static float chroma_realign(int size, int pixel) // Minimal rendering code path, for GLES or OpenGL 2.1 without proper FBOs. static void pass_render_frame_dumb(struct gl_video *p) { - struct img_tex tex[4]; + struct image img[4]; struct gl_transform off[4]; - pass_get_img_tex(p, &p->image, tex, off); + pass_get_images(p, &p->image, img, off); struct gl_transform transform; compute_src_transform(p, &transform); int index = 0; for (int i = 0; i < p->plane_count; i++) { - int cw = tex[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1; - int ch = tex[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1; + int cw = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1; + int ch = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1; if (p->image_params.rotate % 180 == 90) MPSWAP(int, cw, ch); @@ -2644,10 +2661,10 @@ static void pass_render_frame_dumb(struct gl_video *p) t.t[0] += off[i].t[0]; t.t[1] += off[i].t[1]; - gl_transform_trans(tex[i].transform, &t); - tex[i].transform = t; + gl_transform_trans(img[i].transform, &t); + img[i].transform = t; - copy_img_tex(p, &index, tex[i]); + copy_image(p, &index, img[i]); } pass_convert_yuv(p); @@ -2662,8 +2679,8 @@ static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t p->texture_h = p->image_params.h; p->texture_offset = identity_trans; p->components = 0; - p->saved_tex_num = 0; - p->hook_fbo_num = 0; + p->num_saved_imgs = 0; + p->idx_hook_textures = 0; p->use_linear = false; // try uploading the frame @@ -2693,10 +2710,10 @@ static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t .w = p->texture_w, .h = p->texture_h, .display_par = scale[1] / scale[0], // counter compensate scaling }; - finish_pass_fbo(p, &p->blend_subs_fbo, rect.w, rect.h, 0); - pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, - p->blend_subs_fbo.fbo, false); - pass_read_fbo(p, &p->blend_subs_fbo); + finish_pass_tex(p, &p->blend_subs_tex, rect.w, rect.h); + struct ra_fbo fbo = { p->blend_subs_tex }; + pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, fbo, false); + pass_read_tex(p, p->blend_subs_tex); pass_describe(p, "blend subs video"); } pass_opt_hook_point(p, "MAIN", &p->texture_offset); @@ -2723,10 +2740,10 @@ static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t pass_delinearize(p->sc, p->image_params.color.gamma); p->use_linear = false; } - finish_pass_fbo(p, &p->blend_subs_fbo, p->texture_w, p->texture_h, 0); - pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, - p->blend_subs_fbo.fbo, false); - pass_read_fbo(p, &p->blend_subs_fbo); + finish_pass_tex(p, &p->blend_subs_tex, p->texture_w, p->texture_h); + struct ra_fbo fbo = { p->blend_subs_tex }; + pass_draw_osd(p, OSD_DRAW_SUB_ONLY, vpts, rect, fbo, false); + pass_read_tex(p, p->blend_subs_tex); pass_describe(p, "blend subs"); } @@ -2735,7 +2752,7 @@ static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t return true; } -static void pass_draw_to_screen(struct gl_video *p, struct fbodst fbo) +static void pass_draw_to_screen(struct gl_video *p, struct ra_fbo fbo) { if (p->dumb_mode) pass_render_frame_dumb(p); @@ -2749,15 +2766,15 @@ static void pass_draw_to_screen(struct gl_video *p, struct fbodst fbo) pass_colormanage(p, p->image_params.color, false); - // Since finish_pass_direct doesn't work with compute shaders, and neither + // Since finish_pass_fbo doesn't work with compute shaders, and neither // does the checkerboard/dither code, we may need an indirection via - // p->screen_fbo here. + // p->screen_tex here. if (p->pass_compute.active) { int o_w = p->dst_rect.x1 - p->dst_rect.x0, o_h = p->dst_rect.y1 - p->dst_rect.y0; - finish_pass_fbo(p, &p->screen_fbo, o_w, o_h, FBOTEX_FUZZY); - struct img_tex tmp = img_tex_fbo(&p->screen_fbo, PLANE_RGB, p->components); - copy_img_tex(p, &(int){0}, tmp); + finish_pass_tex(p, &p->screen_tex, o_w, o_h); + struct image tmp = image_wrap(p->screen_tex, PLANE_RGB, p->components); + copy_image(p, &(int){0}, tmp); } if (p->has_alpha){ @@ -2765,14 +2782,16 @@ static void pass_draw_to_screen(struct gl_video *p, struct fbodst fbo) // Draw checkerboard pattern to indicate transparency GLSLF("// transparency checkerboard\n"); GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));) - GLSL(vec3 background = vec3(tile.x == tile.y ? 1.0 : 0.75);) - GLSL(color.rgb = mix(background, color.rgb, color.a);) + GLSL(vec3 background = vec3(tile.x == tile.y ? 0.93 : 0.87);) + GLSL(color.rgb += background.rgb * (1.0 - color.a);) + GLSL(color.a = 1.0;) } else if (p->opts.alpha_mode == ALPHA_BLEND) { // Blend into background color (usually black) struct m_color c = p->opts.background; GLSLF("vec4 background = vec4(%f, %f, %f, %f);\n", c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0); - GLSL(color = mix(background, vec4(color.rgb, 1.0), color.a);) + GLSL(color.rgb += background.rgb * (1.0 - color.a);) + GLSL(color.a = background.a;) } } @@ -2780,11 +2799,11 @@ static void pass_draw_to_screen(struct gl_video *p, struct fbodst fbo) pass_dither(p); pass_describe(p, "output to screen"); - finish_pass_direct(p, fbo, &p->dst_rect); + finish_pass_fbo(p, fbo, &p->dst_rect); } -static bool update_fbosurface(struct gl_video *p, struct mp_image *mpi, - uint64_t id, struct fbosurface *surf) +static bool update_surface(struct gl_video *p, struct mp_image *mpi, + uint64_t id, struct surface *surf) { int vp_w = p->dst_rect.x1 - p->dst_rect.x0, vp_h = p->dst_rect.y1 - p->dst_rect.y0; @@ -2801,7 +2820,7 @@ static bool update_fbosurface(struct gl_video *p, struct mp_image *mpi, pass_linearize(p->sc, p->image_params.color.gamma); } - finish_pass_fbo(p, &surf->fbotex, vp_w, vp_h, FBOTEX_FUZZY); + finish_pass_tex(p, &surf->tex, vp_w, vp_h); surf->id = id; surf->pts = mpi->pts; return true; @@ -2809,7 +2828,7 @@ static bool update_fbosurface(struct gl_video *p, struct mp_image *mpi, // Draws an interpolate frame to fbo, based on the frame timing in t static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, - struct fbodst fbo) + struct ra_fbo fbo) { bool is_new = false; @@ -2822,8 +2841,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // First of all, figure out if we have a frame available at all, and draw // it manually + reset the queue if not if (p->surfaces[p->surface_now].id == 0) { - struct fbosurface *now = &p->surfaces[p->surface_now]; - if (!update_fbosurface(p, t->current, t->frame_id, now)) + struct surface *now = &p->surfaces[p->surface_now]; + if (!update_surface(p, t->current, t->frame_id, now)) return; p->surface_idx = p->surface_now; is_new = true; @@ -2831,13 +2850,13 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // Find the right frame for this instant if (t->current) { - int next = fbosurface_wrap(p->surface_now + 1); + int next = surface_wrap(p->surface_now + 1); while (p->surfaces[next].id && p->surfaces[next].id > p->surfaces[p->surface_now].id && p->surfaces[p->surface_now].id < t->frame_id) { p->surface_now = next; - next = fbosurface_wrap(next + 1); + next = surface_wrap(next + 1); } } @@ -2856,20 +2875,19 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, } else { assert(tscale->kernel && !tscale->kernel->polar); size = ceil(tscale->kernel->size); - assert(size <= TEXUNIT_VIDEO_NUM); } int radius = size/2; int surface_now = p->surface_now; - int surface_bse = fbosurface_wrap(surface_now - (radius-1)); - int surface_end = fbosurface_wrap(surface_now + radius); - assert(fbosurface_wrap(surface_bse + size-1) == surface_end); + int surface_bse = surface_wrap(surface_now - (radius-1)); + int surface_end = surface_wrap(surface_now + radius); + assert(surface_wrap(surface_bse + size-1) == surface_end); // Render new frames while there's room in the queue. Note that technically, // this should be done before the step where we find the right frame, but // it only barely matters at the very beginning of playback, and this way // makes the code much more linear. - int surface_dst = fbosurface_wrap(p->surface_idx + 1); + int surface_dst = surface_wrap(p->surface_idx + 1); for (int i = 0; i < t->num_frames; i++) { // Avoid overwriting data we might still need if (surface_dst == surface_bse - 1) @@ -2881,11 +2899,11 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, continue; if (f_id > p->surfaces[p->surface_idx].id) { - struct fbosurface *dst = &p->surfaces[surface_dst]; - if (!update_fbosurface(p, f, f_id, dst)) + struct surface *dst = &p->surfaces[surface_dst]; + if (!update_surface(p, f, f_id, dst)) return; p->surface_idx = surface_dst; - surface_dst = fbosurface_wrap(surface_dst + 1); + surface_dst = surface_wrap(surface_dst + 1); is_new = true; } } @@ -2897,7 +2915,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // end of playback or start of playback. bool valid = true; for (int i = surface_bse, ii; valid && i != surface_end; i = ii) { - ii = fbosurface_wrap(i + 1); + ii = surface_wrap(i + 1); if (p->surfaces[i].id == 0 || p->surfaces[ii].id == 0) { valid = false; } else if (p->surfaces[ii].id < p->surfaces[i].id) { @@ -2915,7 +2933,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, pass_describe(p, "interpolation"); if (!valid || t->still) { // surface_now is guaranteed to be valid, so we can safely use it. - pass_read_fbo(p, &p->surfaces[surface_now].fbotex); + pass_read_tex(p, p->surfaces[surface_now].tex); p->is_interpolated = false; } else { double mix = t->vsync_offset / t->ideal_frame_duration; @@ -2923,7 +2941,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // so we try to adjust by using the previous set of N frames instead // (which requires some extra checking to make sure it's valid) if (mix < 0.0) { - int prev = fbosurface_wrap(surface_bse - 1); + int prev = surface_wrap(surface_bse - 1); if (p->surfaces[prev].id != 0 && p->surfaces[prev].id < p->surfaces[surface_bse].id) { @@ -2949,20 +2967,22 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // Blend the frames together if (oversample || linear) { + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_f(p->sc, "inter_coeff", mix); GLSL(color = mix(texture(texture0, texcoord0), texture(texture1, texcoord1), inter_coeff);) } else { + gl_sc_uniform_dynamic(p->sc); gl_sc_uniform_f(p->sc, "fcoord", mix); pass_sample_separated_gen(p->sc, tscale, 0, 0); } // Load all the required frames for (int i = 0; i < size; i++) { - struct img_tex img = - img_tex_fbo(&p->surfaces[fbosurface_wrap(surface_bse+i)].fbotex, - PLANE_RGB, p->components); + struct image img = + image_wrap(p->surfaces[surface_wrap(surface_bse+i)].tex, + PLANE_RGB, p->components); // Since the code in pass_sample_separated currently assumes // the textures are bound in-order and starting at 0, we just // assert to make sure this is the case (which it should always be) @@ -2970,8 +2990,8 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, assert(id == i); } - MP_DBG(p, "inter frame dur: %f vsync: %f, mix: %f\n", - t->ideal_frame_duration, t->vsync_interval, mix); + MP_TRACE(p, "inter frame dur: %f vsync: %f, mix: %f\n", + t->ideal_frame_duration, t->vsync_interval, mix); p->is_interpolated = true; } pass_draw_to_screen(p, fbo); @@ -2980,9 +3000,11 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, } void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, - struct fbodst target) + struct ra_fbo fbo) { - struct mp_rect target_rc = {0, 0, target.tex->params.w, target.tex->params.h}; + gl_video_update_options(p); + + struct mp_rect target_rc = {0, 0, fbo.tex->params.w, fbo.tex->params.h}; p->broken_frame = false; @@ -2991,18 +3013,18 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, if (!has_frame || !mp_rect_equals(&p->dst_rect, &target_rc)) { struct m_color c = p->clear_color; float color[4] = {c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0}; - p->ra->fns->clear(p->ra, target.tex, color, &target_rc); + p->ra->fns->clear(p->ra, fbo.tex, color, &target_rc); } - if (p->hwdec_active && p->hwdec->driver->overlay_frame) { + if (p->hwdec_overlay) { if (has_frame) { - float *color = p->hwdec->overlay_colorkey; - p->ra->fns->clear(p->ra, target.tex, color, &p->dst_rect); + float *color = p->hwdec_overlay->overlay_colorkey; + p->ra->fns->clear(p->ra, fbo.tex, color, &p->dst_rect); } - p->hwdec->driver->overlay_frame(p->hwdec, frame->current, - &p->src_rect, &p->dst_rect, - frame->frame_id != p->image.id); + p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, frame->current, + &p->src_rect, &p->dst_rect, + frame->frame_id != p->image.id); if (frame->current) p->osd_pts = frame->current->pts; @@ -3021,7 +3043,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, } if (interpolate) { - gl_video_interpolate_frame(p, frame, target); + gl_video_interpolate_frame(p, frame, fbo); } else { bool is_new = frame->frame_id != p->image.id; @@ -3029,41 +3051,42 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, if (frame->still && p->opts.blend_subs) is_new = true; - if (is_new || !p->output_fbo_valid) { - p->output_fbo_valid = false; + if (is_new || !p->output_tex_valid) { + p->output_tex_valid = false; pass_info_reset(p, !is_new); if (!pass_render_frame(p, frame->current, frame->frame_id)) goto done; // For the non-interpolation case, we draw to a single "cache" - // FBO to speed up subsequent re-draws (if any exist) - struct fbodst dest_fbo = target; + // texture to speed up subsequent re-draws (if any exist) + struct ra_fbo dest_fbo = fbo; if (frame->num_vsyncs > 1 && frame->display_synced && !p->dumb_mode && (p->ra->caps & RA_CAP_BLIT)) { - fbotex_change(&p->output_fbo, p->ra, p->log, - target.tex->params.w, target.tex->params.h, - p->fbo_format, FBOTEX_FUZZY); - dest_fbo = p->output_fbo.fbo; - p->output_fbo_valid = true; + bool r = ra_tex_resize(p->ra, p->log, &p->output_tex, + fbo.tex->params.w, fbo.tex->params.h, + p->fbo_format); + if (r) { + dest_fbo = (struct ra_fbo) { p->output_tex }; + p->output_tex_valid = true; + } } pass_draw_to_screen(p, dest_fbo); } - // "output fbo valid" and "output fbo needed" are equivalent - if (p->output_fbo_valid) { + // "output tex valid" and "output tex needed" are equivalent + if (p->output_tex_valid) { pass_info_reset(p, true); pass_describe(p, "redraw cached frame"); struct mp_rect src = p->dst_rect; struct mp_rect dst = src; - if (target.flip) { - dst.y0 = target.tex->params.h - src.y0; - dst.y1 = target.tex->params.h - src.y1; + if (fbo.flip) { + dst.y0 = fbo.tex->params.h - src.y0; + dst.y1 = fbo.tex->params.h - src.y1; } timer_pool_start(p->blit_timer); - p->ra->fns->blit(p->ra, target.tex, p->output_fbo.tex, - &dst, &src); + p->ra->fns->blit(p->ra, fbo.tex, p->output_tex, &dst, &src); timer_pool_stop(p->blit_timer); pass_record(p, timer_pool_measure(p->blit_timer)); } @@ -3072,8 +3095,6 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, done: - unmap_current_image(p); - debug_check_gl(p, "after video rendering"); if (p->osd) { @@ -3084,7 +3105,7 @@ done: pass_info_reset(p, true); pass_draw_osd(p, p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0, - p->osd_pts, p->osd_rect, target, true); + p->osd_pts, p->osd_rect, fbo, true); debug_check_gl(p, "after OSD rendering"); } @@ -3092,17 +3113,7 @@ done: // Make the screen solid blue to make it visually clear that an // error has occurred float color[4] = {0.0, 0.05, 0.5, 1.0}; - p->ra->fns->clear(p->ra, target.tex, color, &target_rc); - } - - // The playloop calls this last before waiting some time until it decides - // to call flip_page(). Tell OpenGL to start execution of the GPU commands - // while we sleep (this happens asynchronously). - if ((p->opts.early_flush == -1 && !frame->display_synced) || - p->opts.early_flush == 1) - { - if (p->ra->fns->flush) - p->ra->fns->flush(p->ra); + p->ra->fns->clear(p->ra, fbo.tex, color, &target_rc); } p->frames_rendered++; @@ -3148,7 +3159,7 @@ void gl_video_resize(struct gl_video *p, static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out) { - for (int i = 0; i < PASS_INFO_MAX; i++) { + for (int i = 0; i < VO_PASS_PERF_MAX; i++) { if (!pass[i].desc.len) break; out->perf[out->count] = pass[i].perf; @@ -3169,14 +3180,14 @@ static void reinterleave_vdpau(struct gl_video *p, struct ra_tex *input[4], struct ra_tex *output[2]) { for (int n = 0; n < 2; n++) { - struct fbotex *fbo = &p->vdpau_deinterleave_fbo[n]; + struct ra_tex **tex = &p->vdpau_deinterleave_tex[n]; // This is an array of the 2 to-merge planes. struct ra_tex **src = &input[n * 2]; int w = src[0]->params.w; int h = src[0]->params.h; int ids[2]; for (int t = 0; t < 2; t++) { - ids[t] = pass_bind(p, (struct img_tex){ + ids[t] = pass_bind(p, (struct image){ .tex = src[t], .multiplier = 1.0, .transform = identity_trans, @@ -3185,18 +3196,18 @@ static void reinterleave_vdpau(struct gl_video *p, }); } + pass_describe(p, "vdpau reinterleaving"); GLSLF("color = fract(gl_FragCoord.y * 0.5) < 0.5\n"); GLSLF(" ? texture(texture%d, texcoord%d)\n", ids[0], ids[0]); GLSLF(" : texture(texture%d, texcoord%d);", ids[1], ids[1]); - const struct ra_format *fmt = - ra_find_unorm_format(p->ra, 1, n == 0 ? 1 : 2); - fbotex_change(fbo, p->ra, p->log, w, h * 2, fmt, 0); - - pass_describe(p, "vdpau reinterleaving"); - finish_pass_direct(p, fbo->fbo, &(struct mp_rect){0, 0, w, h * 2}); + int comps = n == 0 ? 1 : 2; + const struct ra_format *fmt = ra_find_unorm_format(p->ra, 1, comps); + ra_tex_resize(p->ra, p->log, tex, w, h * 2, fmt); + struct ra_fbo fbo = { *tex }; + finish_pass_fbo(p, fbo, &(struct mp_rect){0, 0, w, h * 2}); - output[n] = fbo->tex; + output[n] = *tex; } } @@ -3262,8 +3273,6 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t for (int n = 0; n < p->plane_count; n++) { struct texplane *plane = &vimg->planes[n]; - plane->flipped = mpi->stride[0] < 0; - struct ra_tex_upload_params params = { .tex = plane->tex, .src = mpi->planes[n], @@ -3271,6 +3280,13 @@ static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t .stride = mpi->stride[n], }; + plane->flipped = params.stride < 0; + if (plane->flipped) { + int h = mp_image_plane_h(mpi, n); + params.src = (char *)params.src + (h - 1) * params.stride; + params.stride = -params.stride; + } + struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); if (mapped) { params.buf = mapped->buf; @@ -3310,9 +3326,9 @@ error: static bool test_fbo(struct gl_video *p, const struct ra_format *fmt) { MP_VERBOSE(p, "Testing FBO format %s\n", fmt->name); - struct fbotex fbo = {0}; - bool success = fbotex_change(&fbo, p->ra, p->log, 16, 16, fmt, 0); - fbotex_uninit(&fbo); + struct ra_tex *tex = NULL; + bool success = ra_tex_resize(p->ra, p->log, &tex, 16, 16, fmt); + ra_tex_free(p->ra, &tex); return success; } @@ -3359,7 +3375,8 @@ static void check_gl_features(struct gl_video *p) bool have_compute = ra->caps & RA_CAP_COMPUTE; bool have_ssbo = ra->caps & RA_CAP_BUF_RW; - const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgb10_a2", "rgba8", 0}; + const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgba16hf", + "rgb10_a2", "rgba8", 0}; const char *user_fbo_fmts[] = {p->opts.fbo_format, 0}; const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto") ? user_fbo_fmts : auto_fbo_fmts; @@ -3388,7 +3405,6 @@ static void check_gl_features(struct gl_video *p) "Most extended features will be disabled.\n"); } p->dumb_mode = true; - p->use_lut_3d = false; // Most things don't work, so whitelist all options that still work. p->opts = (struct gl_video_opts){ .gamma = p->opts.gamma, @@ -3409,9 +3425,13 @@ static void check_gl_features(struct gl_video *p) .tone_mapping_param = p->opts.tone_mapping_param, .tone_mapping_desat = p->opts.tone_mapping_desat, .early_flush = p->opts.early_flush, + .icc_opts = p->opts.icc_opts, + .hwdec_interop = p->opts.hwdec_interop, }; for (int n = 0; n < SCALER_COUNT; n++) p->opts.scaler[n] = gl_video_opts_def.scaler[n]; + if (!have_fbo) + p->use_lut_3d = false; return; } p->dumb_mode = false; @@ -3463,6 +3483,19 @@ static void check_gl_features(struct gl_video *p) p->opts.compute_hdr_peak = 0; MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n"); } + if (!(ra->caps & RA_CAP_FRAGCOORD) && p->opts.dither_depth >= 0 && + p->opts.dither_algo != DITHER_NONE) + { + p->opts.dither_algo = DITHER_NONE; + MP_WARN(p, "Disabling dithering (no gl_FragCoord).\n"); + } + if (!(ra->caps & RA_CAP_FRAGCOORD) && + p->opts.alpha_mode == ALPHA_BLEND_TILES) + { + p->opts.alpha_mode = ALPHA_BLEND; + // Verbose, since this is the default setting + MP_VERBOSE(p, "Disabling alpha checkerboard (no gl_FragCoord).\n"); + } } static void init_gl(struct gl_video *p) @@ -3486,6 +3519,10 @@ void gl_video_uninit(struct gl_video *p) uninit_video(p); + for (int n = 0; n < p->num_hwdecs; n++) + ra_hwdec_uninit(p->hwdecs[n]); + p->num_hwdecs = 0; + gl_sc_destroy(p->sc); ra_tex_free(p->ra, &p->lut_3d_texture); @@ -3495,7 +3532,7 @@ void gl_video_uninit(struct gl_video *p) timer_pool_destroy(p->blit_timer); timer_pool_destroy(p->osd_timer); - for (int i = 0; i < PASS_INFO_MAX; i++) { + for (int i = 0; i < VO_PASS_PERF_MAX; i++) { talloc_free(p->pass_fresh[i].desc.start); talloc_free(p->pass_redraw[i].desc.start); } @@ -3540,8 +3577,10 @@ bool gl_video_check_format(struct gl_video *p, int mp_format) if (ra_get_imgfmt_desc(p->ra, mp_format, &desc) && is_imgfmt_desc_supported(p, &desc)) return true; - if (p->hwdec && ra_hwdec_test_format(p->hwdec, mp_format)) - return true; + for (int n = 0; n < p->num_hwdecs; n++) { + if (ra_hwdec_test_format(p->hwdecs[n], mp_format)) + return true; + } return false; } @@ -3588,6 +3627,14 @@ struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, p->opts = *opts; for (int n = 0; n < SCALER_COUNT; n++) p->scaler[n] = (struct scaler){.index = n}; + // our VAO always has the vec2 position as the first element + MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) { + .name = "position", + .type = RA_VARTYPE_FLOAT, + .dim_v = 2, + .dim_m = 1, + .offset = 0, + }); init_gl(p); reinit_from_options(p); return p; @@ -3612,12 +3659,15 @@ static const char *handle_scaler_opt(const char *name, bool tscale) return NULL; } -void gl_video_update_options(struct gl_video *p) +static void gl_video_update_options(struct gl_video *p) { if (m_config_cache_update(p->opts_cache)) { gl_lcms_update_options(p->cms); reinit_from_options(p); } + + if (mp_csp_equalizer_state_changed(p->video_eq)) + p->output_tex_valid = false; } static void reinit_from_options(struct gl_video *p) @@ -3648,6 +3698,8 @@ static void reinit_from_options(struct gl_video *p) void gl_video_configure_queue(struct gl_video *p, struct vo *vo) { + gl_video_update_options(p); + int queue_size = 1; // Figure out an adequate size for the interpolation queue. The larger @@ -3742,19 +3794,12 @@ float gl_video_scale_ambient_lux(float lmin, float lmax, void gl_video_set_ambient_lux(struct gl_video *p, int lux) { if (p->opts.gamma_auto) { - float gamma = gl_video_scale_ambient_lux(16.0, 64.0, 2.40, 1.961, lux); - MP_VERBOSE(p, "ambient light changed: %dlux (gamma: %f)\n", lux, gamma); - p->opts.gamma = MPMIN(1.0, 1.961 / gamma); + p->opts.gamma = gl_video_scale_ambient_lux(16.0, 256.0, 1.0, 1.2, lux); + MP_TRACE(p, "ambient light changed: %d lux (gamma: %f)\n", lux, + p->opts.gamma); } } -void gl_video_set_hwdec(struct gl_video *p, struct ra_hwdec *hwdec) -{ - unref_current_image(p); - ra_hwdec_mapper_free(&p->hwdec_mapper); - p->hwdec = hwdec; -} - static void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size) { struct ra_buf_params params = { @@ -3811,3 +3856,46 @@ struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h gl_video_dr_free_buffer(p, ptr); return res; } + +static void load_add_hwdec(struct gl_video *p, struct mp_hwdec_devices *devs, + const struct ra_hwdec_driver *drv, bool is_auto) +{ + struct ra_hwdec *hwdec = + ra_hwdec_load_driver(p->ra, p->log, p->global, devs, drv, is_auto); + if (hwdec) + MP_TARRAY_APPEND(p, p->hwdecs, p->num_hwdecs, hwdec); +} + +void gl_video_load_hwdecs(struct gl_video *p, struct mp_hwdec_devices *devs, + bool load_all_by_default) +{ + char *type = p->opts.hwdec_interop; + if (!type || !type[0] || strcmp(type, "auto") == 0) { + if (!load_all_by_default) + return; + type = "all"; + } + if (strcmp(type, "no") == 0) { + // do nothing, just block further loading + } else if (strcmp(type, "all") == 0) { + gl_video_load_hwdecs_all(p, devs); + } else { + for (int n = 0; ra_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n]; + if (strcmp(type, drv->name) == 0) { + load_add_hwdec(p, devs, drv, false); + break; + } + } + } + p->hwdec_interop_loading_done = true; +} + +void gl_video_load_hwdecs_all(struct gl_video *p, struct mp_hwdec_devices *devs) +{ + if (!p->hwdec_interop_loading_done) { + for (int n = 0; ra_hwdec_drivers[n]; n++) + load_add_hwdec(p, devs, ra_hwdec_drivers[n], true); + p->hwdec_interop_loading_done = true; + } +} diff --git a/video/out/opengl/video.h b/video/out/gpu/video.h index d163bc8..78f8828 100644 --- a/video/out/opengl/video.h +++ b/video/out/gpu/video.h @@ -27,11 +27,6 @@ #include "shader_cache.h" #include "video/csputils.h" #include "video/out/filter_kernels.h" -#include "video/out/vo.h" - -// Assume we have this many texture units for sourcing additional passes. -// The actual texture unit assignment is dynamic. -#define TEXUNIT_VIDEO_NUM 6 struct scaler_fun { char *name; @@ -56,7 +51,7 @@ struct scaler { bool initialized; struct filter_kernel *kernel; struct ra_tex *lut; - struct fbotex sep_fbo; + struct ra_tex *sep_fbo; bool insufficient; int lut_size; @@ -144,6 +139,7 @@ struct gl_video_opts { struct mp_icc_opts *icc_opts; int early_flush; char *shader_cache_dir; + char *hwdec_interop; }; extern const struct m_sub_options gl_video_conf; @@ -155,12 +151,11 @@ struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, struct mpv_global *g); void gl_video_uninit(struct gl_video *p); void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd); -void gl_video_update_options(struct gl_video *p); bool gl_video_check_format(struct gl_video *p, int mp_format); void gl_video_config(struct gl_video *p, struct mp_image_params *params); void gl_video_set_output_depth(struct gl_video *p, int r, int g, int b); void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, - struct fbodst target); + struct ra_fbo fbo); void gl_video_resize(struct gl_video *p, struct mp_rect *src, struct mp_rect *dst, struct mp_osd_res *osd); @@ -182,8 +177,10 @@ struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p); void gl_video_reset(struct gl_video *p); bool gl_video_showing_interpolated_frame(struct gl_video *p); -struct ra_hwdec; -void gl_video_set_hwdec(struct gl_video *p, struct ra_hwdec *hwdec); +struct mp_hwdec_devices; +void gl_video_load_hwdecs(struct gl_video *p, struct mp_hwdec_devices *devs, + bool load_all_by_default); +void gl_video_load_hwdecs_all(struct gl_video *p, struct mp_hwdec_devices *devs); struct vo; void gl_video_configure_queue(struct gl_video *p, struct vo *vo); diff --git a/video/out/opengl/video_shaders.c b/video/out/gpu/video_shaders.c index 60c5ce8..3e71c31 100644 --- a/video/out/opengl/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -97,11 +97,11 @@ void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler } // Subroutine for computing and adding an individual texel contribution -// If subtexel < 0 and offset < 0, samples directly. -// If subtexel >= 0, takes the texel from cN[subtexel] -// If offset >= 0, takes the texel from inN[rel.y+y+offset][rel.x+x+offset] +// If planar is false, samples directly +// If planar is true, takes the pixel from inX[idx] where X is the component and +// `idx` must be defined by the caller static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, - int x, int y, int subtexel, int offset, int components) + int x, int y, int components, bool planar) { double radius = scaler->kernel->f.radius * scaler->kernel->filter_scale; double radius_cutoff = scaler->kernel->radius_cutoff; @@ -130,19 +130,12 @@ static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, } GLSL(wsum += w;) - if (subtexel < 0 && offset < 0) { - GLSLF("c0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y); - GLSL(color += vec4(w) * c0;) - } else if (subtexel >= 0) { + if (planar) { for (int n = 0; n < components; n++) - GLSLF("color[%d] += w * c%d[%d];\n", n, n, subtexel); - } else if (offset >= 0) { - for (int n = 0; n <components; n++) - GLSLF("color[%d] += w * in%d[rel.y+%d][rel.x+%d];\n", n, n, - y + offset, x + offset); + GLSLF("color[%d] += w * in%d[idx];\n", n, n); } else { - // invalid usage - abort(); + GLSLF("in0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y); + GLSL(color += vec4(w) * in0;) } if (maybe_skippable) @@ -150,7 +143,7 @@ static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, } void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, - int components, int glsl_version) + int components, bool sup_gather) { GLSL(color = vec4(0.0);) GLSLF("{\n"); @@ -158,7 +151,8 @@ void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, GLSL(vec2 base = pos - fcoord * pt;) GLSLF("float w, d, wsum = 0.0;\n"); for (int n = 0; n < components; n++) - GLSLF("vec4 c%d;\n", n); + GLSLF("vec4 in%d;\n", n); + GLSL(int idx;) gl_sc_uniform_texture(sc, "lut", scaler->lut); @@ -173,15 +167,14 @@ void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, // exactly when all four texels are within bounds bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff; - // textureGather is only supported in GLSL 400+ - if (glsl_version < 400) + if (!sup_gather) use_gather = false; if (use_gather) { // Gather the four surrounding texels simultaneously for (int n = 0; n < components; n++) { - GLSLF("c%d = textureGatherOffset(tex, base, ivec2(%d, %d), %d);\n", - n, x, y, n); + GLSLF("in%d = textureGatherOffset(tex, base, " + "ivec2(%d, %d), %d);\n", n, x, y, n); } // Mix in all of the points with their weights @@ -192,13 +185,14 @@ void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, static const int yo[4] = {1, 1, 0, 0}; if (x+xo[p] > bound || y+yo[p] > bound) continue; - polar_sample(sc, scaler, x+xo[p], y+yo[p], p, -1, components); + GLSLF("idx = %d;\n", p); + polar_sample(sc, scaler, x+xo[p], y+yo[p], components, true); } } else { // switch to direct sampling instead, for efficiency/compatibility for (int yy = y; yy <= bound && yy <= y+1; yy++) { for (int xx = x; xx <= bound && xx <= x+1; xx++) - polar_sample(sc, scaler, xx, yy, -1, -1, components); + polar_sample(sc, scaler, xx, yy, components, false); } } } @@ -223,20 +217,20 @@ void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) GLSL(vec2 base = pos - pt * fcoord;) GLSL(ivec2 rel = ivec2(round((base - wbase) * size));) + GLSL(int idx;) GLSLF("float w, d, wsum = 0.0;\n"); gl_sc_uniform_texture(sc, "lut", scaler->lut); // Load all relevant texels into shmem - gl_sc_enable_extension(sc, "GL_ARB_arrays_of_arrays"); for (int c = 0; c < components; c++) - GLSLHF("shared float in%d[%d][%d];\n", c, ih, iw); + GLSLHF("shared float in%d[%d];\n", c, ih * iw); GLSL(vec4 c;) GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh); GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw); GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset); for (int c = 0; c < components; c++) - GLSLF("in%d[y][x] = c[%d];\n", c, c); + GLSLF("in%d[%d * y + x] = c[%d];\n", c, iw, c); GLSLF("}}\n"); GLSL(groupMemoryBarrier();) GLSL(barrier();) @@ -244,8 +238,11 @@ void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, // Dispatch the actual samples GLSLF("// scaler samples\n"); for (int y = 1-bound; y <= bound; y++) { - for (int x = 1-bound; x <= bound; x++) - polar_sample(sc, scaler, x, y, -1, offset, components); + for (int x = 1-bound; x <= bound; x++) { + GLSLF("idx = %d * rel.y + rel.x + %d;\n", iw, + iw * (y + offset) + x + offset); + polar_sample(sc, scaler, x, y, components, true); + } } GLSL(color = color / vec4(wsum);) @@ -567,18 +564,19 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, { GLSLF("// HDR tone mapping\n"); - // Desaturate the color using a coefficient dependent on the luminance - GLSL(float luma = dot(dst_luma, color.rgb);) - if (desat > 0) { - GLSLF("float overbright = max(luma - %f, 1e-6) / max(luma, 1e-6);\n", desat); - GLSL(color.rgb = mix(color.rgb, vec3(luma), overbright);) - } - // To prevent discoloration due to out-of-bounds clipping, we need to make // sure to reduce the value range as far as necessary to keep the entire // signal in range, so tone map based on the brightest component. GLSL(float sig = max(max(color.r, color.g), color.b);) - GLSL(float sig_orig = sig;) + + // Desaturate the color using a coefficient dependent on the signal + if (desat > 0) { + GLSL(float luma = dot(dst_luma, color.rgb);) + GLSL(float coeff = max(sig - 0.18, 1e-6) / max(sig, 1e-6);); + GLSLF("coeff = pow(coeff, %f);\n", 10.0 / desat); + GLSL(color.rgb = mix(color.rgb, vec3(luma), coeff);) + GLSL(sig = mix(sig, luma, coeff);) // also make sure to update `sig` + } if (!ref_peak) { // For performance, we want to do as few atomic operations on global @@ -614,6 +612,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSLHF("const float sig_peak = %f;\n", ref_peak); } + GLSL(float sig_orig = sig;) switch (algo) { case TONE_MAPPING_CLIP: GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); @@ -627,7 +626,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / " "max(1e-6, sig_peak - 1.0);\n"); GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); - GLSL(sig = mix(sig, scale * (sig + a) / (sig + b), sig > j);) + GLSL(sig = sig > j ? scale * (sig + a) / (sig + b) : sig;) break; case TONE_MAPPING_REINHARD: { @@ -770,6 +769,7 @@ static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) // Initialize the PRNG by hashing the position + a random uniform GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);) + gl_sc_uniform_dynamic(sc); gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); } diff --git a/video/out/opengl/video_shaders.h b/video/out/gpu/video_shaders.h index 8345e4c..2ae2ac3 100644 --- a/video/out/opengl/video_shaders.h +++ b/video/out/gpu/video_shaders.h @@ -30,7 +30,7 @@ void sampler_prelude(struct gl_shader_cache *sc, int tex_num); void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, int d_x, int d_y); void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, - int components, int glsl_version); + int components, bool sup_gather); void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, int components, int bw, int bh, int iw, int ih); void pass_sample_bicubic_fast(struct gl_shader_cache *sc); diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index 3d03c47..fda40da 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -31,6 +31,7 @@ #include "common.h" #include "common/common.h" +#include "utils.h" // This guesses if the current GL context is a suspected software renderer. static bool is_software_gl(GL *gl) @@ -49,14 +50,6 @@ static void GLAPIENTRY dummy_glBindFramebuffer(GLenum target, GLuint framebuffer assert(framebuffer == 0); } -static bool check_ext(GL *gl, const char *name) -{ - const char *exts = gl->extensions; - char *s = strstr(exts, name); - char *e = s ? s + strlen(name) : NULL; - return s && (s == exts || s[-1] == ' ') && (e[0] == ' ' || !e[0]); -} - #define FN_OFFS(name) offsetof(GL, name) #define DEF_FN(name) {FN_OFFS(name), "gl" # name} @@ -383,6 +376,15 @@ static const struct gl_functions gl_functions[] = { {0}, }, }, + // This one overrides GLX_SGI_swap_control on platforms using mesa. The + // only difference is that it supports glXSwapInterval(0). + { + .extension = "GLX_MESA_swap_control", + .functions = (const struct gl_function[]) { + DEF_FN_NAME(SwapInterval, "glXSwapIntervalMESA"), + {0}, + }, + }, { .extension = "WGL_EXT_swap_control", .functions = (const struct gl_function[]) { @@ -572,8 +574,8 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), if (ver_core) must_exist = version >= ver_core; - if (section->extension && check_ext(gl, section->extension)) - exists = true; + if (section->extension) + exists = gl_check_extension(gl->extensions, section->extension); exists |= must_exist; if (!exists) @@ -623,7 +625,7 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), if (gl->es >= 300) gl->glsl_version = 300; } else { - gl->glsl_version = 110; + gl->glsl_version = 120; int glsl_major = 0, glsl_minor = 0; if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2) gl->glsl_version = glsl_major * 100 + glsl_minor; diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h index 7b2e3ed..b9f582b 100644 --- a/video/out/opengl/common.h +++ b/video/out/opengl/common.h @@ -26,10 +26,10 @@ #include "common/msg.h" #include "misc/bstr.h" -#include "video/out/vo.h" #include "video/csputils.h" - #include "video/mp_image.h" +#include "video/out/vo.h" +#include "video/out/gpu/ra.h" #include "gl_headers.h" diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c index fe454e9..cdaf632 100644 --- a/video/out/opengl/context.c +++ b/video/out/opengl/context.c @@ -1,10 +1,4 @@ /* - * common OpenGL routines - * - * copyleft (C) 2005-2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de> - * Special thanks go to the xine team and Matthias Hopf, whose video_out_opengl.c - * gave me lots of good ideas. - * * This file is part of mpv. * * mpv is free software; you can redistribute it and/or @@ -21,73 +15,10 @@ * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ -#include <stddef.h> -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <stdbool.h> -#include <math.h> -#include <assert.h> - +#include "options/m_config.h" #include "context.h" -#include "common/common.h" -#include "options/options.h" -#include "options/m_option.h" - -extern const struct mpgl_driver mpgl_driver_x11; -extern const struct mpgl_driver mpgl_driver_x11egl; -extern const struct mpgl_driver mpgl_driver_x11_probe; -extern const struct mpgl_driver mpgl_driver_drm_egl; -extern const struct mpgl_driver mpgl_driver_drm; -extern const struct mpgl_driver mpgl_driver_cocoa; -extern const struct mpgl_driver mpgl_driver_wayland; -extern const struct mpgl_driver mpgl_driver_w32; -extern const struct mpgl_driver mpgl_driver_angle; -extern const struct mpgl_driver mpgl_driver_angle_es2; -extern const struct mpgl_driver mpgl_driver_dxinterop; -extern const struct mpgl_driver mpgl_driver_rpi; -extern const struct mpgl_driver mpgl_driver_mali; -extern const struct mpgl_driver mpgl_driver_vdpauglx; - -static const struct mpgl_driver *const backends[] = { -#if HAVE_RPI - &mpgl_driver_rpi, -#endif -#if HAVE_GL_COCOA - &mpgl_driver_cocoa, -#endif -#if HAVE_EGL_ANGLE_WIN32 - &mpgl_driver_angle, -#endif -#if HAVE_GL_WIN32 - &mpgl_driver_w32, -#endif -#if HAVE_GL_DXINTEROP - &mpgl_driver_dxinterop, -#endif -#if HAVE_GL_X11 - &mpgl_driver_x11_probe, -#endif -#if HAVE_EGL_X11 - &mpgl_driver_x11egl, -#endif -#if HAVE_GL_X11 - &mpgl_driver_x11, -#endif -#if HAVE_GL_WAYLAND - &mpgl_driver_wayland, -#endif -#if HAVE_EGL_DRM - &mpgl_driver_drm, - &mpgl_driver_drm_egl, -#endif -#if HAVE_MALI_FBDEV - &mpgl_driver_mali, -#endif -#if HAVE_VDPAU_GL_X11 - &mpgl_driver_vdpauglx, -#endif -}; +#include "ra_gl.h" +#include "utils.h" // 0-terminated list of desktop GL versions a backend should try to // initialize. The first entry is the most preferred version. @@ -103,140 +34,322 @@ const int mpgl_preferred_gl_versions[] = { 0 }; -int mpgl_find_backend(const char *name) +enum { + FLUSH_NO = 0, + FLUSH_YES, + FLUSH_AUTO, +}; + +enum { + GLES_AUTO = 0, + GLES_YES, + GLES_NO, +}; + +struct opengl_opts { + int use_glfinish; + int waitvsync; + int vsync_pattern[2]; + int swapinterval; + int early_flush; + int restrict_version; + int gles_mode; +}; + +#define OPT_BASE_STRUCT struct opengl_opts +const struct m_sub_options opengl_conf = { + .opts = (const struct m_option[]) { + OPT_FLAG("opengl-glfinish", use_glfinish, 0), + OPT_FLAG("opengl-waitvsync", waitvsync, 0), + OPT_INT("opengl-swapinterval", swapinterval, 0), + OPT_INTPAIR("opengl-check-pattern", vsync_pattern, 0), + OPT_INT("opengl-restrict", restrict_version, 0), + OPT_CHOICE("opengl-es", gles_mode, 0, + ({"auto", GLES_AUTO}, {"yes", GLES_YES}, {"no", GLES_NO})), + OPT_CHOICE("opengl-early-flush", early_flush, 0, + ({"no", FLUSH_NO}, {"yes", FLUSH_YES}, {"auto", FLUSH_AUTO})), + + OPT_REPLACED("opengl-debug", "gpu-debug"), + OPT_REPLACED("opengl-sw", "gpu-sw"), + OPT_REPLACED("opengl-vsync-fences", "swapchain-depth"), + OPT_REPLACED("opengl-backend", "gpu-context"), + {0}, + }, + .defaults = &(const struct opengl_opts) { + .swapinterval = 1, + }, + .size = sizeof(struct opengl_opts), +}; + +struct priv { + GL *gl; + struct mp_log *log; + struct ra_gl_ctx_params params; + struct opengl_opts *opts; + struct ra_swapchain_fns fns; + GLuint main_fb; + struct ra_tex *wrapped_fb; // corresponds to main_fb + // for debugging: + int frames_rendered; + unsigned int prev_sgi_sync_count; + // for gl_vsync_pattern + int last_pattern; + int matches, mismatches; + // for swapchain_depth simulation + GLsync *vsync_fences; + int num_vsync_fences; +}; + +bool ra_gl_ctx_test_version(struct ra_ctx *ctx, int version, bool es) { - if (name == NULL || strcmp(name, "auto") == 0) - return -1; - for (int n = 0; n < MP_ARRAY_SIZE(backends); n++) { - if (strcmp(backends[n]->name, name) == 0) - return n; + bool ret; + struct opengl_opts *opts; + void *tmp = talloc_new(NULL); + opts = mp_get_config_group(tmp, ctx->global, &opengl_conf); + + // Version too high + if (opts->restrict_version && version >= opts->restrict_version) { + ret = false; + goto done; } - return -2; -} -int mpgl_validate_backend_opt(struct mp_log *log, const struct m_option *opt, - struct bstr name, struct bstr param) -{ - if (bstr_equals0(param, "help")) { - mp_info(log, "OpenGL windowing backends:\n"); - mp_info(log, " auto (autodetect)\n"); - for (int n = 0; n < MP_ARRAY_SIZE(backends); n++) - mp_info(log, " %s\n", backends[n]->name); - return M_OPT_EXIT; + switch (opts->gles_mode) { + case GLES_YES: ret = es; goto done; + case GLES_NO: ret = !es; goto done; + case GLES_AUTO: ret = true; goto done; + default: abort(); } - char s[20]; - snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); - return mpgl_find_backend(s) >= -1 ? 1 : M_OPT_INVALID; + +done: + talloc_free(tmp); + return ret; } -static void *get_native_display(void *pctx, const char *name) +static void *get_native_display(void *priv, const char *name) { - MPGLContext *ctx = pctx; - if (!ctx->native_display_type || !name) + struct priv *p = priv; + if (!p->params.native_display_type || !name) + return NULL; + if (strcmp(p->params.native_display_type, name) != 0) return NULL; - return strcmp(ctx->native_display_type, name) == 0 ? ctx->native_display : NULL; + + return p->params.native_display; } -static MPGLContext *init_backend(struct vo *vo, const struct mpgl_driver *driver, - bool probing, int vo_flags) +void ra_gl_ctx_uninit(struct ra_ctx *ctx) { - MPGLContext *ctx = talloc_ptrtype(NULL, ctx); - *ctx = (MPGLContext) { - .gl = talloc_zero(ctx, GL), - .vo = vo, - .global = vo->global, - .driver = driver, - .log = vo->log, + if (ctx->swapchain) { + struct priv *p = ctx->swapchain->priv; + if (ctx->ra && p->wrapped_fb) + ra_tex_free(ctx->ra, &p->wrapped_fb); + talloc_free(ctx->swapchain); + ctx->swapchain = NULL; + } + + ra_free(&ctx->ra); +} + +static const struct ra_swapchain_fns ra_gl_swapchain_fns; + +bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params) +{ + struct ra_swapchain *sw = ctx->swapchain = talloc_ptrtype(NULL, sw); + *sw = (struct ra_swapchain) { + .ctx = ctx, }; - if (probing) - vo_flags |= VOFLAG_PROBING; - bool old_probing = vo->probing; - vo->probing = probing; // hack; kill it once backends are separate - MP_VERBOSE(vo, "Initializing OpenGL backend '%s'\n", ctx->driver->name); - ctx->priv = talloc_zero_size(ctx, ctx->driver->priv_size); - if (ctx->driver->init(ctx, vo_flags) < 0) { - vo->probing = old_probing; - talloc_free(ctx); - return NULL; + + struct priv *p = sw->priv = talloc_ptrtype(sw, p); + *p = (struct priv) { + .gl = gl, + .log = ctx->log, + .params = params, + .opts = mp_get_config_group(p, ctx->global, &opengl_conf), + .fns = ra_gl_swapchain_fns, + }; + + sw->fns = &p->fns; + + const struct ra_swapchain_fns *ext = p->params.external_swapchain; + if (ext) { + if (ext->color_depth) + p->fns.color_depth = ext->color_depth; + if (ext->screenshot) + p->fns.screenshot = ext->screenshot; + if (ext->start_frame) + p->fns.start_frame = ext->start_frame; + if (ext->submit_frame) + p->fns.submit_frame = ext->submit_frame; + if (ext->swap_buffers) + p->fns.swap_buffers = ext->swap_buffers; } - vo->probing = old_probing; - if (!ctx->gl->version && !ctx->gl->es) - goto cleanup; + if (!gl->version && !gl->es) + return false; - if (probing && ctx->gl->es && (vo_flags & VOFLAG_NO_GLES)) { - MP_VERBOSE(ctx->vo, "Skipping GLES backend.\n"); - goto cleanup; + if (gl->mpgl_caps & MPGL_CAP_SW) { + MP_WARN(p, "Suspected software renderer or indirect context.\n"); + if (ctx->opts.probing && !ctx->opts.allow_sw) + return false; } - if (ctx->gl->mpgl_caps & MPGL_CAP_SW) { - MP_WARN(ctx->vo, "Suspected software renderer or indirect context.\n"); - if (vo->probing && !(vo_flags & VOFLAG_SW)) - goto cleanup; + gl->debug_context = ctx->opts.debug; + gl->get_native_display_ctx = p; + gl->get_native_display = get_native_display; + + if (gl->SwapInterval) { + gl->SwapInterval(p->opts->swapinterval); + } else { + MP_VERBOSE(p, "GL_*_swap_control extension missing.\n"); } - ctx->gl->debug_context = !!(vo_flags & VOFLAG_GL_DEBUG); + ctx->ra = ra_create_gl(p->gl, ctx->log); + return !!ctx->ra; +} - ctx->gl->get_native_display_ctx = ctx; - ctx->gl->get_native_display = get_native_display; +void ra_gl_ctx_resize(struct ra_swapchain *sw, int w, int h, int fbo) +{ + struct priv *p = sw->priv; + if (p->main_fb == fbo && p->wrapped_fb && p->wrapped_fb->params.w == w + && p->wrapped_fb->params.h == h) + return; - return ctx; + if (p->wrapped_fb) + ra_tex_free(sw->ctx->ra, &p->wrapped_fb); -cleanup: - mpgl_uninit(ctx); - return NULL; + p->main_fb = fbo; + p->wrapped_fb = ra_create_wrapped_fb(sw->ctx->ra, fbo, w, h); } -// Create a VO window and create a GL context on it. -// vo_flags: passed to the backend's create window function -MPGLContext *mpgl_init(struct vo *vo, const char *backend_name, int vo_flags) +int ra_gl_ctx_color_depth(struct ra_swapchain *sw) { - MPGLContext *ctx = NULL; - int index = mpgl_find_backend(backend_name); - if (index == -1) { - for (int n = 0; n < MP_ARRAY_SIZE(backends); n++) { - ctx = init_backend(vo, backends[n], true, vo_flags); - if (ctx) - break; - } - // VO forced, but no backend is ok => force the first that works at all - if (!ctx && !vo->probing) { - for (int n = 0; n < MP_ARRAY_SIZE(backends); n++) { - ctx = init_backend(vo, backends[n], false, vo_flags); - if (ctx) - break; - } - } - } else if (index >= 0) { - ctx = init_backend(vo, backends[index], false, vo_flags); - } - return ctx; + struct priv *p = sw->priv; + GL *gl = p->gl; + + if (!p->wrapped_fb) + return 0; + + if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB)) + return 0; + + gl->BindFramebuffer(GL_FRAMEBUFFER, p->main_fb); + + GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK; + if (p->main_fb) + obj = GL_COLOR_ATTACHMENT0; + + GLint depth_g = 0; + + gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g); + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + + return depth_g; } -int mpgl_reconfig_window(struct MPGLContext *ctx) +struct mp_image *ra_gl_ctx_screenshot(struct ra_swapchain *sw) { - return ctx->driver->reconfig(ctx); + struct priv *p = sw->priv; + + assert(p->wrapped_fb); + struct mp_image *screen = gl_read_fbo_contents(p->gl, p->main_fb, + p->wrapped_fb->params.w, + p->wrapped_fb->params.h); + + // OpenGL FB is also read in flipped order, so we need to flip when the + // rendering is *not* flipped, which in our case is whenever + // p->params.flipped is true. I hope that made sense + if (screen && p->params.flipped) + mp_image_vflip(screen); + + return screen; } -int mpgl_control(struct MPGLContext *ctx, int *events, int request, void *arg) +bool ra_gl_ctx_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) { - return ctx->driver->control(ctx, events, request, arg); + struct priv *p = sw->priv; + out_fbo->tex = p->wrapped_fb; + out_fbo->flip = !p->params.flipped; // OpenGL FBs are normally flipped + return true; } -void mpgl_start_frame(struct MPGLContext *ctx) +bool ra_gl_ctx_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) { - if (ctx->driver->start_frame) - ctx->driver->start_frame(ctx); + struct priv *p = sw->priv; + GL *gl = p->gl; + + if (p->opts->use_glfinish) + gl->Finish(); + + if (gl->FenceSync && !p->params.external_swapchain) { + GLsync fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + if (fence) + MP_TARRAY_APPEND(p, p->vsync_fences, p->num_vsync_fences, fence); + } + + switch (p->opts->early_flush) { + case FLUSH_AUTO: + if (frame->display_synced) + break; + // fall through + case FLUSH_YES: + gl->Flush(); + } + + return true; } -void mpgl_swap_buffers(struct MPGLContext *ctx) +static void check_pattern(struct priv *p, int item) { - ctx->driver->swap_buffers(ctx); + int expected = p->opts->vsync_pattern[p->last_pattern]; + if (item == expected) { + p->last_pattern++; + if (p->last_pattern >= 2) + p->last_pattern = 0; + p->matches++; + } else { + p->mismatches++; + MP_WARN(p, "wrong pattern, expected %d got %d (hit: %d, mis: %d)\n", + expected, item, p->matches, p->mismatches); + } } -void mpgl_uninit(MPGLContext *ctx) +void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw) { - if (ctx) - ctx->driver->uninit(ctx); - talloc_free(ctx); + struct priv *p = sw->priv; + GL *gl = p->gl; + + p->params.swap_buffers(sw->ctx); + p->frames_rendered++; + + if (p->frames_rendered > 5 && !sw->ctx->opts.debug) + ra_gl_set_debug(sw->ctx->ra, false); + + if ((p->opts->waitvsync || p->opts->vsync_pattern[0]) + && gl->GetVideoSync) + { + unsigned int n1 = 0, n2 = 0; + gl->GetVideoSync(&n1); + if (p->opts->waitvsync) + gl->WaitVideoSync(2, (n1 + 1) % 2, &n2); + int step = n1 - p->prev_sgi_sync_count; + p->prev_sgi_sync_count = n1; + MP_DBG(p, "Flip counts: %u->%u, step=%d\n", n1, n2, step); + if (p->opts->vsync_pattern[0]) + check_pattern(p, step); + } + + while (p->num_vsync_fences >= sw->ctx->opts.swapchain_depth) { + gl->ClientWaitSync(p->vsync_fences[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); + gl->DeleteSync(p->vsync_fences[0]); + MP_TARRAY_REMOVE_AT(p->vsync_fences, p->num_vsync_fences, 0); + } } + +static const struct ra_swapchain_fns ra_gl_swapchain_fns = { + .color_depth = ra_gl_ctx_color_depth, + .screenshot = ra_gl_ctx_screenshot, + .start_frame = ra_gl_ctx_start_frame, + .submit_frame = ra_gl_ctx_submit_frame, + .swap_buffers = ra_gl_ctx_swap_buffers, +}; diff --git a/video/out/opengl/context.h b/video/out/opengl/context.h index 229c5ef..95ed374 100644 --- a/video/out/opengl/context.h +++ b/video/out/opengl/context.h @@ -1,116 +1,56 @@ -/* - * common OpenGL routines - * - * copyleft (C) 2005-2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de> - * Special thanks go to the xine team and Matthias Hopf, whose video_out_opengl.c - * gave me lots of good ideas. - * - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MP_GL_CONTEXT_H_ -#define MP_GL_CONTEXT_H_ +#pragma once +#include "common/global.h" +#include "video/out/gpu/context.h" #include "common.h" -enum { - VOFLAG_GLES = 1 << 0, // Hint to create a GLES context - VOFLAG_NO_GLES = 1 << 1, // Hint to create a desktop GL context - VOFLAG_GL_DEBUG = 1 << 2, // Hint to request debug OpenGL context - VOFLAG_ALPHA = 1 << 3, // Hint to request alpha framebuffer - VOFLAG_SW = 1 << 4, // Hint to accept a software GL renderer - VOFLAG_PROBING = 1 << 6, // The backend is being auto-probed. - VOFLAG_GLES2 = 1 << 7, // Hint for GLESv2 (needs VOFLAG_GLES) -}; - extern const int mpgl_preferred_gl_versions[]; -struct MPGLContext; - -// A windowing backend (like X11, win32, ...), which provides OpenGL rendering. -struct mpgl_driver { - const char *name; - - // Size of the struct allocated for MPGLContext.priv - int priv_size; - - // Init the GL context and possibly the underlying VO backend. - // The created context should be compatible to GL 3.2 core profile, but - // some other GL versions are supported as well (e.g. GL 2.1 or GLES 2). - // Return 0 on success, negative value (-1) on error. - int (*init)(struct MPGLContext *ctx, int vo_flags); - - // Resize the window, or create a new window if there isn't one yet. - // Currently, there is an unfortunate interaction with ctx->vo, and - // display size etc. are determined by it. - // Return 0 on success, negative value (-1) on error. - int (*reconfig)(struct MPGLContext *ctx); - - // Called when rendering starts. The backend can map or resize the - // framebuffer, or update GL.main_fb. swap_buffers() ends the frame. - // Optional. - void (*start_frame)(struct MPGLContext *ctx); - - // Present the frame. - void (*swap_buffers)(struct MPGLContext *ctx); - - // This behaves exactly like vo_driver.control(). - int (*control)(struct MPGLContext *ctx, int *events, int request, void *arg); - - // These behave exactly like vo_driver.wakeup/wait_events. They are - // optional. - void (*wakeup)(struct MPGLContext *ctx); - void (*wait_events)(struct MPGLContext *ctx, int64_t until_time_us); - - // Destroy the GL context and possibly the underlying VO backend. - void (*uninit)(struct MPGLContext *ctx); -}; - -typedef struct MPGLContext { - GL *gl; - struct vo *vo; - const struct mpgl_driver *driver; - struct mpv_global *global; - struct mp_log *log; - - // For hwdec_vaegl.c. +// Returns whether or not a candidate GL version should be accepted or not +// (based on the --opengl opts). Implementations may call this before +// ra_gl_ctx_init if they wish to probe for multiple possible GL versions. +bool ra_gl_ctx_test_version(struct ra_ctx *ctx, int version, bool es); + +// These are a set of helpers for ra_ctx providers based on ra_gl. +// The init function also initializes ctx->ra and ctx->swapchain, so the user +// doesn't have to do this manually. (Similarly, the uninit function will +// clean them up) + +struct ra_gl_ctx_params { + // Set to the platform-specific function to swap buffers, like + // glXSwapBuffers, eglSwapBuffers etc. This will be called by + // ra_gl_ctx_swap_buffers. Required unless you either never call that + // function or if you override it yourself. + void (*swap_buffers)(struct ra_ctx *ctx); + + // Set to false if the implementation follows normal GL semantics, which is + // upside down. Set to true if it does *not*, i.e. if rendering is right + // side up + bool flipped; + + // If this is set to non-NULL, then the ra_gl_ctx will consider the GL + // implementation to be using an external swapchain, which disables the + // software simulation of --swapchain-depth. Any functions defined by this + // ra_swapchain_fns structs will entirely replace the equivalent ra_gl_ctx + // functions in the resulting ra_swapchain. + const struct ra_swapchain_fns *external_swapchain; + + // For hwdec_vaegl.c: const char *native_display_type; void *native_display; +}; - // Flip the rendered image vertically. This is useful for dxinterop. - bool flip_v; - - // framebuffer to render to (normally 0) - GLuint main_fb; - - // For free use by the mpgl_driver. - void *priv; -} MPGLContext; - -MPGLContext *mpgl_init(struct vo *vo, const char *backend_name, int vo_flags); -void mpgl_uninit(MPGLContext *ctx); -int mpgl_reconfig_window(struct MPGLContext *ctx); -int mpgl_control(struct MPGLContext *ctx, int *events, int request, void *arg); -void mpgl_start_frame(struct MPGLContext *ctx); -void mpgl_swap_buffers(struct MPGLContext *ctx); - -int mpgl_find_backend(const char *name); +void ra_gl_ctx_uninit(struct ra_ctx *ctx); +bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params); -struct m_option; -int mpgl_validate_backend_opt(struct mp_log *log, const struct m_option *opt, - struct bstr name, struct bstr param); +// Call this any time the window size or main framebuffer changes +void ra_gl_ctx_resize(struct ra_swapchain *sw, int w, int h, int fbo); -#endif +// These functions are normally set in the ra_swapchain->fns, but if an +// implementation has a need to override this fns struct with custom functions +// for whatever reason, these can be used to inherit the original behavior. +int ra_gl_ctx_color_depth(struct ra_swapchain *sw); +struct mp_image *ra_gl_ctx_screenshot(struct ra_swapchain *sw); +bool ra_gl_ctx_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo); +bool ra_gl_ctx_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame); +void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw); diff --git a/video/out/opengl/context_android.c b/video/out/opengl/context_android.c new file mode 100644 index 0000000..a2acce2 --- /dev/null +++ b/video/out/opengl/context_android.c @@ -0,0 +1,152 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <EGL/egl.h> +#include <EGL/eglext.h> +#include <libavcodec/jni.h> +#include <android/native_window_jni.h> + +#include "egl_helpers.h" + +#include "common/common.h" +#include "options/m_config.h" +#include "context.h" + +struct priv { + struct GL gl; + EGLDisplay egl_display; + EGLConfig egl_config; + EGLContext egl_context; + EGLSurface egl_surface; + ANativeWindow *native_window; +}; + +static void android_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static void android_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + + if (p->egl_surface) { + eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + eglDestroySurface(p->egl_display, p->egl_surface); + } + if (p->egl_context) + eglDestroyContext(p->egl_display, p->egl_context); + + if (p->native_window) { + ANativeWindow_release(p->native_window); + p->native_window = NULL; + } +} + +static bool android_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + + jobject surface = (jobject)(intptr_t)ctx->vo->opts->WinID; + JavaVM *vm = (JavaVM *)av_jni_get_java_vm(NULL); + JNIEnv *env; + int ret = (*vm)->GetEnv(vm, (void**)&env, JNI_VERSION_1_6); + if (ret == JNI_EDETACHED) { + if ((*vm)->AttachCurrentThread(vm, &env, NULL) != 0) { + MP_FATAL(ctx, "Could not attach java VM.\n"); + goto fail; + } + } + p->native_window = ANativeWindow_fromSurface(env, surface); + (*vm)->DetachCurrentThread(vm); + + p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + if (!eglInitialize(p->egl_display, NULL, NULL)) { + MP_FATAL(ctx, "EGL failed to initialize.\n"); + goto fail; + } + + EGLConfig config; + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &config)) + goto fail; + + EGLint format; + eglGetConfigAttrib(p->egl_display, config, EGL_NATIVE_VISUAL_ID, &format); + ANativeWindow_setBuffersGeometry(p->native_window, 0, 0, format); + + p->egl_surface = eglCreateWindowSurface(p->egl_display, config, + (EGLNativeWindowType)p->native_window, NULL); + + if (p->egl_surface == EGL_NO_SURFACE) { + MP_FATAL(ctx, "Could not create EGL surface!\n"); + goto fail; + } + + if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, + p->egl_context)) { + MP_FATAL(ctx, "Failed to set context!\n"); + goto fail; + } + + mpegl_load_functions(&p->gl, ctx->log); + + struct ra_gl_ctx_params params = { + .swap_buffers = android_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto fail; + + return true; +fail: + android_uninit(ctx); + return false; +} + +static bool android_reconfig(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + int w, h; + + if (!eglQuerySurface(p->egl_display, p->egl_surface, EGL_WIDTH, &w) || + !eglQuerySurface(p->egl_display, p->egl_surface, EGL_HEIGHT, &h)) { + MP_FATAL(ctx, "Failed to get height and width!\n"); + return false; + } + + ctx->vo->dwidth = w; + ctx->vo->dheight = h; + ra_gl_ctx_resize(ctx->swapchain, w, h, 0); + return true; +} + +static int android_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + return VO_NOTIMPL; +} + +const struct ra_ctx_fns ra_ctx_android = { + .type = "opengl", + .name = "android", + .reconfig = android_reconfig, + .control = android_control, + .init = android_init, + .uninit = android_uninit, +}; diff --git a/video/out/opengl/context_angle.c b/video/out/opengl/context_angle.c index f249b74..986a503 100644 --- a/video/out/opengl/context_angle.c +++ b/video/out/opengl/context_angle.c @@ -24,13 +24,14 @@ #include "angle_dynamic.h" #include "egl_helpers.h" -#include "d3d11_helpers.h" +#include "video/out/gpu/d3d11_helpers.h" #include "common/common.h" #include "options/m_config.h" #include "video/out/w32_common.h" #include "osdep/windows_utils.h" #include "context.h" +#include "utils.h" #ifndef EGL_D3D_TEXTURE_ANGLE #define EGL_D3D_TEXTURE_ANGLE 0x33A3 @@ -52,8 +53,6 @@ struct angle_opts { int d3d11_warp; int d3d11_feature_level; int egl_windowing; - int swapchain_length; // Currently only works with DXGI 1.2+ - int max_frame_latency; int flip; }; @@ -77,9 +76,9 @@ const struct m_sub_options angle_conf = { ({"auto", -1}, {"no", 0}, {"yes", 1})), - OPT_INTRANGE("angle-swapchain-length", swapchain_length, 0, 2, 16), - OPT_INTRANGE("angle-max-frame-latency", max_frame_latency, 0, 1, 16), OPT_FLAG("angle-flip", flip, 0), + OPT_REPLACED("angle-max-frame-latency", "swapchain-depth"), + OPT_REMOVED("angle-swapchain-length", "controlled by --swapchain-depth"), {0} }, .defaults = &(const struct angle_opts) { @@ -87,14 +86,14 @@ const struct m_sub_options angle_conf = { .d3d11_warp = -1, .d3d11_feature_level = D3D_FEATURE_LEVEL_11_0, .egl_windowing = -1, - .swapchain_length = 6, - .max_frame_latency = 3, .flip = 1, }, .size = sizeof(struct angle_opts), }; struct priv { + GL gl; + IDXGISwapChain *dxgi_swapchain; ID3D11Device *d3d11_device; @@ -110,20 +109,21 @@ struct priv { int sc_width, sc_height; // Swap chain width and height int swapinterval; + bool flipped; struct angle_opts *opts; }; -static __thread struct MPGLContext *current_ctx; +static __thread struct ra_ctx *current_ctx; -static void update_sizes(MPGLContext *ctx) +static void update_sizes(struct ra_ctx *ctx) { struct priv *p = ctx->priv; p->sc_width = ctx->vo->dwidth ? ctx->vo->dwidth : 1; p->sc_height = ctx->vo->dheight ? ctx->vo->dheight : 1; } -static void d3d11_backbuffer_release(MPGLContext *ctx) +static void d3d11_backbuffer_release(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -137,7 +137,7 @@ static void d3d11_backbuffer_release(MPGLContext *ctx) SAFE_RELEASE(p->d3d11_backbuffer); } -static bool d3d11_backbuffer_get(MPGLContext *ctx) +static bool d3d11_backbuffer_get(struct ra_ctx *ctx) { struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -168,7 +168,7 @@ static bool d3d11_backbuffer_get(MPGLContext *ctx) return true; } -static void d3d11_backbuffer_resize(MPGLContext *ctx) +static void d3d11_backbuffer_resize(struct ra_ctx *ctx) { struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -197,7 +197,7 @@ static void d3d11_backbuffer_resize(MPGLContext *ctx) MP_FATAL(vo, "Couldn't get back buffer after resize\n"); } -static void d3d11_device_destroy(MPGLContext *ctx) +static void d3d11_device_destroy(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -215,7 +215,7 @@ static void d3d11_device_destroy(MPGLContext *ctx) SAFE_RELEASE(p->d3d11_device); } -static bool d3d11_device_create(MPGLContext *ctx, int flags) +static bool d3d11_device_create(struct ra_ctx *ctx) { struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -226,7 +226,7 @@ static bool d3d11_device_create(MPGLContext *ctx, int flags) .force_warp = o->d3d11_warp == 1, .max_feature_level = o->d3d11_feature_level, .min_feature_level = D3D_FEATURE_LEVEL_9_3, - .max_frame_latency = o->max_frame_latency, + .max_frame_latency = ctx->opts.swapchain_depth, }; if (!mp_d3d11_create_present_device(vo->log, &device_opts, &p->d3d11_device)) return false; @@ -262,7 +262,7 @@ static bool d3d11_device_create(MPGLContext *ctx, int flags) return true; } -static void d3d11_swapchain_surface_destroy(MPGLContext *ctx) +static void d3d11_swapchain_surface_destroy(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -277,7 +277,7 @@ static void d3d11_swapchain_surface_destroy(MPGLContext *ctx) ID3D11DeviceContext_Flush(p->d3d11_context); } -static bool d3d11_swapchain_surface_create(MPGLContext *ctx, int flags) +static bool d3d11_swapchain_surface_create(struct ra_ctx *ctx) { struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -292,7 +292,9 @@ static bool d3d11_swapchain_surface_create(MPGLContext *ctx, int flags) .width = p->sc_width, .height = p->sc_height, .flip = o->flip, - .length = o->swapchain_length, + // Add one frame for the backbuffer and one frame of "slack" to reduce + // contention with the window manager when acquiring the backbuffer + .length = ctx->opts.swapchain_depth + 2, .usage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_SHADER_INPUT, }; if (!mp_d3d11_create_swapchain(p->d3d11_device, vo->log, &swapchain_opts, @@ -301,8 +303,7 @@ static bool d3d11_swapchain_surface_create(MPGLContext *ctx, int flags) if (!d3d11_backbuffer_get(ctx)) goto fail; - // EGL_D3D_TEXTURE_ANGLE pbuffers are always flipped vertically - ctx->flip_v = true; + p->flipped = true; return true; fail: @@ -310,7 +311,7 @@ fail: return false; } -static void d3d9_device_destroy(MPGLContext *ctx) +static void d3d9_device_destroy(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -319,7 +320,7 @@ static void d3d9_device_destroy(MPGLContext *ctx) p->egl_display = EGL_NO_DISPLAY; } -static bool d3d9_device_create(MPGLContext *ctx, int flags) +static bool d3d9_device_create(struct ra_ctx *ctx) { struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -348,7 +349,7 @@ static bool d3d9_device_create(MPGLContext *ctx, int flags) return true; } -static void egl_window_surface_destroy(MPGLContext *ctx) +static void egl_window_surface_destroy(struct ra_ctx *ctx) { struct priv *p = ctx->priv; if (p->egl_window) { @@ -357,7 +358,7 @@ static void egl_window_surface_destroy(MPGLContext *ctx) } } -static bool egl_window_surface_create(MPGLContext *ctx, int flags) +static bool egl_window_surface_create(struct ra_ctx *ctx) { struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -374,7 +375,7 @@ static bool egl_window_surface_create(MPGLContext *ctx, int flags) EGL_SURFACE_ORIENTATION_ANGLE); MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE); - ctx->flip_v = true; + p->flipped = true; MP_VERBOSE(vo, "Rendering flipped.\n"); } } @@ -396,7 +397,7 @@ fail: return false; } -static void context_destroy(struct MPGLContext *ctx) +static void context_destroy(struct ra_ctx *ctx) { struct priv *p = ctx->priv; if (p->egl_context) { @@ -407,7 +408,7 @@ static void context_destroy(struct MPGLContext *ctx) p->egl_context = EGL_NO_CONTEXT; } -static bool context_init(struct MPGLContext *ctx, int flags) +static bool context_init(struct ra_ctx *ctx) { struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -421,8 +422,8 @@ static bool context_init(struct MPGLContext *ctx, int flags) if (exts) MP_DBG(vo, "EGL extensions: %s\n", exts); - if (!mpegl_create_context(p->egl_display, vo->log, flags | VOFLAG_GLES, - &p->egl_context, &p->egl_config)) + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, + &p->egl_config)) { MP_FATAL(vo, "Could not create EGL context!\n"); goto fail; @@ -434,10 +435,12 @@ fail: return false; } -static void angle_uninit(struct MPGLContext *ctx) +static void angle_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + DwmEnableMMCSS(FALSE); // Uninit the EGL surface implementation that is being used. Note: This may @@ -474,17 +477,88 @@ static int GLAPIENTRY angle_swap_interval(int interval) } } -static int angle_init(struct MPGLContext *ctx, int flags) +static void d3d11_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + // Calling Present() on a flip-sequential swap chain will silently change + // the underlying storage of the back buffer to point to the next buffer in + // the chain. This results in the RTVs for the back buffer becoming + // unbound. Since ANGLE doesn't know we called Present(), it will continue + // using the unbound RTVs, so we must save and restore them ourselves. + ID3D11RenderTargetView *rtvs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0}; + ID3D11DepthStencilView *dsv = NULL; + ID3D11DeviceContext_OMGetRenderTargets(p->d3d11_context, + MP_ARRAY_SIZE(rtvs), rtvs, &dsv); + + HRESULT hr = IDXGISwapChain_Present(p->dxgi_swapchain, p->swapinterval, 0); + if (FAILED(hr)) + MP_FATAL(ctx->vo, "Couldn't present: %s\n", mp_HRESULT_to_str(hr)); + + // Restore the RTVs and release the objects + ID3D11DeviceContext_OMSetRenderTargets(p->d3d11_context, + MP_ARRAY_SIZE(rtvs), rtvs, dsv); + for (int i = 0; i < MP_ARRAY_SIZE(rtvs); i++) + SAFE_RELEASE(rtvs[i]); + SAFE_RELEASE(dsv); +} + +static void egl_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + eglSwapBuffers(p->egl_display, p->egl_window); +} + +static void angle_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + if (p->dxgi_swapchain) + d3d11_swap_buffers(ctx); + else + egl_swap_buffers(ctx); +} + + +static int angle_color_depth(struct ra_swapchain *sw) +{ + // Only 8-bit output is supported at the moment + return 8; +} + +static struct mp_image *angle_screenshot(struct ra_swapchain *sw) +{ + struct priv *p = sw->ctx->priv; + if (p->dxgi_swapchain) { + struct mp_image *img = mp_d3d11_screenshot(p->dxgi_swapchain); + if (img) + return img; + } + return ra_gl_ctx_screenshot(sw); +} + +static bool angle_submit_frame(struct ra_swapchain *sw, + const struct vo_frame *frame) +{ + struct priv *p = sw->ctx->priv; + bool ret = ra_gl_ctx_submit_frame(sw, frame); + if (p->d3d11_context) { + // DXGI Present doesn't flush the immediate context, which can make + // timers inaccurate, since the end queries might not be sent until the + // next frame. Fix this by flushing the context now. + ID3D11DeviceContext_Flush(p->d3d11_context); + } + return ret; +} + +static bool angle_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); struct vo *vo = ctx->vo; + GL *gl = &p->gl; p->opts = mp_get_config_group(ctx, ctx->global, &angle_conf); struct angle_opts *o = p->opts; - // DWM MMCSS cargo-cult. The dxinterop backend also does this. - DwmEnableMMCSS(TRUE); - if (!angle_load()) { MP_VERBOSE(vo, "Failed to load LIBEGL.DLL\n"); goto fail; @@ -493,19 +567,19 @@ static int angle_init(struct MPGLContext *ctx, int flags) // Create the underlying EGL device implementation bool context_ok = false; if ((!context_ok && !o->renderer) || o->renderer == RENDERER_D3D11) { - context_ok = d3d11_device_create(ctx, flags); + context_ok = d3d11_device_create(ctx); if (context_ok) { - context_ok = context_init(ctx, flags); + context_ok = context_init(ctx); if (!context_ok) d3d11_device_destroy(ctx); } } if ((!context_ok && !o->renderer) || o->renderer == RENDERER_D3D9) { - context_ok = d3d9_device_create(ctx, flags); + context_ok = d3d9_device_create(ctx); if (context_ok) { MP_VERBOSE(vo, "Using Direct3D 9\n"); - context_ok = context_init(ctx, flags); + context_ok = context_init(ctx); if (!context_ok) d3d9_device_destroy(ctx); } @@ -519,181 +593,74 @@ static int angle_init(struct MPGLContext *ctx, int flags) // Create the underlying EGL surface implementation bool surface_ok = false; if ((!surface_ok && o->egl_windowing == -1) || o->egl_windowing == 0) { - surface_ok = d3d11_swapchain_surface_create(ctx, flags); + surface_ok = d3d11_swapchain_surface_create(ctx); } if ((!surface_ok && o->egl_windowing == -1) || o->egl_windowing == 1) { - surface_ok = egl_window_surface_create(ctx, flags); + surface_ok = egl_window_surface_create(ctx); if (surface_ok) MP_VERBOSE(vo, "Using EGL windowing\n"); } if (!surface_ok) goto fail; - mpegl_load_functions(ctx->gl, vo->log); + mpegl_load_functions(gl, vo->log); current_ctx = ctx; - ctx->gl->SwapInterval = angle_swap_interval; - - return 0; -fail: - angle_uninit(ctx); - return -1; -} - -static int angle_reconfig(struct MPGLContext *ctx) -{ - vo_w32_config(ctx->vo); - return 0; -} + gl->SwapInterval = angle_swap_interval; -static struct mp_image *d3d11_screenshot(MPGLContext *ctx) -{ - struct priv *p = ctx->priv; - ID3D11Texture2D *frontbuffer = NULL; - ID3D11Texture2D *staging = NULL; - struct mp_image *img = NULL; - HRESULT hr; - - if (!p->dxgi_swapchain) - goto done; - - // Validate the swap chain. This screenshot method will only work on DXGI - // 1.2+ flip/sequential swap chains. It's probably not possible at all with - // discard swap chains, since by definition, the backbuffer contents is - // discarded on Present(). - DXGI_SWAP_CHAIN_DESC scd; - hr = IDXGISwapChain_GetDesc(p->dxgi_swapchain, &scd); - if (FAILED(hr)) - goto done; - if (scd.SwapEffect != DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL) - goto done; - - // Get the last buffer that was presented with Present(). This should be - // the n-1th buffer for a swap chain of length n. - hr = IDXGISwapChain_GetBuffer(p->dxgi_swapchain, scd.BufferCount - 1, - &IID_ID3D11Texture2D, (void**)&frontbuffer); - if (FAILED(hr)) - goto done; - - D3D11_TEXTURE2D_DESC td; - ID3D11Texture2D_GetDesc(frontbuffer, &td); - if (td.SampleDesc.Count > 1) - goto done; - - // Validate the backbuffer format and convert to an mpv IMGFMT - enum mp_imgfmt fmt; - switch (td.Format) { - case DXGI_FORMAT_B8G8R8A8_UNORM: fmt = IMGFMT_BGR0; break; - case DXGI_FORMAT_R8G8B8A8_UNORM: fmt = IMGFMT_RGB0; break; - default: - goto done; - } - - // Create a staging texture based on the frontbuffer with CPU access - td.BindFlags = 0; - td.MiscFlags = 0; - td.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - td.Usage = D3D11_USAGE_STAGING; - hr = ID3D11Device_CreateTexture2D(p->d3d11_device, &td, 0, &staging); - if (FAILED(hr)) - goto done; - - ID3D11DeviceContext_CopyResource(p->d3d11_context, - (ID3D11Resource*)staging, (ID3D11Resource*)frontbuffer); - - // Attempt to map the staging texture to CPU-accessible memory - D3D11_MAPPED_SUBRESOURCE lock; - hr = ID3D11DeviceContext_Map(p->d3d11_context, (ID3D11Resource*)staging, - 0, D3D11_MAP_READ, 0, &lock); - if (FAILED(hr)) - goto done; - - img = mp_image_alloc(fmt, td.Width, td.Height); - if (!img) - return NULL; - for (int i = 0; i < td.Height; i++) { - memcpy(img->planes[0] + img->stride[0] * i, - (char*)lock.pData + lock.RowPitch * i, td.Width * 4); - } - - ID3D11DeviceContext_Unmap(p->d3d11_context, (ID3D11Resource*)staging, 0); - -done: - SAFE_RELEASE(frontbuffer); - SAFE_RELEASE(staging); - return img; -} + // Custom swapchain impl for the D3D11 swapchain-based surface + static const struct ra_swapchain_fns dxgi_swapchain_fns = { + .color_depth = angle_color_depth, + .screenshot = angle_screenshot, + .submit_frame = angle_submit_frame, + }; + struct ra_gl_ctx_params params = { + .swap_buffers = angle_swap_buffers, + .flipped = p->flipped, + .external_swapchain = p->dxgi_swapchain ? &dxgi_swapchain_fns : NULL, + }; -static int angle_control(MPGLContext *ctx, int *events, int request, void *arg) -{ - struct priv *p = ctx->priv; + if (!ra_gl_ctx_init(ctx, gl, params)) + goto fail; - // Try a D3D11-specific method of taking a window screenshot - if (request == VOCTRL_SCREENSHOT_WIN) { - struct mp_image *img = d3d11_screenshot(ctx); - if (img) { - *(struct mp_image **)arg = img; - return true; - } - } + DwmEnableMMCSS(TRUE); // DWM MMCSS cargo-cult. The dxgl backend also does this. - int r = vo_w32_control(ctx->vo, events, request, arg); - if (*events & VO_EVENT_RESIZE) { - if (p->dxgi_swapchain) - d3d11_backbuffer_resize(ctx); - else - eglWaitClient(); // Should get ANGLE to resize its swapchain - } - return r; + return true; +fail: + angle_uninit(ctx); + return false; } -static void d3d11_swap_buffers(MPGLContext *ctx) +static void resize(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - - // Calling Present() on a flip-sequential swap chain will silently change - // the underlying storage of the back buffer to point to the next buffer in - // the chain. This results in the RTVs for the back buffer becoming - // unbound. Since ANGLE doesn't know we called Present(), it will continue - // using the unbound RTVs, so we must save and restore them ourselves. - ID3D11RenderTargetView *rtvs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0}; - ID3D11DepthStencilView *dsv = NULL; - ID3D11DeviceContext_OMGetRenderTargets(p->d3d11_context, - MP_ARRAY_SIZE(rtvs), rtvs, &dsv); - - HRESULT hr = IDXGISwapChain_Present(p->dxgi_swapchain, p->swapinterval, 0); - if (FAILED(hr)) - MP_FATAL(ctx->vo, "Couldn't present: %s\n", mp_HRESULT_to_str(hr)); - - // Restore the RTVs and release the objects - ID3D11DeviceContext_OMSetRenderTargets(p->d3d11_context, - MP_ARRAY_SIZE(rtvs), rtvs, dsv); - for (int i = 0; i < MP_ARRAY_SIZE(rtvs); i++) - SAFE_RELEASE(rtvs[i]); - SAFE_RELEASE(dsv); + if (p->dxgi_swapchain) + d3d11_backbuffer_resize(ctx); + else + eglWaitClient(); // Should get ANGLE to resize its swapchain + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); } -static void egl_swap_buffers(MPGLContext *ctx) +static bool angle_reconfig(struct ra_ctx *ctx) { - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl_display, p->egl_window); + vo_w32_config(ctx->vo); + resize(ctx); + return true; } -static void angle_swap_buffers(MPGLContext *ctx) +static int angle_control(struct ra_ctx *ctx, int *events, int request, void *arg) { - struct priv *p = ctx->priv; - if (p->dxgi_swapchain) - d3d11_swap_buffers(ctx); - else - egl_swap_buffers(ctx); + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; } -const struct mpgl_driver mpgl_driver_angle = { +const struct ra_ctx_fns ra_ctx_angle = { + .type = "opengl", .name = "angle", - .priv_size = sizeof(struct priv), .init = angle_init, .reconfig = angle_reconfig, - .swap_buffers = angle_swap_buffers, .control = angle_control, .uninit = angle_uninit, }; diff --git a/video/out/opengl/context_cocoa.c b/video/out/opengl/context_cocoa.c index 1d9a10c..2256d31 100644 --- a/video/out/opengl/context_cocoa.c +++ b/video/out/opengl/context_cocoa.c @@ -36,6 +36,7 @@ const struct m_sub_options cocoa_conf = { }; struct priv { + GL gl; CGLPixelFormatObj pix; CGLContextObj ctx; @@ -62,7 +63,7 @@ static void *cocoa_glgetaddr(const char *s) return ret; } -static CGLError test_gl_version(struct MPGLContext *ctx, CGLOpenGLProfile ver) +static CGLError test_gl_version(struct ra_ctx *ctx, CGLOpenGLProfile ver) { struct priv *p = ctx->priv; @@ -107,9 +108,10 @@ error_out: return err; } -static bool create_gl_context(struct MPGLContext *ctx, int vo_flags) +static bool create_gl_context(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + GL *gl = &p->gl; CGLError err; CGLOpenGLProfile gl_versions[] = { @@ -132,60 +134,83 @@ static bool create_gl_context(struct MPGLContext *ctx, int vo_flags) vo_cocoa_set_opengl_ctx(ctx->vo, p->ctx); CGLSetCurrentContext(p->ctx); - if (vo_flags & VOFLAG_ALPHA) + if (ctx->opts.want_alpha) CGLSetParameter(p->ctx, kCGLCPSurfaceOpacity, &(GLint){0}); - mpgl_load_functions(ctx->gl, (void *)cocoa_glgetaddr, NULL, ctx->vo->log); + mpgl_load_functions(gl, (void *)cocoa_glgetaddr, NULL, ctx->vo->log); + gl->SwapInterval = set_swap_interval; CGLReleasePixelFormat(p->pix); return true; } -static void cocoa_uninit(MPGLContext *ctx) +static void cocoa_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); CGLReleaseContext(p->ctx); vo_cocoa_uninit(ctx->vo); } -static int cocoa_init(MPGLContext *ctx, int vo_flags) +static void cocoa_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + GL *gl = &p->gl; + vo_cocoa_swap_buffers(ctx->vo); + gl->Flush(); +} + +static bool cocoa_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + GL *gl = &p->gl; p->opts = mp_get_config_group(ctx, ctx->global, &cocoa_conf); vo_cocoa_init(ctx->vo); - if (!create_gl_context(ctx, vo_flags)) - return -1; + if (!create_gl_context(ctx)) + goto fail; + + struct ra_gl_ctx_params params = { + .swap_buffers = cocoa_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, gl, params)) + goto fail; + + return true; - ctx->gl->SwapInterval = set_swap_interval; - return 0; +fail: + cocoa_uninit(ctx); + return false; } -static int cocoa_reconfig(struct MPGLContext *ctx) +static void resize(struct ra_ctx *ctx) { - vo_cocoa_config_window(ctx->vo); - return 0; + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); } -static int cocoa_control(struct MPGLContext *ctx, int *events, int request, - void *arg) +static bool cocoa_reconfig(struct ra_ctx *ctx) { - return vo_cocoa_control(ctx->vo, events, request, arg); + vo_cocoa_config_window(ctx->vo); + resize(ctx); + return true; } -static void cocoa_swap_buffers(struct MPGLContext *ctx) +static int cocoa_control(struct ra_ctx *ctx, int *events, int request, + void *arg) { - vo_cocoa_swap_buffers(ctx->vo); - ctx->gl->Flush(); + int ret = vo_cocoa_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; } -const struct mpgl_driver mpgl_driver_cocoa = { +const struct ra_ctx_fns ra_ctx_cocoa = { + .type = "opengl", .name = "cocoa", - .priv_size = sizeof(struct priv), .init = cocoa_init, .reconfig = cocoa_reconfig, - .swap_buffers = cocoa_swap_buffers, .control = cocoa_control, .uninit = cocoa_uninit, -};
\ No newline at end of file +}; diff --git a/video/out/opengl/context_drm_egl.c b/video/out/opengl/context_drm_egl.c index e52fec4..6191309 100644 --- a/video/out/opengl/context_drm_egl.c +++ b/video/out/opengl/context_drm_egl.c @@ -25,22 +25,25 @@ #include <unistd.h> #include <gbm.h> +#include <drm_fourcc.h> #include <EGL/egl.h> #include <EGL/eglext.h> -#include "context.h" -#include "egl_helpers.h" -#include "common/common.h" +#include "libmpv/opengl_cb.h" #include "video/out/drm_common.h" +#include "common/common.h" + +#include "egl_helpers.h" +#include "common.h" +#include "context.h" #define USE_MASTER 0 struct framebuffer { - struct gbm_bo *bo; - int width, height; int fd; - int id; + uint32_t width, height; + uint32_t id; }; struct gbm @@ -59,6 +62,7 @@ struct egl }; struct priv { + GL gl; struct kms *kms; drmEventContext ev; @@ -66,43 +70,46 @@ struct priv { struct egl egl; struct gbm gbm; - struct framebuffer fb; + struct framebuffer *fb; + + uint32_t primary_plane_format; bool active; bool waiting_for_flip; bool vt_switcher_active; struct vt_switcher vt_switcher; + + struct mpv_opengl_cb_drm_params drm_params; }; -static bool init_egl(struct MPGLContext *ctx, int flags) +static bool init_egl(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - MP_VERBOSE(ctx->vo, "Initializing EGL\n"); + MP_VERBOSE(ctx, "Initializing EGL\n"); p->egl.display = eglGetDisplay(p->gbm.device); if (p->egl.display == EGL_NO_DISPLAY) { - MP_ERR(ctx->vo, "Failed to get EGL display.\n"); + MP_ERR(ctx, "Failed to get EGL display.\n"); return false; } if (!eglInitialize(p->egl.display, NULL, NULL)) { - MP_ERR(ctx->vo, "Failed to initialize EGL.\n"); + MP_ERR(ctx, "Failed to initialize EGL.\n"); return false; } EGLConfig config; - if (!mpegl_create_context(p->egl.display, ctx->vo->log, flags, - &p->egl.context, &config)) - return -1; - MP_VERBOSE(ctx->vo, "Initializing EGL surface\n"); + if (!mpegl_create_context(ctx, p->egl.display, &p->egl.context, &config)) + return false; + MP_VERBOSE(ctx, "Initializing EGL surface\n"); p->egl.surface = eglCreateWindowSurface(p->egl.display, config, p->gbm.surface, NULL); if (p->egl.surface == EGL_NO_SURFACE) { - MP_ERR(ctx->vo, "Failed to create EGL surface.\n"); + MP_ERR(ctx, "Failed to create EGL surface.\n"); return false; } return true; } -static bool init_gbm(struct MPGLContext *ctx) +static bool init_gbm(struct ra_ctx *ctx) { struct priv *p = ctx->priv; MP_VERBOSE(ctx->vo, "Creating GBM device\n"); @@ -118,7 +125,7 @@ static bool init_gbm(struct MPGLContext *ctx) p->gbm.device, p->kms->mode.hdisplay, p->kms->mode.vdisplay, - GBM_BO_FORMAT_XRGB8888, + p->primary_plane_format, // drm_fourcc.h defs should be gbm-compatible GBM_BO_USE_SCANOUT | GBM_BO_USE_RENDERING); if (!p->gbm.surface) { MP_ERR(ctx->vo, "Failed to create GBM surface.\n"); @@ -135,46 +142,50 @@ static void framebuffer_destroy_callback(struct gbm_bo *bo, void *data) } } -static void update_framebuffer_from_bo( - const struct MPGLContext *ctx, struct gbm_bo *bo) +static void update_framebuffer_from_bo(struct ra_ctx *ctx, struct gbm_bo *bo) { struct priv *p = ctx->priv; - p->fb.bo = bo; - p->fb.fd = p->kms->fd; - p->fb.width = gbm_bo_get_width(bo); - p->fb.height = gbm_bo_get_height(bo); - int stride = gbm_bo_get_stride(bo); - int handle = gbm_bo_get_handle(bo).u32; - - int ret = drmModeAddFB(p->kms->fd, p->fb.width, p->fb.height, - 24, 32, stride, handle, &p->fb.id); + struct framebuffer *fb = gbm_bo_get_user_data(bo); + if (fb) { + p->fb = fb; + return; + } + + fb = talloc_zero(ctx, struct framebuffer); + fb->fd = p->kms->fd; + fb->width = gbm_bo_get_width(bo); + fb->height = gbm_bo_get_height(bo); + uint32_t stride = gbm_bo_get_stride(bo); + uint32_t handle = gbm_bo_get_handle(bo).u32; + + int ret = drmModeAddFB2(fb->fd, fb->width, fb->height, + p->primary_plane_format, + (uint32_t[4]){handle, 0, 0, 0}, + (uint32_t[4]){stride, 0, 0, 0}, + (uint32_t[4]){0, 0, 0, 0}, + &fb->id, 0); + if (ret) { MP_ERR(ctx->vo, "Failed to create framebuffer: %s\n", mp_strerror(errno)); } - gbm_bo_set_user_data(bo, &p->fb, framebuffer_destroy_callback); -} - -static void page_flipped(int fd, unsigned int frame, unsigned int sec, - unsigned int usec, void *data) -{ - struct priv *p = data; - p->waiting_for_flip = false; + gbm_bo_set_user_data(bo, fb, framebuffer_destroy_callback); + p->fb = fb; } -static bool crtc_setup(struct MPGLContext *ctx) +static bool crtc_setup(struct ra_ctx *ctx) { struct priv *p = ctx->priv; if (p->active) return true; p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id); - int ret = drmModeSetCrtc(p->kms->fd, p->kms->crtc_id, p->fb.id, + int ret = drmModeSetCrtc(p->kms->fd, p->kms->crtc_id, p->fb->id, 0, 0, &p->kms->connector->connector_id, 1, &p->kms->mode); p->active = true; return ret == 0; } -static void crtc_release(struct MPGLContext *ctx) +static void crtc_release(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -204,7 +215,7 @@ static void crtc_release(struct MPGLContext *ctx) static void release_vt(void *data) { - struct MPGLContext *ctx = data; + struct ra_ctx *ctx = data; MP_VERBOSE(ctx->vo, "Releasing VT"); crtc_release(ctx); if (USE_MASTER) { @@ -221,7 +232,7 @@ static void release_vt(void *data) static void acquire_vt(void *data) { - struct MPGLContext *ctx = data; + struct ra_ctx *ctx = data; MP_VERBOSE(ctx->vo, "Acquiring VT"); if (USE_MASTER) { struct priv *p = ctx->priv; @@ -234,11 +245,78 @@ static void acquire_vt(void *data) crtc_setup(ctx); } -static void drm_egl_uninit(MPGLContext *ctx) +static bool drm_atomic_egl_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct priv *p = sw->ctx->priv; + if (p->kms->atomic_context) { + p->kms->atomic_context->request = drmModeAtomicAlloc(); + p->drm_params.atomic_request = p->kms->atomic_context->request; + return ra_gl_ctx_start_frame(sw, out_fbo); + } + return false; +} + +static const struct ra_swapchain_fns drm_atomic_swapchain = { + .start_frame = drm_atomic_egl_start_frame, +}; + +static void drm_egl_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - crtc_release(ctx); + struct drm_atomic_context *atomic_ctx = p->kms->atomic_context; + int ret; + + eglSwapBuffers(p->egl.display, p->egl.surface); + p->gbm.next_bo = gbm_surface_lock_front_buffer(p->gbm.surface); + p->waiting_for_flip = true; + update_framebuffer_from_bo(ctx, p->gbm.next_bo); + + if (atomic_ctx) { + drm_object_set_property(atomic_ctx->request, atomic_ctx->primary_plane, "FB_ID", p->fb->id); + drm_object_set_property(atomic_ctx->request, atomic_ctx->primary_plane, "CRTC_ID", atomic_ctx->crtc->id); + drm_object_set_property(atomic_ctx->request, atomic_ctx->primary_plane, "ZPOS", 1); + + ret = drmModeAtomicCommit(p->kms->fd, atomic_ctx->request, + DRM_MODE_ATOMIC_NONBLOCK | DRM_MODE_PAGE_FLIP_EVENT, NULL); + if (ret) + MP_WARN(ctx->vo, "Failed to commit atomic request (%d)\n", ret); + } else { + ret = drmModePageFlip(p->kms->fd, p->kms->crtc_id, p->fb->id, + DRM_MODE_PAGE_FLIP_EVENT, p); + if (ret) { + MP_WARN(ctx->vo, "Failed to queue page flip: %s\n", mp_strerror(errno)); + } + } + + // poll page flip finish event + const int timeout_ms = 3000; + struct pollfd fds[1] = { { .events = POLLIN, .fd = p->kms->fd } }; + poll(fds, 1, timeout_ms); + if (fds[0].revents & POLLIN) { + ret = drmHandleEvent(p->kms->fd, &p->ev); + if (ret != 0) { + MP_ERR(ctx->vo, "drmHandleEvent failed: %i\n", ret); + p->waiting_for_flip = false; + return; + } + } + p->waiting_for_flip = false; + + if (atomic_ctx) { + drmModeAtomicFree(atomic_ctx->request); + p->drm_params.atomic_request = atomic_ctx->request = NULL; + } + + gbm_surface_release_buffer(p->gbm.surface, p->gbm.bo); + p->gbm.bo = p->gbm.next_bo; +} + +static void drm_egl_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + crtc_release(ctx); if (p->vt_switcher_active) vt_switcher_destroy(&p->vt_switcher); @@ -258,100 +336,146 @@ static void drm_egl_uninit(MPGLContext *ctx) } } -static int drm_egl_init(struct MPGLContext *ctx, int flags) +// If primary plane supports ARGB8888 we want to use that, but if it doesn't we +// fall back on XRGB8888. If the driver does not support atomic there is no +// particular reason to be using ARGB8888, so we fall back to XRGB8888 (another +// reason is that we do not have the convenient atomic_ctx and its convenient +// primary_plane field). +static bool probe_primary_plane_format(struct ra_ctx *ctx) { - if (ctx->vo->probing) { - MP_VERBOSE(ctx->vo, "DRM EGL backend can be activated only manually.\n"); - return -1; - } struct priv *p = ctx->priv; - p->kms = NULL; - p->old_crtc = NULL; - p->gbm.surface = NULL; - p->gbm.device = NULL; - p->active = false; - p->waiting_for_flip = false; + if (!p->kms->atomic_context) { + p->primary_plane_format = DRM_FORMAT_XRGB8888; + MP_VERBOSE(ctx->vo, "Not using DRM Atomic: Use DRM_FORMAT_XRGB8888 for primary plane.\n"); + return true; + } + + drmModePlane *drmplane = + drmModeGetPlane(p->kms->fd, p->kms->atomic_context->primary_plane->id); + bool have_argb8888 = false; + bool have_xrgb8888 = false; + bool result = false; + for (unsigned int i = 0; i < drmplane->count_formats; ++i) { + if (drmplane->formats[i] == DRM_FORMAT_ARGB8888) { + have_argb8888 = true; + } else if (drmplane->formats[i] == DRM_FORMAT_XRGB8888) { + have_xrgb8888 = true; + } + } + + if (have_argb8888) { + p->primary_plane_format = DRM_FORMAT_ARGB8888; + MP_VERBOSE(ctx->vo, "DRM_FORMAT_ARGB8888 supported by primary plane.\n"); + result = true; + } else if (have_xrgb8888) { + p->primary_plane_format = DRM_FORMAT_XRGB8888; + MP_VERBOSE(ctx->vo, + "DRM_FORMAT_ARGB8888 not supported by primary plane: " + "Falling back to DRM_FORMAT_XRGB8888.\n"); + result = true; + } + + drmModeFreePlane(drmplane); + return result; +} + +static bool drm_egl_init(struct ra_ctx *ctx) +{ + if (ctx->opts.probing) { + MP_VERBOSE(ctx, "DRM EGL backend can be activated only manually.\n"); + return false; + } + + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); p->ev.version = DRM_EVENT_CONTEXT_VERSION; - p->ev.page_flip_handler = page_flipped; p->vt_switcher_active = vt_switcher_init(&p->vt_switcher, ctx->vo->log); if (p->vt_switcher_active) { vt_switcher_acquire(&p->vt_switcher, acquire_vt, ctx); vt_switcher_release(&p->vt_switcher, release_vt, ctx); } else { - MP_WARN(ctx->vo, "Failed to set up VT switcher. Terminal switching will be unavailable.\n"); + MP_WARN(ctx, "Failed to set up VT switcher. Terminal switching will be unavailable.\n"); } - MP_VERBOSE(ctx->vo, "Initializing KMS\n"); - p->kms = kms_create(ctx->vo->log, ctx->vo->opts->drm_connector_spec, - ctx->vo->opts->drm_mode_id); + MP_VERBOSE(ctx, "Initializing KMS\n"); + p->kms = kms_create(ctx->log, ctx->vo->opts->drm_opts->drm_connector_spec, + ctx->vo->opts->drm_opts->drm_mode_id, + ctx->vo->opts->drm_opts->drm_overlay_id); if (!p->kms) { - MP_ERR(ctx->vo, "Failed to create KMS.\n"); - return -1; + MP_ERR(ctx, "Failed to create KMS.\n"); + return false; + } + + if (!probe_primary_plane_format(ctx)) { + MP_ERR(ctx->vo, "No suitable format found on DRM primary plane.\n"); + return false; } if (!init_gbm(ctx)) { MP_ERR(ctx->vo, "Failed to setup GBM.\n"); - return -1; + return false; } - if (!init_egl(ctx, flags)) { + if (!init_egl(ctx)) { MP_ERR(ctx->vo, "Failed to setup EGL.\n"); - return -1; + return false; } if (!eglMakeCurrent(p->egl.display, p->egl.surface, p->egl.surface, p->egl.context)) { MP_ERR(ctx->vo, "Failed to make context current.\n"); - return -1; + return false; } - mpegl_load_functions(ctx->gl, ctx->vo->log); - - ctx->native_display_type = "drm"; - ctx->native_display = (void *)(intptr_t)p->kms->fd; - + mpegl_load_functions(&p->gl, ctx->vo->log); // required by gbm_surface_lock_front_buffer eglSwapBuffers(p->egl.display, p->egl.surface); - MP_VERBOSE(ctx->vo, "Preparing framebuffer\n"); + MP_VERBOSE(ctx, "Preparing framebuffer\n"); p->gbm.bo = gbm_surface_lock_front_buffer(p->gbm.surface); if (!p->gbm.bo) { - MP_ERR(ctx->vo, "Failed to lock GBM surface.\n"); - return -1; + MP_ERR(ctx, "Failed to lock GBM surface.\n"); + return false; } update_framebuffer_from_bo(ctx, p->gbm.bo); - if (!p->fb.id) { - MP_ERR(ctx->vo, "Failed to create framebuffer.\n"); - return -1; + if (!p->fb || !p->fb->id) { + MP_ERR(ctx, "Failed to create framebuffer.\n"); + return false; } if (!crtc_setup(ctx)) { - MP_ERR(ctx->vo, "Failed to set CRTC for connector %u: %s\n", + MP_ERR(ctx, "Failed to set CRTC for connector %u: %s\n", p->kms->connector->connector_id, mp_strerror(errno)); - return -1; + return false; } - return 0; -} + p->drm_params.fd = p->kms->fd; + p->drm_params.crtc_id = p->kms->crtc_id; + if (p->kms->atomic_context) + p->drm_params.atomic_request = p->kms->atomic_context->request; + struct ra_gl_ctx_params params = { + .swap_buffers = drm_egl_swap_buffers, + .native_display_type = "opengl-cb-drm-params", + .native_display = &p->drm_params, + .external_swapchain = p->kms->atomic_context ? &drm_atomic_swapchain : + NULL, + }; + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + return false; -static int drm_egl_init_deprecated(struct MPGLContext *ctx, int flags) -{ - if (ctx->vo->probing) - return -1; - MP_WARN(ctx->vo, "'drm-egl' is deprecated, use 'drm' instead.\n"); - return drm_egl_init(ctx, flags); + return true; } -static int drm_egl_reconfig(struct MPGLContext *ctx) +static bool drm_egl_reconfig(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - ctx->vo->dwidth = p->fb.width; - ctx->vo->dheight = p->fb.height; - return 0; + ctx->vo->dwidth = p->fb->width; + ctx->vo->dheight = p->fb->height; + ra_gl_ctx_resize(ctx->swapchain, p->fb->width, p->fb->height, 0); + return true; } -static int drm_egl_control(struct MPGLContext *ctx, int *events, int request, +static int drm_egl_control(struct ra_ctx *ctx, int *events, int request, void *arg) { struct priv *p = ctx->priv; @@ -367,51 +491,11 @@ static int drm_egl_control(struct MPGLContext *ctx, int *events, int request, return VO_NOTIMPL; } -static void drm_egl_swap_buffers(MPGLContext *ctx) -{ - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl.display, p->egl.surface); - p->gbm.next_bo = gbm_surface_lock_front_buffer(p->gbm.surface); - p->waiting_for_flip = true; - update_framebuffer_from_bo(ctx, p->gbm.next_bo); - int ret = drmModePageFlip(p->kms->fd, p->kms->crtc_id, p->fb.id, - DRM_MODE_PAGE_FLIP_EVENT, p); - if (ret) { - MP_WARN(ctx->vo, "Failed to queue page flip: %s\n", mp_strerror(errno)); - } - - // poll page flip finish event - const int timeout_ms = 3000; - struct pollfd fds[1] = { { .events = POLLIN, .fd = p->kms->fd } }; - poll(fds, 1, timeout_ms); - if (fds[0].revents & POLLIN) { - ret = drmHandleEvent(p->kms->fd, &p->ev); - if (ret != 0) { - MP_ERR(ctx->vo, "drmHandleEvent failed: %i\n", ret); - return; - } - } - - gbm_surface_release_buffer(p->gbm.surface, p->gbm.bo); - p->gbm.bo = p->gbm.next_bo; -} - -const struct mpgl_driver mpgl_driver_drm = { +const struct ra_ctx_fns ra_ctx_drm_egl = { + .type = "opengl", .name = "drm", - .priv_size = sizeof(struct priv), - .init = drm_egl_init, - .reconfig = drm_egl_reconfig, - .swap_buffers = drm_egl_swap_buffers, - .control = drm_egl_control, - .uninit = drm_egl_uninit, -}; - -const struct mpgl_driver mpgl_driver_drm_egl = { - .name = "drm-egl", - .priv_size = sizeof(struct priv), - .init = drm_egl_init_deprecated, .reconfig = drm_egl_reconfig, - .swap_buffers = drm_egl_swap_buffers, .control = drm_egl_control, + .init = drm_egl_init, .uninit = drm_egl_uninit, }; diff --git a/video/out/opengl/context_dxinterop.c b/video/out/opengl/context_dxinterop.c index 507c150..85d84bf 100644 --- a/video/out/opengl/context_dxinterop.c +++ b/video/out/opengl/context_dxinterop.c @@ -22,6 +22,7 @@ #include "osdep/windows_utils.h" #include "video/out/w32_common.h" #include "context.h" +#include "utils.h" // For WGL_ACCESS_WRITE_DISCARD_NV, etc. #include <GL/wglext.h> @@ -35,6 +36,8 @@ EXTERN_C IMAGE_DOS_HEADER __ImageBase; #endif struct priv { + GL gl; + HMODULE d3d9_dll; HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D); @@ -54,6 +57,7 @@ struct priv { // OpenGL resources GLuint texture; + GLuint main_fb; // Did we lose the device? bool lost_device; @@ -63,7 +67,7 @@ struct priv { int width, height, swapinterval; }; -static __thread struct MPGLContext *current_ctx; +static __thread struct ra_ctx *current_ctx; static void pump_message_loop(void) { @@ -84,10 +88,11 @@ static void *w32gpa(const GLubyte *procName) return GetProcAddress(oglmod, procName); } -static int os_ctx_create(struct MPGLContext *ctx) +static int os_ctx_create(struct ra_ctx *ctx) { static const wchar_t os_wnd_class[] = L"mpv offscreen gl"; struct priv *p = ctx->priv; + GL *gl = &p->gl; HGLRC legacy_context = NULL; RegisterClassExW(&(WNDCLASSEXW) { @@ -190,8 +195,8 @@ static int os_ctx_create(struct MPGLContext *ctx) goto fail; } - mpgl_load_functions(ctx->gl, w32gpa, wgl_exts, ctx->vo->log); - if (!(ctx->gl->mpgl_caps & MPGL_CAP_DXINTEROP)) { + mpgl_load_functions(gl, w32gpa, wgl_exts, ctx->vo->log); + if (!(gl->mpgl_caps & MPGL_CAP_DXINTEROP)) { MP_FATAL(ctx->vo, "WGL_NV_DX_interop is not supported\n"); goto fail; } @@ -205,7 +210,7 @@ fail: return -1; } -static void os_ctx_destroy(MPGLContext *ctx) +static void os_ctx_destroy(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -219,10 +224,10 @@ static void os_ctx_destroy(MPGLContext *ctx) DestroyWindow(p->os_wnd); } -static int d3d_size_dependent_create(MPGLContext *ctx) +static int d3d_size_dependent_create(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - struct GL *gl = ctx->gl; + GL *gl = &p->gl; HRESULT hr; IDirect3DSwapChain9 *sw9; @@ -294,7 +299,7 @@ static int d3d_size_dependent_create(MPGLContext *ctx) return -1; } - gl->BindFramebuffer(GL_FRAMEBUFFER, ctx->main_fb); + gl->BindFramebuffer(GL_FRAMEBUFFER, p->main_fb); gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, p->texture, 0); gl->BindFramebuffer(GL_FRAMEBUFFER, 0); @@ -302,10 +307,10 @@ static int d3d_size_dependent_create(MPGLContext *ctx) return 0; } -static void d3d_size_dependent_destroy(MPGLContext *ctx) +static void d3d_size_dependent_destroy(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - struct GL *gl = ctx->gl; + GL *gl = &p->gl; if (p->rtarget_h) { gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h); @@ -321,7 +326,8 @@ static void d3d_size_dependent_destroy(MPGLContext *ctx) SAFE_RELEASE(p->swapchain); } -static void fill_presentparams(MPGLContext *ctx, D3DPRESENT_PARAMETERS *pparams) +static void fill_presentparams(struct ra_ctx *ctx, + D3DPRESENT_PARAMETERS *pparams) { struct priv *p = ctx->priv; @@ -338,13 +344,9 @@ static void fill_presentparams(MPGLContext *ctx, D3DPRESENT_PARAMETERS *pparams) .Windowed = TRUE, .BackBufferWidth = ctx->vo->dwidth ? ctx->vo->dwidth : 1, .BackBufferHeight = ctx->vo->dheight ? ctx->vo->dheight : 1, - // The length of the backbuffer queue shouldn't affect latency because - // swap_buffers() always uses the backbuffer at the head of the queue - // and presents it immediately. MSDN says there is a performance - // penalty for having a short backbuffer queue and this seems to be - // true, at least on Nvidia, where less than four backbuffers causes - // very high CPU usage. Use six to be safe. - .BackBufferCount = 6, + // Add one frame for the backbuffer and one frame of "slack" to reduce + // contention with the window manager when acquiring the backbuffer + .BackBufferCount = ctx->opts.swapchain_depth + 2, .SwapEffect = IsWindows7OrGreater() ? D3DSWAPEFFECT_FLIPEX : D3DSWAPEFFECT_FLIP, // Automatically get the backbuffer format from the display format .BackBufferFormat = D3DFMT_UNKNOWN, @@ -353,10 +355,10 @@ static void fill_presentparams(MPGLContext *ctx, D3DPRESENT_PARAMETERS *pparams) }; } -static int d3d_create(MPGLContext *ctx) +static int d3d_create(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - struct GL *gl = ctx->gl; + GL *gl = &p->gl; HRESULT hr; p->d3d9_dll = LoadLibraryW(L"d3d9.dll"); @@ -396,8 +398,7 @@ static int d3d_create(MPGLContext *ctx) return -1; } - // mpv expects frames to be presented right after swap_buffers() returns - IDirect3DDevice9Ex_SetMaximumFrameLatency(p->device, 1); + IDirect3DDevice9Ex_SetMaximumFrameLatency(p->device, ctx->opts.swapchain_depth); // Register the Direct3D device with WGL_NV_dx_interop p->device_h = gl->DXOpenDeviceNV(p->device); @@ -410,10 +411,10 @@ static int d3d_create(MPGLContext *ctx) return 0; } -static void d3d_destroy(MPGLContext *ctx) +static void d3d_destroy(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - struct GL *gl = ctx->gl; + GL *gl = &p->gl; if (p->device_h) gl->DXCloseDeviceNV(p->device_h); @@ -423,8 +424,9 @@ static void d3d_destroy(MPGLContext *ctx) FreeLibrary(p->d3d9_dll); } -static void dxinterop_uninit(MPGLContext *ctx) +static void dxgl_uninit(struct ra_ctx *ctx) { + ra_gl_ctx_uninit(ctx); d3d_size_dependent_destroy(ctx); d3d_destroy(ctx); os_ctx_destroy(ctx); @@ -433,7 +435,7 @@ static void dxinterop_uninit(MPGLContext *ctx) pump_message_loop(); } -static void dxinterop_reset(struct MPGLContext *ctx) +static void dxgl_reset(struct ra_ctx *ctx) { struct priv *p = ctx->priv; HRESULT hr; @@ -468,18 +470,18 @@ static void dxinterop_reset(struct MPGLContext *ctx) p->lost_device = false; } -static int GLAPIENTRY dxinterop_swap_interval(int interval) +static int GLAPIENTRY dxgl_swap_interval(int interval) { if (!current_ctx) return 0; struct priv *p = current_ctx->priv; p->requested_swapinterval = interval; - dxinterop_reset(current_ctx); + dxgl_reset(current_ctx); return 1; } -static void * GLAPIENTRY dxinterop_get_native_display(const char *name) +static void * GLAPIENTRY dxgl_get_native_display(const char *name) { if (!current_ctx || !name) return NULL; @@ -493,60 +495,17 @@ static void * GLAPIENTRY dxinterop_get_native_display(const char *name) return NULL; } -static int dxinterop_init(struct MPGLContext *ctx, int flags) -{ - struct priv *p = ctx->priv; - struct GL *gl = ctx->gl; - - p->requested_swapinterval = 1; - - if (!vo_w32_init(ctx->vo)) - goto fail; - if (os_ctx_create(ctx) < 0) - goto fail; - - // Create the shared framebuffer - gl->GenFramebuffers(1, &ctx->main_fb); - - current_ctx = ctx; - gl->SwapInterval = dxinterop_swap_interval; - gl->MPGetNativeDisplay = dxinterop_get_native_display; - - if (d3d_create(ctx) < 0) - goto fail; - if (d3d_size_dependent_create(ctx) < 0) - goto fail; - - // The OpenGL and Direct3D coordinate systems are flipped vertically - // relative to each other. Flip the video during rendering so it can be - // copied to the Direct3D backbuffer with a simple (and fast) StretchRect. - ctx->flip_v = true; - - DwmEnableMMCSS(TRUE); - - return 0; -fail: - dxinterop_uninit(ctx); - return -1; -} - -static int dxinterop_reconfig(struct MPGLContext *ctx) -{ - vo_w32_config(ctx->vo); - return 0; -} - -static void dxinterop_swap_buffers(MPGLContext *ctx) +static void dxgl_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - struct GL *gl = ctx->gl; + GL *gl = &p->gl; HRESULT hr; pump_message_loop(); // If the device is still lost, try to reset it again if (p->lost_device) - dxinterop_reset(ctx); + dxgl_reset(ctx); if (p->lost_device) return; @@ -571,7 +530,7 @@ static void dxinterop_swap_buffers(MPGLContext *ctx) case D3DERR_DEVICEHUNG: MP_VERBOSE(ctx->vo, "Direct3D device lost! Resetting.\n"); p->lost_device = true; - dxinterop_reset(ctx); + dxgl_reset(ctx); return; default: if (FAILED(hr)) @@ -584,21 +543,75 @@ static void dxinterop_swap_buffers(MPGLContext *ctx) } } -static int dxinterop_control(MPGLContext *ctx, int *events, int request, +static bool dxgl_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + GL *gl = &p->gl; + + p->requested_swapinterval = 1; + + if (!vo_w32_init(ctx->vo)) + goto fail; + if (os_ctx_create(ctx) < 0) + goto fail; + + // Create the shared framebuffer + gl->GenFramebuffers(1, &p->main_fb); + + current_ctx = ctx; + gl->SwapInterval = dxgl_swap_interval; + gl->MPGetNativeDisplay = dxgl_get_native_display; + + if (d3d_create(ctx) < 0) + goto fail; + if (d3d_size_dependent_create(ctx) < 0) + goto fail; + + static const struct ra_swapchain_fns empty_swapchain_fns = {0}; + struct ra_gl_ctx_params params = { + .swap_buffers = dxgl_swap_buffers, + .flipped = true, + .external_swapchain = &empty_swapchain_fns, + }; + + if (!ra_gl_ctx_init(ctx, gl, params)) + goto fail; + + DwmEnableMMCSS(TRUE); + return true; +fail: + dxgl_uninit(ctx); + return false; +} + +static void resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + dxgl_reset(ctx); + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, p->main_fb); +} + +static bool dxgl_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + resize(ctx); + return true; +} + +static int dxgl_control(struct ra_ctx *ctx, int *events, int request, void *arg) { - int r = vo_w32_control(ctx->vo, events, request, arg); + int ret = vo_w32_control(ctx->vo, events, request, arg); if (*events & VO_EVENT_RESIZE) - dxinterop_reset(ctx); - return r; + resize(ctx); + return ret; } -const struct mpgl_driver mpgl_driver_dxinterop = { +const struct ra_ctx_fns ra_ctx_dxgl = { + .type = "opengl", .name = "dxinterop", - .priv_size = sizeof(struct priv), - .init = dxinterop_init, - .reconfig = dxinterop_reconfig, - .swap_buffers = dxinterop_swap_buffers, - .control = dxinterop_control, - .uninit = dxinterop_uninit, + .init = dxgl_init, + .reconfig = dxgl_reconfig, + .control = dxgl_control, + .uninit = dxgl_uninit, }; diff --git a/video/out/opengl/context_x11.c b/video/out/opengl/context_glx.c index 4d8dac1..462f2cf 100644 --- a/video/out/opengl/context_x11.c +++ b/video/out/opengl/context_glx.c @@ -39,43 +39,46 @@ #include "video/out/x11_common.h" #include "context.h" +#include "utils.h" -struct glx_context { +struct priv { + GL gl; XVisualInfo *vinfo; GLXContext context; GLXFBConfig fbc; }; -static void glx_uninit(MPGLContext *ctx) +static void glx_uninit(struct ra_ctx *ctx) { - struct glx_context *glx_ctx = ctx->priv; - if (glx_ctx->vinfo) - XFree(glx_ctx->vinfo); - if (glx_ctx->context) { + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + + if (p->vinfo) + XFree(p->vinfo); + if (p->context) { Display *display = ctx->vo->x11->display; glXMakeCurrent(display, None, NULL); - glXDestroyContext(display, glx_ctx->context); + glXDestroyContext(display, p->context); } + vo_x11_uninit(ctx->vo); } -static bool create_context_x11_old(struct MPGLContext *ctx) +static bool create_context_x11_old(struct ra_ctx *ctx, GL *gl) { - struct glx_context *glx_ctx = ctx->priv; + struct priv *p = ctx->priv; Display *display = ctx->vo->x11->display; struct vo *vo = ctx->vo; - GL *gl = ctx->gl; - if (glx_ctx->context) + if (p->context) return true; - if (!glx_ctx->vinfo) { + if (!p->vinfo) { MP_FATAL(vo, "Can't create a legacy GLX context without X visual\n"); return false; } - GLXContext new_context = glXCreateContext(display, glx_ctx->vinfo, NULL, - True); + GLXContext new_context = glXCreateContext(display, p->vinfo, NULL, True); if (!new_context) { MP_FATAL(vo, "Could not create GLX context!\n"); return false; @@ -91,7 +94,7 @@ static bool create_context_x11_old(struct MPGLContext *ctx) mpgl_load_functions(gl, (void *)glXGetProcAddressARB, glxstr, vo->log); - glx_ctx->context = new_context; + p->context = new_context; return true; } @@ -99,15 +102,18 @@ static bool create_context_x11_old(struct MPGLContext *ctx) typedef GLXContext (*glXCreateContextAttribsARBProc) (Display*, GLXFBConfig, GLXContext, Bool, const int*); -static bool create_context_x11_gl3(struct MPGLContext *ctx, int vo_flags, - int gl_version, bool es) +static bool create_context_x11_gl3(struct ra_ctx *ctx, GL *gl, int gl_version, + bool es) { - struct glx_context *glx_ctx = ctx->priv; + struct priv *p = ctx->priv; struct vo *vo = ctx->vo; - if (glx_ctx->context) + if (p->context) return true; + if (!ra_gl_ctx_test_version(ctx, gl_version, es)) + return false; + glXCreateContextAttribsARBProc glXCreateContextAttribsARB = (glXCreateContextAttribsARBProc) glXGetProcAddressARB((const GLubyte *)"glXCreateContextAttribsARB"); @@ -120,7 +126,7 @@ static bool create_context_x11_gl3(struct MPGLContext *ctx, int vo_flags, return false; } - int ctx_flags = vo_flags & VOFLAG_GL_DEBUG ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; + int ctx_flags = ctx->opts.debug ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; int profile_mask = GLX_CONTEXT_CORE_PROFILE_BIT_ARB; if (es) { @@ -138,7 +144,7 @@ static bool create_context_x11_gl3(struct MPGLContext *ctx, int vo_flags, }; vo_x11_silence_xlib(1); GLXContext context = glXCreateContextAttribsARB(vo->x11->display, - glx_ctx->fbc, 0, True, + p->fbc, 0, True, context_attribs); vo_x11_silence_xlib(-1); if (!context) @@ -151,9 +157,9 @@ static bool create_context_x11_gl3(struct MPGLContext *ctx, int vo_flags, return false; } - glx_ctx->context = context; + p->context = context; - mpgl_load_functions(ctx->gl, (void *)glXGetProcAddressARB, glxstr, vo->log); + mpgl_load_functions(gl, (void *)glXGetProcAddressARB, glxstr, vo->log); return true; } @@ -162,7 +168,7 @@ static bool create_context_x11_gl3(struct MPGLContext *ctx, int vo_flags, // http://www.opengl.org/wiki/Tutorial:_OpenGL_3.0_Context_Creation_(GLX) // but also uses some of the old code. -static GLXFBConfig select_fb_config(struct vo *vo, const int *attribs, int flags) +static GLXFBConfig select_fb_config(struct vo *vo, const int *attribs, bool alpha) { int fbcount; GLXFBConfig *fbc = glXChooseFBConfig(vo->x11->display, vo->x11->screen, @@ -173,7 +179,7 @@ static GLXFBConfig select_fb_config(struct vo *vo, const int *attribs, int flags // The list in fbc is sorted (so that the first element is the best). GLXFBConfig fbconfig = fbcount > 0 ? fbc[0] : NULL; - if (flags & VOFLAG_ALPHA) { + if (alpha) { for (int n = 0; n < fbcount; n++) { XVisualInfo *v = glXGetVisualFromFBConfig(vo->x11->display, fbc[n]); if (v) { @@ -202,10 +208,16 @@ static void set_glx_attrib(int *attribs, int name, int value) } } -static int glx_init(struct MPGLContext *ctx, int flags) +static void glx_swap_buffers(struct ra_ctx *ctx) +{ + glXSwapBuffers(ctx->vo->x11->display, ctx->vo->x11->window); +} + +static bool glx_init(struct ra_ctx *ctx) { + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); struct vo *vo = ctx->vo; - struct glx_context *glx_ctx = ctx->priv; + GL *gl = &p->gl; if (!vo_x11_init(ctx->vo)) goto uninit; @@ -213,12 +225,12 @@ static int glx_init(struct MPGLContext *ctx, int flags) int glx_major, glx_minor; if (!glXQueryVersion(vo->x11->display, &glx_major, &glx_minor)) { - MP_ERR(vo, "GLX not found.\n"); + MP_ERR(ctx, "GLX not found.\n"); goto uninit; } // FBConfigs were added in GLX version 1.3. if (MPGL_VER(glx_major, glx_minor) < MPGL_VER(1, 3)) { - MP_ERR(vo, "GLX version older than 1.3.\n"); + MP_ERR(ctx, "GLX version older than 1.3.\n"); goto uninit; } @@ -233,126 +245,132 @@ static int glx_init(struct MPGLContext *ctx, int flags) None }; GLXFBConfig fbc = NULL; - if (flags & VOFLAG_ALPHA) { + if (ctx->opts.want_alpha) { set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 1); - fbc = select_fb_config(vo, glx_attribs, flags); - if (!fbc) { + fbc = select_fb_config(vo, glx_attribs, true); + if (!fbc) set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 0); - flags &= ~VOFLAG_ALPHA; - } } if (!fbc) - fbc = select_fb_config(vo, glx_attribs, flags); + fbc = select_fb_config(vo, glx_attribs, false); if (!fbc) { - MP_ERR(vo, "no GLX support present\n"); + MP_ERR(ctx, "no GLX support present\n"); goto uninit; } int fbid = -1; if (!glXGetFBConfigAttrib(vo->x11->display, fbc, GLX_FBCONFIG_ID, &fbid)) - MP_VERBOSE(vo, "GLX chose FB config with ID 0x%x\n", fbid); + MP_VERBOSE(ctx, "GLX chose FB config with ID 0x%x\n", fbid); - glx_ctx->fbc = fbc; - glx_ctx->vinfo = glXGetVisualFromFBConfig(vo->x11->display, fbc); - if (glx_ctx->vinfo) { - MP_VERBOSE(vo, "GLX chose visual with ID 0x%x\n", - (int)glx_ctx->vinfo->visualid); + p->fbc = fbc; + p->vinfo = glXGetVisualFromFBConfig(vo->x11->display, fbc); + if (p->vinfo) { + MP_VERBOSE(ctx, "GLX chose visual with ID 0x%x\n", + (int)p->vinfo->visualid); } else { - MP_WARN(vo, "Selected GLX FB config has no associated X visual\n"); + MP_WARN(ctx, "Selected GLX FB config has no associated X visual\n"); } - if (!vo_x11_create_vo_window(vo, glx_ctx->vinfo, "gl")) + if (!vo_x11_create_vo_window(vo, p->vinfo, "gl")) goto uninit; bool success = false; - if (!(flags & VOFLAG_GLES)) { - for (int n = 0; mpgl_preferred_gl_versions[n]; n++) { - int version = mpgl_preferred_gl_versions[n]; - MP_VERBOSE(vo, "Creating OpenGL %d.%d context...\n", - MPGL_VER_P(version)); - if (version >= 300) { - success = create_context_x11_gl3(ctx, flags, version, false); - } else { - success = create_context_x11_old(ctx); - } - if (success) - break; + for (int n = 0; mpgl_preferred_gl_versions[n]; n++) { + int version = mpgl_preferred_gl_versions[n]; + MP_VERBOSE(ctx, "Creating OpenGL %d.%d context...\n", + MPGL_VER_P(version)); + if (version >= 300) { + success = create_context_x11_gl3(ctx, gl, version, false); + } else { + success = create_context_x11_old(ctx, gl); } + if (success) + break; } - if (!success) // try ES - success = create_context_x11_gl3(ctx, flags, 200, true); - if (success && !glXIsDirect(vo->x11->display, glx_ctx->context)) - ctx->gl->mpgl_caps |= MPGL_CAP_SW; + if (!success) // try again for GLES + success = create_context_x11_gl3(ctx, gl, 200, true); + if (success && !glXIsDirect(vo->x11->display, p->context)) + gl->mpgl_caps |= MPGL_CAP_SW; if (!success) goto uninit; - return 0; + struct ra_gl_ctx_params params = { + .swap_buffers = glx_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, gl, params)) + goto uninit; + + return true; uninit: glx_uninit(ctx); - return -1; + return false; } -static int glx_init_probe(struct MPGLContext *ctx, int flags) +static bool glx_init_probe(struct ra_ctx *ctx) { - int r = glx_init(ctx, flags); - if (r >= 0) { - if (!(ctx->gl->mpgl_caps & MPGL_CAP_VDPAU)) { - MP_VERBOSE(ctx->vo, "No vdpau support found - probing more things.\n"); - glx_uninit(ctx); - r = -1; - } + if (!glx_init(ctx)) + return false; + + struct priv *p = ctx->priv; + if (!(p->gl.mpgl_caps & MPGL_CAP_VDPAU)) { + MP_VERBOSE(ctx, "No vdpau support found - probing more things.\n"); + glx_uninit(ctx); + return false; } - return r; + + return true; } -static int glx_reconfig(struct MPGLContext *ctx) +static void resize(struct ra_ctx *ctx) { - vo_x11_config_vo_window(ctx->vo); - return 0; + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); } -static int glx_control(struct MPGLContext *ctx, int *events, int request, - void *arg) +static bool glx_reconfig(struct ra_ctx *ctx) { - return vo_x11_control(ctx->vo, events, request, arg); + vo_x11_config_vo_window(ctx->vo); + resize(ctx); + return true; } -static void glx_swap_buffers(struct MPGLContext *ctx) +static int glx_control(struct ra_ctx *ctx, int *events, int request, void *arg) { - glXSwapBuffers(ctx->vo->x11->display, ctx->vo->x11->window); + int ret = vo_x11_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; } -static void glx_wakeup(struct MPGLContext *ctx) +static void glx_wakeup(struct ra_ctx *ctx) { vo_x11_wakeup(ctx->vo); } -static void glx_wait_events(struct MPGLContext *ctx, int64_t until_time_us) +static void glx_wait_events(struct ra_ctx *ctx, int64_t until_time_us) { vo_x11_wait_events(ctx->vo, until_time_us); } -const struct mpgl_driver mpgl_driver_x11 = { +const struct ra_ctx_fns ra_ctx_glx = { + .type = "opengl", .name = "x11", - .priv_size = sizeof(struct glx_context), - .init = glx_init, .reconfig = glx_reconfig, - .swap_buffers = glx_swap_buffers, .control = glx_control, .wakeup = glx_wakeup, .wait_events = glx_wait_events, + .init = glx_init, .uninit = glx_uninit, }; -const struct mpgl_driver mpgl_driver_x11_probe = { +const struct ra_ctx_fns ra_ctx_glx_probe = { + .type = "opengl", .name = "x11probe", - .priv_size = sizeof(struct glx_context), - .init = glx_init_probe, .reconfig = glx_reconfig, - .swap_buffers = glx_swap_buffers, .control = glx_control, .wakeup = glx_wakeup, .wait_events = glx_wait_events, + .init = glx_init_probe, .uninit = glx_uninit, }; diff --git a/video/out/opengl/context_mali_fbdev.c b/video/out/opengl/context_mali_fbdev.c index 66daa7f..8576e53 100644 --- a/video/out/opengl/context_mali_fbdev.c +++ b/video/out/opengl/context_mali_fbdev.c @@ -50,8 +50,7 @@ static bool get_fbdev_size(int *w, int *h) } struct priv { - struct mp_log *log; - struct GL *gl; + struct GL gl; EGLDisplay egl_display; EGLConfig egl_config; EGLContext egl_context; @@ -60,9 +59,10 @@ struct priv { int w, h; }; -static void mali_uninit(struct MPGLContext *ctx) +static void mali_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); if (p->egl_surface) { eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, @@ -74,25 +74,29 @@ static void mali_uninit(struct MPGLContext *ctx) eglReleaseThread(); } -static int mali_init(struct MPGLContext *ctx, int flags) +static void mali_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - p->log = ctx->vo->log; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static bool mali_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); if (!get_fbdev_size(&p->w, &p->h)) { - MP_FATAL(p, "Could not get fbdev size.\n"); + MP_FATAL(ctx, "Could not get fbdev size.\n"); goto fail; } p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); if (!eglInitialize(p->egl_display, NULL, NULL)) { - MP_FATAL(p, "EGL failed to initialize.\n"); + MP_FATAL(ctx, "EGL failed to initialize.\n"); goto fail; } EGLConfig config; - if (!mpegl_create_context(p->egl_display, p->log, flags, &p->egl_context, - &config)) + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &config)) goto fail; p->egl_window = (struct fbdev_window){ @@ -104,53 +108,51 @@ static int mali_init(struct MPGLContext *ctx, int flags) (EGLNativeWindowType)&p->egl_window, NULL); if (p->egl_surface == EGL_NO_SURFACE) { - MP_FATAL(p, "Could not create EGL surface!\n"); + MP_FATAL(ctx, "Could not create EGL surface!\n"); goto fail; } if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context)) { - MP_FATAL(p, "Failed to set context!\n"); + MP_FATAL(ctx, "Failed to set context!\n"); goto fail; } - ctx->gl = talloc_zero(ctx, GL); + mpegl_load_functions(&p->gl, ctx->log); - mpegl_load_functions(ctx->gl, p->log); + struct ra_gl_ctx_params params = { + .swap_buffers = mali_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto fail; - return 0; + return true; fail: mali_uninit(ctx); - return -1; + return false; } -static int mali_reconfig(struct MPGLContext *ctx) +static bool mali_reconfig(struct ra_ctx *ctx) { struct priv *p = ctx->priv; ctx->vo->dwidth = p->w; ctx->vo->dheight = p->h; - return 0; + ra_gl_ctx_resize(ctx->swapchain, p->w, p->h, 0); } -static void mali_swap_buffers(MPGLContext *ctx) -{ - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl_display, p->egl_surface); -} - -static int mali_control(MPGLContext *ctx, int *events, int request, void *arg) +static int mali_control(struct ra_ctx *ctx, int *events, int request, void *arg) { return VO_NOTIMPL; } -const struct mpgl_driver mpgl_driver_mali = { +const struct ra_ctx_fns ra_ctx_mali_fbdev = { + .type = "opengl", .name = "mali-fbdev", - .priv_size = sizeof(struct priv), - .init = mali_init, .reconfig = mali_reconfig, - .swap_buffers = mali_swap_buffers, .control = mali_control, + .init = mali_init, .uninit = mali_uninit, }; diff --git a/video/out/opengl/context_rpi.c b/video/out/opengl/context_rpi.c index e79622b..8b447d0 100644 --- a/video/out/opengl/context_rpi.c +++ b/video/out/opengl/context_rpi.c @@ -30,7 +30,7 @@ #include "egl_helpers.h" struct priv { - struct mp_log *log; + struct GL gl; DISPMANX_DISPLAY_HANDLE_T display; DISPMANX_ELEMENT_HANDLE_T window; DISPMANX_UPDATE_HANDLE_T update; @@ -49,13 +49,13 @@ struct priv { static void tv_callback(void *callback_data, uint32_t reason, uint32_t param1, uint32_t param2) { - struct MPGLContext *ctx = callback_data; + struct ra_ctx *ctx = callback_data; struct priv *p = ctx->priv; atomic_store(&p->reload_display, true); vo_wakeup(ctx->vo); } -static void destroy_dispmanx(struct MPGLContext *ctx) +static void destroy_dispmanx(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -77,9 +77,10 @@ static void destroy_dispmanx(struct MPGLContext *ctx) p->update = 0; } -static void rpi_uninit(MPGLContext *ctx) +static void rpi_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); vc_tv_unregister_callback_full(tv_callback, ctx); @@ -92,26 +93,26 @@ static void rpi_uninit(MPGLContext *ctx) p->egl_display = EGL_NO_DISPLAY; } -static int recreate_dispmanx(struct MPGLContext *ctx) +static bool recreate_dispmanx(struct ra_ctx *ctx) { struct priv *p = ctx->priv; int display_nr = 0; int layer = 0; - MP_VERBOSE(ctx->vo, "Recreating DISPMANX state...\n"); + MP_VERBOSE(ctx, "Recreating DISPMANX state...\n"); destroy_dispmanx(ctx); p->display = vc_dispmanx_display_open(display_nr); p->update = vc_dispmanx_update_start(0); if (!p->display || !p->update) { - MP_FATAL(ctx->vo, "Could not get DISPMANX objects.\n"); + MP_FATAL(ctx, "Could not get DISPMANX objects.\n"); goto fail; } uint32_t dispw, disph; if (graphics_get_display_size(0, &dispw, &disph) < 0) { - MP_FATAL(ctx->vo, "Could not get display size.\n"); + MP_FATAL(ctx, "Could not get display size.\n"); goto fail; } p->w = dispw; @@ -145,7 +146,7 @@ static int recreate_dispmanx(struct MPGLContext *ctx) &src, DISPMANX_PROTECTION_NONE, &alpha, 0, 0); if (!p->window) { - MP_FATAL(ctx->vo, "Could not add DISPMANX element.\n"); + MP_FATAL(ctx, "Could not add DISPMANX element.\n"); goto fail; } @@ -161,14 +162,14 @@ static int recreate_dispmanx(struct MPGLContext *ctx) &p->egl_window, NULL); if (p->egl_surface == EGL_NO_SURFACE) { - MP_FATAL(p, "Could not create EGL surface!\n"); + MP_FATAL(ctx, "Could not create EGL surface!\n"); goto fail; } if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context)) { - MP_FATAL(p, "Failed to set context!\n"); + MP_FATAL(ctx, "Failed to set context!\n"); goto fail; } @@ -197,21 +198,27 @@ static int recreate_dispmanx(struct MPGLContext *ctx) ctx->vo->dwidth = p->w; ctx->vo->dheight = p->h; + ra_gl_ctx_resize(ctx->swapchain, p->w, p->h, 0); ctx->vo->want_redraw = true; vo_event(ctx->vo, VO_EVENT_WIN_STATE); - return 0; + return true; fail: destroy_dispmanx(ctx); - return -1; + return false; } -static int rpi_init(struct MPGLContext *ctx, int flags) +static void rpi_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - p->log = ctx->vo->log; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static bool rpi_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); bcm_host_init(); @@ -219,43 +226,40 @@ static int rpi_init(struct MPGLContext *ctx, int flags) p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); if (!eglInitialize(p->egl_display, NULL, NULL)) { - MP_FATAL(p, "EGL failed to initialize.\n"); + MP_FATAL(ctx, "EGL failed to initialize.\n"); goto fail; } - if (!mpegl_create_context(p->egl_display, p->log, 0, &p->egl_context, - &p->egl_config)) + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &p->egl_config)) goto fail; if (recreate_dispmanx(ctx) < 0) goto fail; - ctx->gl = talloc_zero(ctx, GL); + mpegl_load_functions(&p->gl, ctx->log); - mpegl_load_functions(ctx->gl, p->log); + struct ra_gl_ctx_params params = { + .swap_buffers = rpi_swap_buffers, + .native_display_type = "MPV_RPI_WINDOW", + .native_display = p->win_params, + }; - ctx->native_display_type = "MPV_RPI_WINDOW"; - ctx->native_display = p->win_params; + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto fail; - return 0; + return true; fail: rpi_uninit(ctx); - return -1; + return false; } -static int rpi_reconfig(struct MPGLContext *ctx) +static bool rpi_reconfig(struct ra_ctx *ctx) { return recreate_dispmanx(ctx); } -static void rpi_swap_buffers(MPGLContext *ctx) -{ - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl_display, p->egl_surface); -} - -static struct mp_image *take_screenshot(struct MPGLContext *ctx) +static struct mp_image *take_screenshot(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -289,21 +293,20 @@ fail: return NULL; } - -static int rpi_control(MPGLContext *ctx, int *events, int request, void *arg) +static int rpi_control(struct ra_ctx *ctx, int *events, int request, void *arg) { struct priv *p = ctx->priv; switch (request) { case VOCTRL_SCREENSHOT_WIN: *(struct mp_image **)arg = take_screenshot(ctx); - return true; + return VO_TRUE; case VOCTRL_FULLSCREEN: recreate_dispmanx(ctx); return VO_TRUE; case VOCTRL_CHECK_EVENTS: if (atomic_fetch_and(&p->reload_display, 0)) { - MP_WARN(ctx->vo, "Recovering from display mode switch...\n"); + MP_WARN(ctx, "Recovering from display mode switch...\n"); recreate_dispmanx(ctx); } return VO_TRUE; @@ -315,12 +318,11 @@ static int rpi_control(MPGLContext *ctx, int *events, int request, void *arg) return VO_NOTIMPL; } -const struct mpgl_driver mpgl_driver_rpi = { +const struct ra_ctx_fns ra_ctx_rpi = { + .type = "opengl", .name = "rpi", - .priv_size = sizeof(struct priv), - .init = rpi_init, .reconfig = rpi_reconfig, - .swap_buffers = rpi_swap_buffers, .control = rpi_control, + .init = rpi_init, .uninit = rpi_uninit, -};
\ No newline at end of file +}; diff --git a/video/out/opengl/context_vdpau.c b/video/out/opengl/context_vdpau.c index 40d21ab..e989414 100644 --- a/video/out/opengl/context_vdpau.c +++ b/video/out/opengl/context_vdpau.c @@ -26,8 +26,6 @@ // follow it. I'm not sure about the original nvidia headers. #define BRAINDEATH(x) ((void *)(uintptr_t)(x)) -#define NUM_SURFACES 4 - struct surface { int w, h; VdpOutputSurface surface; @@ -39,21 +37,22 @@ struct surface { }; struct priv { + GL gl; GLXContext context; struct mp_vdpau_ctx *vdp; VdpPresentationQueueTarget vdp_target; VdpPresentationQueue vdp_queue; + struct surface *surfaces; int num_surfaces; - struct surface surfaces[NUM_SURFACES]; - int current_surface; + int idx_surfaces; }; typedef GLXContext (*glXCreateContextAttribsARBProc) (Display*, GLXFBConfig, GLXContext, Bool, const int*); -static bool create_context_x11(struct MPGLContext *ctx, int vo_flags) +static bool create_context_x11(struct ra_ctx *ctx) { - struct priv *glx_ctx = ctx->priv; + struct priv *p = ctx->priv; struct vo *vo = ctx->vo; int glx_major, glx_minor; @@ -62,6 +61,9 @@ static bool create_context_x11(struct MPGLContext *ctx, int vo_flags) return false; } + if (!ra_gl_ctx_test_version(ctx, MPGL_VER(glx_major, glx_minor), false)) + return false; + int glx_attribs[] = { GLX_X_RENDERABLE, True, GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR, @@ -96,7 +98,7 @@ static bool create_context_x11(struct MPGLContext *ctx, int vo_flags) return false; } - int ctx_flags = vo_flags & VOFLAG_GL_DEBUG ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; + int ctx_flags = ctx->opts.debug ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; int context_attribs[] = { GLX_CONTEXT_MAJOR_VERSION_ARB, 4, GLX_CONTEXT_MINOR_VERSION_ARB, 0, @@ -117,19 +119,20 @@ static bool create_context_x11(struct MPGLContext *ctx, int vo_flags) return false; } - glx_ctx->context = context; - mpgl_load_functions(ctx->gl, (void *)glXGetProcAddressARB, glxstr, vo->log); + p->context = context; + mpgl_load_functions(&p->gl, (void *)glXGetProcAddressARB, glxstr, vo->log); return true; } -static int create_vdpau_objects(struct MPGLContext *ctx) +static int create_vdpau_objects(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + struct GL *gl = &p->gl; VdpDevice dev = p->vdp->vdp_device; struct vdp_functions *vdp = &p->vdp->vdp; VdpStatus vdp_st; - ctx->gl->VDPAUInitNV(BRAINDEATH(dev), p->vdp->get_proc_address); + gl->VDPAUInitNV(BRAINDEATH(dev), p->vdp->get_proc_address); vdp_st = vdp->presentation_queue_target_create_x11(dev, ctx->vo->x11->window, &p->vdp_target); @@ -141,13 +144,13 @@ static int create_vdpau_objects(struct MPGLContext *ctx) return 0; } -static void destroy_vdpau_surface(struct MPGLContext *ctx, +static void destroy_vdpau_surface(struct ra_ctx *ctx, struct surface *surface) { struct priv *p = ctx->priv; struct vdp_functions *vdp = &p->vdp->vdp; VdpStatus vdp_st; - GL *gl = ctx->gl; + GL *gl = &p->gl; if (surface->mapped) gl->VDPAUUnmapSurfacesNV(1, &surface->registered); @@ -168,14 +171,14 @@ static void destroy_vdpau_surface(struct MPGLContext *ctx, }; } -static int recreate_vdpau_surface(struct MPGLContext *ctx, - struct surface *surface) +static bool recreate_vdpau_surface(struct ra_ctx *ctx, + struct surface *surface) { struct priv *p = ctx->priv; VdpDevice dev = p->vdp->vdp_device; struct vdp_functions *vdp = &p->vdp->vdp; VdpStatus vdp_st; - GL *gl = ctx->gl; + GL *gl = &p->gl; destroy_vdpau_surface(ctx, surface); @@ -219,16 +222,37 @@ static int recreate_vdpau_surface(struct MPGLContext *ctx, gl->VDPAUUnmapSurfacesNV(1, &surface->registered); surface->mapped = false; - return 0; + return true; error: destroy_vdpau_surface(ctx, surface); - return -1; + return false; +} + +static void vdpau_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vdp_functions *vdp = &p->vdp->vdp; + VdpStatus vdp_st; + + // This is the *next* surface we will be rendering to. By delaying the + // block_until_idle, we're essentially allowing p->num_surfaces - 1 + // in-flight surfaces, plus the one currently visible surface. + struct surface *surf = &p->surfaces[p->idx_surfaces]; + if (surf->surface == VDP_INVALID_HANDLE) + return; + + VdpTime prev_vsync_time; + vdp_st = vdp->presentation_queue_block_until_surface_idle(p->vdp_queue, + surf->surface, + &prev_vsync_time); + CHECK_VDP_WARNING(ctx, "waiting for surface failed"); } -static void glx_uninit(MPGLContext *ctx) +static void vdpau_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); if (p->vdp) { struct vdp_functions *vdp = &p->vdp->vdp; @@ -259,10 +283,12 @@ static void glx_uninit(MPGLContext *ctx) vo_x11_uninit(ctx->vo); } -static int glx_init(struct MPGLContext *ctx, int flags) +static const struct ra_swapchain_fns vdpau_swapchain; + +static bool vdpau_init(struct ra_ctx *ctx) { struct vo *vo = ctx->vo; - struct priv *p = ctx->priv; + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); p->vdp_queue = VDP_INVALID_HANDLE; p->vdp_target = VDP_INVALID_HANDLE; @@ -280,110 +306,112 @@ static int glx_init(struct MPGLContext *ctx, int flags) if (!vo_x11_create_vo_window(vo, NULL, "vdpauglx")) goto uninit; - if (!create_context_x11(ctx, flags)) + if (!create_context_x11(ctx)) goto uninit; - if (!(ctx->gl->mpgl_caps & MPGL_CAP_VDPAU)) + if (!(p->gl.mpgl_caps & MPGL_CAP_VDPAU)) goto uninit; if (create_vdpau_objects(ctx) < 0) goto uninit; - p->num_surfaces = NUM_SURFACES; + p->num_surfaces = ctx->opts.swapchain_depth + 1; // +1 for the visible image + p->surfaces = talloc_zero_array(p, struct surface, p->num_surfaces); for (int n = 0; n < p->num_surfaces; n++) p->surfaces[n].surface = VDP_INVALID_HANDLE; - ctx->flip_v = true; + struct ra_gl_ctx_params params = { + .swap_buffers = vdpau_swap_buffers, + .external_swapchain = &vdpau_swapchain, + .flipped = true, + }; - return 0; + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto uninit; + + return true; uninit: - glx_uninit(ctx); - return -1; + vdpau_uninit(ctx); + return false; } -static int glx_reconfig(struct MPGLContext *ctx) +static bool vdpau_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) { - vo_x11_config_vo_window(ctx->vo); - return 0; -} + struct priv *p = sw->ctx->priv; + struct vo *vo = sw->ctx->vo; + GL *gl = &p->gl; + + struct surface *surf = &p->surfaces[p->idx_surfaces]; + if (surf->w != vo->dwidth || surf->h != vo->dheight || + surf->surface == VDP_INVALID_HANDLE) + { + if (!recreate_vdpau_surface(sw->ctx, surf)) + return NULL; + } -static int glx_control(struct MPGLContext *ctx, int *events, int request, - void *arg) -{ - return vo_x11_control(ctx->vo, events, request, arg); + assert(!surf->mapped); + gl->VDPAUMapSurfacesNV(1, &surf->registered); + surf->mapped = true; + + ra_gl_ctx_resize(sw, surf->w, surf->h, surf->fbo); + return ra_gl_ctx_start_frame(sw, out_fbo); } -static void glx_start_frame(struct MPGLContext *ctx) +static bool vdpau_submit_frame(struct ra_swapchain *sw, + const struct vo_frame *frame) { - struct priv *p = ctx->priv; + struct priv *p = sw->ctx->priv; + GL *gl = &p->gl; struct vdp_functions *vdp = &p->vdp->vdp; VdpStatus vdp_st; - GL *gl = ctx->gl; - - struct surface *surface = &p->surfaces[p->current_surface]; - - if (surface->surface != VDP_INVALID_HANDLE) { - VdpTime prev_vsync_time; - vdp_st = vdp->presentation_queue_block_until_surface_idle(p->vdp_queue, - surface->surface, - &prev_vsync_time); - CHECK_VDP_WARNING(ctx, "waiting for surface failed"); - } - if (surface->w != ctx->vo->dwidth || surface->h != ctx->vo->dheight) - recreate_vdpau_surface(ctx, surface); + struct surface *surf = &p->surfaces[p->idx_surfaces]; + assert(surf->surface != VDP_INVALID_HANDLE); + assert(surf->mapped); + gl->VDPAUUnmapSurfacesNV(1, &surf->registered); + surf->mapped = false; + vdp_st = vdp->presentation_queue_display(p->vdp_queue, surf->surface, 0, 0, 0); + CHECK_VDP_WARNING(sw->ctx, "trying to present vdp surface"); - ctx->main_fb = surface->fbo; // 0 if creating the surface failed - - if (surface->surface != VDP_INVALID_HANDLE) { - gl->VDPAUMapSurfacesNV(1, &surface->registered); - surface->mapped = true; - } + p->idx_surfaces = (p->idx_surfaces + 1) % p->num_surfaces; + return ra_gl_ctx_submit_frame(sw, frame) && vdp_st == VDP_STATUS_OK; } -static void glx_swap_buffers(struct MPGLContext *ctx) +static bool vdpau_reconfig(struct ra_ctx *ctx) { - struct priv *p = ctx->priv; - struct vdp_functions *vdp = &p->vdp->vdp; - VdpStatus vdp_st; - GL *gl = ctx->gl; - - struct surface *surface = &p->surfaces[p->current_surface]; - if (surface->surface == VDP_INVALID_HANDLE) - return; // surface alloc probably failed before - - if (surface->mapped) - gl->VDPAUUnmapSurfacesNV(1, &surface->registered); - surface->mapped = false; - - vdp_st = vdp->presentation_queue_display(p->vdp_queue, surface->surface, - 0, 0, 0); - CHECK_VDP_WARNING(ctx, "trying to present vdp surface"); + vo_x11_config_vo_window(ctx->vo); + return true; +} - p->current_surface = (p->current_surface + 1) % p->num_surfaces; +static int vdpau_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + return vo_x11_control(ctx->vo, events, request, arg); } -static void glx_wakeup(struct MPGLContext *ctx) +static void vdpau_wakeup(struct ra_ctx *ctx) { vo_x11_wakeup(ctx->vo); } -static void glx_wait_events(struct MPGLContext *ctx, int64_t until_time_us) +static void vdpau_wait_events(struct ra_ctx *ctx, int64_t until_time_us) { vo_x11_wait_events(ctx->vo, until_time_us); } -const struct mpgl_driver mpgl_driver_vdpauglx = { +static const struct ra_swapchain_fns vdpau_swapchain = { + .start_frame = vdpau_start_frame, + .submit_frame = vdpau_submit_frame, +}; + +const struct ra_ctx_fns ra_ctx_vdpauglx = { + .type = "opengl", .name = "vdpauglx", - .priv_size = sizeof(struct priv), - .init = glx_init, - .reconfig = glx_reconfig, - .start_frame = glx_start_frame, - .swap_buffers = glx_swap_buffers, - .control = glx_control, - .wakeup = glx_wakeup, - .wait_events = glx_wait_events, - .uninit = glx_uninit, + .reconfig = vdpau_reconfig, + .control = vdpau_control, + .wakeup = vdpau_wakeup, + .wait_events = vdpau_wait_events, + .init = vdpau_init, + .uninit = vdpau_uninit, }; diff --git a/video/out/opengl/context_wayland.c b/video/out/opengl/context_wayland.c index 87e98cd..f686fcc 100644 --- a/video/out/opengl/context_wayland.c +++ b/video/out/opengl/context_wayland.c @@ -16,189 +16,166 @@ * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ +#include <wayland-egl.h> +#include <EGL/egl.h> +#include <EGL/eglext.h> + #include "video/out/wayland_common.h" #include "context.h" #include "egl_helpers.h" +#include "utils.h" + +struct priv { + GL gl; + EGLDisplay egl_display; + EGLContext egl_context; + EGLSurface egl_surface; + EGLConfig egl_config; + struct wl_egl_window *egl_window; +}; -static void egl_resize(struct vo_wayland_state *wl) +static void resize(struct ra_ctx *ctx) { - int32_t x = wl->window.sh_x; - int32_t y = wl->window.sh_y; - int32_t width = wl->window.sh_width; - int32_t height = wl->window.sh_height; - int32_t scale = 1; - - if (!wl->egl_context.egl_window) - return; - - if (wl->display.current_output) - scale = wl->display.current_output->scale; - - // get the real size of the window - // this improves moving the window while resizing it - wl_egl_window_get_attached_size(wl->egl_context.egl_window, - &wl->window.width, - &wl->window.height); + struct priv *p = ctx->priv; + struct vo_wayland_state *wl = ctx->vo->wl; - MP_VERBOSE(wl, "resizing %dx%d -> %dx%d\n", wl->window.width, - wl->window.height, - width, - height); + MP_VERBOSE(wl, "Handling resize on the egl side\n"); - if (x != 0) - x = wl->window.width - width; + const int32_t width = wl->scaling*mp_rect_w(wl->geometry); + const int32_t height = wl->scaling*mp_rect_h(wl->geometry); - if (y != 0) - y = wl->window.height - height; + wl_surface_set_buffer_scale(wl->surface, wl->scaling); + wl_egl_window_resize(p->egl_window, width, height, 0, 0); - wl_surface_set_buffer_scale(wl->window.video_surface, scale); - wl_egl_window_resize(wl->egl_context.egl_window, scale*width, scale*height, x, y); - - wl->window.width = width; - wl->window.height = height; + wl->vo->dwidth = width; + wl->vo->dheight = height; +} - /* set size for mplayer */ - wl->vo->dwidth = scale*wl->window.width; - wl->vo->dheight = scale*wl->window.height; - wl->vo->want_redraw = true; +static void wayland_egl_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + eglSwapBuffers(p->egl_display, p->egl_surface); } -static int egl_create_context(struct vo_wayland_state *wl, MPGLContext *ctx, - int flags) +static bool egl_create_context(struct ra_ctx *ctx) { - GL *gl = ctx->gl; + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct vo_wayland_state *wl = ctx->vo->wl; - if (!(wl->egl_context.egl.dpy = eglGetDisplay(wl->display.display))) - return -1; + if (!(p->egl_display = eglGetDisplay(wl->display))) + return false; - if (eglInitialize(wl->egl_context.egl.dpy, NULL, NULL) != EGL_TRUE) - return -1; + if (eglInitialize(p->egl_display, NULL, NULL) != EGL_TRUE) + return false; - if (!mpegl_create_context(wl->egl_context.egl.dpy, wl->log, flags, - &wl->egl_context.egl.ctx, - &wl->egl_context.egl.conf)) - return -1; + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, + &p->egl_config)) + return false; - eglMakeCurrent(wl->egl_context.egl.dpy, NULL, NULL, wl->egl_context.egl.ctx); + eglMakeCurrent(p->egl_display, NULL, NULL, p->egl_context); - mpegl_load_functions(gl, wl->log); + mpegl_load_functions(&p->gl, wl->log); - ctx->native_display_type = "wl"; - ctx->native_display = wl->display.display; + struct ra_gl_ctx_params params = { + .swap_buffers = wayland_egl_swap_buffers, + .native_display_type = "wl", + .native_display = wl->display, + }; - return 0; -} + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + return false; -static void egl_create_window(struct vo_wayland_state *wl) -{ - wl->egl_context.egl_window = wl_egl_window_create(wl->window.video_surface, - wl->window.width, - wl->window.height); - - wl->egl_context.egl_surface = eglCreateWindowSurface(wl->egl_context.egl.dpy, - wl->egl_context.egl.conf, - wl->egl_context.egl_window, - NULL); - - eglMakeCurrent(wl->egl_context.egl.dpy, - wl->egl_context.egl_surface, - wl->egl_context.egl_surface, - wl->egl_context.egl.ctx); - - wl_display_dispatch_pending(wl->display.display); - - /** - * <http://lists.freedesktop.org/archives/wayland-devel/2013-November/012019.html> - * - * The main change is that if the swap interval is 0 then Mesa won't install a - * frame callback so that eglSwapBuffers can be executed as often as necessary. - * Instead it will do a sync request after the swap buffers. It will block for - * sync complete event in get_back_bo instead of the frame callback. The - * compositor is likely to send a release event while processing the new buffer - * attach and this makes sure we will receive that before deciding whether to - * allocate a new buffer. - */ - - eglSwapInterval(wl->egl_context.egl.dpy, 0); + return true; } -static int waylandgl_reconfig(struct MPGLContext *ctx) +static void egl_create_window(struct ra_ctx *ctx) { - struct vo_wayland_state * wl = ctx->vo->wayland; + struct priv *p = ctx->priv; + struct vo_wayland_state *wl = ctx->vo->wl; - if (!vo_wayland_config(ctx->vo)) - return -1; + p->egl_window = wl_egl_window_create(wl->surface, mp_rect_w(wl->geometry), + mp_rect_h(wl->geometry)); - if (!wl->egl_context.egl_window) - egl_create_window(wl); + p->egl_surface = eglCreateWindowSurface(p->egl_display, p->egl_config, + p->egl_window, NULL); - return 0; + eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context); + + eglSwapInterval(p->egl_display, 0); } -static void waylandgl_uninit(MPGLContext *ctx) +static bool wayland_egl_reconfig(struct ra_ctx *ctx) { - struct vo_wayland_state *wl = ctx->vo->wayland; + struct priv *p = ctx->priv; - if (wl->egl_context.egl.ctx) { - eglReleaseThread(); - if (wl->egl_context.egl_window) - wl_egl_window_destroy(wl->egl_context.egl_window); - eglDestroySurface(wl->egl_context.egl.dpy, wl->egl_context.egl_surface); - eglMakeCurrent(wl->egl_context.egl.dpy, NULL, NULL, EGL_NO_CONTEXT); - eglDestroyContext(wl->egl_context.egl.dpy, wl->egl_context.egl.ctx); - } - eglTerminate(wl->egl_context.egl.dpy); - wl->egl_context.egl.ctx = NULL; + if (!vo_wayland_reconfig(ctx->vo)) + return false; - vo_wayland_uninit(ctx->vo); + if (!p->egl_window) + egl_create_window(ctx); + + return true; } -static void waylandgl_swap_buffers(MPGLContext *ctx) +static void wayland_egl_uninit(struct ra_ctx *ctx) { - struct vo_wayland_state *wl = ctx->vo->wayland; + struct priv *p = ctx->priv; - vo_wayland_wait_events(ctx->vo, 0); + ra_gl_ctx_uninit(ctx); - eglSwapBuffers(wl->egl_context.egl.dpy, wl->egl_context.egl_surface); + if (p->egl_context) { + eglReleaseThread(); + if (p->egl_window) + wl_egl_window_destroy(p->egl_window); + eglDestroySurface(p->egl_display, p->egl_surface); + eglMakeCurrent(p->egl_display, NULL, NULL, EGL_NO_CONTEXT); + eglDestroyContext(p->egl_display, p->egl_context); + p->egl_context = NULL; + } + eglTerminate(p->egl_display); + + vo_wayland_uninit(ctx->vo); } -static int waylandgl_control(MPGLContext *ctx, int *events, int request, +static int wayland_egl_control(struct ra_ctx *ctx, int *events, int request, void *data) { - struct vo_wayland_state *wl = ctx->vo->wayland; + struct vo_wayland_state *wl = ctx->vo->wl; int r = vo_wayland_control(ctx->vo, events, request, data); - if (*events & VO_EVENT_RESIZE) - egl_resize(wl); + if (*events & VO_EVENT_RESIZE) { + resize(ctx); + ra_gl_ctx_resize(ctx->swapchain, wl->vo->dwidth, wl->vo->dheight, 0); + } return r; } -static void wayland_wakeup(struct MPGLContext *ctx) +static void wayland_egl_wakeup(struct ra_ctx *ctx) { vo_wayland_wakeup(ctx->vo); } -static void wayland_wait_events(struct MPGLContext *ctx, int64_t until_time_us) +static void wayland_egl_wait_events(struct ra_ctx *ctx, int64_t until_time_us) { vo_wayland_wait_events(ctx->vo, until_time_us); } -static int waylandgl_init(struct MPGLContext *ctx, int flags) +static bool wayland_egl_init(struct ra_ctx *ctx) { if (!vo_wayland_init(ctx->vo)) - return -1; + return false; - return egl_create_context(ctx->vo->wayland, ctx, flags); + return egl_create_context(ctx); } -const struct mpgl_driver mpgl_driver_wayland = { +const struct ra_ctx_fns ra_ctx_wayland_egl = { + .type = "opengl", .name = "wayland", - .init = waylandgl_init, - .reconfig = waylandgl_reconfig, - .swap_buffers = waylandgl_swap_buffers, - .control = waylandgl_control, - .wakeup = wayland_wakeup, - .wait_events = wayland_wait_events, - .uninit = waylandgl_uninit, + .reconfig = wayland_egl_reconfig, + .control = wayland_egl_control, + .wakeup = wayland_egl_wakeup, + .wait_events = wayland_egl_wait_events, + .init = wayland_egl_init, + .uninit = wayland_egl_uninit, }; diff --git a/video/out/opengl/context_w32.c b/video/out/opengl/context_win.c index eb61239..5a0042b 100644 --- a/video/out/opengl/context_w32.c +++ b/video/out/opengl/context_win.c @@ -21,8 +21,8 @@ #include "options/m_config.h" #include "video/out/w32_common.h" -#include "video/out/win32/exclusive_hack.h" #include "context.h" +#include "utils.h" #if !defined(WGL_CONTEXT_MAJOR_VERSION_ARB) /* these are supposed to be defined in wingdi.h but mingw's is too old */ @@ -37,7 +37,9 @@ #define WGL_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 #endif -struct w32_context { +struct priv { + GL gl; + int opt_swapinterval; int current_swapinterval; @@ -45,26 +47,25 @@ struct w32_context { HGLRC context; HDC hdc; - int flags; }; -static void w32_uninit(MPGLContext *ctx); +static void wgl_uninit(struct ra_ctx *ctx); -static __thread struct w32_context *current_w32_context; +static __thread struct priv *current_wgl_context; -static int GLAPIENTRY w32_swap_interval(int interval) +static int GLAPIENTRY wgl_swap_interval(int interval) { - if (current_w32_context) - current_w32_context->opt_swapinterval = interval; + if (current_wgl_context) + current_wgl_context->opt_swapinterval = interval; return 0; } -static bool create_dc(struct MPGLContext *ctx, int flags) +static bool create_dc(struct ra_ctx *ctx) { - struct w32_context *w32_ctx = ctx->priv; + struct priv *p = ctx->priv; HWND win = vo_w32_hwnd(ctx->vo); - if (w32_ctx->hdc) + if (p->hdc) return true; HDC hdc = GetDC(win); @@ -90,11 +91,11 @@ static bool create_dc(struct MPGLContext *ctx, int flags) SetPixelFormat(hdc, pf, &pfd); - w32_ctx->hdc = hdc; + p->hdc = hdc; return true; } -static void *w32gpa(const GLubyte *procName) +static void *wglgpa(const GLubyte *procName) { HMODULE oglmod; void *res = wglGetProcAddress(procName); @@ -104,11 +105,11 @@ static void *w32gpa(const GLubyte *procName) return GetProcAddress(oglmod, procName); } -static bool create_context_w32_old(struct MPGLContext *ctx) +static bool create_context_wgl_old(struct ra_ctx *ctx) { - struct w32_context *w32_ctx = ctx->priv; + struct priv *p = ctx->priv; - HDC windc = w32_ctx->hdc; + HDC windc = p->hdc; bool res = false; HGLRC context = wglCreateContext(windc); @@ -123,17 +124,15 @@ static bool create_context_w32_old(struct MPGLContext *ctx) return res; } - w32_ctx->context = context; - - mpgl_load_functions(ctx->gl, w32gpa, NULL, ctx->vo->log); + p->context = context; return true; } -static bool create_context_w32_gl3(struct MPGLContext *ctx) +static bool create_context_wgl_gl3(struct ra_ctx *ctx) { - struct w32_context *w32_ctx = ctx->priv; + struct priv *p = ctx->priv; - HDC windc = w32_ctx->hdc; + HDC windc = p->hdc; HGLRC context = 0; // A legacy context is needed to get access to the new functions. @@ -150,7 +149,7 @@ static bool create_context_w32_gl3(struct MPGLContext *ctx) } const char *(GLAPIENTRY *wglGetExtensionsStringARB)(HDC hdc) - = w32gpa((const GLubyte*)"wglGetExtensionsStringARB"); + = wglgpa((const GLubyte*)"wglGetExtensionsStringARB"); if (!wglGetExtensionsStringARB) goto unsupported; @@ -161,7 +160,7 @@ static bool create_context_w32_gl3(struct MPGLContext *ctx) HGLRC (GLAPIENTRY *wglCreateContextAttribsARB)(HDC hDC, HGLRC hShareContext, const int *attribList) - = w32gpa((const GLubyte*)"wglCreateContextAttribsARB"); + = wglgpa((const GLubyte*)"wglCreateContextAttribsARB"); if (!wglCreateContextAttribsARB) goto unsupported; @@ -197,11 +196,7 @@ static bool create_context_w32_gl3(struct MPGLContext *ctx) return false; } - w32_ctx->context = context; - - /* update function pointers */ - mpgl_load_functions(ctx->gl, w32gpa, NULL, ctx->vo->log); - + p->context = context; return true; unsupported: @@ -214,79 +209,20 @@ out: static void create_ctx(void *ptr) { - struct MPGLContext *ctx = ptr; - struct w32_context *w32_ctx = ctx->priv; + struct ra_ctx *ctx = ptr; + struct priv *p = ctx->priv; - if (!create_dc(ctx, w32_ctx->flags)) + if (!create_dc(ctx)) return; - create_context_w32_gl3(ctx); - if (!w32_ctx->context) - create_context_w32_old(ctx); - - wglMakeCurrent(w32_ctx->hdc, NULL); -} - -static int w32_init(struct MPGLContext *ctx, int flags) -{ - if (!vo_w32_init(ctx->vo)) - goto fail; - - struct w32_context *w32_ctx = ctx->priv; + create_context_wgl_gl3(ctx); + if (!p->context) + create_context_wgl_old(ctx); - w32_ctx->flags = flags; - vo_w32_run_on_thread(ctx->vo, create_ctx, ctx); - - if (!w32_ctx->context) - goto fail; - - if (!ctx->gl->SwapInterval) - MP_VERBOSE(ctx->vo, "WGL_EXT_swap_control missing.\n"); - w32_ctx->real_wglSwapInterval = ctx->gl->SwapInterval; - ctx->gl->SwapInterval = w32_swap_interval; - w32_ctx->current_swapinterval = -1; - - current_w32_context = w32_ctx; - wglMakeCurrent(w32_ctx->hdc, w32_ctx->context); - DwmEnableMMCSS(TRUE); - return 0; - -fail: - w32_uninit(ctx); - return -1; + wglMakeCurrent(p->hdc, NULL); } -static int w32_reconfig(struct MPGLContext *ctx) -{ - vo_w32_config(ctx->vo); - return 0; -} - -static void destroy_gl(void *ptr) -{ - struct MPGLContext *ctx = ptr; - struct w32_context *w32_ctx = ctx->priv; - if (w32_ctx->context) - wglDeleteContext(w32_ctx->context); - w32_ctx->context = 0; - if (w32_ctx->hdc) - ReleaseDC(vo_w32_hwnd(ctx->vo), w32_ctx->hdc); - w32_ctx->hdc = NULL; - current_w32_context = NULL; -} - -static void w32_uninit(MPGLContext *ctx) -{ - struct w32_context *w32_ctx = ctx->priv; - if (w32_ctx->context) - wglMakeCurrent(w32_ctx->hdc, 0); - vo_w32_run_on_thread(ctx->vo, destroy_gl, ctx); - - DwmEnableMMCSS(FALSE); - vo_w32_uninit(ctx->vo); -} - -static bool compositor_active(MPGLContext *ctx) +static bool compositor_active(struct ra_ctx *ctx) { // For Windows 7. BOOL enabled = 0; @@ -300,21 +236,16 @@ static bool compositor_active(MPGLContext *ctx) if (FAILED(DwmGetCompositionTimingInfo(0, &info))) return false; - // Test if a program is running in exclusive fullscreen mode. If so, it's - // probably this one, so it's not getting redirected by the compositor. - if (mp_w32_is_in_exclusive_mode()) - return false; - return true; } -static void w32_swap_buffers(MPGLContext *ctx) +static void wgl_swap_buffers(struct ra_ctx *ctx) { - struct w32_context *w32_ctx = ctx->priv; - SwapBuffers(w32_ctx->hdc); + struct priv *p = ctx->priv; + SwapBuffers(p->hdc); // default if we don't DwmFLush - int new_swapinterval = w32_ctx->opt_swapinterval; + int new_swapinterval = p->opt_swapinterval; int dwm_flush_opt; mp_read_option_raw(ctx->global, "opengl-dwmflush", &m_option_type_choice, @@ -330,26 +261,103 @@ static void w32_swap_buffers(MPGLContext *ctx) } } - if (new_swapinterval != w32_ctx->current_swapinterval && - w32_ctx->real_wglSwapInterval) + if (new_swapinterval != p->current_swapinterval && + p->real_wglSwapInterval) { - w32_ctx->real_wglSwapInterval(new_swapinterval); + p->real_wglSwapInterval(new_swapinterval); MP_VERBOSE(ctx->vo, "set SwapInterval(%d)\n", new_swapinterval); } - w32_ctx->current_swapinterval = new_swapinterval; + p->current_swapinterval = new_swapinterval; +} + +static bool wgl_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + GL *gl = &p->gl; + + if (!vo_w32_init(ctx->vo)) + goto fail; + + vo_w32_run_on_thread(ctx->vo, create_ctx, ctx); + if (!p->context) + goto fail; + + current_wgl_context = p; + wglMakeCurrent(p->hdc, p->context); + + mpgl_load_functions(gl, wglgpa, NULL, ctx->vo->log); + + if (!gl->SwapInterval) + MP_VERBOSE(ctx->vo, "WGL_EXT_swap_control missing.\n"); + p->real_wglSwapInterval = gl->SwapInterval; + gl->SwapInterval = wgl_swap_interval; + p->current_swapinterval = -1; + + struct ra_gl_ctx_params params = { + .swap_buffers = wgl_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, gl, params)) + goto fail; + + DwmEnableMMCSS(TRUE); + return true; + +fail: + wgl_uninit(ctx); + return false; +} + +static void resize(struct ra_ctx *ctx) +{ + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); +} + +static bool wgl_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + resize(ctx); + return true; +} + +static void destroy_gl(void *ptr) +{ + struct ra_ctx *ctx = ptr; + struct priv *p = ctx->priv; + if (p->context) + wglDeleteContext(p->context); + p->context = 0; + if (p->hdc) + ReleaseDC(vo_w32_hwnd(ctx->vo), p->hdc); + p->hdc = NULL; + current_wgl_context = NULL; +} + +static void wgl_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + if (p->context) + wglMakeCurrent(p->hdc, 0); + vo_w32_run_on_thread(ctx->vo, destroy_gl, ctx); + + DwmEnableMMCSS(FALSE); + vo_w32_uninit(ctx->vo); } -static int w32_control(MPGLContext *ctx, int *events, int request, void *arg) +static int wgl_control(struct ra_ctx *ctx, int *events, int request, void *arg) { - return vo_w32_control(ctx->vo, events, request, arg); + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; } -const struct mpgl_driver mpgl_driver_w32 = { +const struct ra_ctx_fns ra_ctx_wgl = { + .type = "opengl", .name = "win", - .priv_size = sizeof(struct w32_context), - .init = w32_init, - .reconfig = w32_reconfig, - .swap_buffers = w32_swap_buffers, - .control = w32_control, - .uninit = w32_uninit, + .init = wgl_init, + .reconfig = wgl_reconfig, + .control = wgl_control, + .uninit = wgl_uninit, }; diff --git a/video/out/opengl/context_x11egl.c b/video/out/opengl/context_x11egl.c index 2b68007..7ab4fe0 100644 --- a/video/out/opengl/context_x11egl.c +++ b/video/out/opengl/context_x11egl.c @@ -32,14 +32,17 @@ #include "egl_helpers.h" struct priv { + GL gl; EGLDisplay egl_display; EGLContext egl_context; EGLSurface egl_surface; }; -static void mpegl_uninit(MPGLContext *ctx) +static void mpegl_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + if (p->egl_context) { eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); @@ -51,7 +54,7 @@ static void mpegl_uninit(MPGLContext *ctx) static int pick_xrgba_config(void *user_data, EGLConfig *configs, int num_configs) { - struct MPGLContext *ctx = user_data; + struct ra_ctx *ctx = user_data; struct priv *p = ctx->priv; struct vo *vo = ctx->vo; @@ -72,40 +75,44 @@ static int pick_xrgba_config(void *user_data, EGLConfig *configs, int num_config return 0; } -static int mpegl_init(struct MPGLContext *ctx, int flags) +static void mpegl_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static bool mpegl_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); struct vo *vo = ctx->vo; - int msgl = vo->probing ? MSGL_V : MSGL_FATAL; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_FATAL; if (!vo_x11_init(vo)) goto uninit; p->egl_display = eglGetDisplay(vo->x11->display); if (!eglInitialize(p->egl_display, NULL, NULL)) { - mp_msg(vo->log, msgl, "Could not initialize EGL.\n"); + MP_MSG(ctx, msgl, "Could not initialize EGL.\n"); goto uninit; } - struct mpegl_opts opts = { - .vo_flags = flags, + struct mpegl_cb cb = { .user_data = ctx, - .refine_config = (flags & VOFLAG_ALPHA) ? pick_xrgba_config : NULL, + .refine_config = ctx->opts.want_alpha ? pick_xrgba_config : NULL, }; EGLConfig config; - if (!mpegl_create_context_opts(p->egl_display, vo->log, &opts, - &p->egl_context, &config)) + if (!mpegl_create_context_cb(ctx, p->egl_display, cb, &p->egl_context, &config)) goto uninit; int vID, n; eglGetConfigAttrib(p->egl_display, config, EGL_NATIVE_VISUAL_ID, &vID); - MP_VERBOSE(vo, "chose visual 0x%x\n", vID); + MP_VERBOSE(ctx, "chose visual 0x%x\n", vID); XVisualInfo template = {.visualid = vID}; XVisualInfo *vi = XGetVisualInfo(vo->x11->display, VisualIDMask, &template, &n); if (!vi) { - MP_FATAL(vo, "Getting X visual failed!\n"); + MP_FATAL(ctx, "Getting X visual failed!\n"); goto uninit; } @@ -120,64 +127,73 @@ static int mpegl_init(struct MPGLContext *ctx, int flags) (EGLNativeWindowType)vo->x11->window, NULL); if (p->egl_surface == EGL_NO_SURFACE) { - MP_FATAL(ctx->vo, "Could not create EGL surface!\n"); + MP_FATAL(ctx, "Could not create EGL surface!\n"); goto uninit; } if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context)) { - MP_FATAL(ctx->vo, "Could not make context current!\n"); + MP_FATAL(ctx, "Could not make context current!\n"); goto uninit; } - mpegl_load_functions(ctx->gl, vo->log); + mpegl_load_functions(&p->gl, ctx->log); - ctx->native_display_type = "x11"; - ctx->native_display = vo->x11->display; - return 0; + struct ra_gl_ctx_params params = { + .swap_buffers = mpegl_swap_buffers, + .native_display_type = "x11", + .native_display = vo->x11->display, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto uninit; + + return true; uninit: mpegl_uninit(ctx); - return -1; + return false; } -static int mpegl_reconfig(struct MPGLContext *ctx) +static void resize(struct ra_ctx *ctx) { - vo_x11_config_vo_window(ctx->vo); - return 0; + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); } -static int mpegl_control(struct MPGLContext *ctx, int *events, int request, - void *arg) +static bool mpegl_reconfig(struct ra_ctx *ctx) { - return vo_x11_control(ctx->vo, events, request, arg); + vo_x11_config_vo_window(ctx->vo); + resize(ctx); + return true; } -static void mpegl_swap_buffers(MPGLContext *ctx) +static int mpegl_control(struct ra_ctx *ctx, int *events, int request, + void *arg) { - struct priv *p = ctx->priv; - eglSwapBuffers(p->egl_display, p->egl_surface); + int ret = vo_x11_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; } -static void mpegl_wakeup(struct MPGLContext *ctx) +static void mpegl_wakeup(struct ra_ctx *ctx) { vo_x11_wakeup(ctx->vo); } -static void mpegl_wait_events(struct MPGLContext *ctx, int64_t until_time_us) +static void mpegl_wait_events(struct ra_ctx *ctx, int64_t until_time_us) { vo_x11_wait_events(ctx->vo, until_time_us); } -const struct mpgl_driver mpgl_driver_x11egl = { +const struct ra_ctx_fns ra_ctx_x11_egl = { + .type = "opengl", .name = "x11egl", - .priv_size = sizeof(struct priv), - .init = mpegl_init, .reconfig = mpegl_reconfig, - .swap_buffers = mpegl_swap_buffers, .control = mpegl_control, .wakeup = mpegl_wakeup, .wait_events = mpegl_wait_events, + .init = mpegl_init, .uninit = mpegl_uninit, }; diff --git a/video/out/opengl/egl_helpers.c b/video/out/opengl/egl_helpers.c index ac152df..0033bf1 100644 --- a/video/out/opengl/egl_helpers.c +++ b/video/out/opengl/egl_helpers.c @@ -25,6 +25,7 @@ #include "egl_helpers.h" #include "common.h" +#include "utils.h" #include "context.h" #if HAVE_EGL_ANGLE @@ -43,41 +44,49 @@ #define EGL_OPENGL_ES3_BIT 0x00000040 #endif -// es_version = 0 (desktop), 2/3 (ES major version) -static bool create_context(EGLDisplay display, struct mp_log *log, bool probing, - int es_version, struct mpegl_opts *opts, +// es_version: 0 (core), 2 or 3 +static bool create_context(struct ra_ctx *ctx, EGLDisplay display, + int es_version, struct mpegl_cb cb, EGLContext *out_context, EGLConfig *out_config) { - int msgl = probing ? MSGL_V : MSGL_FATAL; - - EGLenum api = EGL_OPENGL_API; - EGLint rend = EGL_OPENGL_BIT; - const char *name = "Desktop OpenGL"; - if (es_version == 2) { + int msgl = ctx->opts.probing ? MSGL_V : MSGL_FATAL; + + EGLenum api; + EGLint rend; + const char *name; + + switch (es_version) { + case 0: + api = EGL_OPENGL_API; + rend = EGL_OPENGL_BIT; + name = "Desktop OpenGL"; + break; + case 2: api = EGL_OPENGL_ES_API; rend = EGL_OPENGL_ES2_BIT; - name = "GLES 2.0"; - } - if (es_version == 3) { + name = "GLES 2.x"; + break; + case 3: api = EGL_OPENGL_ES_API; rend = EGL_OPENGL_ES3_BIT; name = "GLES 3.x"; + break; + default: abort(); } - mp_msg(log, MSGL_V, "Trying to create %s context.\n", name); + MP_VERBOSE(ctx, "Trying to create %s context.\n", name); if (!eglBindAPI(api)) { - mp_msg(log, MSGL_V, "Could not bind API!\n"); + MP_VERBOSE(ctx, "Could not bind API!\n"); return false; } - EGLint attributes[] = { EGL_SURFACE_TYPE, EGL_WINDOW_BIT, EGL_RED_SIZE, 1, EGL_GREEN_SIZE, 1, EGL_BLUE_SIZE, 1, - EGL_ALPHA_SIZE, (opts->vo_flags & VOFLAG_ALPHA ) ? 1 : 0, + EGL_ALPHA_SIZE, ctx->opts.want_alpha ? 1 : 0, EGL_RENDERABLE_TYPE, rend, EGL_NONE }; @@ -92,29 +101,34 @@ static bool create_context(EGLDisplay display, struct mp_log *log, bool probing, if (!num_configs) { talloc_free(configs); - mp_msg(log, msgl, "Could not choose EGLConfig!\n"); + MP_MSG(ctx, msgl, "Could not choose EGLConfig!\n"); return false; } int chosen = 0; - if (opts->refine_config) - chosen = opts->refine_config(opts->user_data, configs, num_configs); + if (cb.refine_config) + chosen = cb.refine_config(cb.user_data, configs, num_configs); EGLConfig config = configs[chosen]; talloc_free(configs); - EGLContext *ctx = NULL; + EGLContext *egl_ctx = NULL; if (es_version) { + if (!ra_gl_ctx_test_version(ctx, MPGL_VER(es_version, 0), true)) + return false; + EGLint attrs[] = { EGL_CONTEXT_CLIENT_VERSION, es_version, EGL_NONE }; - ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); + egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); } else { for (int n = 0; mpgl_preferred_gl_versions[n]; n++) { int ver = mpgl_preferred_gl_versions[n]; + if (!ra_gl_ctx_test_version(ctx, ver, false)) + continue; EGLint attrs[] = { EGL_CONTEXT_MAJOR_VERSION, MPGL_VER_GET_MAJOR(ver), @@ -124,25 +138,25 @@ static bool create_context(EGLDisplay display, struct mp_log *log, bool probing, EGL_NONE }; - ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); - if (ctx) + egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); + if (egl_ctx) break; } - if (!ctx) { + if (!egl_ctx && ra_gl_ctx_test_version(ctx, 140, false)) { // Fallback for EGL 1.4 without EGL_KHR_create_context. EGLint attrs[] = { EGL_NONE }; - ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); + egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); } } - if (!ctx) { - mp_msg(log, msgl, "Could not create EGL context!\n"); + if (!egl_ctx) { + MP_MSG(ctx, msgl, "Could not create EGL context!\n"); return false; } - *out_context = ctx; + *out_context = egl_ctx; *out_config = config; return true; } @@ -152,56 +166,36 @@ static bool create_context(EGLDisplay display, struct mp_log *log, bool probing, // Create a context and return it and the config it was created with. If it // returns false, the out_* pointers are set to NULL. // vo_flags is a combination of VOFLAG_* values. -bool mpegl_create_context(EGLDisplay display, struct mp_log *log, int vo_flags, +bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display, EGLContext *out_context, EGLConfig *out_config) { - return mpegl_create_context_opts(display, log, - &(struct mpegl_opts){.vo_flags = vo_flags}, out_context, out_config); + return mpegl_create_context_cb(ctx, display, (struct mpegl_cb){0}, + out_context, out_config); } // Create a context and return it and the config it was created with. If it // returns false, the out_* pointers are set to NULL. -bool mpegl_create_context_opts(EGLDisplay display, struct mp_log *log, - struct mpegl_opts *opts, - EGLContext *out_context, EGLConfig *out_config) +bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display, + struct mpegl_cb cb, EGLContext *out_context, + EGLConfig *out_config) { - assert(opts); - *out_context = NULL; *out_config = NULL; const char *version = eglQueryString(display, EGL_VERSION); const char *vendor = eglQueryString(display, EGL_VENDOR); const char *apis = eglQueryString(display, EGL_CLIENT_APIS); - mp_verbose(log, "EGL_VERSION=%s\nEGL_VENDOR=%s\nEGL_CLIENT_APIS=%s\n", + MP_VERBOSE(ctx, "EGL_VERSION=%s\nEGL_VENDOR=%s\nEGL_CLIENT_APIS=%s\n", STR_OR_ERR(version), STR_OR_ERR(vendor), STR_OR_ERR(apis)); - bool probing = opts->vo_flags & VOFLAG_PROBING; - int msgl = probing ? MSGL_V : MSGL_FATAL; - bool try_gles = !(opts->vo_flags & VOFLAG_NO_GLES); - - if (!(opts->vo_flags & VOFLAG_GLES)) { - // Desktop OpenGL - if (create_context(display, log, try_gles | probing, 0, opts, - out_context, out_config)) - return true; - } - - if (try_gles && !(opts->vo_flags & VOFLAG_GLES2)) { - // ES 3.x - if (create_context(display, log, true, 3, opts, - out_context, out_config)) - return true; - } - - if (try_gles) { - // ES 2.0 - if (create_context(display, log, probing, 2, opts, - out_context, out_config)) + int es[] = {0, 3, 2}; // preference order + for (int i = 0; i < MP_ARRAY_SIZE(es); i++) { + if (create_context(ctx, display, es[i], cb, out_context, out_config)) return true; } - mp_msg(log, msgl, "Could not create a GL context.\n"); + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + MP_MSG(ctx, msgl, "Could not create a GL context.\n"); return false; } diff --git a/video/out/opengl/egl_helpers.h b/video/out/opengl/egl_helpers.h index 05f9dcc..eaaf9d7 100644 --- a/video/out/opengl/egl_helpers.h +++ b/video/out/opengl/egl_helpers.h @@ -6,26 +6,23 @@ #include <EGL/egl.h> #include <EGL/eglext.h> +#include "video/out/gpu/context.h" + struct mp_log; -bool mpegl_create_context(EGLDisplay display, struct mp_log *log, int vo_flags, +bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display, EGLContext *out_context, EGLConfig *out_config); -struct mpegl_opts { - // combination of VOFLAG_* values. - int vo_flags; - - // for callbacks - void *user_data; - +struct mpegl_cb { // if set, pick the desired config from the given list and return its index // defaults to 0 (they are sorted by eglChooseConfig) int (*refine_config)(void *user_data, EGLConfig *configs, int num_configs); + void *user_data; }; -bool mpegl_create_context_opts(EGLDisplay display, struct mp_log *log, - struct mpegl_opts *opts, - EGLContext *out_context, EGLConfig *out_config); +bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display, + struct mpegl_cb cb, EGLContext *out_context, + EGLConfig *out_config); struct GL; void mpegl_load_functions(struct GL *gl, struct mp_log *log); diff --git a/video/out/opengl/formats.h b/video/out/opengl/formats.h index 3da6ede..f727a3b 100644 --- a/video/out/opengl/formats.h +++ b/video/out/opengl/formats.h @@ -2,7 +2,6 @@ #define MPGL_FORMATS_H_ #include "common.h" -#include "ra.h" struct gl_format { const char *name; // symbolic name for user interaction/debugging diff --git a/video/out/opengl/gl_utils.c b/video/out/opengl/gl_utils.c deleted file mode 100644 index bce2dab..0000000 --- a/video/out/opengl/gl_utils.c +++ /dev/null @@ -1,291 +0,0 @@ -/* - * This file is part of mpv. - * Parts based on MPlayer code by Reimar Döffinger. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <stddef.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> -#include <stdarg.h> -#include <assert.h> - -#include <libavutil/sha.h> -#include <libavutil/intreadwrite.h> -#include <libavutil/mem.h> - -#include "osdep/io.h" - -#include "common/common.h" -#include "options/path.h" -#include "stream/stream.h" -#include "formats.h" -#include "ra_gl.h" -#include "gl_utils.h" - -// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) -static const char *gl_error_to_string(GLenum error) -{ - switch (error) { - case GL_INVALID_ENUM: return "INVALID_ENUM"; - case GL_INVALID_VALUE: return "INVALID_VALUE"; - case GL_INVALID_OPERATION: return "INVALID_OPERATION"; - case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION"; - case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; - default: return "unknown"; - } -} - -void gl_check_error(GL *gl, struct mp_log *log, const char *info) -{ - for (;;) { - GLenum error = gl->GetError(); - if (error == GL_NO_ERROR) - break; - mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info, - gl_error_to_string(error)); - } -} - -static int get_alignment(int stride) -{ - if (stride % 8 == 0) - return 8; - if (stride % 4 == 0) - return 4; - if (stride % 2 == 0) - return 2; - return 1; -} - -// upload a texture, handling things like stride and slices -// target: texture target, usually GL_TEXTURE_2D -// format, type: texture parameters -// dataptr, stride: image data -// x, y, width, height: part of the image to upload -void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, - const void *dataptr, int stride, - int x, int y, int w, int h) -{ - int bpp = gl_bytes_per_pixel(format, type); - const uint8_t *data = dataptr; - int y_max = y + h; - if (w <= 0 || h <= 0 || !bpp) - return; - if (stride < 0) { - data += (h - 1) * stride; - stride = -stride; - } - gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); - int slice = h; - if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { - // this is not always correct, but should work for MPlayer - gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); - } else { - if (stride != bpp * w) - slice = 1; // very inefficient, but at least it works - } - for (; y + slice <= y_max; y += slice) { - gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data); - data += stride * slice; - } - if (y < y_max) - gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); - if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) - gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); - gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); -} - -mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h) -{ - if (gl->es) - return NULL; // ES can't read from front buffer - mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h); - if (!image) - return NULL; - gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); - GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; - gl->PixelStorei(GL_PACK_ALIGNMENT, 1); - gl->ReadBuffer(obj); - //flip image while reading (and also avoid stride-related trouble) - for (int y = 0; y < h; y++) { - gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE, - image->planes[0] + y * image->stride[0]); - } - gl->PixelStorei(GL_PACK_ALIGNMENT, 4); - gl->BindFramebuffer(GL_FRAMEBUFFER, 0); - return image; -} - -static void gl_vao_enable_attribs(struct gl_vao *vao) -{ - GL *gl = vao->gl; - - for (int n = 0; n < vao->num_entries; n++) { - const struct ra_renderpass_input *e = &vao->entries[n]; - GLenum type = 0; - bool normalized = false; - switch (e->type) { - case RA_VARTYPE_INT: - type = GL_INT; - break; - case RA_VARTYPE_FLOAT: - type = GL_FLOAT; - break; - case RA_VARTYPE_BYTE_UNORM: - type = GL_UNSIGNED_BYTE; - normalized = true; - break; - default: - abort(); - } - assert(e->dim_m == 1); - - gl->EnableVertexAttribArray(n); - gl->VertexAttribPointer(n, e->dim_v, type, normalized, - vao->stride, (void *)(intptr_t)e->offset); - } -} - -void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct ra_renderpass_input *entries, - int num_entries) -{ - assert(!vao->vao); - assert(!vao->buffer); - - *vao = (struct gl_vao){ - .gl = gl, - .stride = stride, - .entries = entries, - .num_entries = num_entries, - }; - - gl->GenBuffers(1, &vao->buffer); - - if (gl->BindVertexArray) { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - - gl->GenVertexArrays(1, &vao->vao); - gl->BindVertexArray(vao->vao); - gl_vao_enable_attribs(vao); - gl->BindVertexArray(0); - - gl->BindBuffer(GL_ARRAY_BUFFER, 0); - } -} - -void gl_vao_uninit(struct gl_vao *vao) -{ - GL *gl = vao->gl; - if (!gl) - return; - - if (gl->DeleteVertexArrays) - gl->DeleteVertexArrays(1, &vao->vao); - gl->DeleteBuffers(1, &vao->buffer); - - *vao = (struct gl_vao){0}; -} - -static void gl_vao_bind(struct gl_vao *vao) -{ - GL *gl = vao->gl; - - if (gl->BindVertexArray) { - gl->BindVertexArray(vao->vao); - } else { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - gl_vao_enable_attribs(vao); - gl->BindBuffer(GL_ARRAY_BUFFER, 0); - } -} - -static void gl_vao_unbind(struct gl_vao *vao) -{ - GL *gl = vao->gl; - - if (gl->BindVertexArray) { - gl->BindVertexArray(0); - } else { - for (int n = 0; n < vao->num_entries; n++) - gl->DisableVertexAttribArray(n); - } -} - -// Draw the vertex data (as described by the gl_vao_entry entries) in ptr -// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. -// If ptr is NULL, then skip the upload, and use the data uploaded with the -// previous call. -void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) -{ - GL *gl = vao->gl; - - if (ptr) { - gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW); - gl->BindBuffer(GL_ARRAY_BUFFER, 0); - } - - gl_vao_bind(vao); - - gl->DrawArrays(prim, 0, num); - - gl_vao_unbind(vao); -} - -static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id, - GLenum severity, GLsizei length, - const GLchar *message, const void *userParam) -{ - // keep in mind that the debug callback can be asynchronous - struct mp_log *log = (void *)userParam; - int level = MSGL_ERR; - switch (severity) { - case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break; - case GL_DEBUG_SEVERITY_LOW: level = MSGL_INFO; break; - case GL_DEBUG_SEVERITY_MEDIUM: level = MSGL_WARN; break; - case GL_DEBUG_SEVERITY_HIGH: level = MSGL_ERR; break; - } - mp_msg(log, level, "GL: %s\n", message); -} - -void gl_set_debug_logger(GL *gl, struct mp_log *log) -{ - if (gl->DebugMessageCallback) - gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log); -} - -int gl_get_fb_depth(GL *gl, int fbo) -{ - if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB)) - return -1; - - gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); - - GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK; - if (fbo) - obj = GL_COLOR_ATTACHMENT0; - - GLint depth_g = -1; - - gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, - GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g); - - gl->BindFramebuffer(GL_FRAMEBUFFER, 0); - - return depth_g > 0 ? depth_g : -1; -} diff --git a/video/out/opengl/gl_utils.h b/video/out/opengl/gl_utils.h deleted file mode 100644 index 306ee23..0000000 --- a/video/out/opengl/gl_utils.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * This file is part of mpv. - * Parts based on MPlayer code by Reimar Döffinger. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MP_GL_UTILS_ -#define MP_GL_UTILS_ - -#include <math.h> - -#include "common.h" -#include "ra.h" - -struct mp_log; - -void gl_check_error(GL *gl, struct mp_log *log, const char *info); - -void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, - const void *dataptr, int stride, - int x, int y, int w, int h); - -mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h); - -struct gl_vao { - GL *gl; - GLuint vao; // the VAO object, or 0 if unsupported by driver - GLuint buffer; // GL_ARRAY_BUFFER used for the data - int stride; // size of each element (interleaved elements are assumed) - const struct ra_renderpass_input *entries; - int num_entries; -}; - -void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, - const struct ra_renderpass_input *entries, - int num_entries); -void gl_vao_uninit(struct gl_vao *vao); -void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num); - -void gl_set_debug_logger(GL *gl, struct mp_log *log); - -int gl_get_fb_depth(GL *gl, int fbo); - -#endif diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index d40bafe..1a7df20 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -32,11 +32,10 @@ #include <libavutil/hwcontext.h> #include <libavutil/hwcontext_cuda.h> +#include "video/out/gpu/hwdec.h" #include "formats.h" -#include "hwdec.h" #include "options/m_config.h" #include "ra_gl.h" -#include "video.h" struct priv_owner { struct mp_hwdec_ctx hwctx; @@ -161,11 +160,9 @@ static int cuda_init(struct ra_hwdec *hw) goto error; p->hwctx = (struct mp_hwdec_ctx) { - .type = HWDEC_CUDA, - .ctx = p->decode_ctx, + .driver_name = hw->driver->name, .av_device_ref = hw_device_ctx, }; - p->hwctx.driver_name = hw->driver->name; hwdec_devices_add(hw->devs, &p->hwctx); return 0; @@ -180,8 +177,7 @@ static void cuda_uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; - if (p->hwctx.ctx) - hwdec_devices_remove(hw->devs, &p->hwctx); + hwdec_devices_remove(hw->devs, &p->hwctx); av_buffer_unref(&p->hwctx.av_device_ref); if (p->decode_ctx && p->decode_ctx != p->display_ctx) @@ -327,8 +323,7 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) } const struct ra_hwdec_driver ra_hwdec_cuda = { - .name = "cuda", - .api = HWDEC_CUDA, + .name = "cuda-nvdec", .imgfmts = {IMGFMT_CUDA, 0}, .priv_size = sizeof(struct priv_owner), .init = cuda_init, diff --git a/video/out/opengl/hwdec_d3d11egl.c b/video/out/opengl/hwdec_d3d11egl.c index 3988f83..e741633 100644 --- a/video/out/opengl/hwdec_d3d11egl.c +++ b/video/out/opengl/hwdec_d3d11egl.c @@ -27,10 +27,10 @@ #include "common/common.h" #include "osdep/timer.h" #include "osdep/windows_utils.h" -#include "hwdec.h" +#include "video/out/gpu/hwdec.h" #include "ra_gl.h" #include "video/hwdec.h" -#include "video/decode/d3d.h" +#include "video/d3d.h" #ifndef EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE #define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x33AB @@ -75,8 +75,7 @@ static void uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; - if (p->hwctx.ctx) - hwdec_devices_remove(hw->devs, &p->hwctx); + hwdec_devices_remove(hw->devs, &p->hwctx); if (p->d3d11_device) ID3D11Device_Release(p->d3d11_device); @@ -180,10 +179,7 @@ static int init(struct ra_hwdec *hw) ID3D10Multithread_Release(multithread); p->hwctx = (struct mp_hwdec_ctx){ - .type = HWDEC_D3D11VA, .driver_name = hw->driver->name, - .ctx = p->d3d11_device, - .download_image = d3d11_download_image, .av_device_ref = d3d11_wrap_device_ref(p->d3d11_device), }; hwdec_devices_add(hw->devs, &p->hwctx); @@ -336,7 +332,6 @@ static void mapper_unmap(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_d3d11egl = { .name = "d3d11-egl", .priv_size = sizeof(struct priv_owner), - .api = HWDEC_D3D11VA, .imgfmts = {IMGFMT_D3D11NV12, 0}, .init = init, .uninit = uninit, diff --git a/video/out/opengl/hwdec_d3d11eglrgb.c b/video/out/opengl/hwdec_d3d11eglrgb.c index fa3976f..c8f6580 100644 --- a/video/out/opengl/hwdec_d3d11eglrgb.c +++ b/video/out/opengl/hwdec_d3d11eglrgb.c @@ -27,10 +27,10 @@ #include "common/common.h" #include "osdep/timer.h" #include "osdep/windows_utils.h" -#include "hwdec.h" +#include "video/out/gpu/hwdec.h" #include "ra_gl.h" #include "video/hwdec.h" -#include "video/decode/d3d.h" +#include "video/d3d.h" #ifndef EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE #define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x3AAB @@ -54,8 +54,7 @@ static void uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; - if (p->hwctx.ctx) - hwdec_devices_remove(hw->devs, &p->hwctx); + hwdec_devices_remove(hw->devs, &p->hwctx); if (p->d3d11_device) ID3D11Device_Release(p->d3d11_device); @@ -137,9 +136,7 @@ static int init(struct ra_hwdec *hw) } p->hwctx = (struct mp_hwdec_ctx){ - .type = HWDEC_D3D11VA, .driver_name = hw->driver->name, - .ctx = p->d3d11_device, .av_device_ref = d3d11_wrap_device_ref(p->d3d11_device), }; hwdec_devices_add(hw->devs, &p->hwctx); @@ -261,7 +258,6 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_d3d11eglrgb = { .name = "d3d11-egl-rgb", .priv_size = sizeof(struct priv_owner), - .api = HWDEC_D3D11VA, .imgfmts = {IMGFMT_D3D11RGB, 0}, .init = init, .uninit = uninit, diff --git a/video/out/opengl/hwdec_drmprime_drm.c b/video/out/opengl/hwdec_drmprime_drm.c new file mode 100644 index 0000000..faa099a --- /dev/null +++ b/video/out/opengl/hwdec_drmprime_drm.c @@ -0,0 +1,268 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <stdbool.h> + +#include <libavutil/hwcontext_drm.h> + +#include "common.h" +#include "video/hwdec.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "libmpv/opengl_cb.h" +#include "video/out/drm_common.h" +#include "video/out/drm_prime.h" +#include "video/out/gpu/hwdec.h" +#include "video/mp_image.h" + +#include "ra_gl.h" + +extern const struct m_sub_options drm_conf; + +struct drm_frame { + struct drm_prime_framebuffer fb; + struct mp_image *image; // associated mpv image +}; + +struct priv { + struct mp_log *log; + + struct mp_image_params params; + + struct drm_atomic_context *ctx; + struct drm_frame current_frame, old_frame; + + struct mp_rect src, dst; + + int display_w, display_h; +}; + +static void set_current_frame(struct ra_hwdec *hw, struct drm_frame *frame) +{ + struct priv *p = hw->priv; + + // frame will be on screen after next vsync + // current_frame is currently the displayed frame and will be replaced + // by frame after next vsync. + // We used old frame as triple buffering to make sure that the drm framebuffer + // is not being displayed when we release it. + + if (p->ctx) { + drm_prime_destroy_framebuffer(p->log, p->ctx->fd, &p->old_frame.fb); + } + + mp_image_setrefp(&p->old_frame.image, p->current_frame.image); + p->old_frame.fb = p->current_frame.fb; + + if (frame) { + p->current_frame.fb = frame->fb; + mp_image_setrefp(&p->current_frame.image, frame->image); + } else { + memset(&p->current_frame.fb, 0, sizeof(p->current_frame.fb)); + mp_image_setrefp(&p->current_frame.image, NULL); + } +} + +static void scale_dst_rect(struct ra_hwdec *hw, int source_w, int source_h ,struct mp_rect *src, struct mp_rect *dst) +{ + struct priv *p = hw->priv; + double hratio, vratio, ratio; + + // drm can allow to have a layer that has a different size from framebuffer + // we scale here the destination size to video mode + hratio = vratio = ratio = 1.0; + + hratio = (double)p->display_w / (double)source_w; + vratio = (double)p->display_h / (double)source_h; + ratio = hratio <= vratio ? hratio : vratio; + + dst->x0 = src->x0 * ratio; + dst->x1 = src->x1 * ratio; + dst->y0 = src->y0 * ratio; + dst->y1 = src->y1 * ratio; + + int offset_x = (p->display_w - ratio * source_w) / 2; + int offset_y = (p->display_h - ratio * source_h) / 2; + + dst->x0 += offset_x; + dst->x1 += offset_x; + dst->y0 += offset_y; + dst->y1 += offset_y; +} + +static int overlay_frame(struct ra_hwdec *hw, struct mp_image *hw_image, + struct mp_rect *src, struct mp_rect *dst, bool newframe) +{ + struct priv *p = hw->priv; + GL *gl = ra_gl_get(hw->ra); + AVDRMFrameDescriptor *desc = NULL; + drmModeAtomicReq *request = NULL; + struct drm_frame next_frame = {0}; + int ret; + + if (hw_image) { + + // grab opengl-cb windowing info to eventually upscale the overlay + // as egl windows could be upscaled to primary plane. + struct mpv_opengl_cb_window_pos *glparams = + gl ? (struct mpv_opengl_cb_window_pos *) + mpgl_get_native_display(gl, "opengl-cb-window-pos") : NULL; + if (glparams) { + scale_dst_rect(hw, glparams->width, glparams->height, dst, &p->dst); + } else { + p->dst = *dst; + } + p->src = *src; + + // grab drm interop info + struct mpv_opengl_cb_drm_params *drmparams = + gl ? (struct mpv_opengl_cb_drm_params *) + mpgl_get_native_display(gl, "opengl-cb-drm-params") : NULL; + if (drmparams) + request = (drmModeAtomicReq *)drmparams->atomic_request; + + next_frame.image = hw_image; + desc = (AVDRMFrameDescriptor *)hw_image->planes[0]; + + if (desc) { + int srcw = p->src.x1 - p->src.x0; + int srch = p->src.y1 - p->src.y0; + int dstw = MP_ALIGN_UP(p->dst.x1 - p->dst.x0, 2); + int dsth = MP_ALIGN_UP(p->dst.y1 - p->dst.y0, 2); + + if (drm_prime_create_framebuffer(p->log, p->ctx->fd, desc, srcw, srch, &next_frame.fb)) { + ret = -1; + goto fail; + } + + if (request) { + drm_object_set_property(request, p->ctx->overlay_plane, "FB_ID", next_frame.fb.fb_id); + drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_ID", p->ctx->crtc->id); + drm_object_set_property(request, p->ctx->overlay_plane, "SRC_X", p->src.x0 << 16); + drm_object_set_property(request, p->ctx->overlay_plane, "SRC_Y", p->src.y0 << 16); + drm_object_set_property(request, p->ctx->overlay_plane, "SRC_W", srcw << 16); + drm_object_set_property(request, p->ctx->overlay_plane, "SRC_H", srch << 16); + drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_X", MP_ALIGN_DOWN(p->dst.x0, 2)); + drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_Y", MP_ALIGN_DOWN(p->dst.y0, 2)); + drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_W", dstw); + drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_H", dsth); + drm_object_set_property(request, p->ctx->overlay_plane, "ZPOS", 0); + } else { + ret = drmModeSetPlane(p->ctx->fd, p->ctx->overlay_plane->id, p->ctx->crtc->id, next_frame.fb.fb_id, 0, + MP_ALIGN_DOWN(p->dst.x0, 2), MP_ALIGN_DOWN(p->dst.y0, 2), dstw, dsth, + p->src.x0 << 16, p->src.y0 << 16 , srcw << 16, srch << 16); + if (ret < 0) { + MP_ERR(hw, "Failed to set the plane %d (buffer %d).\n", p->ctx->overlay_plane->id, + next_frame.fb.fb_id); + goto fail; + } + } + } + } + + set_current_frame(hw, &next_frame); + return 0; + + fail: + drm_prime_destroy_framebuffer(p->log, p->ctx->fd, &next_frame.fb); + return ret; +} + +static void uninit(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + + set_current_frame(hw, NULL); + + if (p->ctx) { + drm_atomic_destroy_context(p->ctx); + p->ctx = NULL; + } +} + +static int init(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + int drm_overlay; + + if (!ra_is_gl(hw->ra)) + return -1; + + p->log = hw->log; + + void *tmp = talloc_new(NULL); + struct drm_opts *opts = mp_get_config_group(tmp, hw->global, &drm_conf); + drm_overlay = opts->drm_overlay_id; + talloc_free(tmp); + + GL *gl = ra_gl_get(hw->ra); + struct mpv_opengl_cb_drm_params *params = + gl ? (struct mpv_opengl_cb_drm_params *) + mpgl_get_native_display(gl, "opengl-cb-drm-params") : NULL; + if (!params) { + MP_VERBOSE(hw, "Could not get drm interop info.\n"); + goto err; + } + + if (params->fd) { + p->ctx = drm_atomic_create_context(p->log, params->fd, params->crtc_id, + drm_overlay); + if (!p->ctx) { + mp_err(p->log, "Failed to retrieve DRM atomic context.\n"); + goto err; + } + } else { + mp_err(p->log, "Failed to retrieve DRM fd from native display.\n"); + goto err; + } + + drmModeCrtcPtr crtc; + crtc = drmModeGetCrtc(p->ctx->fd, p->ctx->crtc->id); + if (crtc) { + p->display_w = crtc->mode.hdisplay; + p->display_h = crtc->mode.vdisplay; + drmModeFreeCrtc(crtc); + } + + + uint64_t has_prime; + if (drmGetCap(p->ctx->fd, DRM_CAP_PRIME, &has_prime) < 0) { + MP_ERR(hw, "Card does not support prime handles.\n"); + goto err; + } + + return 0; + +err: + uninit(hw); + return -1; +} + +const struct ra_hwdec_driver ra_hwdec_drmprime_drm = { + .name = "drmprime-drm", + .priv_size = sizeof(struct priv), + .imgfmts = {IMGFMT_DRMPRIME, 0}, + .init = init, + .overlay_frame = overlay_frame, + .uninit = uninit, +}; diff --git a/video/out/opengl/hwdec_dxva2egl.c b/video/out/opengl/hwdec_dxva2egl.c index 01fb482..0f8a4ad 100644 --- a/video/out/opengl/hwdec_dxva2egl.c +++ b/video/out/opengl/hwdec_dxva2egl.c @@ -27,10 +27,10 @@ #include "common/common.h" #include "osdep/timer.h" #include "osdep/windows_utils.h" -#include "hwdec.h" +#include "video/out/gpu/hwdec.h" #include "ra_gl.h" #include "video/hwdec.h" -#include "video/decode/d3d.h" +#include "video/d3d.h" struct priv_owner { struct mp_hwdec_ctx hwctx; @@ -58,8 +58,8 @@ static void uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; - if (p->hwctx.ctx) - hwdec_devices_remove(hw->devs, &p->hwctx); + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); if (p->device9ex) IDirect3DDevice9Ex_Release(p->device9ex); @@ -180,9 +180,7 @@ static int init(struct ra_hwdec *hw) ra_hwdec_mapper_free(&mapper); p->hwctx = (struct mp_hwdec_ctx){ - .type = HWDEC_DXVA2, .driver_name = hw->driver->name, - .ctx = (IDirect3DDevice9 *)p->device9ex, .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->device9ex), }; hwdec_devices_add(hw->devs, &p->hwctx); @@ -368,7 +366,6 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_dxva2egl = { .name = "dxva2-egl", .priv_size = sizeof(struct priv_owner), - .api = HWDEC_DXVA2, .imgfmts = {IMGFMT_DXVA2, 0}, .init = init, .uninit = uninit, diff --git a/video/out/opengl/hwdec_dxva2gldx.c b/video/out/opengl/hwdec_dxva2gldx.c index fd9c80b..984fd7f 100644 --- a/video/out/opengl/hwdec_dxva2gldx.c +++ b/video/out/opengl/hwdec_dxva2gldx.c @@ -20,10 +20,10 @@ #include "common/common.h" #include "osdep/windows_utils.h" -#include "hwdec.h" +#include "video/out/gpu/hwdec.h" #include "ra_gl.h" #include "video/hwdec.h" -#include "video/decode/d3d.h" +#include "video/d3d.h" // for WGL_ACCESS_READ_ONLY_NV #include <GL/wglext.h> @@ -48,8 +48,8 @@ static void uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; - if (p->hwctx.ctx) - hwdec_devices_remove(hw->devs, &p->hwctx); + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); if (p->device) IDirect3DDevice9Ex_Release(p->device); @@ -78,9 +78,7 @@ static int init(struct ra_hwdec *hw) IDirect3DDevice9Ex_AddRef(p->device); p->hwctx = (struct mp_hwdec_ctx){ - .type = HWDEC_DXVA2, .driver_name = hw->driver->name, - .ctx = (IDirect3DDevice9 *)p->device, .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->device), }; hwdec_devices_add(hw->devs, &p->hwctx); @@ -229,7 +227,6 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_dxva2gldx = { .name = "dxva2-dxinterop", .priv_size = sizeof(struct priv_owner), - .api = HWDEC_DXVA2, .imgfmts = {IMGFMT_DXVA2, 0}, .init = init, .uninit = uninit, diff --git a/video/out/opengl/hwdec_ios.m b/video/out/opengl/hwdec_ios.m index 8e020de..b8d4876 100644 --- a/video/out/opengl/hwdec_ios.m +++ b/video/out/opengl/hwdec_ios.m @@ -27,10 +27,9 @@ #include "config.h" +#include "video/out/gpu/hwdec.h" #include "video/mp_image_pool.h" -#include "video/vt.h" #include "ra_gl.h" -#include "hwdec.h" struct priv_owner { struct mp_hwdec_ctx hwctx; @@ -70,15 +69,11 @@ static int init(struct ra_hwdec *hw) return -1; p->hwctx = (struct mp_hwdec_ctx){ - .type = HWDEC_VIDEOTOOLBOX, - .download_image = mp_vt_download_image, - .ctx = &p->hwctx, + .driver_name = hw->driver->name, }; -#if HAVE_VIDEOTOOLBOX_HWACCEL_NEW av_hwdevice_ctx_create(&p->hwctx.av_device_ref, AV_HWDEVICE_TYPE_VIDEOTOOLBOX, NULL, NULL, 0); -#endif hwdec_devices_add(hw->devs, &p->hwctx); @@ -89,8 +84,7 @@ static void uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; - if (p->hwctx.ctx) - hwdec_devices_remove(hw->devs, &p->hwctx); + hwdec_devices_remove(hw->devs, &p->hwctx); av_buffer_unref(&p->hwctx.av_device_ref); } @@ -132,7 +126,6 @@ static const struct ra_format *find_la_variant(struct ra *ra, static int mapper_init(struct ra_hwdec_mapper *mapper) { struct priv *p = mapper->priv; - GL *gl = ra_gl_get(mapper->ra); mapper->dst_params = mapper->src_params; mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; @@ -243,8 +236,11 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) .src_linear = true, }; - mapper->tex[i] = ra_create_wrapped_tex(mapper->ra, ¶ms, - p->gl_planes[i]); + mapper->tex[i] = ra_create_wrapped_tex( + mapper->ra, + ¶ms, + CVOpenGLESTextureGetName(p->gl_planes[i]) + ); if (!mapper->tex[i]) return -1; } @@ -264,7 +260,6 @@ static void mapper_uninit(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_videotoolbox = { .name = "videotoolbox", .priv_size = sizeof(struct priv_owner), - .api = HWDEC_VIDEOTOOLBOX, .imgfmts = {IMGFMT_VIDEOTOOLBOX, 0}, .init = init, .uninit = uninit, diff --git a/video/out/opengl/hwdec_osx.c b/video/out/opengl/hwdec_osx.c index 348a5e1..ca7a004 100644 --- a/video/out/opengl/hwdec_osx.c +++ b/video/out/opengl/hwdec_osx.c @@ -29,9 +29,8 @@ #include "config.h" #include "video/mp_image_pool.h" -#include "video/vt.h" +#include "video/out/gpu/hwdec.h" #include "ra_gl.h" -#include "hwdec.h" struct priv_owner { struct mp_hwdec_ctx hwctx; @@ -71,15 +70,11 @@ static int init(struct ra_hwdec *hw) return -1; p->hwctx = (struct mp_hwdec_ctx){ - .type = HWDEC_VIDEOTOOLBOX, - .download_image = mp_vt_download_image, - .ctx = &p->hwctx, + .driver_name = hw->driver->name, }; -#if HAVE_VIDEOTOOLBOX_HWACCEL_NEW av_hwdevice_ctx_create(&p->hwctx.av_device_ref, AV_HWDEVICE_TYPE_VIDEOTOOLBOX, NULL, NULL, 0); -#endif hwdec_devices_add(hw->devs, &p->hwctx); @@ -90,8 +85,7 @@ static void uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; - if (p->hwctx.ctx) - hwdec_devices_remove(hw->devs, &p->hwctx); + hwdec_devices_remove(hw->devs, &p->hwctx); av_buffer_unref(&p->hwctx.av_device_ref); } @@ -214,7 +208,6 @@ static void mapper_uninit(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_videotoolbox = { .name = "videotoolbox", .priv_size = sizeof(struct priv_owner), - .api = HWDEC_VIDEOTOOLBOX, .imgfmts = {IMGFMT_VIDEOTOOLBOX, 0}, .init = init, .uninit = uninit, diff --git a/video/out/opengl/hwdec_rpi.c b/video/out/opengl/hwdec_rpi.c index 6f39c3e..6c080f1 100644 --- a/video/out/opengl/hwdec_rpi.c +++ b/video/out/opengl/hwdec_rpi.c @@ -33,8 +33,8 @@ #include "common/common.h" #include "common/msg.h" #include "video/mp_image.h" +#include "video/out/gpu/hwdec.h" -#include "hwdec.h" #include "common.h" #include "ra_gl.h" @@ -378,7 +378,6 @@ static int create(struct ra_hwdec *hw) const struct ra_hwdec_driver ra_hwdec_rpi_overlay = { .name = "rpi-overlay", - .api = HWDEC_RPI, .priv_size = sizeof(struct priv), .imgfmts = {IMGFMT_MMAL, IMGFMT_420P, 0}, .init = create, diff --git a/video/out/opengl/hwdec_vaegl.c b/video/out/opengl/hwdec_vaegl.c index a0e3222..b4587c5 100644 --- a/video/out/opengl/hwdec_vaegl.c +++ b/video/out/opengl/hwdec_vaegl.c @@ -18,6 +18,7 @@ #include <stddef.h> #include <string.h> #include <assert.h> +#include <unistd.h> #include <EGL/egl.h> #include <EGL/eglext.h> @@ -30,9 +31,9 @@ #include "config.h" -#include "hwdec.h" -#include "video/vaapi.h" +#include "video/out/gpu/hwdec.h" #include "video/mp_image_pool.h" +#include "video/vaapi.h" #include "common.h" #include "ra_gl.h" @@ -127,6 +128,11 @@ struct priv { EGLImageKHR images[4]; VAImage current_image; bool buffer_acquired; +#if VA_CHECK_VERSION(1, 1, 0) + bool esh_not_implemented; + VADRMPRIMESurfaceDescriptor desc; + bool surface_acquired; +#endif EGLImageKHR (EGLAPIENTRY *CreateImageKHR)(EGLDisplay, EGLContext, EGLenum, EGLClientBuffer, @@ -209,6 +215,14 @@ static void mapper_unmap(struct ra_hwdec_mapper *mapper) p->images[n] = 0; } +#if VA_CHECK_VERSION(1, 1, 0) + if (p->surface_acquired) { + for (int n = 0; n < p->desc.num_objects; n++) + close(p->desc.objects[n].fd); + p->surface_acquired = false; + } +#endif + if (p->buffer_acquired) { status = vaReleaseBufferHandle(display, p->current_image.buf); CHECK_VA_STATUS(mapper, "vaReleaseBufferHandle()"); @@ -330,6 +344,72 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) VAImage *va_image = &p->current_image; VADisplay *display = p_owner->display; +#if VA_CHECK_VERSION(1, 1, 0) + if (p->esh_not_implemented) + goto esh_failed; + + status = vaExportSurfaceHandle(display, va_surface_id(mapper->src), + VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, + VA_EXPORT_SURFACE_READ_ONLY | + VA_EXPORT_SURFACE_SEPARATE_LAYERS, + &p->desc); + if (!CHECK_VA_STATUS(mapper, "vaAcquireSurfaceHandle()")) { + if (status == VA_STATUS_ERROR_UNIMPLEMENTED) + p->esh_not_implemented = true; + goto esh_failed; + } + p->surface_acquired = true; + + for (int n = 0; n < p->num_planes; n++) { + int attribs[20] = {EGL_NONE}; + int num_attribs = 0; + + ADD_ATTRIB(EGL_LINUX_DRM_FOURCC_EXT, p->desc.layers[n].drm_format); + ADD_ATTRIB(EGL_WIDTH, p->tex[n]->params.w); + ADD_ATTRIB(EGL_HEIGHT, p->tex[n]->params.h); + +#define ADD_PLANE_ATTRIBS(plane) do { \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _FD_EXT, \ + p->desc.objects[p->desc.layers[n].object_index[plane]].fd); \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _OFFSET_EXT, \ + p->desc.layers[n].offset[plane]); \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _PITCH_EXT, \ + p->desc.layers[n].pitch[plane]); \ + } while (0) + + ADD_PLANE_ATTRIBS(0); + if (p->desc.layers[n].num_planes > 1) + ADD_PLANE_ATTRIBS(1); + if (p->desc.layers[n].num_planes > 2) + ADD_PLANE_ATTRIBS(2); + if (p->desc.layers[n].num_planes > 3) + ADD_PLANE_ATTRIBS(3); + + p->images[n] = p->CreateImageKHR(eglGetCurrentDisplay(), + EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, attribs); + if (!p->images[n]) + goto esh_failed; + + gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]); + p->EGLImageTargetTexture2DOES(GL_TEXTURE_2D, p->images[n]); + + mapper->tex[n] = p->tex[n]; + } + gl->BindTexture(GL_TEXTURE_2D, 0); + + if (p->desc.fourcc == VA_FOURCC_YV12) + MPSWAP(struct ra_tex*, mapper->tex[1], mapper->tex[2]); + + return 0; + +esh_failed: + if (p->surface_acquired) { + for (int n = 0; n < p->desc.num_objects; n++) + close(p->desc.objects[n].fd); + p->surface_acquired = false; + } +#endif + status = vaDeriveImage(display, va_surface_id(mapper->src), va_image); if (!CHECK_VA_STATUS(mapper, "vaDeriveImage()")) goto err; @@ -417,7 +497,7 @@ static void determine_working_formats(struct ra_hwdec *hw) AVHWFramesConstraints *fc = av_hwdevice_get_hwframe_constraints(p->ctx->av_device_ref, NULL); if (!fc) { - MP_WARN(hw, "failed to retrieve libavutil frame constaints\n"); + MP_WARN(hw, "failed to retrieve libavutil frame constraints\n"); goto done; } for (int n = 0; fc->valid_sw_formats[n] != AV_PIX_FMT_NONE; n++) { @@ -464,7 +544,6 @@ done: const struct ra_hwdec_driver ra_hwdec_vaegl = { .name = "vaapi-egl", .priv_size = sizeof(struct priv_owner), - .api = HWDEC_VAAPI, .imgfmts = {IMGFMT_VAAPI, 0}, .init = init, .uninit = uninit, diff --git a/video/out/opengl/hwdec_vaglx.c b/video/out/opengl/hwdec_vaglx.c deleted file mode 100644 index 8db15c4..0000000 --- a/video/out/opengl/hwdec_vaglx.c +++ /dev/null @@ -1,226 +0,0 @@ -/* - * This file is part of mpv. - * - * Parts based on the MPlayer VA-API patch (see vo_vaapi.c). - * - * mpv is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <stddef.h> -#include <string.h> -#include <assert.h> - -#include <GL/glx.h> -#include <va/va_x11.h> - -#include "video/out/x11_common.h" -#include "ra_gl.h" -#include "hwdec.h" -#include "video/vaapi.h" - -struct priv_owner { - struct mp_vaapi_ctx *ctx; - VADisplay *display; - Display *xdisplay; - GLXFBConfig fbc; -}; - -struct priv { - GLuint gl_texture; - Pixmap pixmap; - GLXPixmap glxpixmap; - void (*glXBindTexImage)(Display *dpy, GLXDrawable draw, int buffer, int *a); - void (*glXReleaseTexImage)(Display *dpy, GLXDrawable draw, int buffer); -}; - -static void uninit(struct ra_hwdec *hw) -{ - struct priv_owner *p = hw->priv; - if (p->ctx) - hwdec_devices_remove(hw->devs, &p->ctx->hwctx); - va_destroy(p->ctx); -} - -static int init(struct ra_hwdec *hw) -{ - Display *x11disp = glXGetCurrentDisplay(); - if (!x11disp || !ra_is_gl(hw->ra)) - return -1; - int x11scr = DefaultScreen(x11disp); - struct priv_owner *p = hw->priv; - p->xdisplay = x11disp; - const char *glxext = glXQueryExtensionsString(x11disp, x11scr); - if (!glxext || !strstr(glxext, "GLX_EXT_texture_from_pixmap")) - return -1; - p->display = vaGetDisplay(x11disp); - if (!p->display) - return -1; - p->ctx = va_initialize(p->display, hw->log, true); - if (!p->ctx) { - vaTerminate(p->display); - return -1; - } - - int attribs[] = { - GLX_BIND_TO_TEXTURE_RGBA_EXT, True, - GLX_DRAWABLE_TYPE, GLX_PIXMAP_BIT, - GLX_BIND_TO_TEXTURE_TARGETS_EXT, GLX_TEXTURE_2D_BIT_EXT, - GLX_Y_INVERTED_EXT, True, - GLX_DOUBLEBUFFER, False, - GLX_RED_SIZE, 8, - GLX_GREEN_SIZE, 8, - GLX_BLUE_SIZE, 8, - GLX_ALPHA_SIZE, 0, - None - }; - - int fbcount; - GLXFBConfig *fbc = glXChooseFBConfig(x11disp, x11scr, attribs, &fbcount); - if (fbcount) - p->fbc = fbc[0]; - if (fbc) - XFree(fbc); - if (!fbcount) { - MP_VERBOSE(hw, "No texture-from-pixmap support.\n"); - return -1; - } - - p->ctx->hwctx.driver_name = hw->driver->name; - hwdec_devices_add(hw->devs, &p->ctx->hwctx); - return 0; -} - -static int mapper_init(struct ra_hwdec_mapper *mapper) -{ - struct priv_owner *p_owner = mapper->owner->priv; - struct priv *p = mapper->priv; - GL *gl = ra_gl_get(mapper->ra); - Display *xdisplay = p_owner->xdisplay; - - p->glXBindTexImage = - (void*)glXGetProcAddressARB((void*)"glXBindTexImageEXT"); - p->glXReleaseTexImage = - (void*)glXGetProcAddressARB((void*)"glXReleaseTexImageEXT"); - if (!p->glXBindTexImage || !p->glXReleaseTexImage) - return -1; - - gl->GenTextures(1, &p->gl_texture); - gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - gl->BindTexture(GL_TEXTURE_2D, 0); - - p->pixmap = XCreatePixmap(xdisplay, - RootWindow(xdisplay, DefaultScreen(xdisplay)), - mapper->src_params.w, mapper->src_params.h, 24); - if (!p->pixmap) { - MP_FATAL(mapper, "could not create pixmap\n"); - return -1; - } - - int attribs[] = { - GLX_TEXTURE_TARGET_EXT, GLX_TEXTURE_2D_EXT, - GLX_TEXTURE_FORMAT_EXT, GLX_TEXTURE_FORMAT_RGB_EXT, - GLX_MIPMAP_TEXTURE_EXT, False, - None, - }; - p->glxpixmap = glXCreatePixmap(xdisplay, p_owner->fbc, p->pixmap, attribs); - - gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); - p->glXBindTexImage(xdisplay, p->glxpixmap, GLX_FRONT_EXT, NULL); - gl->BindTexture(GL_TEXTURE_2D, 0); - - struct ra_tex_params params = { - .dimensions = 2, - .w = mapper->src_params.w, - .h = mapper->src_params.h, - .d = 1, - .format = ra_find_unorm_format(mapper->ra, 1, 4), // unsure - .render_src = true, - .src_linear = true, - }; - if (!params.format) - return -1; - - mapper->tex[0] = ra_create_wrapped_tex(mapper->ra, ¶ms, p->gl_texture); - if (!mapper->tex[0]) - return -1; - - mapper->dst_params = mapper->src_params; - mapper->dst_params.imgfmt = IMGFMT_RGB0; - mapper->dst_params.hw_subfmt = 0; - - return 0; -} - -static void mapper_uninit(struct ra_hwdec_mapper *mapper) -{ - struct priv_owner *p_owner = mapper->owner->priv; - struct priv *p = mapper->priv; - GL *gl = ra_gl_get(mapper->ra); - Display *xdisplay = p_owner->xdisplay; - - if (p->glxpixmap) { - p->glXReleaseTexImage(xdisplay, p->glxpixmap, GLX_FRONT_EXT); - glXDestroyPixmap(xdisplay, p->glxpixmap); - } - p->glxpixmap = 0; - - if (p->pixmap) - XFreePixmap(xdisplay, p->pixmap); - p->pixmap = 0; - - ra_tex_free(mapper->ra, &mapper->tex[0]); - gl->DeleteTextures(1, &p->gl_texture); - p->gl_texture = 0; -} - -static int mapper_map(struct ra_hwdec_mapper *mapper) -{ - struct priv_owner *p_owner = mapper->owner->priv; - struct priv *p = mapper->priv; - VAStatus status; - - struct mp_image *hw_image = mapper->src; - - if (!p->pixmap) - return -1; - - status = vaPutSurface(p_owner->display, va_surface_id(hw_image), p->pixmap, - 0, 0, hw_image->w, hw_image->h, - 0, 0, hw_image->w, hw_image->h, - NULL, 0, - va_get_colorspace_flag(hw_image->params.color.space)); - CHECK_VA_STATUS(mapper, "vaPutSurface()"); - - return 0; -} - -const struct ra_hwdec_driver ra_hwdec_vaglx = { - .name = "vaapi-glx", - .priv_size = sizeof(struct priv_owner), - .api = HWDEC_VAAPI, - .imgfmts = {IMGFMT_VAAPI, 0}, - .testing_only = true, - .init = init, - .uninit = uninit, - .mapper = &(const struct ra_hwdec_mapper_driver){ - .priv_size = sizeof(struct priv), - .init = mapper_init, - .uninit = mapper_uninit, - .map = mapper_map, - }, -}; diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c index d733650..603a70e 100644 --- a/video/out/opengl/hwdec_vdpau.c +++ b/video/out/opengl/hwdec_vdpau.c @@ -20,7 +20,7 @@ #include <GL/glx.h> -#include "hwdec.h" +#include "video/out/gpu/hwdec.h" #include "ra_gl.h" #include "video/vdpau.h" #include "video/vdpau_mixer.h" @@ -304,7 +304,6 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_vdpau = { .name = "vdpau-glx", .priv_size = sizeof(struct priv_owner), - .api = HWDEC_VDPAU, .imgfmts = {IMGFMT_VDPAU, 0}, .init = init, .uninit = uninit, diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c index ab5c132..5b03368 100644 --- a/video/out/opengl/ra_gl.c +++ b/video/out/opengl/ra_gl.c @@ -96,14 +96,12 @@ static int ra_init_gl(struct ra *ra, GL *gl) static const int caps_map[][2] = { {RA_CAP_DIRECT_UPLOAD, 0}, - {RA_CAP_SHARED_BINDING, 0}, {RA_CAP_GLOBAL_UNIFORM, 0}, + {RA_CAP_FRAGCOORD, 0}, {RA_CAP_TEX_1D, MPGL_CAP_1D_TEX}, {RA_CAP_TEX_3D, MPGL_CAP_3D_TEX}, {RA_CAP_COMPUTE, MPGL_CAP_COMPUTE_SHADER}, {RA_CAP_NESTED_ARRAY, MPGL_CAP_NESTED_ARRAY}, - {RA_CAP_BUF_RO, MPGL_CAP_UBO}, - {RA_CAP_BUF_RW, MPGL_CAP_SSBO}, }; for (int i = 0; i < MP_ARRAY_SIZE(caps_map); i++) { @@ -111,6 +109,17 @@ static int ra_init_gl(struct ra *ra, GL *gl) ra->caps |= caps_map[i][0]; } + if (gl->BindBufferBase) { + if (gl->mpgl_caps & MPGL_CAP_UBO) + ra->caps |= RA_CAP_BUF_RO; + if (gl->mpgl_caps & MPGL_CAP_SSBO) + ra->caps |= RA_CAP_BUF_RW; + } + + // textureGather is only supported in GLSL 400+ + if (ra->glsl_version >= 400) + ra->caps |= RA_CAP_GATHER; + if (gl->BlitFramebuffer) ra->caps |= RA_CAP_BLIT; @@ -175,6 +184,8 @@ static int ra_init_gl(struct ra *ra, GL *gl) desc->chroma_w = desc->chroma_h = 1; } + fmt->glsl_format = ra_fmt_glsl_format(fmt); + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); } @@ -648,6 +659,11 @@ static void gl_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); } +static int gl_desc_namespace(enum ra_vartype type) +{ + return type; +} + static void gl_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) { GL *gl = ra_gl_get(ra); @@ -773,7 +789,7 @@ static GLuint load_program(struct ra *ra, const struct ra_renderpass_params *p, GLint status = 0; gl->GetProgramiv(prog, GL_LINK_STATUS, &status); if (status) { - MP_VERBOSE(ra, "Loading binary program succeeded.\n"); + MP_DBG(ra, "Loading binary program succeeded.\n"); } else { gl->DeleteProgram(prog); prog = 0; @@ -811,7 +827,7 @@ static struct ra_renderpass *gl_renderpass_create(struct ra *ra, GL *gl = ra_gl_get(ra); struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); - pass->params = *ra_render_pass_params_copy(pass, params); + pass->params = *ra_renderpass_params_copy(pass, params); pass->params.cached_program = (bstr){0}; struct ra_renderpass_gl *pass_gl = pass->priv = talloc_zero(NULL, struct ra_renderpass_gl); @@ -1097,12 +1113,6 @@ static uint64_t gl_timer_stop(struct ra *ra, ra_timer *ratimer) return timer->result; } -static void gl_flush(struct ra *ra) -{ - GL *gl = ra_gl_get(ra); - gl->Flush(); -} - static void gl_debug_marker(struct ra *ra, const char *msg) { struct ra_gl *p = ra->priv; @@ -1123,6 +1133,7 @@ static struct ra_fns ra_fns_gl = { .clear = gl_clear, .blit = gl_blit, .uniform_layout = std140_layout, + .desc_namespace = gl_desc_namespace, .renderpass_create = gl_renderpass_create, .renderpass_destroy = gl_renderpass_destroy, .renderpass_run = gl_renderpass_run, @@ -1130,6 +1141,5 @@ static struct ra_fns ra_fns_gl = { .timer_destroy = gl_timer_destroy, .timer_start = gl_timer_start, .timer_stop = gl_timer_stop, - .flush = gl_flush, .debug_marker = gl_debug_marker, }; diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h index e5e09a0..9844977 100644 --- a/video/out/opengl/ra_gl.h +++ b/video/out/opengl/ra_gl.h @@ -1,8 +1,7 @@ #pragma once #include "common.h" -#include "ra.h" -#include "gl_utils.h" +#include "utils.h" struct ra *ra_create_gl(GL *gl, struct mp_log *log); struct ra_tex *ra_create_wrapped_tex(struct ra *ra, diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index b8fc24a..34f4736 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -1,371 +1,284 @@ -#include "common/msg.h" -#include "video/out/vo.h" +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <assert.h> + +#include <libavutil/sha.h> +#include <libavutil/intreadwrite.h> +#include <libavutil/mem.h> + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "formats.h" #include "utils.h" -// Standard parallel 2D projection, except y1 < y0 means that the coordinate -// system is flipped, not the projection. -void gl_transform_ortho(struct gl_transform *t, float x0, float x1, - float y0, float y1) +// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) +static const char *gl_error_to_string(GLenum error) { - if (y1 < y0) { - float tmp = y0; - y0 = tmp - y1; - y1 = tmp; + switch (error) { + case GL_INVALID_ENUM: return "INVALID_ENUM"; + case GL_INVALID_VALUE: return "INVALID_VALUE"; + case GL_INVALID_OPERATION: return "INVALID_OPERATION"; + case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION"; + case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; + default: return "unknown"; } - - t->m[0][0] = 2.0f / (x1 - x0); - t->m[0][1] = 0.0f; - t->m[1][0] = 0.0f; - t->m[1][1] = 2.0f / (y1 - y0); - t->t[0] = -(x1 + x0) / (x1 - x0); - t->t[1] = -(y1 + y0) / (y1 - y0); -} - -// Apply the effects of one transformation to another, transforming it in the -// process. In other words: post-composes t onto x -void gl_transform_trans(struct gl_transform t, struct gl_transform *x) -{ - struct gl_transform xt = *x; - x->m[0][0] = t.m[0][0] * xt.m[0][0] + t.m[0][1] * xt.m[1][0]; - x->m[1][0] = t.m[1][0] * xt.m[0][0] + t.m[1][1] * xt.m[1][0]; - x->m[0][1] = t.m[0][0] * xt.m[0][1] + t.m[0][1] * xt.m[1][1]; - x->m[1][1] = t.m[1][0] * xt.m[0][1] + t.m[1][1] * xt.m[1][1]; - gl_transform_vec(t, &x->t[0], &x->t[1]); -} - -void gl_transform_ortho_fbodst(struct gl_transform *t, struct fbodst fbo) -{ - int y_dir = fbo.flip ? -1 : 1; - gl_transform_ortho(t, 0, fbo.tex->params.w, 0, fbo.tex->params.h * y_dir); } -void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool) +void gl_check_error(GL *gl, struct mp_log *log, const char *info) { - for (int i = 0; i < pool->num_buffers; i++) - ra_buf_free(ra, &pool->buffers[i]); - - talloc_free(pool->buffers); - *pool = (struct ra_buf_pool){0}; + for (;;) { + GLenum error = gl->GetError(); + if (error == GL_NO_ERROR) + break; + mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info, + gl_error_to_string(error)); + } } -static bool ra_buf_params_compatible(const struct ra_buf_params *new, - const struct ra_buf_params *old) +static int get_alignment(int stride) { - return new->type == old->type && - new->size <= old->size && - new->host_mapped == old->host_mapped && - new->host_mutable == old->host_mutable; + if (stride % 8 == 0) + return 8; + if (stride % 4 == 0) + return 4; + if (stride % 2 == 0) + return 2; + return 1; } -static bool ra_buf_pool_grow(struct ra *ra, struct ra_buf_pool *pool) +// upload a texture, handling things like stride and slices +// target: texture target, usually GL_TEXTURE_2D +// format, type: texture parameters +// dataptr, stride: image data +// x, y, width, height: part of the image to upload +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h) { - struct ra_buf *buf = ra_buf_create(ra, &pool->current_params); - if (!buf) - return false; - - MP_TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf); - MP_VERBOSE(ra, "Resized buffer pool to size %d\n", pool->num_buffers); - return true; + int bpp = gl_bytes_per_pixel(format, type); + const uint8_t *data = dataptr; + int y_max = y + h; + if (w <= 0 || h <= 0 || !bpp) + return; + assert(stride > 0); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); + int slice = h; + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { + // this is not always correct, but should work for MPlayer + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); + } else { + if (stride != bpp * w) + slice = 1; // very inefficient, but at least it works + } + for (; y + slice <= y_max; y += slice) { + gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data); + data += stride * slice; + } + if (y < y_max) + gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); } -struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, - const struct ra_buf_params *params) +mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h) { - assert(!params->initial_data); - - if (!ra_buf_params_compatible(params, &pool->current_params)) { - ra_buf_pool_uninit(ra, pool); - pool->current_params = *params; - } - - // Make sure we have at least one buffer available - if (!pool->buffers && !ra_buf_pool_grow(ra, pool)) - return NULL; - - // Make sure the next buffer is available for use - if (!ra->fns->buf_poll(ra, pool->buffers[pool->index]) && - !ra_buf_pool_grow(ra, pool)) - { + if (gl->es) + return NULL; // ES can't read from front buffer + mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h); + if (!image) return NULL; + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); + GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; + gl->PixelStorei(GL_PACK_ALIGNMENT, 1); + gl->ReadBuffer(obj); + //flip image while reading (and also avoid stride-related trouble) + for (int y = 0; y < h; y++) { + gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE, + image->planes[0] + y * image->stride[0]); } - - struct ra_buf *buf = pool->buffers[pool->index++]; - pool->index %= pool->num_buffers; - - return buf; + gl->PixelStorei(GL_PACK_ALIGNMENT, 4); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + return image; } -bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, - const struct ra_tex_upload_params *params) +static void gl_vao_enable_attribs(struct gl_vao *vao) { - if (params->buf) - return ra->fns->tex_upload(ra, params); - - struct ra_tex *tex = params->tex; - size_t row_size = tex->params.dimensions == 2 ? params->stride : - tex->params.w * tex->params.format->pixel_size; - - struct ra_buf_params bufparams = { - .type = RA_BUF_TYPE_TEX_UPLOAD, - .size = row_size * tex->params.h * tex->params.d, - .host_mutable = true, - }; - - struct ra_buf *buf = ra_buf_pool_get(ra, pbo, &bufparams); - if (!buf) - return false; - - ra->fns->buf_update(ra, buf, 0, params->src, bufparams.size); - - struct ra_tex_upload_params newparams = *params; - newparams.buf = buf; - newparams.src = NULL; - - return ra->fns->tex_upload(ra, &newparams); -} + GL *gl = vao->gl; + + for (int n = 0; n < vao->num_entries; n++) { + const struct ra_renderpass_input *e = &vao->entries[n]; + GLenum type = 0; + bool normalized = false; + switch (e->type) { + case RA_VARTYPE_INT: + type = GL_INT; + break; + case RA_VARTYPE_FLOAT: + type = GL_FLOAT; + break; + case RA_VARTYPE_BYTE_UNORM: + type = GL_UNSIGNED_BYTE; + normalized = true; + break; + default: + abort(); + } + assert(e->dim_m == 1); -struct ra_layout std140_layout(struct ra_renderpass_input *inp) -{ - size_t el_size = ra_vartype_size(inp->type); - - // std140 packing rules: - // 1. The alignment of generic values is their size in bytes - // 2. The alignment of vectors is the vector length * the base count, with - // the exception of vec3 which is always aligned like vec4 - // 3. The alignment of arrays is that of the element size rounded up to - // the nearest multiple of vec4 - // 4. Matrices are treated like arrays of vectors - // 5. Arrays/matrices are laid out with a stride equal to the alignment - size_t size = el_size * inp->dim_v; - if (inp->dim_v == 3) - size += el_size; - if (inp->dim_m > 1) - size = MP_ALIGN_UP(size, sizeof(float[4])); - - return (struct ra_layout) { - .align = size, - .stride = size, - .size = size * inp->dim_m, - }; + gl->EnableVertexAttribArray(n); + gl->VertexAttribPointer(n, e->dim_v, type, normalized, + vao->stride, (void *)(intptr_t)e->offset); + } } -struct ra_layout std430_layout(struct ra_renderpass_input *inp) +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct ra_renderpass_input *entries, + int num_entries) { - size_t el_size = ra_vartype_size(inp->type); - - // std430 packing rules: like std140, except arrays/matrices are always - // "tightly" packed, even arrays/matrices of vec3s - size_t align = el_size * inp->dim_v; - if (inp->dim_v == 3 && inp->dim_m == 1) - align += el_size; - - return (struct ra_layout) { - .align = align, - .stride = align, - .size = align * inp->dim_m, + assert(!vao->vao); + assert(!vao->buffer); + + *vao = (struct gl_vao){ + .gl = gl, + .stride = stride, + .entries = entries, + .num_entries = num_entries, }; -} - -// Create a texture and a FBO using the texture as color attachments. -// fmt: texture internal format -// If the parameters are the same as the previous call, do not touch it. -// flags can be 0, or a combination of FBOTEX_FUZZY_W and FBOTEX_FUZZY_H. -// Enabling FUZZY for W or H means the w or h does not need to be exact. -bool fbotex_change(struct fbotex *fbo, struct ra *ra, struct mp_log *log, - int w, int h, const struct ra_format *fmt, int flags) -{ - int lw = w, lh = h; - - if (fbo->tex) { - int cw = w, ch = h; - int rw = fbo->tex->params.w, rh = fbo->tex->params.h; - - if ((flags & FBOTEX_FUZZY_W) && cw < rw) - cw = rw; - if ((flags & FBOTEX_FUZZY_H) && ch < rh) - ch = rh; - - if (rw == cw && rh == ch && fbo->tex->params.format == fmt) - goto done; - } - - if (flags & FBOTEX_FUZZY_W) - w = MP_ALIGN_UP(w, 256); - if (flags & FBOTEX_FUZZY_H) - h = MP_ALIGN_UP(h, 256); - - mp_verbose(log, "Create FBO: %dx%d (%dx%d)\n", lw, lh, w, h); - - if (!fmt || !fmt->renderable || !fmt->linear_filter) { - mp_err(log, "Format %s not supported.\n", fmt ? fmt->name : "(unset)"); - return false; - } - fbotex_uninit(fbo); + gl->GenBuffers(1, &vao->buffer); - *fbo = (struct fbotex) { - .ra = ra, - }; - - struct ra_tex_params params = { - .dimensions = 2, - .w = w, - .h = h, - .d = 1, - .format = fmt, - .src_linear = true, - .render_src = true, - .render_dst = true, - .storage_dst = true, - .blit_src = true, - }; + if (gl->BindVertexArray) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); - fbo->tex = ra_tex_create(fbo->ra, ¶ms); + gl->GenVertexArrays(1, &vao->vao); + gl->BindVertexArray(vao->vao); + gl_vao_enable_attribs(vao); + gl->BindVertexArray(0); - if (!fbo->tex) { - mp_err(log, "Error: framebuffer could not be created.\n"); - fbotex_uninit(fbo); - return false; + gl->BindBuffer(GL_ARRAY_BUFFER, 0); } +} -done: - - fbo->lw = lw; - fbo->lh = lh; +void gl_vao_uninit(struct gl_vao *vao) +{ + GL *gl = vao->gl; + if (!gl) + return; - fbo->fbo = (struct fbodst){ - .tex = fbo->tex, - }; + if (gl->DeleteVertexArrays) + gl->DeleteVertexArrays(1, &vao->vao); + gl->DeleteBuffers(1, &vao->buffer); - return true; + *vao = (struct gl_vao){0}; } -void fbotex_uninit(struct fbotex *fbo) +static void gl_vao_bind(struct gl_vao *vao) { - if (fbo->ra) { - ra_tex_free(fbo->ra, &fbo->tex); - *fbo = (struct fbotex) {0}; + GL *gl = vao->gl; + + if (gl->BindVertexArray) { + gl->BindVertexArray(vao->vao); + } else { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl_vao_enable_attribs(vao); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); } } -struct timer_pool { - struct ra *ra; - ra_timer *timer; - bool running; // detect invalid usage - - uint64_t samples[VO_PERF_SAMPLE_COUNT]; - int sample_idx; - int sample_count; - - uint64_t sum; - uint64_t peak; -}; - -struct timer_pool *timer_pool_create(struct ra *ra) +static void gl_vao_unbind(struct gl_vao *vao) { - if (!ra->fns->timer_create) - return NULL; - - ra_timer *timer = ra->fns->timer_create(ra); - if (!timer) - return NULL; + GL *gl = vao->gl; - struct timer_pool *pool = talloc(NULL, struct timer_pool); - if (!pool) { - ra->fns->timer_destroy(ra, timer); - return NULL; + if (gl->BindVertexArray) { + gl->BindVertexArray(0); + } else { + for (int n = 0; n < vao->num_entries; n++) + gl->DisableVertexAttribArray(n); } - - *pool = (struct timer_pool){ .ra = ra, .timer = timer }; - return pool; } -void timer_pool_destroy(struct timer_pool *pool) +// Draw the vertex data (as described by the gl_vao_entry entries) in ptr +// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. +// If ptr is NULL, then skip the upload, and use the data uploaded with the +// previous call. +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) { - if (!pool) - return; + GL *gl = vao->gl; - pool->ra->fns->timer_destroy(pool->ra, pool->timer); - talloc_free(pool); -} + if (ptr) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } -void timer_pool_start(struct timer_pool *pool) -{ - if (!pool) - return; + gl_vao_bind(vao); + + gl->DrawArrays(prim, 0, num); - assert(!pool->running); - pool->ra->fns->timer_start(pool->ra, pool->timer); - pool->running = true; + gl_vao_unbind(vao); } -void timer_pool_stop(struct timer_pool *pool) +static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id, + GLenum severity, GLsizei length, + const GLchar *message, const void *userParam) { - if (!pool) - return; - - assert(pool->running); - uint64_t res = pool->ra->fns->timer_stop(pool->ra, pool->timer); - pool->running = false; - - if (res) { - // Input res into the buffer and grab the previous value - uint64_t old = pool->samples[pool->sample_idx]; - pool->sample_count = MPMIN(pool->sample_count + 1, VO_PERF_SAMPLE_COUNT); - pool->samples[pool->sample_idx++] = res; - pool->sample_idx %= VO_PERF_SAMPLE_COUNT; - pool->sum = pool->sum + res - old; - - // Update peak if necessary - if (res >= pool->peak) { - pool->peak = res; - } else if (pool->peak == old) { - // It's possible that the last peak was the value we just removed, - // if so we need to scan for the new peak - uint64_t peak = res; - for (int i = 0; i < VO_PERF_SAMPLE_COUNT; i++) - peak = MPMAX(peak, pool->samples[i]); - pool->peak = peak; - } + // keep in mind that the debug callback can be asynchronous + struct mp_log *log = (void *)userParam; + int level = MSGL_ERR; + switch (severity) { + case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break; + case GL_DEBUG_SEVERITY_LOW: level = MSGL_INFO; break; + case GL_DEBUG_SEVERITY_MEDIUM: level = MSGL_WARN; break; + case GL_DEBUG_SEVERITY_HIGH: level = MSGL_ERR; break; } + mp_msg(log, level, "GL: %s\n", message); } -struct mp_pass_perf timer_pool_measure(struct timer_pool *pool) +void gl_set_debug_logger(GL *gl, struct mp_log *log) { - if (!pool) - return (struct mp_pass_perf){0}; - - struct mp_pass_perf res = { - .peak = pool->peak, - .count = pool->sample_count, - }; - - int idx = pool->sample_idx - pool->sample_count + VO_PERF_SAMPLE_COUNT; - for (int i = 0; i < res.count; i++) { - idx %= VO_PERF_SAMPLE_COUNT; - res.samples[i] = pool->samples[idx++]; - } - - if (res.count > 0) { - res.last = res.samples[res.count - 1]; - res.avg = pool->sum / res.count; - } - - return res; + if (gl->DebugMessageCallback) + gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log); } -void mp_log_source(struct mp_log *log, int lev, const char *src) +// Given a GL combined extension string in extensions, find out whether ext +// is included in it. Basically, a word search. +bool gl_check_extension(const char *extensions, const char *ext) { - int line = 1; - if (!src) - return; - while (*src) { - const char *end = strchr(src, '\n'); - const char *next = end + 1; - if (!end) - next = end = src + strlen(src); - mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src); - line++; - src = next; + int len = strlen(ext); + const char *cur = extensions; + while (cur) { + cur = strstr(cur, ext); + if (!cur) + break; + if ((cur == extensions || cur[-1] == ' ') && + (cur[len] == '\0' || cur[len] == ' ')) + return true; + cur += len; } + return false; } diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 7d00d26..53127e4 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -1,121 +1,56 @@ -#pragma once +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_GL_UTILS_ +#define MP_GL_UTILS_ -#include <stdbool.h> #include <math.h> -#include "video/out/vo.h" -#include "ra.h" +#include "video/out/gpu/utils.h" +#include "common.h" -// A 3x2 matrix, with the translation part separate. -struct gl_transform { - // row-major, e.g. in mathematical notation: - // | m[0][0] m[0][1] | - // | m[1][0] m[1][1] | - float m[2][2]; - float t[2]; -}; - -static const struct gl_transform identity_trans = { - .m = {{1.0, 0.0}, {0.0, 1.0}}, - .t = {0.0, 0.0}, -}; - -void gl_transform_ortho(struct gl_transform *t, float x0, float x1, - float y0, float y1); - -// This treats m as an affine transformation, in other words m[2][n] gets -// added to the output. -static inline void gl_transform_vec(struct gl_transform t, float *x, float *y) -{ - float vx = *x, vy = *y; - *x = vx * t.m[0][0] + vy * t.m[0][1] + t.t[0]; - *y = vx * t.m[1][0] + vy * t.m[1][1] + t.t[1]; -} - -struct mp_rect_f { - float x0, y0, x1, y1; -}; - -// Semantic equality (fuzzy comparison) -static inline bool mp_rect_f_seq(struct mp_rect_f a, struct mp_rect_f b) -{ - return fabs(a.x0 - b.x0) < 1e-6 && fabs(a.x1 - b.x1) < 1e-6 && - fabs(a.y0 - b.y0) < 1e-6 && fabs(a.y1 - b.y1) < 1e-6; -} - -static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r) -{ - gl_transform_vec(t, &r->x0, &r->y0); - gl_transform_vec(t, &r->x1, &r->y1); -} - -static inline bool gl_transform_eq(struct gl_transform a, struct gl_transform b) -{ - for (int x = 0; x < 2; x++) { - for (int y = 0; y < 2; y++) { - if (a.m[x][y] != b.m[x][y]) - return false; - } - } - - return a.t[0] == b.t[0] && a.t[1] == b.t[1]; -} - -void gl_transform_trans(struct gl_transform t, struct gl_transform *x); - -struct fbodst { - struct ra_tex *tex; - bool flip; // mirror vertically -}; - -void gl_transform_ortho_fbodst(struct gl_transform *t, struct fbodst fbo); - -// A pool of buffers, which can grow as needed -struct ra_buf_pool { - struct ra_buf_params current_params; - struct ra_buf **buffers; - int num_buffers; - int index; -}; - -void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool); +struct mp_log; -// Note: params->initial_data is *not* supported -struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, - const struct ra_buf_params *params); +void gl_check_error(GL *gl, struct mp_log *log, const char *info); -// Helper that wraps ra_tex_upload using texture upload buffers to ensure that -// params->buf is always set. This is intended for RA-internal usage. -bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, - const struct ra_tex_upload_params *params); +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h); -// Layout rules for GLSL's packing modes -struct ra_layout std140_layout(struct ra_renderpass_input *inp); -struct ra_layout std430_layout(struct ra_renderpass_input *inp); +mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h); -struct fbotex { - struct ra *ra; - struct ra_tex *tex; - int lw, lh; // logical (configured) size, <= than texture size - struct fbodst fbo; +struct gl_vao { + GL *gl; + GLuint vao; // the VAO object, or 0 if unsupported by driver + GLuint buffer; // GL_ARRAY_BUFFER used for the data + int stride; // size of each element (interleaved elements are assumed) + const struct ra_renderpass_input *entries; + int num_entries; }; -void fbotex_uninit(struct fbotex *fbo); -bool fbotex_change(struct fbotex *fbo, struct ra *ra, struct mp_log *log, - int w, int h, const struct ra_format *fmt, int flags); -#define FBOTEX_FUZZY_W 1 -#define FBOTEX_FUZZY_H 2 -#define FBOTEX_FUZZY (FBOTEX_FUZZY_W | FBOTEX_FUZZY_H) +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct ra_renderpass_input *entries, + int num_entries); +void gl_vao_uninit(struct gl_vao *vao); +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num); -// A wrapper around ra_timer that does result pooling, averaging etc. -struct timer_pool; +void gl_set_debug_logger(GL *gl, struct mp_log *log); -struct timer_pool *timer_pool_create(struct ra *ra); -void timer_pool_destroy(struct timer_pool *pool); -void timer_pool_start(struct timer_pool *pool); -void timer_pool_stop(struct timer_pool *pool); -struct mp_pass_perf timer_pool_measure(struct timer_pool *pool); +bool gl_check_extension(const char *extensions, const char *ext); -// print a multi line string with line numbers (e.g. for shader sources) -// log, lev: module and log level, as in mp_msg() -void mp_log_source(struct mp_log *log, int lev, const char *src); +#endif diff --git a/video/out/vo.c b/video/out/vo.c index f9c5d04..63f5b34 100644 --- a/video/out/vo.c +++ b/video/out/vo.c @@ -47,10 +47,11 @@ #include "osdep/io.h" #include "osdep/threads.h" +extern const struct vo_driver video_out_mediacodec_embed; extern const struct vo_driver video_out_x11; extern const struct vo_driver video_out_vdpau; extern const struct vo_driver video_out_xv; -extern const struct vo_driver video_out_opengl; +extern const struct vo_driver video_out_gpu; extern const struct vo_driver video_out_opengl_cb; extern const struct vo_driver video_out_null; extern const struct vo_driver video_out_image; @@ -60,34 +61,31 @@ extern const struct vo_driver video_out_drm; extern const struct vo_driver video_out_direct3d; extern const struct vo_driver video_out_sdl; extern const struct vo_driver video_out_vaapi; -extern const struct vo_driver video_out_wayland; extern const struct vo_driver video_out_rpi; extern const struct vo_driver video_out_tct; const struct vo_driver *const video_out_drivers[] = { +#if HAVE_ANDROID + &video_out_mediacodec_embed, +#endif #if HAVE_RPI &video_out_rpi, #endif -#if HAVE_GL - &video_out_opengl, -#endif + &video_out_gpu, #if HAVE_VDPAU &video_out_vdpau, #endif #if HAVE_DIRECT3D &video_out_direct3d, #endif -#if HAVE_WAYLAND - &video_out_wayland, -#endif #if HAVE_XV &video_out_xv, #endif #if HAVE_SDL2 &video_out_sdl, #endif -#if HAVE_VAAPI_X11 +#if HAVE_VAAPI_X11 && HAVE_GPL &video_out_vaapi, #endif #if HAVE_X11 @@ -136,6 +134,8 @@ struct vo_internal { int64_t nominal_vsync_interval; + bool external_renderloop_drive; + int64_t vsync_interval; int64_t *vsync_samples; int num_vsync_samples; @@ -196,8 +196,9 @@ const struct m_obj_list vo_obj_list = { .get_desc = get_desc, .description = "video outputs", .aliases = { - {"gl", "opengl"}, + {"gl", "gpu"}, {"direct3d_shaders", "direct3d"}, + {"opengl", "gpu"}, {0} }, .allow_unknown_entries = true, @@ -789,11 +790,12 @@ static void wait_until(struct vo *vo, int64_t target) pthread_mutex_unlock(&in->lock); } -static bool render_frame(struct vo *vo) +bool vo_render_frame_external(struct vo *vo) { struct vo_internal *in = vo->in; struct vo_frame *frame = NULL; bool got_frame = false; + bool flipped = false; update_display_fps(vo); @@ -855,6 +857,7 @@ static bool render_frame(struct vo *vo) if (in->dropped_frame) { in->drop_count += 1; } else { + flipped = true; in->rendering = true; in->hasframe_rendered = true; int64_t prev_drop_count = vo->in->drop_count; @@ -886,6 +889,11 @@ static bool render_frame(struct vo *vo) update_vsync_timing_after_swap(vo); } + if (vo->driver->caps & VO_CAP_NOREDRAW) { + talloc_free(in->current_frame); + in->current_frame = NULL; + } + if (in->dropped_frame) { MP_STATS(vo, "drop-vo"); } else { @@ -900,6 +908,8 @@ static bool render_frame(struct vo *vo) done: talloc_free(frame); pthread_mutex_unlock(&in->lock); + if (in->external_renderloop_drive) + return flipped; return got_frame || (in->frame_queued && in->frame_queued->display_synced); } @@ -907,7 +917,7 @@ static void do_redraw(struct vo *vo) { struct vo_internal *in = vo->in; - if (!vo->config_ok) + if (!vo->config_ok || (vo->driver->caps & VO_CAP_NOREDRAW)) return; pthread_mutex_lock(&in->lock); @@ -942,6 +952,44 @@ static void do_redraw(struct vo *vo) talloc_free(frame); } +static void drop_unrendered_frame(struct vo *vo) +{ + struct vo_internal *in = vo->in; + + pthread_mutex_lock(&in->lock); + + if (!in->frame_queued) + goto end; + + if ((in->frame_queued->pts + in->frame_queued->duration) > mp_time_us()) + goto end; + + MP_VERBOSE(vo, "Dropping unrendered frame (pts %"PRId64")\n", in->frame_queued->pts); + + talloc_free(in->frame_queued); + in->frame_queued = NULL; + in->hasframe = false; + pthread_cond_broadcast(&in->wakeup); + wakeup_core(vo); + +end: + pthread_mutex_unlock(&in->lock); +} + +void vo_enable_external_renderloop(struct vo *vo) +{ + struct vo_internal *in = vo->in; + MP_VERBOSE(vo, "Enabling event driven renderloop!\n"); + in->external_renderloop_drive = true; +} + +void vo_disable_external_renderloop(struct vo *vo) +{ + struct vo_internal *in = vo->in; + MP_VERBOSE(vo, "Disabling event driven renderloop!\n"); + in->external_renderloop_drive = false; +} + static void *vo_thread(void *ptr) { struct vo *vo = ptr; @@ -963,7 +1011,11 @@ static void *vo_thread(void *ptr) if (in->terminate) break; vo->driver->control(vo, VOCTRL_CHECK_EVENTS, NULL); - bool working = render_frame(vo); + bool working = false; + if (!in->external_renderloop_drive || !in->hasframe_rendered) + working = vo_render_frame_external(vo); + else + drop_unrendered_frame(vo); int64_t now = mp_time_us(); int64_t wait_until = now + (working ? 0 : (int64_t)1e9); @@ -976,7 +1028,7 @@ static void *vo_thread(void *ptr) wakeup_core(vo); } } - if (vo->want_redraw && !in->want_redraw) { + if (vo->want_redraw) { vo->want_redraw = false; in->want_redraw = true; wakeup_core(vo); diff --git a/video/out/vo.h b/video/out/vo.h index 2a0c3ef..995d6b9 100644 --- a/video/out/vo.h +++ b/video/out/vo.h @@ -172,6 +172,8 @@ enum { VO_CAP_ROTATE90 = 1 << 0, // VO does framedrop itself (vo_vdpau). Untimed/encoding VOs never drop. VO_CAP_FRAMEDROP = 1 << 1, + // VO does not support redraws (vo_mediacodec_embed). + VO_CAP_NOREDRAW = 1 << 2, }; #define VO_MAX_REQ_FRAMES 10 @@ -374,7 +376,7 @@ struct vo { struct vo_x11_state *x11; struct vo_w32_state *w32; struct vo_cocoa_state *cocoa; - struct vo_wayland_state *wayland; + struct vo_wayland_state *wl; struct mp_hwdec_devices *hwdec_devs; struct input_ctx *input_ctx; struct osd_state *osd; @@ -431,6 +433,9 @@ void vo_query_formats(struct vo *vo, uint8_t *list); void vo_event(struct vo *vo, int event); int vo_query_and_reset_events(struct vo *vo, int events); struct mp_image *vo_get_current_frame(struct vo *vo); +void vo_enable_external_renderloop(struct vo *vo); +void vo_disable_external_renderloop(struct vo *vo); +bool vo_render_frame_external(struct vo *vo); void vo_set_queue_params(struct vo *vo, int64_t offset_us, int num_req_frames); int vo_get_num_req_frames(struct vo *vo); int64_t vo_get_vsync_interval(struct vo *vo); diff --git a/video/out/vo_caca.c b/video/out/vo_caca.c index 46090af..e63bd69 100644 --- a/video/out/vo_caca.c +++ b/video/out/vo_caca.c @@ -42,6 +42,11 @@ #include "common/msg.h" #include "input/input.h" +#include "config.h" +#if !HAVE_GPL +#error GPL only +#endif + struct priv { caca_canvas_t *canvas; caca_display_t *display; diff --git a/video/out/vo_direct3d.c b/video/out/vo_direct3d.c index 952dca8..a131d21 100644 --- a/video/out/vo_direct3d.c +++ b/video/out/vo_direct3d.c @@ -40,6 +40,11 @@ #include "w32_common.h" #include "sub/osd.h" +#include "config.h" +#if !HAVE_GPL +#error GPL only +#endif + // shaders generated by fxc.exe from d3d_shader_yuv.hlsl #include "d3d_shader_420p.h" diff --git a/video/out/vo_drm.c b/video/out/vo_drm.c index 2fdd840..24189d5 100644 --- a/video/out/vo_drm.c +++ b/video/out/vo_drm.c @@ -412,7 +412,9 @@ static int preinit(struct vo *vo) } p->kms = kms_create( - vo->log, vo->opts->drm_connector_spec, vo->opts->drm_mode_id); + vo->log, vo->opts->drm_opts->drm_connector_spec, + vo->opts->drm_opts->drm_mode_id, + vo->opts->drm_opts->drm_overlay_id); if (!p->kms) { MP_ERR(vo, "Failed to create KMS.\n"); goto err; diff --git a/video/out/vo_gpu.c b/video/out/vo_gpu.c new file mode 100644 index 0000000..95318d3 --- /dev/null +++ b/video/out/vo_gpu.c @@ -0,0 +1,336 @@ +/* + * Based on vo_gl.c by Reimar Doeffinger. + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <stdbool.h> +#include <assert.h> + +#include <libavutil/common.h> + +#include "config.h" + +#include "mpv_talloc.h" +#include "common/common.h" +#include "misc/bstr.h" +#include "common/msg.h" +#include "common/global.h" +#include "options/m_config.h" +#include "vo.h" +#include "video/mp_image.h" +#include "sub/osd.h" + +#include "gpu/context.h" +#include "gpu/hwdec.h" +#include "gpu/video.h" + +struct gpu_priv { + struct mp_log *log; + struct ra_ctx *ctx; + + char *context_name; + char *context_type; + struct ra_ctx_opts opts; + struct gl_video *renderer; + + int events; +}; + +static void resize(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + + MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight); + + struct mp_rect src, dst; + struct mp_osd_res osd; + vo_get_src_dst_rects(vo, &src, &dst, &osd); + + gl_video_resize(p->renderer, &src, &dst, &osd); + + int fb_depth = sw->fns->color_depth ? sw->fns->color_depth(sw) : 0; + if (fb_depth) + MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth); + gl_video_set_fb_depth(p->renderer, fb_depth); + + vo->want_redraw = true; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + + struct ra_fbo fbo; + if (!sw->fns->start_frame(sw, &fbo)) + return; + + gl_video_render_frame(p->renderer, frame, fbo); + if (!sw->fns->submit_frame(sw, frame)) { + MP_ERR(vo, "Failed presenting frame!\n"); + return; + } +} + +static void flip_page(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + sw->fns->swap_buffers(sw); +} + +static int query_format(struct vo *vo, int format) +{ + struct gpu_priv *p = vo->priv; + if (!gl_video_check_format(p->renderer, format)) + return 0; + return 1; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct gpu_priv *p = vo->priv; + + if (!p->ctx->fns->reconfig(p->ctx)) + return -1; + + resize(vo); + gl_video_config(p->renderer, params); + + return 0; +} + +static void request_hwdec_api(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + + gl_video_load_hwdecs_all(p->renderer, vo->hwdec_devs); +} + +static void call_request_hwdec_api(void *ctx) +{ + // Roundabout way to run hwdec loading on the VO thread. + // Redirects to request_hwdec_api(). + vo_control(ctx, VOCTRL_LOAD_HWDEC_API, NULL); +} + +static void get_and_update_icc_profile(struct gpu_priv *p) +{ + if (gl_video_icc_auto_enabled(p->renderer)) { + MP_VERBOSE(p, "Querying ICC profile...\n"); + bstr icc = bstr0(NULL); + int r = p->ctx->fns->control(p->ctx, &p->events, VOCTRL_GET_ICC_PROFILE, &icc); + + if (r != VO_NOTAVAIL) { + if (r == VO_FALSE) { + MP_WARN(p, "Could not retrieve an ICC profile.\n"); + } else if (r == VO_NOTIMPL) { + MP_ERR(p, "icc-profile-auto not implemented on this platform.\n"); + } + + gl_video_set_icc_profile(p->renderer, icc); + } + } +} + +static void get_and_update_ambient_lighting(struct gpu_priv *p) +{ + int lux; + int r = p->ctx->fns->control(p->ctx, &p->events, VOCTRL_GET_AMBIENT_LUX, &lux); + if (r == VO_TRUE) { + gl_video_set_ambient_lux(p->renderer, lux); + } + if (r != VO_TRUE && gl_video_gamma_auto_enabled(p->renderer)) { + MP_ERR(p, "gamma_auto option provided, but querying for ambient" + " lighting is not supported on this platform\n"); + } +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + + switch (request) { + case VOCTRL_SET_PANSCAN: + resize(vo); + return VO_TRUE; + case VOCTRL_SET_EQUALIZER: + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_SCREENSHOT_WIN: { + struct mp_image *screen = NULL; + if (sw->fns->screenshot) + screen = sw->fns->screenshot(sw); + if (!screen) + break; // redirect to backend + // set image parameters according to the display, if possible + screen->params.color = gl_video_get_output_colorspace(p->renderer); + *(struct mp_image **)data = screen; + return true; + } + case VOCTRL_LOAD_HWDEC_API: + request_hwdec_api(vo); + return true; + case VOCTRL_UPDATE_RENDER_OPTS: { + gl_video_configure_queue(p->renderer, vo); + get_and_update_icc_profile(p); + vo->want_redraw = true; + return true; + } + case VOCTRL_RESET: + gl_video_reset(p->renderer); + return true; + case VOCTRL_PAUSE: + if (gl_video_showing_interpolated_frame(p->renderer)) + vo->want_redraw = true; + break; + case VOCTRL_PERFORMANCE_DATA: + gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data); + return true; + } + + int events = 0; + int r = p->ctx->fns->control(p->ctx, &events, request, data); + if (events & VO_EVENT_ICC_PROFILE_CHANGED) { + get_and_update_icc_profile(p); + vo->want_redraw = true; + } + if (events & VO_EVENT_AMBIENT_LIGHTING_CHANGED) { + get_and_update_ambient_lighting(p); + vo->want_redraw = true; + } + events |= p->events; + p->events = 0; + if (events & VO_EVENT_RESIZE) + resize(vo); + if (events & VO_EVENT_EXPOSE) + vo->want_redraw = true; + vo_event(vo, events); + + return r; +} + +static void wakeup(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + if (p->ctx && p->ctx->fns->wakeup) + p->ctx->fns->wakeup(p->ctx); +} + +static void wait_events(struct vo *vo, int64_t until_time_us) +{ + struct gpu_priv *p = vo->priv; + if (p->ctx && p->ctx->fns->wait_events) { + p->ctx->fns->wait_events(p->ctx, until_time_us); + } else { + vo_wait_default(vo, until_time_us); + } +} + +static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align) +{ + struct gpu_priv *p = vo->priv; + + return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align); +} + +static void uninit(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + + gl_video_uninit(p->renderer); + if (vo->hwdec_devs) { + hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL); + hwdec_devices_destroy(vo->hwdec_devs); + } + ra_ctx_destroy(&p->ctx); +} + +static int preinit(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + p->log = vo->log; + + int alpha_mode; + mp_read_option_raw(vo->global, "alpha", &m_option_type_choice, &alpha_mode); + + struct ra_ctx_opts opts = p->opts; + opts.want_alpha = alpha_mode == 1; + + p->ctx = ra_ctx_create(vo, p->context_type, p->context_name, opts); + if (!p->ctx) + goto err_out; + assert(p->ctx->ra); + assert(p->ctx->swapchain); + + p->renderer = gl_video_init(p->ctx->ra, vo->log, vo->global); + gl_video_set_osd_source(p->renderer, vo->osd); + gl_video_configure_queue(p->renderer, vo); + + get_and_update_icc_profile(p); + + vo->hwdec_devs = hwdec_devices_create(); + hwdec_devices_set_loader(vo->hwdec_devs, call_request_hwdec_api, vo); + + gl_video_load_hwdecs(p->renderer, vo->hwdec_devs, false); + + return 0; + +err_out: + uninit(vo); + return -1; +} + +#define OPT_BASE_STRUCT struct gpu_priv +static const m_option_t options[] = { + OPT_STRING_VALIDATE("gpu-context", context_name, 0, ra_ctx_validate_context), + OPT_STRING_VALIDATE("gpu-api", context_type, 0, ra_ctx_validate_api), + OPT_FLAG("gpu-debug", opts.debug, 0), + OPT_FLAG("gpu-sw", opts.allow_sw, 0), + OPT_INTRANGE("swapchain-depth", opts.swapchain_depth, 0, 1, 8), + {0} +}; + +static const struct gpu_priv defaults = { .opts = { + .swapchain_depth = 3, +}}; + +const struct vo_driver video_out_gpu = { + .description = "Shader-based GPU Renderer", + .name = "gpu", + .caps = VO_CAP_ROTATE90, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .get_image = get_image, + .draw_frame = draw_frame, + .flip_page = flip_page, + .wait_events = wait_events, + .wakeup = wakeup, + .uninit = uninit, + .priv_size = sizeof(struct gpu_priv), + .priv_defaults = &defaults, + .options = options, +}; diff --git a/video/out/vo_lavc.c b/video/out/vo_lavc.c index be7de12..4b69231 100644 --- a/video/out/vo_lavc.c +++ b/video/out/vo_lavc.c @@ -49,7 +49,6 @@ struct priv { int64_t mindeltapts; double expected_next_pts; mp_image_t *lastimg; - int lastimg_wants_osd; int lastdisplaycount; AVRational worst_time_base; @@ -287,6 +286,14 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) double pts = mpi ? mpi->pts : MP_NOPTS_VALUE; + if (mpi) { + assert(vo->params); + + struct mp_osd_res dim = osd_res_from_image_params(vo->params); + + osd_draw_on_image(vo->osd, dim, mpi->pts, OSD_DRAW_SUB_ONLY, mpi); + } + if (!vc || vc->shutdown) goto done; if (!encode_lavc_start(ectx)) { @@ -451,7 +458,6 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) talloc_free(vc->lastimg); vc->lastimg = mpi; mpi = NULL; - vc->lastimg_wants_osd = true; vc->lastframeipts = vc->lastipts = frameipts; if (ectx->options->rawts && vc->lastipts < 0) { @@ -462,17 +468,9 @@ static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) } else { MP_INFO(vo, "Frame at pts %d got dropped " "entirely because pts went backwards\n", (int) frameipts); - vc->lastimg_wants_osd = false; } } - if (vc->lastimg && vc->lastimg_wants_osd && vo->params) { - struct mp_osd_res dim = osd_res_from_image_params(vo->params); - - osd_draw_on_image(vo->osd, dim, vc->lastimg->pts, OSD_DRAW_SUB_ONLY, - vc->lastimg); - } - done: talloc_free(mpi); } diff --git a/video/out/vo_mediacodec_embed.c b/video/out/vo_mediacodec_embed.c new file mode 100644 index 0000000..63975e9 --- /dev/null +++ b/video/out/vo_mediacodec_embed.c @@ -0,0 +1,119 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <libavcodec/mediacodec.h> +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_mediacodec.h> + +#include "common/common.h" +#include "vo.h" +#include "video/mp_image.h" +#include "video/hwdec.h" + +struct priv { + struct mp_image *next_image; + struct mp_hwdec_ctx hwctx; +}; + +static AVBufferRef *create_mediacodec_device_ref(struct vo *vo) +{ + AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_MEDIACODEC); + if (!device_ref) + return NULL; + + AVHWDeviceContext *ctx = (void *)device_ref->data; + AVMediaCodecDeviceContext *hwctx = ctx->hwctx; + hwctx->surface = (void *)(intptr_t)(vo->opts->WinID); + + if (av_hwdevice_ctx_init(device_ref) < 0) + av_buffer_unref(&device_ref); + + return device_ref; +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + vo->hwdec_devs = hwdec_devices_create(); + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = "mediacodec_embed", + .av_device_ref = create_mediacodec_device_ref(vo), + }; + hwdec_devices_add(vo->hwdec_devs, &p->hwctx); + return 0; +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + if (!p->next_image) + return; + + AVMediaCodecBuffer *buffer = (AVMediaCodecBuffer *)p->next_image->planes[3]; + av_mediacodec_release_buffer(buffer, 1); + mp_image_unrefp(&p->next_image); +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + + mp_image_t *mpi = NULL; + if (!frame->redraw && !frame->repeat) + mpi = mp_image_new_ref(frame->current); + + talloc_free(p->next_image); + p->next_image = mpi; +} + +static int query_format(struct vo *vo, int format) +{ + return format == IMGFMT_MEDIACODEC; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + return VO_NOTIMPL; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + return 0; +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + mp_image_unrefp(&p->next_image); + + hwdec_devices_remove(vo->hwdec_devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); +} + +const struct vo_driver video_out_mediacodec_embed = { + .description = "Android (Embedded MediaCodec Surface)", + .name = "mediacodec_embed", + .caps = VO_CAP_NOREDRAW, + .preinit = preinit, + .query_format = query_format, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .reconfig = reconfig, + .uninit = uninit, + .priv_size = sizeof(struct priv), +}; diff --git a/video/out/vo_opengl.c b/video/out/vo_opengl.c deleted file mode 100644 index 72691e5..0000000 --- a/video/out/vo_opengl.c +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Based on vo_gl.c by Reimar Doeffinger. - * - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <math.h> -#include <stdbool.h> -#include <assert.h> - -#include <libavutil/common.h> - -#include "config.h" - -#include "mpv_talloc.h" -#include "common/common.h" -#include "misc/bstr.h" -#include "common/msg.h" -#include "common/global.h" -#include "options/m_config.h" -#include "vo.h" -#include "video/mp_image.h" -#include "sub/osd.h" - -#include "opengl/context.h" -#include "opengl/utils.h" -#include "opengl/hwdec.h" -#include "opengl/osd.h" -#include "filter_kernels.h" -#include "video/hwdec.h" -#include "opengl/video.h" -#include "opengl/ra_gl.h" - -#define NUM_VSYNC_FENCES 10 - -struct vo_opengl_opts { - int use_glFinish; - int waitvsync; - int use_gl_debug; - int allow_sw; - int swap_interval; - int vsync_fences; - char *backend; - int es; - int pattern[2]; -}; - -struct gl_priv { - struct vo *vo; - struct mp_log *log; - MPGLContext *glctx; - GL *gl; - struct ra *ra; - - struct vo_opengl_opts opts; - - struct gl_video *renderer; - - struct ra_hwdec *hwdec; - - int events; - - int frames_rendered; - unsigned int prev_sgi_sync_count; - - // check-pattern sub-option; for testing/debugging - int last_pattern; - int matches, mismatches; - - GLsync vsync_fences[NUM_VSYNC_FENCES]; - int num_vsync_fences; -}; - -static void resize(struct gl_priv *p) -{ - struct vo *vo = p->vo; - - MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight); - - struct mp_rect src, dst; - struct mp_osd_res osd; - vo_get_src_dst_rects(vo, &src, &dst, &osd); - - gl_video_resize(p->renderer, &src, &dst, &osd); - - vo->want_redraw = true; -} - -static void check_pattern(struct vo *vo, int item) -{ - struct gl_priv *p = vo->priv; - int expected = p->opts.pattern[p->last_pattern]; - if (item == expected) { - p->last_pattern++; - if (p->last_pattern >= 2) - p->last_pattern = 0; - p->matches++; - } else { - p->mismatches++; - MP_WARN(vo, "wrong pattern, expected %d got %d (hit: %d, mis: %d)\n", - expected, item, p->matches, p->mismatches); - } -} - -static void draw_frame(struct vo *vo, struct vo_frame *frame) -{ - struct gl_priv *p = vo->priv; - GL *gl = p->gl; - - mpgl_start_frame(p->glctx); - - if (gl->FenceSync && p->num_vsync_fences < p->opts.vsync_fences) { - GLsync fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);; - if (fence) - p->vsync_fences[p->num_vsync_fences++] = fence; - } - - struct fbodst target = { - .tex = ra_create_wrapped_fb(p->ra, p->glctx->main_fb, - vo->dwidth, vo->dheight), - .flip = !p->glctx->flip_v, - }; - gl_video_render_frame(p->renderer, frame, target); - ra_tex_free(p->ra, &target.tex); - - if (p->opts.use_glFinish) - gl->Finish(); -} - -static void flip_page(struct vo *vo) -{ - struct gl_priv *p = vo->priv; - GL *gl = p->gl; - - mpgl_swap_buffers(p->glctx); - - p->frames_rendered++; - if (p->frames_rendered > 5 && !p->opts.use_gl_debug) - ra_gl_set_debug(p->ra, false); - - if (p->opts.use_glFinish) - gl->Finish(); - - if (p->opts.waitvsync || p->opts.pattern[0]) { - if (gl->GetVideoSync) { - unsigned int n1 = 0, n2 = 0; - gl->GetVideoSync(&n1); - if (p->opts.waitvsync) - gl->WaitVideoSync(2, (n1 + 1) % 2, &n2); - int step = n1 - p->prev_sgi_sync_count; - p->prev_sgi_sync_count = n1; - MP_DBG(vo, "Flip counts: %u->%u, step=%d\n", n1, n2, step); - if (p->opts.pattern[0]) - check_pattern(vo, step); - } else { - MP_WARN(vo, "GLX_SGI_video_sync not available, disabling.\n"); - p->opts.waitvsync = 0; - p->opts.pattern[0] = 0; - } - } - while (p->opts.vsync_fences > 0 && p->num_vsync_fences >= p->opts.vsync_fences) { - gl->ClientWaitSync(p->vsync_fences[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); - gl->DeleteSync(p->vsync_fences[0]); - MP_TARRAY_REMOVE_AT(p->vsync_fences, p->num_vsync_fences, 0); - } -} - -static int query_format(struct vo *vo, int format) -{ - struct gl_priv *p = vo->priv; - if (!gl_video_check_format(p->renderer, format)) - return 0; - return 1; -} - -static int reconfig(struct vo *vo, struct mp_image_params *params) -{ - struct gl_priv *p = vo->priv; - - if (mpgl_reconfig_window(p->glctx) < 0) - return -1; - - resize(p); - - gl_video_config(p->renderer, params); - - return 0; -} - -static void request_hwdec_api(struct vo *vo, void *api) -{ - struct gl_priv *p = vo->priv; - - if (p->hwdec) - return; - - p->hwdec = ra_hwdec_load_api(p->vo->log, p->ra, p->vo->global, - vo->hwdec_devs, (intptr_t)api); - gl_video_set_hwdec(p->renderer, p->hwdec); -} - -static void call_request_hwdec_api(void *ctx, enum hwdec_type type) -{ - // Roundabout way to run hwdec loading on the VO thread. - // Redirects to request_hwdec_api(). - vo_control(ctx, VOCTRL_LOAD_HWDEC_API, (void *)(intptr_t)type); -} - -static void get_and_update_icc_profile(struct gl_priv *p) -{ - if (gl_video_icc_auto_enabled(p->renderer)) { - MP_VERBOSE(p, "Querying ICC profile...\n"); - bstr icc = bstr0(NULL); - int r = mpgl_control(p->glctx, &p->events, VOCTRL_GET_ICC_PROFILE, &icc); - - if (r != VO_NOTAVAIL) { - if (r == VO_FALSE) { - MP_WARN(p, "Could not retrieve an ICC profile.\n"); - } else if (r == VO_NOTIMPL) { - MP_ERR(p, "icc-profile-auto not implemented on this platform.\n"); - } - - gl_video_set_icc_profile(p->renderer, icc); - } - } -} - -static void get_and_update_ambient_lighting(struct gl_priv *p) -{ - int lux; - int r = mpgl_control(p->glctx, &p->events, VOCTRL_GET_AMBIENT_LUX, &lux); - if (r == VO_TRUE) { - gl_video_set_ambient_lux(p->renderer, lux); - } - if (r != VO_TRUE && gl_video_gamma_auto_enabled(p->renderer)) { - MP_ERR(p, "gamma_auto option provided, but querying for ambient" - " lighting is not supported on this platform\n"); - } -} - -static int control(struct vo *vo, uint32_t request, void *data) -{ - struct gl_priv *p = vo->priv; - - switch (request) { - case VOCTRL_SET_PANSCAN: - resize(p); - return VO_TRUE; - case VOCTRL_SET_EQUALIZER: - vo->want_redraw = true; - return VO_TRUE; - case VOCTRL_SCREENSHOT_WIN: { - struct mp_image *screen = gl_read_fbo_contents(p->gl, p->glctx->main_fb, - vo->dwidth, vo->dheight); - if (!screen) - break; // redirect to backend - // set image parameters according to the display, if possible - screen->params.color = gl_video_get_output_colorspace(p->renderer); - if (p->glctx->flip_v) - mp_image_vflip(screen); - *(struct mp_image **)data = screen; - return true; - } - case VOCTRL_LOAD_HWDEC_API: - request_hwdec_api(vo, data); - return true; - case VOCTRL_UPDATE_RENDER_OPTS: { - gl_video_update_options(p->renderer); - get_and_update_icc_profile(p); - gl_video_configure_queue(p->renderer, p->vo); - p->vo->want_redraw = true; - return true; - } - case VOCTRL_RESET: - gl_video_reset(p->renderer); - return true; - case VOCTRL_PAUSE: - if (gl_video_showing_interpolated_frame(p->renderer)) - vo->want_redraw = true; - return true; - case VOCTRL_PERFORMANCE_DATA: - gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data); - return true; - } - - int events = 0; - int r = mpgl_control(p->glctx, &events, request, data); - if (events & VO_EVENT_ICC_PROFILE_CHANGED) { - get_and_update_icc_profile(p); - vo->want_redraw = true; - } - if (events & VO_EVENT_AMBIENT_LIGHTING_CHANGED) { - get_and_update_ambient_lighting(p); - vo->want_redraw = true; - } - events |= p->events; - p->events = 0; - if (events & VO_EVENT_RESIZE) - resize(p); - if (events & VO_EVENT_EXPOSE) - vo->want_redraw = true; - vo_event(vo, events); - - return r; -} - -static void wakeup(struct vo *vo) -{ - struct gl_priv *p = vo->priv; - if (p->glctx && p->glctx->driver->wakeup) - p->glctx->driver->wakeup(p->glctx); -} - -static void wait_events(struct vo *vo, int64_t until_time_us) -{ - struct gl_priv *p = vo->priv; - if (p->glctx->driver->wait_events) { - p->glctx->driver->wait_events(p->glctx, until_time_us); - } else { - vo_wait_default(vo, until_time_us); - } -} - -static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h, - int stride_align) -{ - struct gl_priv *p = vo->priv; - - return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align); -} - -static void uninit(struct vo *vo) -{ - struct gl_priv *p = vo->priv; - - gl_video_uninit(p->renderer); - ra_hwdec_uninit(p->hwdec); - if (vo->hwdec_devs) { - hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL); - hwdec_devices_destroy(vo->hwdec_devs); - } - ra_free(&p->ra); - mpgl_uninit(p->glctx); -} - -static int preinit(struct vo *vo) -{ - struct gl_priv *p = vo->priv; - p->vo = vo; - p->log = vo->log; - - int vo_flags = 0; - - int alpha_mode; - mp_read_option_raw(vo->global, "alpha", &m_option_type_choice, &alpha_mode); - - if (alpha_mode == 1) - vo_flags |= VOFLAG_ALPHA; - - if (p->opts.use_gl_debug) - vo_flags |= VOFLAG_GL_DEBUG; - - if (p->opts.es == 1) - vo_flags |= VOFLAG_GLES; - if (p->opts.es == 2) - vo_flags |= VOFLAG_GLES | VOFLAG_GLES2; - if (p->opts.es == -1) - vo_flags |= VOFLAG_NO_GLES; - - if (p->opts.allow_sw) - vo_flags |= VOFLAG_SW; - - p->glctx = mpgl_init(vo, p->opts.backend, vo_flags); - if (!p->glctx) - goto err_out; - p->gl = p->glctx->gl; - - if (p->gl->SwapInterval) { - p->gl->SwapInterval(p->opts.swap_interval); - } else { - MP_VERBOSE(vo, "swap_control extension missing.\n"); - } - - p->ra = ra_create_gl(p->gl, vo->log); - if (!p->ra) - goto err_out; - - p->renderer = gl_video_init(p->ra, vo->log, vo->global); - gl_video_set_osd_source(p->renderer, vo->osd); - gl_video_configure_queue(p->renderer, vo); - - get_and_update_icc_profile(p); - - vo->hwdec_devs = hwdec_devices_create(); - - hwdec_devices_set_loader(vo->hwdec_devs, call_request_hwdec_api, vo); - - p->hwdec = ra_hwdec_load(p->vo->log, p->ra, vo->global, - vo->hwdec_devs, vo->opts->gl_hwdec_interop); - gl_video_set_hwdec(p->renderer, p->hwdec); - - gl_check_error(p->gl, p->log, "before retrieving framebuffer depth"); - int fb_depth = gl_get_fb_depth(p->gl, p->glctx->main_fb); - gl_check_error(p->gl, p->log, "retrieving framebuffer depth"); - if (fb_depth) - MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth); - gl_video_set_fb_depth(p->renderer, fb_depth); - - return 0; - -err_out: - uninit(vo); - return -1; -} - -#define OPT_BASE_STRUCT struct gl_priv - -const struct vo_driver video_out_opengl = { - .description = "Extended OpenGL Renderer", - .name = "opengl", - .caps = VO_CAP_ROTATE90, - .preinit = preinit, - .query_format = query_format, - .reconfig = reconfig, - .control = control, - .get_image = get_image, - .draw_frame = draw_frame, - .flip_page = flip_page, - .wait_events = wait_events, - .wakeup = wakeup, - .uninit = uninit, - .priv_size = sizeof(struct gl_priv), - .options = (const m_option_t[]) { - OPT_FLAG("opengl-glfinish", opts.use_glFinish, 0), - OPT_FLAG("opengl-waitvsync", opts.waitvsync, 0), - OPT_INT("opengl-swapinterval", opts.swap_interval, 0), - OPT_FLAG("opengl-debug", opts.use_gl_debug, 0), - OPT_STRING_VALIDATE("opengl-backend", opts.backend, 0, - mpgl_validate_backend_opt), - OPT_FLAG("opengl-sw", opts.allow_sw, 0), - OPT_CHOICE("opengl-es", opts.es, 0, ({"no", -1}, {"auto", 0}, - {"yes", 1}, {"force2", 2})), - OPT_INTPAIR("opengl-check-pattern", opts.pattern, 0), - OPT_INTRANGE("opengl-vsync-fences", opts.vsync_fences, 0, - 0, NUM_VSYNC_FENCES), - - {0} - }, - .priv_defaults = &(const struct gl_priv){ - .opts = { - .swap_interval = 1, - }, - }, -}; diff --git a/video/out/vo_opengl_cb.c b/video/out/vo_opengl_cb.c index ea6aaa9..c8dab15 100644 --- a/video/out/vo_opengl_cb.c +++ b/video/out/vo_opengl_cb.c @@ -24,9 +24,10 @@ #include "common/global.h" #include "player/client.h" +#include "gpu/video.h" +#include "gpu/hwdec.h" #include "opengl/common.h" -#include "opengl/video.h" -#include "opengl/hwdec.h" +#include "opengl/context.h" #include "opengl/ra_gl.h" #include "libmpv/opengl_cb.h" @@ -86,9 +87,8 @@ struct mpv_opengl_cb_context { // application's OpenGL context is current - i.e. only while the // host application is calling certain mpv_opengl_cb_* APIs. GL *gl; - struct ra *ra; + struct ra_ctx *ra_ctx; struct gl_video *renderer; - struct ra_hwdec *hwdec; struct m_config_cache *vo_opts_cache; struct mp_vo_opts *vo_opts; }; @@ -171,18 +171,34 @@ int mpv_opengl_cb_init_gl(struct mpv_opengl_cb_context *ctx, const char *exts, return MPV_ERROR_UNSUPPORTED; } - ctx->ra = ra_create_gl(ctx->gl, ctx->log); - if (!ctx->ra) - return MPV_ERROR_UNSUPPORTED; + // initialize a blank ra_ctx to reuse ra_gl_ctx + ctx->ra_ctx = talloc_zero(ctx, struct ra_ctx); + ctx->ra_ctx->log = ctx->log; + ctx->ra_ctx->global = ctx->global; + ctx->ra_ctx->opts = (struct ra_ctx_opts) { + .probing = false, + .allow_sw = true, + }; + + static const struct ra_swapchain_fns empty_swapchain_fns = {0}; + struct ra_gl_ctx_params gl_params = { + // vo_opengl_cb is essentially like a gigantic external swapchain where + // the user is in charge of presentation / swapping etc. But we don't + // actually need to provide any of these functions, since we can just + // not call them to begin with - so just set it to an empty object to + // signal to ra_gl_ctx that we don't care about its latency emulation + // functionality + .external_swapchain = &empty_swapchain_fns + }; - ctx->renderer = gl_video_init(ctx->ra, ctx->log, ctx->global); + ctx->gl->SwapInterval = NULL; // we shouldn't randomly change this, so lock it + if (!ra_gl_ctx_init(ctx->ra_ctx, ctx->gl, gl_params)) + return MPV_ERROR_UNSUPPORTED; - m_config_cache_update(ctx->vo_opts_cache); + ctx->renderer = gl_video_init(ctx->ra_ctx->ra, ctx->log, ctx->global); ctx->hwdec_devs = hwdec_devices_create(); - ctx->hwdec = ra_hwdec_load(ctx->log, ctx->ra, ctx->global, - ctx->hwdec_devs, ctx->vo_opts->gl_hwdec_interop); - gl_video_set_hwdec(ctx->renderer, ctx->hwdec); + gl_video_load_hwdecs(ctx->renderer, ctx->hwdec_devs, true); pthread_mutex_lock(&ctx->lock); for (int n = IMGFMT_START; n < IMGFMT_END; n++) { @@ -217,12 +233,12 @@ int mpv_opengl_cb_uninit_gl(struct mpv_opengl_cb_context *ctx) gl_video_uninit(ctx->renderer); ctx->renderer = NULL; - ra_hwdec_uninit(ctx->hwdec); - ctx->hwdec = NULL; hwdec_devices_destroy(ctx->hwdec_devs); ctx->hwdec_devs = NULL; - ra_free(&ctx->ra); + ra_gl_ctx_uninit(ctx->ra_ctx); + talloc_free(ctx->ra_ctx); talloc_free(ctx->gl); + ctx->ra_ctx = NULL; ctx->gl = NULL; return 0; } @@ -236,11 +252,6 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) return MPV_ERROR_UNSUPPORTED; } - struct fbodst target = { - .tex = ra_create_wrapped_fb(ctx->ra, fbo, vp_w, abs(vp_h)), - .flip = vp_h < 0, - }; - reset_gl_state(ctx->gl); pthread_mutex_lock(&ctx->lock); @@ -273,14 +284,13 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) gl_video_config(ctx->renderer, &ctx->img_params); } if (ctx->update_new_opts) { - gl_video_update_options(ctx->renderer); if (vo) gl_video_configure_queue(ctx->renderer, vo); int debug; - mp_read_option_raw(ctx->global, "opengl-debug", &m_option_type_flag, + mp_read_option_raw(ctx->global, "gpu-debug", &m_option_type_flag, &debug); ctx->gl->debug_context = debug; - ra_gl_set_debug(ctx->ra, debug); + ra_gl_set_debug(ctx->ra_ctx->ra, debug); if (gl_video_icc_auto_enabled(ctx->renderer)) MP_ERR(ctx, "icc-profile-auto is not available with opengl-cb\n"); } @@ -316,7 +326,13 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) pthread_mutex_unlock(&ctx->lock); MP_STATS(ctx, "glcb-render"); + struct ra_swapchain *sw = ctx->ra_ctx->swapchain; + struct ra_fbo target; + ra_gl_ctx_resize(sw, vp_w, abs(vp_h), fbo); + ra_gl_ctx_start_frame(sw, &target); + target.flip = vp_h < 0; gl_video_render_frame(ctx->renderer, frame, target); + ra_gl_ctx_submit_frame(sw, frame); reset_gl_state(ctx->gl); @@ -328,8 +344,6 @@ int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) pthread_cond_wait(&ctx->wakeup, &ctx->lock); pthread_mutex_unlock(&ctx->lock); - ra_tex_free(ctx->ra, &target.tex); - return 0; } diff --git a/video/out/vo_rpi.c b/video/out/vo_rpi.c index 5b5d62c..4322a3f 100644 --- a/video/out/vo_rpi.c +++ b/video/out/vo_rpi.c @@ -44,7 +44,7 @@ #include "sub/osd.h" #include "opengl/ra_gl.h" -#include "opengl/video.h" +#include "gpu/video.h" struct mp_egl_rpi { struct mp_log *log; @@ -261,7 +261,7 @@ static void update_osd(struct vo *vo) MP_STATS(vo, "start rpi_osd"); struct vo_frame frame = {0}; - struct fbodst target = { + struct ra_fbo target = { .tex = ra_create_wrapped_fb(p->egl.ra, 0, p->osd_res.w, p->osd_res.h), .flip = true, }; diff --git a/video/out/vo_vaapi.c b/video/out/vo_vaapi.c index 3468ac6..a3f7015 100644 --- a/video/out/vo_vaapi.c +++ b/video/out/vo_vaapi.c @@ -96,6 +96,8 @@ struct priv { VADisplayAttribute *va_display_attrs; int *mp_display_attr; int va_num_display_attrs; + + struct va_image_formats *image_formats; }; #define OSD_VA_FORMAT VA_FOURCC_BGRA @@ -108,6 +110,306 @@ static const bool osd_formats[SUBBITMAP_COUNT] = { static void draw_osd(struct vo *vo); + +struct fmtentry { + uint32_t va; + enum mp_imgfmt mp; +}; + +static const struct fmtentry va_to_imgfmt[] = { + {VA_FOURCC_NV12, IMGFMT_NV12}, + {VA_FOURCC_YV12, IMGFMT_420P}, + {VA_FOURCC_IYUV, IMGFMT_420P}, + {VA_FOURCC_UYVY, IMGFMT_UYVY}, + // Note: not sure about endian issues (the mp formats are byte-addressed) + {VA_FOURCC_RGBA, IMGFMT_RGBA}, + {VA_FOURCC_RGBX, IMGFMT_RGBA}, + {VA_FOURCC_BGRA, IMGFMT_BGRA}, + {VA_FOURCC_BGRX, IMGFMT_BGRA}, + {0 , IMGFMT_NONE} +}; + +static enum mp_imgfmt va_fourcc_to_imgfmt(uint32_t fourcc) +{ + for (const struct fmtentry *entry = va_to_imgfmt; entry->va; ++entry) { + if (entry->va == fourcc) + return entry->mp; + } + return IMGFMT_NONE; +} + +static uint32_t va_fourcc_from_imgfmt(int imgfmt) +{ + for (const struct fmtentry *entry = va_to_imgfmt; entry->va; ++entry) { + if (entry->mp == imgfmt) + return entry->va; + } + return 0; +} + +struct va_image_formats { + VAImageFormat *entries; + int num; +}; + +static void va_get_formats(struct priv *ctx) +{ + struct va_image_formats *formats = talloc_ptrtype(ctx, formats); + formats->num = vaMaxNumImageFormats(ctx->display); + formats->entries = talloc_array(formats, VAImageFormat, formats->num); + VAStatus status = vaQueryImageFormats(ctx->display, formats->entries, + &formats->num); + if (!CHECK_VA_STATUS(ctx, "vaQueryImageFormats()")) + return; + MP_VERBOSE(ctx, "%d image formats available:\n", formats->num); + for (int i = 0; i < formats->num; i++) + MP_VERBOSE(ctx, " %s\n", mp_tag_str(formats->entries[i].fourcc)); + ctx->image_formats = formats; +} + +static VAImageFormat *va_image_format_from_imgfmt(struct priv *ctx, + int imgfmt) +{ + struct va_image_formats *formats = ctx->image_formats; + const int fourcc = va_fourcc_from_imgfmt(imgfmt); + if (!formats || !formats->num || !fourcc) + return NULL; + for (int i = 0; i < formats->num; i++) { + if (formats->entries[i].fourcc == fourcc) + return &formats->entries[i]; + } + return NULL; +} + +struct va_surface { + struct mp_vaapi_ctx *ctx; + VADisplay display; + + VASurfaceID id; + int rt_format; + + // The actually allocated surface size (needed for cropping). + // mp_images can have a smaller size than this, which means they are + // cropped down to a smaller size by removing right/bottom pixels. + int w, h; + + VAImage image; // used for software decoding case + bool is_derived; // is image derived by vaDeriveImage()? +}; + +static struct va_surface *va_surface_in_mp_image(struct mp_image *mpi) +{ + return mpi && mpi->imgfmt == IMGFMT_VAAPI ? + (struct va_surface*)mpi->planes[0] : NULL; +} + +static void release_va_surface(void *arg) +{ + struct va_surface *surface = arg; + + if (surface->id != VA_INVALID_ID) { + if (surface->image.image_id != VA_INVALID_ID) + vaDestroyImage(surface->display, surface->image.image_id); + vaDestroySurfaces(surface->display, &surface->id, 1); + } + + talloc_free(surface); +} + +static struct mp_image *alloc_surface(struct mp_vaapi_ctx *ctx, int rt_format, + int w, int h) +{ + VASurfaceID id = VA_INVALID_ID; + VAStatus status; + status = vaCreateSurfaces(ctx->display, rt_format, w, h, &id, 1, NULL, 0); + if (!CHECK_VA_STATUS(ctx, "vaCreateSurfaces()")) + return NULL; + + struct va_surface *surface = talloc_ptrtype(NULL, surface); + if (!surface) + return NULL; + + *surface = (struct va_surface){ + .ctx = ctx, + .id = id, + .rt_format = rt_format, + .w = w, + .h = h, + .display = ctx->display, + .image = { .image_id = VA_INVALID_ID, .buf = VA_INVALID_ID }, + }; + + struct mp_image img = {0}; + mp_image_setfmt(&img, IMGFMT_VAAPI); + mp_image_set_size(&img, w, h); + img.planes[0] = (uint8_t*)surface; + img.planes[3] = (uint8_t*)(uintptr_t)surface->id; + return mp_image_new_custom_ref(&img, surface, release_va_surface); +} + +static void va_surface_image_destroy(struct va_surface *surface) +{ + if (!surface || surface->image.image_id == VA_INVALID_ID) + return; + vaDestroyImage(surface->display, surface->image.image_id); + surface->image.image_id = VA_INVALID_ID; + surface->is_derived = false; +} + +static int va_surface_image_alloc(struct va_surface *p, VAImageFormat *format) +{ + VADisplay *display = p->display; + + if (p->image.image_id != VA_INVALID_ID && + p->image.format.fourcc == format->fourcc) + return 0; + + int r = 0; + + va_surface_image_destroy(p); + + VAStatus status = vaDeriveImage(display, p->id, &p->image); + if (status == VA_STATUS_SUCCESS) { + /* vaDeriveImage() is supported, check format */ + if (p->image.format.fourcc == format->fourcc && + p->image.width == p->w && p->image.height == p->h) + { + p->is_derived = true; + MP_TRACE(p->ctx, "Using vaDeriveImage()\n"); + } else { + vaDestroyImage(p->display, p->image.image_id); + status = VA_STATUS_ERROR_OPERATION_FAILED; + } + } + if (status != VA_STATUS_SUCCESS) { + p->image.image_id = VA_INVALID_ID; + status = vaCreateImage(p->display, format, p->w, p->h, &p->image); + if (!CHECK_VA_STATUS(p->ctx, "vaCreateImage()")) { + p->image.image_id = VA_INVALID_ID; + r = -1; + } + } + + return r; +} + +// img must be a VAAPI surface; make sure its internal VAImage is allocated +// to a format corresponding to imgfmt (or return an error). +static int va_surface_alloc_imgfmt(struct priv *priv, struct mp_image *img, + int imgfmt) +{ + struct va_surface *p = va_surface_in_mp_image(img); + if (!p) + return -1; + // Multiple FourCCs can refer to the same imgfmt, so check by doing the + // surjective conversion first. + if (p->image.image_id != VA_INVALID_ID && + va_fourcc_to_imgfmt(p->image.format.fourcc) == imgfmt) + return 0; + VAImageFormat *format = va_image_format_from_imgfmt(priv, imgfmt); + if (!format) + return -1; + if (va_surface_image_alloc(p, format) < 0) + return -1; + return 0; +} + +static bool va_image_map(struct mp_vaapi_ctx *ctx, VAImage *image, + struct mp_image *mpi) +{ + int imgfmt = va_fourcc_to_imgfmt(image->format.fourcc); + if (imgfmt == IMGFMT_NONE) + return false; + void *data = NULL; + const VAStatus status = vaMapBuffer(ctx->display, image->buf, &data); + if (!CHECK_VA_STATUS(ctx, "vaMapBuffer()")) + return false; + + *mpi = (struct mp_image) {0}; + mp_image_setfmt(mpi, imgfmt); + mp_image_set_size(mpi, image->width, image->height); + + for (int p = 0; p < image->num_planes; p++) { + mpi->stride[p] = image->pitches[p]; + mpi->planes[p] = (uint8_t *)data + image->offsets[p]; + } + + if (image->format.fourcc == VA_FOURCC_YV12) { + MPSWAP(int, mpi->stride[1], mpi->stride[2]); + MPSWAP(uint8_t *, mpi->planes[1], mpi->planes[2]); + } + + return true; +} + +static bool va_image_unmap(struct mp_vaapi_ctx *ctx, VAImage *image) +{ + const VAStatus status = vaUnmapBuffer(ctx->display, image->buf); + return CHECK_VA_STATUS(ctx, "vaUnmapBuffer()"); +} + +// va_dst: copy destination, must be IMGFMT_VAAPI +// sw_src: copy source, must be a software pixel format +static int va_surface_upload(struct priv *priv, struct mp_image *va_dst, + struct mp_image *sw_src) +{ + struct va_surface *p = va_surface_in_mp_image(va_dst); + if (!p) + return -1; + + if (va_surface_alloc_imgfmt(priv, va_dst, sw_src->imgfmt) < 0) + return -1; + + struct mp_image img; + if (!va_image_map(p->ctx, &p->image, &img)) + return -1; + assert(sw_src->w <= img.w && sw_src->h <= img.h); + mp_image_set_size(&img, sw_src->w, sw_src->h); // copy only visible part + mp_image_copy(&img, sw_src); + va_image_unmap(p->ctx, &p->image); + + if (!p->is_derived) { + VAStatus status = vaPutImage(p->display, p->id, + p->image.image_id, + 0, 0, sw_src->w, sw_src->h, + 0, 0, sw_src->w, sw_src->h); + if (!CHECK_VA_STATUS(p->ctx, "vaPutImage()")) + return -1; + } + + if (p->is_derived) + va_surface_image_destroy(p); + return 0; +} + +struct pool_alloc_ctx { + struct mp_vaapi_ctx *vaapi; + int rt_format; +}; + +static struct mp_image *alloc_pool(void *pctx, int fmt, int w, int h) +{ + struct pool_alloc_ctx *alloc_ctx = pctx; + if (fmt != IMGFMT_VAAPI) + return NULL; + + return alloc_surface(alloc_ctx->vaapi, alloc_ctx->rt_format, w, h); +} + +// The allocator of the given image pool to allocate VAAPI surfaces, using +// the given rt_format. +static void va_pool_set_allocator(struct mp_image_pool *pool, + struct mp_vaapi_ctx *ctx, int rt_format) +{ + struct pool_alloc_ctx *alloc_ctx = talloc_ptrtype(pool, alloc_ctx); + *alloc_ctx = (struct pool_alloc_ctx){ + .vaapi = ctx, + .rt_format = rt_format, + }; + mp_image_pool_set_allocator(pool, alloc_pool, alloc_ctx); + mp_image_pool_set_lru(pool); +} + static void flush_output_surfaces(struct priv *p) { for (int n = 0; n < MAX_OUTPUT_SURFACES; n++) @@ -135,7 +437,7 @@ static bool alloc_swdec_surfaces(struct priv *p, int w, int h, int imgfmt) free_video_specific(p); for (int i = 0; i < MAX_OUTPUT_SURFACES; i++) { p->swdec_surfaces[i] = mp_image_pool_get(p->pool, IMGFMT_VAAPI, w, h); - if (va_surface_alloc_imgfmt(p->swdec_surfaces[i], imgfmt) < 0) + if (va_surface_alloc_imgfmt(p, p->swdec_surfaces[i], imgfmt) < 0) return false; } return true; @@ -172,7 +474,7 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) static int query_format(struct vo *vo, int imgfmt) { struct priv *p = vo->priv; - if (imgfmt == IMGFMT_VAAPI || va_image_format_from_imgfmt(p->mpvaapi, imgfmt)) + if (imgfmt == IMGFMT_VAAPI || va_image_format_from_imgfmt(p, imgfmt)) return 1; return 0; @@ -193,7 +495,7 @@ static bool render_to_screen(struct priv *p, struct mp_image *mpi) struct mp_image *img = mp_image_alloc(fmt, w, h); if (img) { mp_image_clear(img, 0, 0, w, h); - if (va_surface_upload(p->black_surface, img) < 0) + if (va_surface_upload(p, p->black_surface, img) < 0) mp_image_unrefp(&p->black_surface); talloc_free(img); } @@ -268,7 +570,7 @@ static void draw_image(struct vo *vo, struct mp_image *mpi) if (mpi->imgfmt != IMGFMT_VAAPI) { struct mp_image *dst = p->swdec_surfaces[p->output_surface]; - if (!dst || va_surface_upload(dst, mpi) < 0) { + if (!dst || va_surface_upload(p, dst, mpi) < 0) { MP_WARN(vo, "Could not upload surface.\n"); talloc_free(mpi); return; @@ -510,6 +812,10 @@ static int preinit(struct vo *vo) "It's better to use VDPAU directly with: --vo=vdpau\n"); } + va_get_formats(p); + if (!p->image_formats) + goto fail; + p->pool = mp_image_pool_new(MAX_OUTPUT_SURFACES + 3); va_pool_set_allocator(p->pool, p->mpvaapi, VA_RT_FORMAT_YUV420); diff --git a/video/out/vo_wayland.c b/video/out/vo_wayland.c deleted file mode 100644 index 37ab4c7..0000000 --- a/video/out/vo_wayland.c +++ /dev/null @@ -1,682 +0,0 @@ -/* - * This file is part of mpv video player. - * Copyright © 2013 Alexander Preisinger <alexander.preisinger@gmail.com> - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <stdio.h> -#include <stdbool.h> -#include <assert.h> - -#include <libavutil/common.h> - -#include "config.h" - -#include "vo.h" -#include "video/mp_image.h" -#include "video/sws_utils.h" -#include "sub/osd.h" -#include "sub/img_convert.h" -#include "common/msg.h" -#include "input/input.h" -#include "osdep/endian.h" -#include "osdep/timer.h" - -#include "wayland_common.h" - -#include "video/out/wayland/buffer.h" - -static void draw_image(struct vo *vo, mp_image_t *mpi); -static void draw_osd(struct vo *vo); - -static const struct wl_buffer_listener buffer_listener; - -// TODO: pay attention to the reported subpixel order -static const format_t format_table[] = { - {WL_SHM_FORMAT_ARGB8888, IMGFMT_BGRA}, // 8b 8g 8r 8a - {WL_SHM_FORMAT_XRGB8888, IMGFMT_BGR0}, -#if BYTE_ORDER == LITTLE_ENDIAN - {WL_SHM_FORMAT_RGB565, IMGFMT_RGB565}, // 5b 6g 5r -#endif - {WL_SHM_FORMAT_RGB888, IMGFMT_BGR24}, // 8b 8g 8r - {WL_SHM_FORMAT_BGR888, IMGFMT_RGB24}, // 8r 8g 8b - {WL_SHM_FORMAT_XBGR8888, IMGFMT_RGB0}, - {WL_SHM_FORMAT_RGBX8888, IMGFMT_0BGR}, - {WL_SHM_FORMAT_BGRX8888, IMGFMT_0RGB}, - {WL_SHM_FORMAT_ABGR8888, IMGFMT_RGBA}, - {WL_SHM_FORMAT_RGBA8888, IMGFMT_ABGR}, - {WL_SHM_FORMAT_BGRA8888, IMGFMT_ARGB}, -}; - -#define MAX_FORMAT_ENTRIES (sizeof(format_table) / sizeof(format_table[0])) -#define DEFAULT_FORMAT_ENTRY 1 -#define DEFAULT_ALPHA_FORMAT_ENTRY 0 - -struct priv; - -// We only use double buffering but the creation and usage is still open to -// triple buffering. Triple buffering is now removed, because double buffering -// is now pixel-perfect. -struct buffer_pool { - shm_buffer_t **buffers; - shm_buffer_t *front_buffer; // just pointers to any of the buffers - shm_buffer_t *back_buffer; - uint32_t buffer_no; -}; - -struct supported_format { - format_t format; - bool is_alpha; - struct wl_list link; -}; - -struct priv { - struct vo *vo; - struct vo_wayland_state *wl; - - struct wl_list format_list; - const format_t *video_format; // pointer to element in supported_format list - - struct mp_rect src; - struct mp_rect dst; - int src_w, src_h; - int dst_w, dst_h; - struct mp_osd_res osd; - - struct mp_sws_context *sws; - struct mp_image_params in_format; - - struct buffer_pool video_bufpool; - - struct mp_image *original_image; - int width; // width of the original image - int height; - - int x, y; // coords for resizing - - struct wl_surface *osd_surfaces[MAX_OSD_PARTS]; - struct wl_subsurface *osd_subsurfaces[MAX_OSD_PARTS]; - shm_buffer_t *osd_buffers[MAX_OSD_PARTS]; - // this id tells us if the subtitle part has changed or not - int change_id[MAX_OSD_PARTS]; - - // options - int enable_alpha; - int use_rgb565; -}; - -static bool is_alpha_format(const format_t *fmt) -{ - return !!(mp_imgfmt_get_desc(fmt->mp_format).flags & MP_IMGFLAG_ALPHA); -} - -static const format_t* is_wayland_format_supported(struct priv *p, - enum wl_shm_format fmt) -{ - struct supported_format *sf; - - // find the matching format first - wl_list_for_each(sf, &p->format_list, link) { - if (sf->format.wl_format == fmt) { - return &sf->format; - } - } - - return NULL; -} - -// additional buffer functions - -static void buffer_finalise_front(shm_buffer_t *buf) -{ - SHM_BUFFER_SET_BUSY(buf); - SHM_BUFFER_CLEAR_DIRTY(buf); -} - -static void buffer_finalise_back(shm_buffer_t *buf) -{ - SHM_BUFFER_SET_DIRTY(buf); -} - -static struct mp_image buffer_get_mp_image(struct priv *p, - shm_buffer_t *buf) -{ - struct mp_image img = {0}; - mp_image_set_params(&img, &p->sws->dst); - - img.w = buf->stride / buf->bytes; - img.h = buf->height; - img.planes[0] = buf->data; - img.stride[0] = buf->stride; - - return img; -} - -// buffer pool functions - -static void buffer_pool_reinit(struct priv *p, - struct buffer_pool *pool, - uint32_t buffer_no, - uint32_t width, uint32_t height, - format_t fmt, - struct wl_shm *shm) -{ - if (!pool->buffers) - pool->buffers = calloc(buffer_no, sizeof(shm_buffer_t*)); - - pool->buffer_no = buffer_no; - - for (uint32_t i = 0; i < buffer_no; ++i) { - if (pool->buffers[i] == NULL) - pool->buffers[i] = shm_buffer_create(width, height, fmt, - shm, &buffer_listener); - else - shm_buffer_resize(pool->buffers[i], width, height); - } - - pool->back_buffer = pool->buffers[0]; - pool->front_buffer = pool->buffers[1]; -} - -static bool buffer_pool_resize(struct buffer_pool *pool, - int width, - int height) -{ - bool ret = true; - - for (uint32_t i = 0; ret && i < pool->buffer_no; ++i) - shm_buffer_resize(pool->buffers[i], width, height); - - return ret; -} - -static void buffer_pool_destroy(struct buffer_pool *pool) -{ - for (uint32_t i = 0; i < pool->buffer_no; ++i) - shm_buffer_destroy(pool->buffers[i]); - - free(pool->buffers); - pool->front_buffer = NULL; - pool->back_buffer = NULL; - pool->buffers = NULL; -} - -static void buffer_pool_swap(struct buffer_pool *pool) -{ - if (SHM_BUFFER_IS_DIRTY(pool->back_buffer)) { - shm_buffer_t *tmp = pool->back_buffer; - pool->back_buffer = pool->front_buffer; - pool->front_buffer = tmp; - } -} - -// returns NULL if the back buffer is busy -static shm_buffer_t * buffer_pool_get_back(struct buffer_pool *pool) -{ - if (!pool->back_buffer || SHM_BUFFER_IS_BUSY(pool->back_buffer)) - return NULL; - - return pool->back_buffer; -} - -static shm_buffer_t * buffer_pool_get_front(struct buffer_pool *pool) -{ - return pool->front_buffer; -} - -static bool redraw_frame(struct priv *p) -{ - draw_image(p->vo, NULL); - return true; -} - -static bool resize(struct priv *p) -{ - struct vo_wayland_state *wl = p->wl; - - if (!p->video_bufpool.back_buffer || SHM_BUFFER_IS_BUSY(p->video_bufpool.back_buffer)) - return false; // skip resizing if we can't guarantee pixel perfectness! - - int32_t scale = 1; - int32_t x = wl->window.sh_x; - int32_t y = wl->window.sh_y; - - if (wl->display.current_output) - scale = wl->display.current_output->scale; - - wl->vo->dwidth = scale*wl->window.sh_width; - wl->vo->dheight = scale*wl->window.sh_height; - - vo_get_src_dst_rects(p->vo, &p->src, &p->dst, &p->osd); - p->src_w = p->src.x1 - p->src.x0; - p->src_h = p->src.y1 - p->src.y0; - p->dst_w = p->dst.x1 - p->dst.x0; - p->dst_h = p->dst.y1 - p->dst.y0; - - mp_input_set_mouse_transform(p->vo->input_ctx, &p->dst, NULL); - - MP_DBG(wl, "resizing %dx%d -> %dx%d\n", wl->window.width, - wl->window.height, - p->dst_w, - p->dst_h); - - if (x != 0) - x = wl->window.width - p->dst_w; - - if (y != 0) - y = wl->window.height - p->dst_h; - - wl_surface_set_buffer_scale(wl->window.video_surface, scale); - mp_sws_set_from_cmdline(p->sws, p->vo->opts->sws_opts); - p->sws->src = p->in_format; - p->sws->dst = (struct mp_image_params) { - .imgfmt = p->video_format->mp_format, - .w = p->dst_w, - .h = p->dst_h, - .p_w = 1, - .p_h = 1, - }; - - mp_image_params_guess_csp(&p->sws->dst); - - if (mp_sws_reinit(p->sws) < 0) - return false; - - if (!buffer_pool_resize(&p->video_bufpool, p->dst_w, p->dst_h)) { - MP_ERR(wl, "failed to resize video buffers\n"); - return false; - } - - wl->window.width = p->dst_w; - wl->window.height = p->dst_h; - - // if no alpha enabled format is used then create an opaque region to allow - // the compositor to optimize the drawing of the window - if (!p->enable_alpha) { - struct wl_region *opaque = - wl_compositor_create_region(wl->display.compositor); - wl_region_add(opaque, 0, 0, p->dst_w/scale, p->dst_h/scale); - wl_surface_set_opaque_region(wl->window.video_surface, opaque); - wl_region_destroy(opaque); - } - - p->x = x; - p->y = y; - p->vo->want_redraw = true; - return true; -} - - -/* wayland listeners */ - -static void buffer_handle_release(void *data, struct wl_buffer *buffer) -{ - shm_buffer_t *buf = data; - - if (SHM_BUFFER_IS_ONESHOT(buf)) { - shm_buffer_destroy(buf); - return; - } - - SHM_BUFFER_CLEAR_BUSY(buf); - // does nothing and returns 0 if no pending resize flag was set - shm_buffer_pending_resize(buf); -} - -static const struct wl_buffer_listener buffer_listener = { - buffer_handle_release -}; - -static void shm_handle_format(void *data, - struct wl_shm *wl_shm, - uint32_t format) -{ - struct priv *p = data; - for (uint32_t i = 0; i < MAX_FORMAT_ENTRIES; ++i) { - if (format_table[i].wl_format == format) { - MP_INFO(p->wl, "format %s supported by hw\n", - mp_imgfmt_to_name(format_table[i].mp_format)); - struct supported_format *sf = talloc(p, struct supported_format); - sf->format = format_table[i]; - sf->is_alpha = is_alpha_format(&sf->format); - wl_list_insert(&p->format_list, &sf->link); - } - } -} - -static const struct wl_shm_listener shm_listener = { - shm_handle_format -}; - - -/* mpv interface */ - -static void draw_image(struct vo *vo, mp_image_t *mpi) -{ - struct priv *p = vo->priv; - - if (mpi) { - talloc_free(p->original_image); - p->original_image = mpi; - } - - vo_wayland_wait_events(vo, 0); - - shm_buffer_t *buf = buffer_pool_get_back(&p->video_bufpool); - - if (!buf) { - MP_VERBOSE(p->wl, "can't draw, back buffer is busy\n"); - return; - } - - struct mp_image img = buffer_get_mp_image(p, buf); - - if (p->original_image) { - struct mp_image src = *p->original_image; - struct mp_rect src_rc = p->src; - src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, src.fmt.align_x); - src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, src.fmt.align_y); - mp_image_crop_rc(&src, src_rc); - - mp_sws_scale(p->sws, &img, &src); - } else { - mp_image_clear(&img, 0, 0, img.w, img.h); - } - - buffer_finalise_back(buf); - - draw_osd(vo); -} - -static void draw_osd_cb(void *ctx, struct sub_bitmaps *imgs) -{ - struct priv *p = ctx; - int id = imgs->render_index; - - struct wl_surface *s = p->osd_surfaces[id]; - - if (imgs->change_id != p->change_id[id]) { - p->change_id[id] = imgs->change_id; - - struct mp_rect bb; - if (!mp_sub_bitmaps_bb(imgs, &bb)) - return; - - int width = mp_rect_w(bb); - int height = mp_rect_h(bb); - - if (!p->osd_buffers[id]) { - p->osd_buffers[id] = shm_buffer_create(width, - height, - format_table[DEFAULT_ALPHA_FORMAT_ENTRY], - p->wl->display.shm, - &buffer_listener); - } - else if (SHM_BUFFER_IS_BUSY(p->osd_buffers[id])) { - // freed on release in buffer_listener - // guarantees pixel perfect resizing of subtitles and osd - SHM_BUFFER_SET_ONESHOT(p->osd_buffers[id]); - p->osd_buffers[id] = shm_buffer_create(width, - height, - format_table[DEFAULT_ALPHA_FORMAT_ENTRY], - p->wl->display.shm, - &buffer_listener); - } - else { - shm_buffer_resize(p->osd_buffers[id], width, height); - } - - shm_buffer_t *buf = p->osd_buffers[id]; - SHM_BUFFER_SET_BUSY(buf); - - struct mp_image wlimg = buffer_get_mp_image(p, buf); - - for (int n = 0; n < imgs->num_parts; n++) { - struct sub_bitmap *sub = &imgs->parts[n]; - memcpy_pic(wlimg.planes[0], sub->bitmap, sub->w * 4, sub->h, - wlimg.stride[0], sub->stride); - } - - wl_subsurface_set_position(p->osd_subsurfaces[id], 0, 0); - wl_surface_attach(s, buf->buffer, bb.x0, bb.y0); - wl_surface_damage(s, 0, 0, width, height); - wl_surface_commit(s); - } - else { - // p->osd_buffer, guaranteed to exist here - assert(p->osd_buffers[id]); - wl_surface_attach(s, p->osd_buffers[id]->buffer, 0, 0); - wl_surface_commit(s); - } -} - -static const bool osd_formats[SUBBITMAP_COUNT] = { - [SUBBITMAP_RGBA] = true, -}; - -static void draw_osd(struct vo *vo) -{ - int32_t scale = 1; - struct priv *p = vo->priv; - - if (p->wl && p->wl->display.current_output) - scale = p->wl->display.current_output->scale; - - // detach all buffers and attach all needed buffers in osd_draw - // only the most recent attach & commit is applied once the parent surface - // is committed - for (int i = 0; i < MAX_OSD_PARTS; ++i) { - struct wl_surface *s = p->osd_surfaces[i]; - wl_surface_attach(s, NULL, 0, 0); - wl_surface_set_buffer_scale(s, scale); - wl_surface_damage(s, 0, 0, p->dst_w, p->dst_h); - wl_surface_commit(s); - } - - double pts = p->original_image ? p->original_image->pts : 0; - osd_draw(vo->osd, p->osd, pts, 0, osd_formats, draw_osd_cb, p); -} - -static void redraw(void *data, uint32_t time) -{ - struct priv *p = data; - - shm_buffer_t *buf = buffer_pool_get_front(&p->video_bufpool); - wl_surface_attach(p->wl->window.video_surface, buf->buffer, p->x, p->y); - wl_surface_damage(p->wl->window.video_surface, 0, 0, p->dst_w, p->dst_h); - buffer_finalise_front(buf); - - p->x = 0; - p->y = 0; -} - -static void flip_page(struct vo *vo) -{ - struct priv *p = vo->priv; - - buffer_pool_swap(&p->video_bufpool); - - if (!p->wl->frame.callback) - vo_wayland_request_frame(vo, p, redraw); - - vo_wayland_wait_events(vo, 0); -} - -static int query_format(struct vo *vo, int format) -{ - struct priv *p = vo->priv; - struct supported_format *sf; - wl_list_for_each_reverse(sf, &p->format_list, link) { - if (sf->format.mp_format == format) - return 1; - } - - if (mp_sws_supported_format(format)) - return 1; - - return 0; -} - -static int reconfig(struct vo *vo, struct mp_image_params *fmt) -{ - struct priv *p = vo->priv; - mp_image_unrefp(&p->original_image); - - p->width = fmt->w; - p->height = fmt->h; - p->in_format = *fmt; - - struct supported_format *sf; - - // find the matching format first - wl_list_for_each(sf, &p->format_list, link) { - if (sf->format.mp_format == fmt->imgfmt && - (p->enable_alpha == sf->is_alpha)) - { - p->video_format = &sf->format; - break; - } - } - - if (!p->video_format) { - // if use default is enable overwrite the auto selected one - if (p->enable_alpha) - p->video_format = &format_table[DEFAULT_ALPHA_FORMAT_ENTRY]; - else - p->video_format = &format_table[DEFAULT_FORMAT_ENTRY]; - } - - // overrides alpha - // use rgb565 if performance is your main concern - if (p->use_rgb565) { - MP_INFO(p->wl, "using rgb565\n"); - const format_t *entry = - is_wayland_format_supported(p, WL_SHM_FORMAT_RGB565); - if (entry) - p->video_format = entry; - } - - buffer_pool_reinit(p, &p->video_bufpool, 2, p->width, p->height, - *p->video_format, p->wl->display.shm); - - vo_wayland_config(vo); - - resize(p); - - return 0; -} - -static void uninit(struct vo *vo) -{ - struct priv *p = vo->priv; - buffer_pool_destroy(&p->video_bufpool); - - talloc_free(p->original_image); - - for (int i = 0; i < MAX_OSD_PARTS; ++i) { - shm_buffer_destroy(p->osd_buffers[i]); - wl_subsurface_destroy(p->osd_subsurfaces[i]); - wl_surface_destroy(p->osd_surfaces[i]); - } - - vo_wayland_uninit(vo); -} - -static int preinit(struct vo *vo) -{ - struct priv *p = vo->priv; - struct vo_wayland_state *wl = NULL; - - if (!vo_wayland_init(vo)) - return -1; - - wl = vo->wayland; - - p->vo = vo; - p->wl = wl; - p->sws = mp_sws_alloc(vo); - - wl_list_init(&p->format_list); - - wl_shm_add_listener(wl->display.shm, &shm_listener, p); - wl_display_dispatch(wl->display.display); - - // Commits on surfaces bound to a subsurface are cached until the parent - // surface is committed, in this case the video surface. - // Which means we can call commit anywhere. - struct wl_region *input = - wl_compositor_create_region(wl->display.compositor); - for (int i = 0; i < MAX_OSD_PARTS; ++i) { - p->osd_surfaces[i] = - wl_compositor_create_surface(wl->display.compositor); - wl_surface_attach(p->osd_surfaces[i], NULL, 0, 0); - wl_surface_set_input_region(p->osd_surfaces[i], input); - p->osd_subsurfaces[i] = - wl_subcompositor_get_subsurface(wl->display.subcomp, - p->osd_surfaces[i], - wl->window.video_surface); // parent - wl_surface_commit(p->osd_surfaces[i]); - wl_subsurface_set_sync(p->osd_subsurfaces[i]); - } - wl_region_destroy(input); - - return 0; -} - -static int control(struct vo *vo, uint32_t request, void *data) -{ - struct priv *p = vo->priv; - switch (request) { - case VOCTRL_SET_PANSCAN: { - resize(p); - return VO_TRUE; - } - case VOCTRL_REDRAW_FRAME: - return redraw_frame(p); - } - int events = 0; - int r = vo_wayland_control(vo, &events, request, data); - - // NOTE: VO_EVENT_EXPOSE is never returned by the wayland backend - if (events & VO_EVENT_RESIZE) - resize(p); - - vo_event(vo, events); - - return r; -} - -#define OPT_BASE_STRUCT struct priv -const struct vo_driver video_out_wayland = { - .description = "Wayland SHM video output", - .name = "wayland", - .priv_size = sizeof(struct priv), - .preinit = preinit, - .query_format = query_format, - .reconfig = reconfig, - .control = control, - .draw_image = draw_image, - .flip_page = flip_page, - .wakeup = vo_wayland_wakeup, - .wait_events = vo_wayland_wait_events, - .uninit = uninit, - .options = (const struct m_option[]) { - OPT_FLAG("alpha", enable_alpha, 0), - OPT_FLAG("rgb565", use_rgb565, 0), - {0} - }, - .options_prefix = "vo-wayland", -}; - diff --git a/video/out/vo_x11.c b/video/out/vo_x11.c index dd2d942..f29d06a 100644 --- a/video/out/vo_x11.c +++ b/video/out/vo_x11.c @@ -37,11 +37,9 @@ #include "x11_common.h" -#if HAVE_SHM #include <sys/ipc.h> #include <sys/shm.h> #include <X11/extensions/XShm.h> -#endif #include "sub/osd.h" #include "sub/draw_bmp.h" @@ -79,11 +77,9 @@ struct priv { int current_buf; bool reset_view; -#if HAVE_SHM int Shmem_Flag; XShmSegmentInfo Shminfo[2]; int Shm_Warned_Slow; -#endif }; static bool resize(struct vo *vo); @@ -91,7 +87,6 @@ static bool resize(struct vo *vo); static bool getMyXImage(struct priv *p, int foo) { struct vo *vo = p->vo; -#if HAVE_SHM if (vo->x11->display_is_local && XShmQueryExtension(vo->x11->display)) { p->Shmem_Flag = 1; vo->x11->ShmCompletionEvent = XShmGetEventBase(vo->x11->display) @@ -136,34 +131,29 @@ static bool getMyXImage(struct priv *p, int foo) } else { shmemerror: p->Shmem_Flag = 0; -#endif - MP_VERBOSE(vo, "Not using SHM.\n"); - p->myximage[foo] = - XCreateImage(vo->x11->display, p->vinfo.visual, p->depth, ZPixmap, - 0, NULL, p->image_width, p->image_height, 8, 0); - if (!p->myximage[foo]) { - MP_WARN(vo, "could not allocate image"); - return false; + + MP_VERBOSE(vo, "Not using SHM.\n"); + p->myximage[foo] = + XCreateImage(vo->x11->display, p->vinfo.visual, p->depth, ZPixmap, + 0, NULL, p->image_width, p->image_height, 8, 0); + if (!p->myximage[foo]) { + MP_WARN(vo, "could not allocate image"); + return false; + } + p->myximage[foo]->data = + calloc(1, p->myximage[foo]->bytes_per_line * p->image_height + 32); } - p->myximage[foo]->data = - calloc(1, p->myximage[foo]->bytes_per_line * p->image_height + 32); -#if HAVE_SHM -} -#endif return true; } static void freeMyXImage(struct priv *p, int foo) { -#if HAVE_SHM struct vo *vo = p->vo; if (p->Shmem_Flag) { XShmDetach(vo->x11->display, &p->Shminfo[foo]); XDestroyImage(p->myximage[foo]); shmdt(p->Shminfo[foo].shmaddr); - } else -#endif - { + } else { if (p->myximage[foo]) XDestroyImage(p->myximage[foo]); } @@ -284,15 +274,12 @@ static void Display_Image(struct priv *p, XImage *myximage) p->reset_view = false; } -#if HAVE_SHM if (p->Shmem_Flag) { XShmPutImage(vo->x11->display, vo->x11->window, p->gc, x_image, 0, 0, p->dst.x0, p->dst.y0, p->dst_w, p->dst_h, True); vo->x11->ShmCompletionWaitCount++; - } else -#endif - { + } else { XPutImage(vo->x11->display, vo->x11->window, p->gc, x_image, 0, 0, p->dst.x0, p->dst.y0, p->dst_w, p->dst_h); } @@ -312,7 +299,6 @@ static struct mp_image get_x_buffer(struct priv *p, int buf_index) static void wait_for_completion(struct vo *vo, int max_outstanding) { -#if HAVE_SHM struct priv *ctx = vo->priv; struct vo_x11_state *x11 = vo->x11; if (ctx->Shmem_Flag) { @@ -326,7 +312,6 @@ static void wait_for_completion(struct vo *vo, int max_outstanding) vo_x11_check_events(vo); } } -#endif } static void flip_page(struct vo *vo) diff --git a/video/out/vo_xv.c b/video/out/vo_xv.c index 7c710f2..e75a653 100644 --- a/video/out/vo_xv.c +++ b/video/out/vo_xv.c @@ -30,12 +30,10 @@ #include "config.h" -#if HAVE_SHM #include <sys/types.h> #include <sys/ipc.h> #include <sys/shm.h> #include <X11/extensions/XShm.h> -#endif // Note: depends on the inclusion of X11/extensions/XShm.h #include <X11/extensions/Xv.h> @@ -92,10 +90,8 @@ struct xvctx { GC f_gc; // used to paint background GC vo_gc; // used to paint video int Shmem_Flag; -#if HAVE_SHM XShmSegmentInfo Shminfo[MAX_BUFFERS]; int Shm_Warned_Slow; -#endif }; #define MP_FOURCC(a,b,c,d) ((a) | ((b)<<8) | ((c)<<16) | ((unsigned)(d)<<24)) @@ -542,7 +538,6 @@ static bool allocate_xvimage(struct vo *vo, int foo) int aligned_w = FFALIGN(ctx->image_width, 32); // round up the height to next chroma boundary too int aligned_h = FFALIGN(ctx->image_height, 2); -#if HAVE_SHM if (x11->display_is_local && XShmQueryExtension(x11->display)) { ctx->Shmem_Flag = 1; x11->ShmCompletionEvent = XShmGetEventBase(x11->display) @@ -572,9 +567,7 @@ static bool allocate_xvimage(struct vo *vo, int foo) XShmAttach(x11->display, &ctx->Shminfo[foo]); XSync(x11->display, False); shmctl(ctx->Shminfo[foo].shmid, IPC_RMID, 0); - } else -#endif - { + } else { ctx->xvimage[foo] = (XvImage *) XvCreateImage(x11->display, ctx->xv_port, ctx->xv_format, NULL, aligned_w, @@ -604,22 +597,17 @@ static bool allocate_xvimage(struct vo *vo, int foo) static void deallocate_xvimage(struct vo *vo, int foo) { struct xvctx *ctx = vo->priv; -#if HAVE_SHM if (ctx->Shmem_Flag) { XShmDetach(vo->x11->display, &ctx->Shminfo[foo]); shmdt(ctx->Shminfo[foo].shmaddr); - } else -#endif - { + } else { av_free(ctx->xvimage[foo]->data); } if (ctx->xvimage[foo]) XFree(ctx->xvimage[foo]); ctx->xvimage[foo] = NULL; -#if HAVE_SHM ctx->Shminfo[foo] = (XShmSegmentInfo){0}; -#endif XSync(vo->x11->display, False); return; @@ -633,16 +621,14 @@ static inline void put_xvimage(struct vo *vo, XvImage *xvi) struct mp_rect *dst = &ctx->dst_rect; int dw = dst->x1 - dst->x0, dh = dst->y1 - dst->y0; int sw = src->x1 - src->x0, sh = src->y1 - src->y0; -#if HAVE_SHM + if (ctx->Shmem_Flag) { XvShmPutImage(x11->display, ctx->xv_port, x11->window, ctx->vo_gc, xvi, src->x0, src->y0, sw, sh, dst->x0, dst->y0, dw, dh, True); x11->ShmCompletionWaitCount++; - } else -#endif - { + } else { XvPutImage(x11->display, ctx->xv_port, x11->window, ctx->vo_gc, xvi, src->x0, src->y0, sw, sh, dst->x0, dst->y0, dw, dh); @@ -677,7 +663,6 @@ static struct mp_image get_xv_buffer(struct vo *vo, int buf_index) static void wait_for_completion(struct vo *vo, int max_outstanding) { -#if HAVE_SHM struct xvctx *ctx = vo->priv; struct vo_x11_state *x11 = vo->x11; if (ctx->Shmem_Flag) { @@ -691,7 +676,6 @@ static void wait_for_completion(struct vo *vo, int max_outstanding) vo_x11_check_events(vo); } } -#endif } static void flip_page(struct vo *vo) diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h new file mode 100644 index 0000000..6e82bfa --- /dev/null +++ b/video/out/vulkan/common.h @@ -0,0 +1,58 @@ +#pragma once + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> + +#include "config.h" + +#include "common/common.h" +#include "common/msg.h" + +// We need to define all platforms we want to support. Since we have +// our own mechanism for checking this, we re-define the right symbols +#if HAVE_WAYLAND +#define VK_USE_PLATFORM_WAYLAND_KHR +#endif +#if HAVE_X11 +#define VK_USE_PLATFORM_XLIB_KHR +#endif +#if HAVE_WIN32_DESKTOP +#define VK_USE_PLATFORM_WIN32_KHR +#endif + +#include <vulkan/vulkan.h> + +// Vulkan allows the optional use of a custom allocator. We don't need one but +// mark this parameter with a better name in case we ever decide to change this +// in the future. (And to make the code more readable) +#define MPVK_ALLOCATOR NULL + +// A lot of things depend on streaming resources across frames. Depending on +// how many frames we render ahead of time, we need to pick enough to avoid +// any conflicts, so make all of these tunable relative to this constant in +// order to centralize them. +#define MPVK_MAX_STREAMING_DEPTH 8 + +// Shared struct used to hold vulkan context information +struct mpvk_ctx { + struct mp_log *log; + VkInstance inst; + VkPhysicalDevice physd; + VkDebugReportCallbackEXT dbg; + VkDevice dev; + + // Surface, must be initialized fter the context itself + VkSurfaceKHR surf; + VkSurfaceFormatKHR surf_format; // picked at surface initialization time + + struct vk_malloc *alloc; // memory allocator for this device + struct vk_cmdpool *pool; // primary command pool for this device + struct vk_cmd *last_cmd; // most recently submitted command + struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler + + // Cached capabilities + VkPhysicalDeviceLimits limits; +}; diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c new file mode 100644 index 0000000..0bca198 --- /dev/null +++ b/video/out/vulkan/context.c @@ -0,0 +1,518 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "options/m_config.h" +#include "video/out/gpu/spirv.h" + +#include "context.h" +#include "ra_vk.h" +#include "utils.h" + +enum { + SWAP_AUTO = 0, + SWAP_FIFO, + SWAP_FIFO_RELAXED, + SWAP_MAILBOX, + SWAP_IMMEDIATE, + SWAP_COUNT, +}; + +struct vulkan_opts { + struct mpvk_device_opts dev_opts; // logical device options + char *device; // force a specific GPU + int swap_mode; +}; + +static int vk_validate_dev(struct mp_log *log, const struct m_option *opt, + struct bstr name, struct bstr param) +{ + int ret = M_OPT_INVALID; + VkResult res; + + // Create a dummy instance to validate/list the devices + VkInstanceCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + }; + + VkInstance inst; + VkPhysicalDevice *devices = NULL; + uint32_t num = 0; + + res = vkCreateInstance(&info, MPVK_ALLOCATOR, &inst); + if (res != VK_SUCCESS) + goto error; + + res = vkEnumeratePhysicalDevices(inst, &num, NULL); + if (res != VK_SUCCESS) + goto error; + + devices = talloc_array(NULL, VkPhysicalDevice, num); + vkEnumeratePhysicalDevices(inst, &num, devices); + if (res != VK_SUCCESS) + goto error; + + bool help = bstr_equals0(param, "help"); + if (help) { + mp_info(log, "Available vulkan devices:\n"); + ret = M_OPT_EXIT; + } + + for (int i = 0; i < num; i++) { + VkPhysicalDeviceProperties prop; + vkGetPhysicalDeviceProperties(devices[i], &prop); + + if (help) { + mp_info(log, " '%s' (GPU %d, ID %x:%x)\n", prop.deviceName, i, + (unsigned)prop.vendorID, (unsigned)prop.deviceID); + } else if (bstr_equals0(param, prop.deviceName)) { + ret = 0; + break; + } + } + + if (!help) + mp_err(log, "No device with name '%.*s'!\n", BSTR_P(param)); + +error: + talloc_free(devices); + return ret; +} + +#define OPT_BASE_STRUCT struct vulkan_opts +const struct m_sub_options vulkan_conf = { + .opts = (const struct m_option[]) { + OPT_STRING_VALIDATE("vulkan-device", device, 0, vk_validate_dev), + OPT_CHOICE("vulkan-swap-mode", swap_mode, 0, + ({"auto", SWAP_AUTO}, + {"fifo", SWAP_FIFO}, + {"fifo-relaxed", SWAP_FIFO_RELAXED}, + {"mailbox", SWAP_MAILBOX}, + {"immediate", SWAP_IMMEDIATE})), + OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1, + MPVK_MAX_QUEUES, OPTDEF_INT(1)), + {0} + }, + .size = sizeof(struct vulkan_opts) +}; + +struct priv { + struct mpvk_ctx *vk; + struct vulkan_opts *opts; + // Swapchain metadata: + int w, h; // current size + VkSwapchainCreateInfoKHR protoInfo; // partially filled-in prototype + VkSwapchainKHR swapchain; + VkSwapchainKHR old_swapchain; + int frames_in_flight; + // state of the images: + struct ra_tex **images; // ra_tex wrappers for the vkimages + int num_images; // size of images + VkSemaphore *acquired; // pool of semaphores used to synchronize images + int num_acquired; // size of this pool + int idx_acquired; // index of next free semaphore within this pool + int last_imgidx; // the image index last acquired (for submit) +}; + +static const struct ra_swapchain_fns vulkan_swapchain; + +struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx) +{ + if (ctx->swapchain->fns != &vulkan_swapchain) + return NULL; + + struct priv *p = ctx->swapchain->priv; + return p->vk; +} + +static bool update_swapchain_info(struct priv *p, + VkSwapchainCreateInfoKHR *info) +{ + struct mpvk_ctx *vk = p->vk; + + // Query the supported capabilities and update this struct as needed + VkSurfaceCapabilitiesKHR caps; + VK(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk->physd, vk->surf, &caps)); + + // Sorted by preference + static const VkCompositeAlphaFlagsKHR alphaModes[] = { + VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR, + VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR, + }; + + for (int i = 0; i < MP_ARRAY_SIZE(alphaModes); i++) { + if (caps.supportedCompositeAlpha & alphaModes[i]) { + info->compositeAlpha = alphaModes[i]; + break; + } + } + + if (!info->compositeAlpha) { + MP_ERR(vk, "Failed picking alpha compositing mode (caps: 0x%x)\n", + caps.supportedCompositeAlpha); + goto error; + } + + static const VkSurfaceTransformFlagsKHR rotModes[] = { + VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, + VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR, + }; + + for (int i = 0; i < MP_ARRAY_SIZE(rotModes); i++) { + if (caps.supportedTransforms & rotModes[i]) { + info->preTransform = rotModes[i]; + break; + } + } + + if (!info->preTransform) { + MP_ERR(vk, "Failed picking surface transform mode (caps: 0x%x)\n", + caps.supportedTransforms); + goto error; + } + + // Image count as required + MP_VERBOSE(vk, "Requested image count: %d (min %d max %d)\n", + (int)info->minImageCount, (int)caps.minImageCount, + (int)caps.maxImageCount); + + info->minImageCount = MPMAX(info->minImageCount, caps.minImageCount); + if (caps.maxImageCount) + info->minImageCount = MPMIN(info->minImageCount, caps.maxImageCount); + + // Check the extent against the allowed parameters + if (caps.currentExtent.width != info->imageExtent.width && + caps.currentExtent.width != 0xFFFFFFFF) + { + MP_WARN(vk, "Requested width %d does not match current width %d\n", + (int)info->imageExtent.width, (int)caps.currentExtent.width); + info->imageExtent.width = caps.currentExtent.width; + } + + if (caps.currentExtent.height != info->imageExtent.height && + caps.currentExtent.height != 0xFFFFFFFF) + { + MP_WARN(vk, "Requested height %d does not match current height %d\n", + (int)info->imageExtent.height, (int)caps.currentExtent.height); + info->imageExtent.height = caps.currentExtent.height; + } + + if (caps.minImageExtent.width > info->imageExtent.width || + caps.minImageExtent.height > info->imageExtent.height) + { + MP_ERR(vk, "Requested size %dx%d smaller than device minimum %d%d\n", + (int)info->imageExtent.width, (int)info->imageExtent.height, + (int)caps.minImageExtent.width, (int)caps.minImageExtent.height); + goto error; + } + + if (caps.maxImageExtent.width < info->imageExtent.width || + caps.maxImageExtent.height < info->imageExtent.height) + { + MP_ERR(vk, "Requested size %dx%d larger than device maximum %d%d\n", + (int)info->imageExtent.width, (int)info->imageExtent.height, + (int)caps.maxImageExtent.width, (int)caps.maxImageExtent.height); + goto error; + } + + // We just request whatever usage we can, and let the ra_vk decide what + // ra_tex_params that translates to. This makes the images as flexible + // as possible. + info->imageUsage = caps.supportedUsageFlags; + return true; + +error: + return false; +} + +void ra_vk_ctx_uninit(struct ra_ctx *ctx) +{ + if (ctx->ra) { + struct priv *p = ctx->swapchain->priv; + struct mpvk_ctx *vk = p->vk; + + mpvk_pool_wait_idle(vk, vk->pool); + + for (int i = 0; i < p->num_images; i++) + ra_tex_free(ctx->ra, &p->images[i]); + for (int i = 0; i < p->num_acquired; i++) + vkDestroySemaphore(vk->dev, p->acquired[i], MPVK_ALLOCATOR); + + vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR); + + talloc_free(p->images); + talloc_free(p->acquired); + ctx->ra->fns->destroy(ctx->ra); + ctx->ra = NULL; + } + + talloc_free(ctx->swapchain); + ctx->swapchain = NULL; +} + +static const struct ra_swapchain_fns vulkan_swapchain; + +bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, + VkPresentModeKHR preferred_mode) +{ + struct ra_swapchain *sw = ctx->swapchain = talloc_zero(NULL, struct ra_swapchain); + sw->ctx = ctx; + sw->fns = &vulkan_swapchain; + + struct priv *p = sw->priv = talloc_zero(sw, struct priv); + p->vk = vk; + p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf); + + if (!mpvk_find_phys_device(vk, p->opts->device, ctx->opts.allow_sw)) + goto error; + if (!spirv_compiler_init(ctx)) + goto error; + vk->spirv = ctx->spirv; + if (!mpvk_pick_surface_format(vk)) + goto error; + if (!mpvk_device_init(vk, p->opts->dev_opts)) + goto error; + + ctx->ra = ra_create_vk(vk, ctx->log); + if (!ctx->ra) + goto error; + + static const VkPresentModeKHR present_modes[SWAP_COUNT] = { + [SWAP_FIFO] = VK_PRESENT_MODE_FIFO_KHR, + [SWAP_FIFO_RELAXED] = VK_PRESENT_MODE_FIFO_RELAXED_KHR, + [SWAP_MAILBOX] = VK_PRESENT_MODE_MAILBOX_KHR, + [SWAP_IMMEDIATE] = VK_PRESENT_MODE_IMMEDIATE_KHR, + }; + + p->protoInfo = (VkSwapchainCreateInfoKHR) { + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR, + .surface = vk->surf, + .imageFormat = vk->surf_format.format, + .imageColorSpace = vk->surf_format.colorSpace, + .imageArrayLayers = 1, // non-stereoscopic + .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, + .minImageCount = ctx->opts.swapchain_depth + 1, // +1 for FB + .presentMode = p->opts->swap_mode ? present_modes[p->opts->swap_mode] + : preferred_mode, + .clipped = true, + }; + + // Make sure the swapchain present mode is supported + int num_modes; + VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf, + &num_modes, NULL)); + VkPresentModeKHR *modes = talloc_array(NULL, VkPresentModeKHR, num_modes); + VK(vkGetPhysicalDeviceSurfacePresentModesKHR(vk->physd, vk->surf, + &num_modes, modes)); + bool supported = false; + for (int i = 0; i < num_modes; i++) + supported |= (modes[i] == p->protoInfo.presentMode); + talloc_free(modes); + + if (!supported) { + MP_ERR(ctx, "Requested swap mode unsupported by this device!\n"); + goto error; + } + + return true; + +error: + ra_vk_ctx_uninit(ctx); + return false; +} + +static void destroy_swapchain(struct mpvk_ctx *vk, struct priv *p) +{ + assert(p->old_swapchain); + vkDestroySwapchainKHR(vk->dev, p->old_swapchain, MPVK_ALLOCATOR); + p->old_swapchain = NULL; +} + +bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h) +{ + struct priv *p = sw->priv; + if (w == p->w && h == p->h) + return true; + + struct ra *ra = sw->ctx->ra; + struct mpvk_ctx *vk = p->vk; + VkImage *vkimages = NULL; + + // It's invalid to trigger another swapchain recreation while there's + // more than one swapchain already active, so we need to flush any pending + // asynchronous swapchain release operations that may be ongoing. + while (p->old_swapchain) + mpvk_dev_poll_cmds(vk, 100000); // 100μs + + VkSwapchainCreateInfoKHR sinfo = p->protoInfo; + sinfo.imageExtent = (VkExtent2D){ w, h }; + sinfo.oldSwapchain = p->swapchain; + + if (!update_swapchain_info(p, &sinfo)) + goto error; + + VK(vkCreateSwapchainKHR(vk->dev, &sinfo, MPVK_ALLOCATOR, &p->swapchain)); + p->w = w; + p->h = h; + + // Freeing the old swapchain while it's still in use is an error, so do + // it asynchronously once the device is idle. + if (sinfo.oldSwapchain) { + p->old_swapchain = sinfo.oldSwapchain; + vk_dev_callback(vk, (vk_cb) destroy_swapchain, vk, p); + } + + // Get the new swapchain images + int num; + VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, NULL)); + vkimages = talloc_array(NULL, VkImage, num); + VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages)); + + // If needed, allocate some more semaphores + while (num > p->num_acquired) { + VkSemaphore sem; + static const VkSemaphoreCreateInfo seminfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem)); + MP_TARRAY_APPEND(NULL, p->acquired, p->num_acquired, sem); + } + + // Recreate the ra_tex wrappers + for (int i = 0; i < p->num_images; i++) + ra_tex_free(ra, &p->images[i]); + + p->num_images = num; + MP_TARRAY_GROW(NULL, p->images, p->num_images); + for (int i = 0; i < num; i++) { + p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo); + if (!p->images[i]) + goto error; + } + + talloc_free(vkimages); + return true; + +error: + talloc_free(vkimages); + vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR); + p->swapchain = NULL; + return false; +} + +static int color_depth(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + int bits = 0; + + if (!p->num_images) + return bits; + + // The channel with the most bits is probably the most authoritative about + // the actual color information (consider e.g. a2bgr10). Slight downside + // in that it results in rounding r/b for e.g. rgb565, but we don't pick + // surfaces with fewer than 8 bits anyway. + const struct ra_format *fmt = p->images[0]->params.format; + for (int i = 0; i < fmt->num_components; i++) { + int depth = fmt->component_depth[i]; + bits = MPMAX(bits, depth ? depth : fmt->component_size[i]); + } + + return bits; +} + +static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct priv *p = sw->priv; + struct mpvk_ctx *vk = p->vk; + if (!p->swapchain) + goto error; + + uint32_t imgidx = 0; + MP_TRACE(vk, "vkAcquireNextImageKHR\n"); + VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX, + p->acquired[p->idx_acquired], NULL, + &imgidx); + if (res == VK_ERROR_OUT_OF_DATE_KHR) + goto error; // just return in this case + VK_ASSERT(res, "Failed acquiring swapchain image"); + + p->last_imgidx = imgidx; + *out_fbo = (struct ra_fbo) { + .tex = p->images[imgidx], + .flip = false, + }; + return true; + +error: + return false; +} + +static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) +{ + struct priv *p = sw->priv; + struct ra *ra = sw->ctx->ra; + struct mpvk_ctx *vk = p->vk; + if (!p->swapchain) + goto error; + + VkSemaphore acquired = p->acquired[p->idx_acquired++]; + p->idx_acquired %= p->num_acquired; + + VkSemaphore done; + if (!ra_vk_submit(ra, p->images[p->last_imgidx], acquired, &done, + &p->frames_in_flight)) + goto error; + + // Older nvidia drivers can spontaneously combust when submitting to the + // same queue as we're rendering from, in a multi-queue scenario. Safest + // option is to cycle the queues first and then submit to the next queue. + // We can drop this hack in the future, I suppose. + vk_cmd_cycle_queues(vk); + struct vk_cmdpool *pool = vk->pool; + VkQueue queue = pool->queues[pool->qindex]; + + VkPresentInfoKHR pinfo = { + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &done, + .swapchainCount = 1, + .pSwapchains = &p->swapchain, + .pImageIndices = &p->last_imgidx, + }; + + VK(vkQueuePresentKHR(queue, &pinfo)); + return true; + +error: + return false; +} + +static void swap_buffers(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + + while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth) + mpvk_dev_poll_cmds(p->vk, 100000); // 100μs +} + +static const struct ra_swapchain_fns vulkan_swapchain = { + // .screenshot is not currently supported + .color_depth = color_depth, + .start_frame = start_frame, + .submit_frame = submit_frame, + .swap_buffers = swap_buffers, +}; diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h new file mode 100644 index 0000000..a64d39f --- /dev/null +++ b/video/out/vulkan/context.h @@ -0,0 +1,13 @@ +#pragma once + +#include "video/out/gpu/context.h" +#include "common.h" + +// Helpers for ra_ctx based on ra_vk. These initialize ctx->ra and ctx->swchain. +void ra_vk_ctx_uninit(struct ra_ctx *ctx); +bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, + VkPresentModeKHR preferred_mode); +bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h); + +// May be called on a ra_ctx of any type. +struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx); diff --git a/video/out/vulkan/context_wayland.c b/video/out/vulkan/context_wayland.c new file mode 100644 index 0000000..7276775 --- /dev/null +++ b/video/out/vulkan/context_wayland.c @@ -0,0 +1,133 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/gpu/context.h" +#include "video/out/wayland_common.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +struct priv { + struct mpvk_ctx vk; +}; + +static void wayland_vk_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + vo_wayland_uninit(ctx->vo); +} + +static bool wayland_vk_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + + if (!mpvk_instance_init(vk, ctx->log, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, + ctx->opts.debug)) + goto error; + + if (!vo_wayland_init(ctx->vo)) + goto error; + + VkWaylandSurfaceCreateInfoKHR wlinfo = { + .sType = VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, + .display = ctx->vo->wl->display, + .surface = ctx->vo->wl->surface, + }; + + VkResult res = vkCreateWaylandSurfaceKHR(vk->inst, &wlinfo, MPVK_ALLOCATOR, + &vk->surf); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating Wayland surface: %s\n", vk_err(res)); + goto error; + } + + /* Because in Wayland clients render whenever they receive a callback from + * the compositor, and the fact that the compositor usually stops sending + * callbacks once the surface is no longer visible, using FIFO here would + * mean the entire player would block on acquiring swapchain images. Hence, + * use MAILBOX to guarantee that there'll always be a swapchain image and + * the player won't block waiting on those */ + if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_MAILBOX_KHR)) + goto error; + + return true; + +error: + wayland_vk_uninit(ctx); + return false; +} + +static void resize(struct ra_ctx *ctx) +{ + struct vo_wayland_state *wl = ctx->vo->wl; + + MP_VERBOSE(wl, "Handling resize on the vk side\n"); + + const int32_t width = wl->scaling*mp_rect_w(wl->geometry); + const int32_t height = wl->scaling*mp_rect_h(wl->geometry); + + wl_surface_set_buffer_scale(wl->surface, wl->scaling); + + wl->vo->dwidth = width; + wl->vo->dheight = height; +} + +static bool wayland_vk_reconfig(struct ra_ctx *ctx) +{ + if (!vo_wayland_reconfig(ctx->vo)) + return false; + + return true; +} + +static int wayland_vk_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_wayland_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + resize(ctx); + if (ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight)) + return VO_ERROR; + } + return ret; +} + +static void wayland_vk_wakeup(struct ra_ctx *ctx) +{ + vo_wayland_wakeup(ctx->vo); +} + +static void wayland_vk_wait_events(struct ra_ctx *ctx, int64_t until_time_us) +{ + vo_wayland_wait_events(ctx->vo, until_time_us); +} + +const struct ra_ctx_fns ra_ctx_vulkan_wayland = { + .type = "vulkan", + .name = "waylandvk", + .reconfig = wayland_vk_reconfig, + .control = wayland_vk_control, + .wakeup = wayland_vk_wakeup, + .wait_events = wayland_vk_wait_events, + .init = wayland_vk_init, + .uninit = wayland_vk_uninit, +}; diff --git a/video/out/vulkan/context_win.c b/video/out/vulkan/context_win.c new file mode 100644 index 0000000..cf31586 --- /dev/null +++ b/video/out/vulkan/context_win.c @@ -0,0 +1,105 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/gpu/context.h" +#include "video/out/w32_common.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase) + +struct priv { + struct mpvk_ctx vk; +}; + +static void win_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + vo_w32_uninit(ctx->vo); +} + +static bool win_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + + if (!mpvk_instance_init(vk, ctx->log, VK_KHR_WIN32_SURFACE_EXTENSION_NAME, + ctx->opts.debug)) + goto error; + + if (!vo_w32_init(ctx->vo)) + goto error; + + VkWin32SurfaceCreateInfoKHR wininfo = { + .sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, + .hinstance = HINST_THISCOMPONENT, + .hwnd = vo_w32_hwnd(ctx->vo), + }; + + VkResult res = vkCreateWin32SurfaceKHR(vk->inst, &wininfo, MPVK_ALLOCATOR, + &vk->surf); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating Windows surface: %s\n", vk_err(res)); + goto error; + } + + if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR)) + goto error; + + return true; + +error: + win_uninit(ctx); + return false; +} + +static bool resize(struct ra_ctx *ctx) +{ + return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight); +} + +static bool win_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + return resize(ctx); +} + +static int win_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +const struct ra_ctx_fns ra_ctx_vulkan_win = { + .type = "vulkan", + .name = "winvk", + .reconfig = win_reconfig, + .control = win_control, + .init = win_init, + .uninit = win_uninit, +}; diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c new file mode 100644 index 0000000..c3bd49f --- /dev/null +++ b/video/out/vulkan/context_xlib.c @@ -0,0 +1,117 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/gpu/context.h" +#include "video/out/x11_common.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +struct priv { + struct mpvk_ctx vk; +}; + +static void xlib_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + vo_x11_uninit(ctx->vo); +} + +static bool xlib_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + + if (!mpvk_instance_init(vk, ctx->log, VK_KHR_XLIB_SURFACE_EXTENSION_NAME, + ctx->opts.debug)) + goto error; + + if (!vo_x11_init(ctx->vo)) + goto error; + + if (!vo_x11_create_vo_window(ctx->vo, NULL, "mpvk")) + goto error; + + VkXlibSurfaceCreateInfoKHR xinfo = { + .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, + .dpy = ctx->vo->x11->display, + .window = ctx->vo->x11->window, + }; + + VkResult res = vkCreateXlibSurfaceKHR(vk->inst, &xinfo, MPVK_ALLOCATOR, + &vk->surf); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating Xlib surface: %s\n", vk_err(res)); + goto error; + } + + if (!ra_vk_ctx_init(ctx, vk, VK_PRESENT_MODE_FIFO_KHR)) + goto error; + + return true; + +error: + xlib_uninit(ctx); + return false; +} + +static bool resize(struct ra_ctx *ctx) +{ + return ra_vk_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight); +} + +static bool xlib_reconfig(struct ra_ctx *ctx) +{ + vo_x11_config_vo_window(ctx->vo); + return resize(ctx); +} + +static int xlib_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_x11_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +static void xlib_wakeup(struct ra_ctx *ctx) +{ + vo_x11_wakeup(ctx->vo); +} + +static void xlib_wait_events(struct ra_ctx *ctx, int64_t until_time_us) +{ + vo_x11_wait_events(ctx->vo, until_time_us); +} + +const struct ra_ctx_fns ra_ctx_vulkan_xlib = { + .type = "vulkan", + .name = "x11vk", + .reconfig = xlib_reconfig, + .control = xlib_control, + .wakeup = xlib_wakeup, + .wait_events = xlib_wait_events, + .init = xlib_init, + .uninit = xlib_uninit, +}; diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c new file mode 100644 index 0000000..b44bead --- /dev/null +++ b/video/out/vulkan/formats.c @@ -0,0 +1,55 @@ +#include "formats.h" + +const struct vk_format vk_formats[] = { + // Regular, byte-aligned integer formats + {"r8", VK_FORMAT_R8_UNORM, 1, 1, {8 }, RA_CTYPE_UNORM }, + {"rg8", VK_FORMAT_R8G8_UNORM, 2, 2, {8, 8 }, RA_CTYPE_UNORM }, + {"rgb8", VK_FORMAT_R8G8B8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM }, + {"rgba8", VK_FORMAT_R8G8B8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM }, + {"r16", VK_FORMAT_R16_UNORM, 1, 2, {16 }, RA_CTYPE_UNORM }, + {"rg16", VK_FORMAT_R16G16_UNORM, 2, 4, {16, 16 }, RA_CTYPE_UNORM }, + {"rgb16", VK_FORMAT_R16G16B16_UNORM, 3, 6, {16, 16, 16 }, RA_CTYPE_UNORM }, + {"rgba16", VK_FORMAT_R16G16B16A16_UNORM, 4, 8, {16, 16, 16, 16}, RA_CTYPE_UNORM }, + + // Special, integer-only formats + {"r32ui", VK_FORMAT_R32_UINT, 1, 4, {32 }, RA_CTYPE_UINT }, + {"rg32ui", VK_FORMAT_R32G32_UINT, 2, 8, {32, 32 }, RA_CTYPE_UINT }, + {"rgb32ui", VK_FORMAT_R32G32B32_UINT, 3, 12, {32, 32, 32 }, RA_CTYPE_UINT }, + {"rgba32ui", VK_FORMAT_R32G32B32A32_UINT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_UINT }, + {"r64ui", VK_FORMAT_R64_UINT, 1, 8, {64 }, RA_CTYPE_UINT }, + {"rg64ui", VK_FORMAT_R64G64_UINT, 2, 16, {64, 64 }, RA_CTYPE_UINT }, + {"rgb64ui", VK_FORMAT_R64G64B64_UINT, 3, 24, {64, 64, 64 }, RA_CTYPE_UINT }, + {"rgba64ui", VK_FORMAT_R64G64B64A64_UINT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_UINT }, + + // Packed integer formats + {"rg4", VK_FORMAT_R4G4_UNORM_PACK8, 2, 1, {4, 4 }, RA_CTYPE_UNORM }, + {"rgba4", VK_FORMAT_R4G4B4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM }, + {"rgb565", VK_FORMAT_R5G6B5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM }, + {"rgb565a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM }, + + // Float formats (native formats, hf = half float, df = double float) + {"r16hf", VK_FORMAT_R16_SFLOAT, 1, 2, {16 }, RA_CTYPE_FLOAT }, + {"rg16hf", VK_FORMAT_R16G16_SFLOAT, 2, 4, {16, 16 }, RA_CTYPE_FLOAT }, + {"rgb16hf", VK_FORMAT_R16G16B16_SFLOAT, 3, 6, {16, 16, 16 }, RA_CTYPE_FLOAT }, + {"rgba16hf", VK_FORMAT_R16G16B16A16_SFLOAT, 4, 8, {16, 16, 16, 16}, RA_CTYPE_FLOAT }, + {"r32f", VK_FORMAT_R32_SFLOAT, 1, 4, {32 }, RA_CTYPE_FLOAT }, + {"rg32f", VK_FORMAT_R32G32_SFLOAT, 2, 8, {32, 32 }, RA_CTYPE_FLOAT }, + {"rgb32f", VK_FORMAT_R32G32B32_SFLOAT, 3, 12, {32, 32, 32 }, RA_CTYPE_FLOAT }, + {"rgba32f", VK_FORMAT_R32G32B32A32_SFLOAT, 4, 16, {32, 32, 32, 32}, RA_CTYPE_FLOAT }, + {"r64df", VK_FORMAT_R64_SFLOAT, 1, 8, {64 }, RA_CTYPE_FLOAT }, + {"rg64df", VK_FORMAT_R64G64_SFLOAT, 2, 16, {64, 64 }, RA_CTYPE_FLOAT }, + {"rgb64df", VK_FORMAT_R64G64B64_SFLOAT, 3, 24, {64, 64, 64 }, RA_CTYPE_FLOAT }, + {"rgba64df", VK_FORMAT_R64G64B64A64_SFLOAT, 4, 32, {64, 64, 64, 64}, RA_CTYPE_FLOAT }, + + // "Swapped" component order images + {"bgr8", VK_FORMAT_B8G8R8_UNORM, 3, 3, {8, 8, 8 }, RA_CTYPE_UNORM, true }, + {"bgra8", VK_FORMAT_B8G8R8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true }, + {"bgra4", VK_FORMAT_B4G4R4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM, true }, + {"bgr565", VK_FORMAT_B5G6R5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM, true }, + {"bgr565a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM, true }, + {"a1rgb5", VK_FORMAT_A1R5G5B5_UNORM_PACK16, 4, 2, {1, 5, 5, 5 }, RA_CTYPE_UNORM, true }, + {"a2rgb10", VK_FORMAT_A2R10G10B10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true }, + {"a2bgr10", VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true }, + {"abgr8", VK_FORMAT_A8B8G8R8_UNORM_PACK32, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true }, + {0} +}; diff --git a/video/out/vulkan/formats.h b/video/out/vulkan/formats.h new file mode 100644 index 0000000..22782a6 --- /dev/null +++ b/video/out/vulkan/formats.h @@ -0,0 +1,16 @@ +#pragma once + +#include "video/out/gpu/ra.h" +#include "common.h" + +struct vk_format { + const char *name; + VkFormat iformat; // vulkan format enum + int components; // how many components are there + int bytes; // how many bytes is a texel + int bits[4]; // how many bits per component + enum ra_ctype ctype; // format representation type + bool fucked_order; // used for formats which are not simply rgba +}; + +extern const struct vk_format vk_formats[]; diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c new file mode 100644 index 0000000..f6cb114 --- /dev/null +++ b/video/out/vulkan/malloc.c @@ -0,0 +1,423 @@ +#include "malloc.h" +#include "utils.h" +#include "osdep/timer.h" + +// Controls the multiplication factor for new slab allocations. The new slab +// will always be allocated such that the size of the slab is this factor times +// the previous slab. Higher values make it grow faster. +#define MPVK_HEAP_SLAB_GROWTH_RATE 4 + +// Controls the minimum slab size, to reduce the frequency at which very small +// slabs would need to get allocated when allocating the first few buffers. +// (Default: 1 MB) +#define MPVK_HEAP_MINIMUM_SLAB_SIZE (1 << 20) + +// Controls the maximum slab size, to reduce the effect of unbounded slab +// growth exhausting memory. If the application needs a single allocation +// that's bigger than this value, it will be allocated directly from the +// device. (Default: 512 MB) +#define MPVK_HEAP_MAXIMUM_SLAB_SIZE (1 << 29) + +// Controls the minimum free region size, to reduce thrashing the free space +// map with lots of small buffers during uninit. (Default: 1 KB) +#define MPVK_HEAP_MINIMUM_REGION_SIZE (1 << 10) + +// Represents a region of available memory +struct vk_region { + size_t start; // first offset in region + size_t end; // first offset *not* in region +}; + +static inline size_t region_len(struct vk_region r) +{ + return r.end - r.start; +} + +// A single slab represents a contiguous region of allocated memory. Actual +// allocations are served as slices of this. Slabs are organized into linked +// lists, which represent individual heaps. +struct vk_slab { + VkDeviceMemory mem; // underlying device allocation + size_t size; // total size of `slab` + size_t used; // number of bytes actually in use (for GC accounting) + bool dedicated; // slab is allocated specifically for one object + // free space map: a sorted list of memory regions that are available + struct vk_region *regions; + int num_regions; + // optional, depends on the memory type: + VkBuffer buffer; // buffer spanning the entire slab + void *data; // mapped memory corresponding to `mem` +}; + +// Represents a single memory heap. We keep track of a vk_heap for each +// combination of buffer type and memory selection parameters. This shouldn't +// actually be that many in practice, because some combinations simply never +// occur, and others will generally be the same for the same objects. +struct vk_heap { + VkBufferUsageFlags usage; // the buffer usage type (or 0) + VkMemoryPropertyFlags flags; // the memory type flags (or 0) + uint32_t typeBits; // the memory type index requirements (or 0) + struct vk_slab **slabs; // array of slabs sorted by size + int num_slabs; +}; + +// The overall state of the allocator, which keeps track of a vk_heap for each +// memory type. +struct vk_malloc { + VkPhysicalDeviceMemoryProperties props; + struct vk_heap *heaps; + int num_heaps; +}; + +static void slab_free(struct mpvk_ctx *vk, struct vk_slab *slab) +{ + if (!slab) + return; + + assert(slab->used == 0); + + int64_t start = mp_time_us(); + vkDestroyBuffer(vk->dev, slab->buffer, MPVK_ALLOCATOR); + // also implicitly unmaps the memory if needed + vkFreeMemory(vk->dev, slab->mem, MPVK_ALLOCATOR); + int64_t stop = mp_time_us(); + + MP_VERBOSE(vk, "Freeing slab of size %zu took %lld μs.\n", + slab->size, (long long)(stop - start)); + + talloc_free(slab); +} + +static bool find_best_memtype(struct mpvk_ctx *vk, uint32_t typeBits, + VkMemoryPropertyFlags flags, + VkMemoryType *out_type, int *out_index) +{ + struct vk_malloc *ma = vk->alloc; + + // The vulkan spec requires memory types to be sorted in the "optimal" + // order, so the first matching type we find will be the best/fastest one. + for (int i = 0; i < ma->props.memoryTypeCount; i++) { + // The memory type flags must include our properties + if ((ma->props.memoryTypes[i].propertyFlags & flags) != flags) + continue; + // The memory type must be supported by the requirements (bitfield) + if (typeBits && !(typeBits & (1 << i))) + continue; + *out_type = ma->props.memoryTypes[i]; + *out_index = i; + return true; + } + + MP_ERR(vk, "Found no memory type matching property flags 0x%x and type " + "bits 0x%x!\n", (unsigned)flags, (unsigned)typeBits); + return false; +} + +static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap, + size_t size) +{ + struct vk_slab *slab = talloc_ptrtype(NULL, slab); + *slab = (struct vk_slab) { + .size = size, + }; + + MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, (struct vk_region) { + .start = 0, + .end = slab->size, + }); + + VkMemoryAllocateInfo minfo = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .allocationSize = slab->size, + }; + + uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX; + if (heap->usage) { + VkBufferCreateInfo binfo = { + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .size = slab->size, + .usage = heap->usage, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + }; + + VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer)); + + VkMemoryRequirements reqs; + vkGetBufferMemoryRequirements(vk->dev, slab->buffer, &reqs); + minfo.allocationSize = reqs.size; // this can be larger than slab->size + typeBits &= reqs.memoryTypeBits; // this can restrict the types + } + + VkMemoryType type; + int index; + if (!find_best_memtype(vk, typeBits, heap->flags, &type, &index)) + goto error; + + MP_VERBOSE(vk, "Allocating %zu memory of type 0x%x (id %d) in heap %d.\n", + slab->size, (unsigned)type.propertyFlags, index, (int)type.heapIndex); + + minfo.memoryTypeIndex = index; + VK(vkAllocateMemory(vk->dev, &minfo, MPVK_ALLOCATOR, &slab->mem)); + + if (heap->flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) + VK(vkMapMemory(vk->dev, slab->mem, 0, VK_WHOLE_SIZE, 0, &slab->data)); + + if (slab->buffer) + VK(vkBindBufferMemory(vk->dev, slab->buffer, slab->mem, 0)); + + return slab; + +error: + slab_free(vk, slab); + return NULL; +} + +static void insert_region(struct vk_slab *slab, struct vk_region region) +{ + if (region.start == region.end) + return; + + bool big_enough = region_len(region) >= MPVK_HEAP_MINIMUM_REGION_SIZE; + + // Find the index of the first region that comes after this + for (int i = 0; i < slab->num_regions; i++) { + struct vk_region *r = &slab->regions[i]; + + // Check for a few special cases which can be coalesced + if (r->end == region.start) { + // The new region is at the tail of this region. In addition to + // modifying this region, we also need to coalesce all the following + // regions for as long as possible + r->end = region.end; + + struct vk_region *next = &slab->regions[i+1]; + while (i+1 < slab->num_regions && r->end == next->start) { + r->end = next->end; + MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, i+1); + } + return; + } + + if (r->start == region.end) { + // The new region is at the head of this region. We don't need to + // do anything special here - because if this could be further + // coalesced backwards, the previous loop iteration would already + // have caught it. + r->start = region.start; + return; + } + + if (r->start > region.start) { + // The new region comes somewhere before this region, so insert + // it into this index in the array. + if (big_enough) { + MP_TARRAY_INSERT_AT(slab, slab->regions, slab->num_regions, + i, region); + } + return; + } + } + + // If we've reached the end of this loop, then all of the regions + // come before the new region, and are disconnected - so append it + if (big_enough) + MP_TARRAY_APPEND(slab, slab->regions, slab->num_regions, region); +} + +static void heap_uninit(struct mpvk_ctx *vk, struct vk_heap *heap) +{ + for (int i = 0; i < heap->num_slabs; i++) + slab_free(vk, heap->slabs[i]); + + talloc_free(heap->slabs); + *heap = (struct vk_heap){0}; +} + +void vk_malloc_init(struct mpvk_ctx *vk) +{ + assert(vk->physd); + vk->alloc = talloc_zero(NULL, struct vk_malloc); + vkGetPhysicalDeviceMemoryProperties(vk->physd, &vk->alloc->props); +} + +void vk_malloc_uninit(struct mpvk_ctx *vk) +{ + struct vk_malloc *ma = vk->alloc; + if (!ma) + return; + + for (int i = 0; i < ma->num_heaps; i++) + heap_uninit(vk, &ma->heaps[i]); + + talloc_free(ma); + vk->alloc = NULL; +} + +void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice) +{ + struct vk_slab *slab = slice.priv; + if (!slab) + return; + + assert(slab->used >= slice.size); + slab->used -= slice.size; + + MP_DBG(vk, "Freeing slice %zu + %zu from slab with size %zu\n", + slice.offset, slice.size, slab->size); + + if (slab->dedicated) { + // If the slab was purpose-allocated for this memslice, we can just + // free it here + slab_free(vk, slab); + } else { + // Return the allocation to the free space map + insert_region(slab, (struct vk_region) { + .start = slice.offset, + .end = slice.offset + slice.size, + }); + } +} + +// reqs: can be NULL +static struct vk_heap *find_heap(struct mpvk_ctx *vk, VkBufferUsageFlags usage, + VkMemoryPropertyFlags flags, + VkMemoryRequirements *reqs) +{ + struct vk_malloc *ma = vk->alloc; + int typeBits = reqs ? reqs->memoryTypeBits : 0; + + for (int i = 0; i < ma->num_heaps; i++) { + if (ma->heaps[i].usage != usage) + continue; + if (ma->heaps[i].flags != flags) + continue; + if (ma->heaps[i].typeBits != typeBits) + continue; + return &ma->heaps[i]; + } + + // Not found => add it + MP_TARRAY_GROW(ma, ma->heaps, ma->num_heaps + 1); + struct vk_heap *heap = &ma->heaps[ma->num_heaps++]; + *heap = (struct vk_heap) { + .usage = usage, + .flags = flags, + .typeBits = typeBits, + }; + return heap; +} + +static inline bool region_fits(struct vk_region r, size_t size, size_t align) +{ + return MP_ALIGN_UP(r.start, align) + size <= r.end; +} + +// Finds the best-fitting region in a heap. If the heap is too small or too +// fragmented, a new slab will be allocated under the hood. +static bool heap_get_region(struct mpvk_ctx *vk, struct vk_heap *heap, + size_t size, size_t align, + struct vk_slab **out_slab, int *out_index) +{ + struct vk_slab *slab = NULL; + + // If the allocation is very big, serve it directly instead of bothering + // with the heap + if (size > MPVK_HEAP_MAXIMUM_SLAB_SIZE) { + slab = slab_alloc(vk, heap, size); + *out_slab = slab; + *out_index = 0; + return !!slab; + } + + for (int i = 0; i < heap->num_slabs; i++) { + slab = heap->slabs[i]; + if (slab->size < size) + continue; + + // Attempt a best fit search + int best = -1; + for (int n = 0; n < slab->num_regions; n++) { + struct vk_region r = slab->regions[n]; + if (!region_fits(r, size, align)) + continue; + if (best >= 0 && region_len(r) > region_len(slab->regions[best])) + continue; + best = n; + } + + if (best >= 0) { + *out_slab = slab; + *out_index = best; + return true; + } + } + + // Otherwise, allocate a new vk_slab and append it to the list. + size_t cur_size = MPMAX(size, slab ? slab->size : 0); + size_t slab_size = MPVK_HEAP_SLAB_GROWTH_RATE * cur_size; + slab_size = MPMAX(MPVK_HEAP_MINIMUM_SLAB_SIZE, slab_size); + slab_size = MPMIN(MPVK_HEAP_MAXIMUM_SLAB_SIZE, slab_size); + assert(slab_size >= size); + slab = slab_alloc(vk, heap, slab_size); + if (!slab) + return false; + MP_TARRAY_APPEND(NULL, heap->slabs, heap->num_slabs, slab); + + // Return the only region there is in a newly allocated slab + assert(slab->num_regions == 1); + *out_slab = slab; + *out_index = 0; + return true; +} + +static bool slice_heap(struct mpvk_ctx *vk, struct vk_heap *heap, size_t size, + size_t alignment, struct vk_memslice *out) +{ + struct vk_slab *slab; + int index; + alignment = MP_ALIGN_UP(alignment, vk->limits.bufferImageGranularity); + if (!heap_get_region(vk, heap, size, alignment, &slab, &index)) + return false; + + struct vk_region reg = slab->regions[index]; + MP_TARRAY_REMOVE_AT(slab->regions, slab->num_regions, index); + *out = (struct vk_memslice) { + .vkmem = slab->mem, + .offset = MP_ALIGN_UP(reg.start, alignment), + .size = size, + .priv = slab, + }; + + MP_DBG(vk, "Sub-allocating slice %zu + %zu from slab with size %zu\n", + out->offset, out->size, slab->size); + + size_t out_end = out->offset + out->size; + insert_region(slab, (struct vk_region) { reg.start, out->offset }); + insert_region(slab, (struct vk_region) { out_end, reg.end }); + + slab->used += size; + return true; +} + +bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs, + VkMemoryPropertyFlags flags, struct vk_memslice *out) +{ + struct vk_heap *heap = find_heap(vk, 0, flags, &reqs); + return slice_heap(vk, heap, reqs.size, reqs.alignment, out); +} + +bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags, + VkMemoryPropertyFlags memFlags, VkDeviceSize size, + VkDeviceSize alignment, struct vk_bufslice *out) +{ + struct vk_heap *heap = find_heap(vk, bufFlags, memFlags, NULL); + if (!slice_heap(vk, heap, size, alignment, &out->mem)) + return false; + + struct vk_slab *slab = out->mem.priv; + out->buf = slab->buffer; + if (slab->data) + out->data = (void *)((uintptr_t)slab->data + (ptrdiff_t)out->mem.offset); + + return true; +} diff --git a/video/out/vulkan/malloc.h b/video/out/vulkan/malloc.h new file mode 100644 index 0000000..466c8d8 --- /dev/null +++ b/video/out/vulkan/malloc.h @@ -0,0 +1,35 @@ +#pragma once + +#include "common.h" + +void vk_malloc_init(struct mpvk_ctx *vk); +void vk_malloc_uninit(struct mpvk_ctx *vk); + +// Represents a single "slice" of generic (non-buffer) memory, plus some +// metadata for accounting. This struct is essentially read-only. +struct vk_memslice { + VkDeviceMemory vkmem; + size_t offset; + size_t size; + void *priv; +}; + +void vk_free_memslice(struct mpvk_ctx *vk, struct vk_memslice slice); +bool vk_malloc_generic(struct mpvk_ctx *vk, VkMemoryRequirements reqs, + VkMemoryPropertyFlags flags, struct vk_memslice *out); + +// Represents a single "slice" of a larger buffer +struct vk_bufslice { + struct vk_memslice mem; // must be freed by the user when done + VkBuffer buf; // the buffer this memory was sliced from + // For persistently mapped buffers, this points to the first usable byte of + // this slice. + void *data; +}; + +// Allocate a buffer slice. This is more efficient than vk_malloc_generic for +// when the user needs lots of buffers, since it doesn't require +// creating/destroying lots of (little) VkBuffers. +bool vk_malloc_buffer(struct mpvk_ctx *vk, VkBufferUsageFlags bufFlags, + VkMemoryPropertyFlags memFlags, VkDeviceSize size, + VkDeviceSize alignment, struct vk_bufslice *out); diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c new file mode 100644 index 0000000..f85e30e --- /dev/null +++ b/video/out/vulkan/ra_vk.c @@ -0,0 +1,1747 @@ +#include "video/out/gpu/utils.h" +#include "video/out/gpu/spirv.h" + +#include "ra_vk.h" +#include "malloc.h" + +static struct ra_fns ra_fns_vk; + +// For ra.priv +struct ra_vk { + struct mpvk_ctx *vk; + struct ra_tex *clear_tex; // stupid hack for clear() + struct vk_cmd *cmd; // currently recording cmd +}; + +struct mpvk_ctx *ra_vk_get(struct ra *ra) +{ + if (ra->fns != &ra_fns_vk) + return NULL; + + struct ra_vk *p = ra->priv; + return p->vk; +} + +// Returns a command buffer, or NULL on error +static struct vk_cmd *vk_require_cmd(struct ra *ra) +{ + struct ra_vk *p = ra->priv; + struct mpvk_ctx *vk = ra_vk_get(ra); + + if (!p->cmd) + p->cmd = vk_cmd_begin(vk, vk->pool); + + return p->cmd; +} + +// Note: This technically follows the flush() API, but we don't need +// to expose that (and in fact, it's a bad idea) since we control flushing +// behavior with ra_vk_present_frame already. +static bool vk_flush(struct ra *ra, VkSemaphore *done) +{ + struct ra_vk *p = ra->priv; + struct mpvk_ctx *vk = ra_vk_get(ra); + + if (p->cmd) { + if (!vk_cmd_submit(vk, p->cmd, done)) + return false; + p->cmd = NULL; + } + + return true; +} + +// The callback's *priv will always be set to `ra` +static void vk_callback(struct ra *ra, vk_cb callback, void *arg) +{ + struct ra_vk *p = ra->priv; + struct mpvk_ctx *vk = ra_vk_get(ra); + + if (p->cmd) { + vk_cmd_callback(p->cmd, callback, ra, arg); + } else { + vk_dev_callback(vk, callback, ra, arg); + } +} + +#define MAKE_LAZY_DESTRUCTOR(fun, argtype) \ + static void fun##_lazy(struct ra *ra, argtype *arg) { \ + vk_callback(ra, (vk_cb) fun, arg); \ + } + +static void vk_destroy_ra(struct ra *ra) +{ + struct ra_vk *p = ra->priv; + struct mpvk_ctx *vk = ra_vk_get(ra); + + vk_flush(ra, NULL); + mpvk_dev_wait_idle(vk); + ra_tex_free(ra, &p->clear_tex); + + talloc_free(ra); +} + +static bool vk_setup_formats(struct ra *ra) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + + for (const struct vk_format *vk_fmt = vk_formats; vk_fmt->name; vk_fmt++) { + VkFormatProperties prop; + vkGetPhysicalDeviceFormatProperties(vk->physd, vk_fmt->iformat, &prop); + + // As a bare minimum, we need to sample from an allocated image + VkFormatFeatureFlags flags = prop.optimalTilingFeatures; + if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) + continue; + + VkFormatFeatureFlags linear_bits, render_bits; + linear_bits = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + render_bits = VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; + + struct ra_format *fmt = talloc_zero(ra, struct ra_format); + *fmt = (struct ra_format) { + .name = vk_fmt->name, + .priv = (void *)vk_fmt, + .ctype = vk_fmt->ctype, + .ordered = !vk_fmt->fucked_order, + .num_components = vk_fmt->components, + .pixel_size = vk_fmt->bytes, + .linear_filter = !!(flags & linear_bits), + .renderable = !!(flags & render_bits), + }; + + for (int i = 0; i < 4; i++) + fmt->component_size[i] = fmt->component_depth[i] = vk_fmt->bits[i]; + + fmt->glsl_format = ra_fmt_glsl_format(fmt); + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); + } + + // Populate some other capabilities related to formats while we're at it + VkImageType imgType[3] = { + VK_IMAGE_TYPE_1D, + VK_IMAGE_TYPE_2D, + VK_IMAGE_TYPE_3D + }; + + // R8_UNORM is supported on literally every single vulkan implementation + const VkFormat testfmt = VK_FORMAT_R8_UNORM; + + for (int d = 0; d < 3; d++) { + VkImageFormatProperties iprop; + VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd, + testfmt, imgType[d], VK_IMAGE_TILING_OPTIMAL, + VK_IMAGE_USAGE_SAMPLED_BIT, 0, &iprop); + + switch (imgType[d]) { + case VK_IMAGE_TYPE_1D: + if (res == VK_SUCCESS) + ra->caps |= RA_CAP_TEX_1D; + break; + case VK_IMAGE_TYPE_2D: + // 2D formats must be supported by RA, so ensure this is the case + VK_ASSERT(res, "Querying 2D format limits"); + ra->max_texture_wh = MPMIN(iprop.maxExtent.width, iprop.maxExtent.height); + break; + case VK_IMAGE_TYPE_3D: + if (res == VK_SUCCESS) + ra->caps |= RA_CAP_TEX_3D; + break; + } + } + + // RA_CAP_BLIT implies both blitting between images as well as blitting + // directly to the swapchain image, so check for all three operations + bool blittable = true; + VkFormatProperties prop; + vkGetPhysicalDeviceFormatProperties(vk->physd, testfmt, &prop); + if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_SRC_BIT)) + blittable = false; + if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT)) + blittable = false; + + vkGetPhysicalDeviceFormatProperties(vk->physd, vk->surf_format.format, &prop); + if (!(prop.optimalTilingFeatures & VK_FORMAT_FEATURE_BLIT_DST_BIT)) + blittable = false; + + if (blittable) + ra->caps |= RA_CAP_BLIT; + + return true; + +error: + return false; +} + +static struct ra_fns ra_fns_vk; + +struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log) +{ + assert(vk->dev); + assert(vk->alloc); + + struct ra *ra = talloc_zero(NULL, struct ra); + ra->log = log; + ra->fns = &ra_fns_vk; + + struct ra_vk *p = ra->priv = talloc_zero(ra, struct ra_vk); + p->vk = vk; + + ra->caps |= vk->spirv->ra_caps; + ra->glsl_version = vk->spirv->glsl_version; + ra->glsl_vulkan = true; + ra->max_shmem = vk->limits.maxComputeSharedMemorySize; + ra->max_pushc_size = vk->limits.maxPushConstantsSize; + + if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT) + ra->caps |= RA_CAP_COMPUTE; + + if (!vk_setup_formats(ra)) + goto error; + + // UBO support is required + ra->caps |= RA_CAP_BUF_RO | RA_CAP_FRAGCOORD; + + // textureGather is only supported in GLSL 400+ + if (ra->glsl_version >= 400) + ra->caps |= RA_CAP_GATHER; + + // Try creating a shader storage buffer + struct ra_buf_params ssbo_params = { + .type = RA_BUF_TYPE_SHADER_STORAGE, + .size = 16, + }; + + struct ra_buf *ssbo = ra_buf_create(ra, &ssbo_params); + if (ssbo) { + ra->caps |= RA_CAP_BUF_RW; + ra_buf_free(ra, &ssbo); + } + + // To support clear() by region, we need to allocate a dummy 1x1 image that + // will be used as the source of blit operations + struct ra_tex_params clear_params = { + .dimensions = 1, // no point in using a 2D image if height = 1 + .w = 1, + .h = 1, + .d = 1, + .format = ra_find_float16_format(ra, 4), + .blit_src = 1, + .host_mutable = 1, + }; + + p->clear_tex = ra_tex_create(ra, &clear_params); + if (!p->clear_tex) { + MP_ERR(ra, "Failed creating 1x1 dummy texture for clear()!\n"); + goto error; + } + + return ra; + +error: + vk_destroy_ra(ra); + return NULL; +} + +// Boilerplate wrapper around vkCreateRenderPass to ensure passes remain +// compatible +static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt, + bool load_fbo, VkRenderPass *out) +{ + struct vk_format *vk_fmt = fmt->priv; + assert(fmt->renderable); + + VkRenderPassCreateInfo rinfo = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkAttachmentDescription) { + .format = vk_fmt->iformat, + .samples = VK_SAMPLE_COUNT_1_BIT, + .loadOp = load_fbo ? VK_ATTACHMENT_LOAD_OP_LOAD + : VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .storeOp = VK_ATTACHMENT_STORE_OP_STORE, + .initialLayout = load_fbo ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL + : VK_IMAGE_LAYOUT_UNDEFINED, + .finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + }, + .subpassCount = 1, + .pSubpasses = &(VkSubpassDescription) { + .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, + .colorAttachmentCount = 1, + .pColorAttachments = &(VkAttachmentReference) { + .attachment = 0, + .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + }, + }, + }; + + return vkCreateRenderPass(dev, &rinfo, MPVK_ALLOCATOR, out); +} + +// For ra_tex.priv +struct ra_tex_vk { + bool external_img; + VkImageType type; + VkImage img; + struct vk_memslice mem; + // for sampling + VkImageView view; + VkSampler sampler; + // for rendering + VkFramebuffer framebuffer; + VkRenderPass dummyPass; + // for uploading + struct ra_buf_pool pbo; + // "current" metadata, can change during the course of execution + VkImageLayout current_layout; + VkPipelineStageFlags current_stage; + VkAccessFlags current_access; +}; + +// Small helper to ease image barrier creation. if `discard` is set, the contents +// of the image will be undefined after the barrier +static void tex_barrier(struct vk_cmd *cmd, struct ra_tex_vk *tex_vk, + VkPipelineStageFlags newStage, VkAccessFlags newAccess, + VkImageLayout newLayout, bool discard) +{ + VkImageMemoryBarrier imgBarrier = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .oldLayout = tex_vk->current_layout, + .newLayout = newLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .srcAccessMask = tex_vk->current_access, + .dstAccessMask = newAccess, + .image = tex_vk->img, + .subresourceRange = vk_range, + }; + + if (discard) { + imgBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imgBarrier.srcAccessMask = 0; + } + + if (imgBarrier.oldLayout != imgBarrier.newLayout || + imgBarrier.srcAccessMask != imgBarrier.dstAccessMask) + { + vkCmdPipelineBarrier(cmd->buf, tex_vk->current_stage, newStage, 0, + 0, NULL, 0, NULL, 1, &imgBarrier); + } + + tex_vk->current_stage = newStage; + tex_vk->current_layout = newLayout; + tex_vk->current_access = newAccess; +} + +static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex) +{ + if (!tex) + return; + + struct mpvk_ctx *vk = ra_vk_get(ra); + struct ra_tex_vk *tex_vk = tex->priv; + + ra_buf_pool_uninit(ra, &tex_vk->pbo); + vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR); + vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR); + vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR); + vkDestroyImageView(vk->dev, tex_vk->view, MPVK_ALLOCATOR); + if (!tex_vk->external_img) { + vkDestroyImage(vk->dev, tex_vk->img, MPVK_ALLOCATOR); + vk_free_memslice(vk, tex_vk->mem); + } + + talloc_free(tex); +} + +MAKE_LAZY_DESTRUCTOR(vk_tex_destroy, struct ra_tex); + +// Initializes non-VkImage values like the image view, samplers, etc. +static bool vk_init_image(struct ra *ra, struct ra_tex *tex) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + + struct ra_tex_params *params = &tex->params; + struct ra_tex_vk *tex_vk = tex->priv; + assert(tex_vk->img); + + tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED; + tex_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + tex_vk->current_access = 0; + + if (params->render_src || params->render_dst) { + static const VkImageViewType viewType[] = { + [VK_IMAGE_TYPE_1D] = VK_IMAGE_VIEW_TYPE_1D, + [VK_IMAGE_TYPE_2D] = VK_IMAGE_VIEW_TYPE_2D, + [VK_IMAGE_TYPE_3D] = VK_IMAGE_VIEW_TYPE_3D, + }; + + const struct vk_format *fmt = params->format->priv; + VkImageViewCreateInfo vinfo = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .image = tex_vk->img, + .viewType = viewType[tex_vk->type], + .format = fmt->iformat, + .subresourceRange = vk_range, + }; + + VK(vkCreateImageView(vk->dev, &vinfo, MPVK_ALLOCATOR, &tex_vk->view)); + } + + if (params->render_src) { + assert(params->format->linear_filter || !params->src_linear); + VkFilter filter = params->src_linear + ? VK_FILTER_LINEAR + : VK_FILTER_NEAREST; + VkSamplerAddressMode wrap = params->src_repeat + ? VK_SAMPLER_ADDRESS_MODE_REPEAT + : VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + VkSamplerCreateInfo sinfo = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .magFilter = filter, + .minFilter = filter, + .addressModeU = wrap, + .addressModeV = wrap, + .addressModeW = wrap, + .maxAnisotropy = 1.0, + }; + + VK(vkCreateSampler(vk->dev, &sinfo, MPVK_ALLOCATOR, &tex_vk->sampler)); + } + + if (params->render_dst) { + // Framebuffers need to be created against a specific render pass + // layout, so we need to temporarily create a skeleton/dummy render + // pass for vulkan to figure out the compatibility + VK(vk_create_render_pass(vk->dev, params->format, false, &tex_vk->dummyPass)); + + VkFramebufferCreateInfo finfo = { + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .renderPass = tex_vk->dummyPass, + .attachmentCount = 1, + .pAttachments = &tex_vk->view, + .width = tex->params.w, + .height = tex->params.h, + .layers = 1, + }; + + VK(vkCreateFramebuffer(vk->dev, &finfo, MPVK_ALLOCATOR, + &tex_vk->framebuffer)); + + // NOTE: Normally we would free the dummyPass again here, but a bug + // in the nvidia vulkan driver causes a segfault if you do. + } + + return true; + +error: + return false; +} + +static struct ra_tex *vk_tex_create(struct ra *ra, + const struct ra_tex_params *params) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + tex->params = *params; + tex->params.initial_data = NULL; + + struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk); + + const struct vk_format *fmt = params->format->priv; + switch (params->dimensions) { + case 1: tex_vk->type = VK_IMAGE_TYPE_1D; break; + case 2: tex_vk->type = VK_IMAGE_TYPE_2D; break; + case 3: tex_vk->type = VK_IMAGE_TYPE_3D; break; + default: abort(); + } + + VkImageUsageFlags usage = 0; + if (params->render_src) + usage |= VK_IMAGE_USAGE_SAMPLED_BIT; + if (params->render_dst) + usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + if (params->storage_dst) + usage |= VK_IMAGE_USAGE_STORAGE_BIT; + if (params->blit_src) + usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + if (params->host_mutable || params->blit_dst || params->initial_data) + usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + + // Double-check image usage support and fail immediately if invalid + VkImageFormatProperties iprop; + VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd, + fmt->iformat, tex_vk->type, VK_IMAGE_TILING_OPTIMAL, usage, 0, + &iprop); + if (res == VK_ERROR_FORMAT_NOT_SUPPORTED) { + return NULL; + } else { + VK_ASSERT(res, "Querying image format properties"); + } + + VkFormatProperties prop; + vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop); + VkFormatFeatureFlags flags = prop.optimalTilingFeatures; + + bool has_blit_src = flags & VK_FORMAT_FEATURE_BLIT_SRC_BIT, + has_src_linear = flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + + if (params->w > iprop.maxExtent.width || + params->h > iprop.maxExtent.height || + params->d > iprop.maxExtent.depth || + (params->blit_src && !has_blit_src) || + (params->src_linear && !has_src_linear)) + { + return NULL; + } + + VkImageCreateInfo iinfo = { + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .imageType = tex_vk->type, + .format = fmt->iformat, + .extent = (VkExtent3D) { params->w, params->h, params->d }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = usage, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 1, + .pQueueFamilyIndices = &vk->pool->qf, + }; + + VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img)); + + VkMemoryPropertyFlags memFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + VkMemoryRequirements reqs; + vkGetImageMemoryRequirements(vk->dev, tex_vk->img, &reqs); + + struct vk_memslice *mem = &tex_vk->mem; + if (!vk_malloc_generic(vk, reqs, memFlags, mem)) + goto error; + + VK(vkBindImageMemory(vk->dev, tex_vk->img, mem->vkmem, mem->offset)); + + if (!vk_init_image(ra, tex)) + goto error; + + if (params->initial_data) { + struct ra_tex_upload_params ul_params = { + .tex = tex, + .invalidate = true, + .src = params->initial_data, + .stride = params->w * fmt->bytes, + }; + if (!ra->fns->tex_upload(ra, &ul_params)) + goto error; + } + + return tex; + +error: + vk_tex_destroy(ra, tex); + return NULL; +} + +struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg, + VkSwapchainCreateInfoKHR info) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + struct ra_tex *tex = NULL; + + const struct ra_format *format = NULL; + for (int i = 0; i < ra->num_formats; i++) { + const struct vk_format *fmt = ra->formats[i]->priv; + if (fmt->iformat == vk->surf_format.format) { + format = ra->formats[i]; + break; + } + } + + if (!format) { + MP_ERR(ra, "Could not find ra_format suitable for wrapped swchain image " + "with surface format 0x%x\n", vk->surf_format.format); + goto error; + } + + tex = talloc_zero(NULL, struct ra_tex); + tex->params = (struct ra_tex_params) { + .format = format, + .dimensions = 2, + .w = info.imageExtent.width, + .h = info.imageExtent.height, + .d = 1, + .blit_src = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT), + .blit_dst = !!(info.imageUsage & VK_IMAGE_USAGE_TRANSFER_DST_BIT), + .render_src = !!(info.imageUsage & VK_IMAGE_USAGE_SAMPLED_BIT), + .render_dst = !!(info.imageUsage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT), + .storage_dst = !!(info.imageUsage & VK_IMAGE_USAGE_STORAGE_BIT), + }; + + struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk); + tex_vk->type = VK_IMAGE_TYPE_2D; + tex_vk->external_img = true; + tex_vk->img = vkimg; + + if (!vk_init_image(ra, tex)) + goto error; + + return tex; + +error: + vk_tex_destroy(ra, tex); + return NULL; +} + +// For ra_buf.priv +struct ra_buf_vk { + struct vk_bufslice slice; + int refcount; // 1 = object allocated but not in use, > 1 = in use + bool needsflush; + // "current" metadata, can change during course of execution + VkPipelineStageFlags current_stage; + VkAccessFlags current_access; +}; + +static void vk_buf_deref(struct ra *ra, struct ra_buf *buf) +{ + if (!buf) + return; + + struct mpvk_ctx *vk = ra_vk_get(ra); + struct ra_buf_vk *buf_vk = buf->priv; + + if (--buf_vk->refcount == 0) { + vk_free_memslice(vk, buf_vk->slice.mem); + talloc_free(buf); + } +} + +static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf, + VkPipelineStageFlags newStage, + VkAccessFlags newAccess, int offset, size_t size) +{ + struct ra_buf_vk *buf_vk = buf->priv; + + VkBufferMemoryBarrier buffBarrier = { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, + .srcAccessMask = buf_vk->current_access, + .dstAccessMask = newAccess, + .buffer = buf_vk->slice.buf, + .offset = offset, + .size = size, + }; + + if (buf_vk->needsflush || buf->params.host_mapped) { + buffBarrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; + buf_vk->current_stage = VK_PIPELINE_STAGE_HOST_BIT; + buf_vk->needsflush = false; + } + + if (buffBarrier.srcAccessMask != buffBarrier.dstAccessMask) { + vkCmdPipelineBarrier(cmd->buf, buf_vk->current_stage, newStage, 0, + 0, NULL, 1, &buffBarrier, 0, NULL); + } + + buf_vk->current_stage = newStage; + buf_vk->current_access = newAccess; + buf_vk->refcount++; + vk_cmd_callback(cmd, (vk_cb) vk_buf_deref, ra, buf); +} + +#define vk_buf_destroy vk_buf_deref +MAKE_LAZY_DESTRUCTOR(vk_buf_destroy, struct ra_buf); + +static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size) +{ + assert(buf->params.host_mutable || buf->params.initial_data); + struct ra_buf_vk *buf_vk = buf->priv; + + // For host-mapped buffers, we can just directly memcpy the buffer contents. + // Otherwise, we can update the buffer from the GPU using a command buffer. + if (buf_vk->slice.data) { + assert(offset + size <= buf->params.size); + uintptr_t addr = (uintptr_t)buf_vk->slice.data + offset; + memcpy((void *)addr, data, size); + buf_vk->needsflush = true; + } else { + struct vk_cmd *cmd = vk_require_cmd(ra); + if (!cmd) { + MP_ERR(ra, "Failed updating buffer!\n"); + return; + } + + buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, offset, size); + + VkDeviceSize bufOffset = buf_vk->slice.mem.offset + offset; + assert(bufOffset == MP_ALIGN_UP(bufOffset, 4)); + vkCmdUpdateBuffer(cmd->buf, buf_vk->slice.buf, bufOffset, size, data); + } +} + +static struct ra_buf *vk_buf_create(struct ra *ra, + const struct ra_buf_params *params) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + + struct ra_buf *buf = talloc_zero(NULL, struct ra_buf); + buf->params = *params; + + struct ra_buf_vk *buf_vk = buf->priv = talloc_zero(buf, struct ra_buf_vk); + buf_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + buf_vk->current_access = 0; + buf_vk->refcount = 1; + + VkBufferUsageFlags bufFlags = 0; + VkMemoryPropertyFlags memFlags = 0; + VkDeviceSize align = 4; // alignment 4 is needed for buf_update + + switch (params->type) { + case RA_BUF_TYPE_TEX_UPLOAD: + bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; + memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + break; + case RA_BUF_TYPE_UNIFORM: + bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; + memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + align = MP_ALIGN_UP(align, vk->limits.minUniformBufferOffsetAlignment); + break; + case RA_BUF_TYPE_SHADER_STORAGE: + bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; + memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment); + break; + case RA_BUF_TYPE_VERTEX: + bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; + memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + break; + default: abort(); + } + + if (params->host_mutable || params->initial_data) { + bufFlags |= VK_BUFFER_USAGE_TRANSFER_DST_BIT; + align = MP_ALIGN_UP(align, vk->limits.optimalBufferCopyOffsetAlignment); + } + + if (params->host_mapped) { + memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + } + + if (!vk_malloc_buffer(vk, bufFlags, memFlags, params->size, align, + &buf_vk->slice)) + { + goto error; + } + + if (params->host_mapped) + buf->data = buf_vk->slice.data; + + if (params->initial_data) + vk_buf_update(ra, buf, 0, params->initial_data, params->size); + + buf->params.initial_data = NULL; // do this after vk_buf_update + return buf; + +error: + vk_buf_destroy(ra, buf); + return NULL; +} + +static bool vk_buf_poll(struct ra *ra, struct ra_buf *buf) +{ + struct ra_buf_vk *buf_vk = buf->priv; + return buf_vk->refcount == 1; +} + +static bool vk_tex_upload(struct ra *ra, + const struct ra_tex_upload_params *params) +{ + struct ra_tex *tex = params->tex; + struct ra_tex_vk *tex_vk = tex->priv; + + if (!params->buf) + return ra_tex_upload_pbo(ra, &tex_vk->pbo, params); + + assert(!params->src); + assert(params->buf); + struct ra_buf *buf = params->buf; + struct ra_buf_vk *buf_vk = buf->priv; + + VkBufferImageCopy region = { + .bufferOffset = buf_vk->slice.mem.offset + params->buf_offset, + .bufferRowLength = tex->params.w, + .bufferImageHeight = tex->params.h, + .imageSubresource = vk_layers, + .imageExtent = (VkExtent3D){tex->params.w, tex->params.h, tex->params.d}, + }; + + if (tex->params.dimensions == 2) { + int pix_size = tex->params.format->pixel_size; + region.bufferRowLength = params->stride / pix_size; + if (region.bufferRowLength * pix_size != params->stride) { + MP_ERR(ra, "Texture upload strides must be a multiple of the texel " + "size!\n"); + goto error; + } + + if (params->rc) { + struct mp_rect *rc = params->rc; + region.imageOffset = (VkOffset3D){rc->x0, rc->y0, 0}; + region.imageExtent = (VkExtent3D){mp_rect_w(*rc), mp_rect_h(*rc), 1}; + } + } + + uint64_t size = region.bufferRowLength * region.bufferImageHeight * + region.imageExtent.depth; + + struct vk_cmd *cmd = vk_require_cmd(ra); + if (!cmd) + goto error; + + buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size); + + tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + params->invalidate); + + vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img, + tex_vk->current_layout, 1, ®ion); + + return true; + +error: + return false; +} + +#define MPVK_NUM_DS MPVK_MAX_STREAMING_DEPTH + +// For ra_renderpass.priv +struct ra_renderpass_vk { + // Pipeline / render pass + VkPipeline pipe; + VkPipelineLayout pipeLayout; + VkRenderPass renderPass; + // Descriptor set (bindings) + VkDescriptorSetLayout dsLayout; + VkDescriptorPool dsPool; + VkDescriptorSet dss[MPVK_NUM_DS]; + int dindex; + // Vertex buffers (vertices) + struct ra_buf_pool vbo; + + // For updating + VkWriteDescriptorSet *dswrite; + VkDescriptorImageInfo *dsiinfo; + VkDescriptorBufferInfo *dsbinfo; +}; + +static void vk_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) +{ + if (!pass) + return; + + struct mpvk_ctx *vk = ra_vk_get(ra); + struct ra_renderpass_vk *pass_vk = pass->priv; + + ra_buf_pool_uninit(ra, &pass_vk->vbo); + vkDestroyPipeline(vk->dev, pass_vk->pipe, MPVK_ALLOCATOR); + vkDestroyRenderPass(vk->dev, pass_vk->renderPass, MPVK_ALLOCATOR); + vkDestroyPipelineLayout(vk->dev, pass_vk->pipeLayout, MPVK_ALLOCATOR); + vkDestroyDescriptorPool(vk->dev, pass_vk->dsPool, MPVK_ALLOCATOR); + vkDestroyDescriptorSetLayout(vk->dev, pass_vk->dsLayout, MPVK_ALLOCATOR); + + talloc_free(pass); +} + +MAKE_LAZY_DESTRUCTOR(vk_renderpass_destroy, struct ra_renderpass); + +static const VkDescriptorType dsType[] = { + [RA_VARTYPE_TEX] = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + [RA_VARTYPE_IMG_W] = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, + [RA_VARTYPE_BUF_RO] = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + [RA_VARTYPE_BUF_RW] = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, +}; + +static bool vk_get_input_format(struct ra *ra, struct ra_renderpass_input *inp, + VkFormat *out_fmt) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + + enum ra_ctype ctype; + switch (inp->type) { + case RA_VARTYPE_FLOAT: ctype = RA_CTYPE_FLOAT; break; + case RA_VARTYPE_BYTE_UNORM: ctype = RA_CTYPE_UNORM; break; + default: abort(); + } + + assert(inp->dim_m == 1); + for (const struct vk_format *fmt = vk_formats; fmt->name; fmt++) { + if (fmt->ctype != ctype) + continue; + if (fmt->components != inp->dim_v) + continue; + if (fmt->bytes != ra_renderpass_input_layout(inp).size) + continue; + + // Ensure this format is valid for vertex attributes + VkFormatProperties prop; + vkGetPhysicalDeviceFormatProperties(vk->physd, fmt->iformat, &prop); + if (!(prop.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT)) + continue; + + *out_fmt = fmt->iformat; + return true; + } + + return false; +} + +static const char vk_cache_magic[4] = {'R','A','V','K'}; +static const int vk_cache_version = 2; + +struct vk_cache_header { + char magic[sizeof(vk_cache_magic)]; + int cache_version; + char compiler[SPIRV_NAME_MAX_LEN]; + int compiler_version; + size_t vert_spirv_len; + size_t frag_spirv_len; + size_t comp_spirv_len; + size_t pipecache_len; +}; + +static bool vk_use_cached_program(const struct ra_renderpass_params *params, + const struct spirv_compiler *spirv, + struct bstr *vert_spirv, + struct bstr *frag_spirv, + struct bstr *comp_spirv, + struct bstr *pipecache) +{ + struct bstr cache = params->cached_program; + if (cache.len < sizeof(struct vk_cache_header)) + return false; + + struct vk_cache_header *header = (struct vk_cache_header *)cache.start; + cache = bstr_cut(cache, sizeof(*header)); + + if (strncmp(header->magic, vk_cache_magic, sizeof(vk_cache_magic)) != 0) + return false; + if (header->cache_version != vk_cache_version) + return false; + if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0) + return false; + if (header->compiler_version != spirv->compiler_version) + return false; + +#define GET(ptr) \ + if (cache.len < header->ptr##_len) \ + return false; \ + *ptr = bstr_splice(cache, 0, header->ptr##_len); \ + cache = bstr_cut(cache, ptr->len); + + GET(vert_spirv); + GET(frag_spirv); + GET(comp_spirv); + GET(pipecache); + return true; +} + +static VkResult vk_compile_glsl(struct ra *ra, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *spirv) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + VkResult ret = VK_SUCCESS; + int msgl = MSGL_DEBUG; + + if (!vk->spirv->fns->compile_glsl(vk->spirv, tactx, type, glsl, spirv)) { + ret = VK_ERROR_INVALID_SHADER_NV; + msgl = MSGL_ERR; + } + + static const char *shader_names[] = { + [GLSL_SHADER_VERTEX] = "vertex", + [GLSL_SHADER_FRAGMENT] = "fragment", + [GLSL_SHADER_COMPUTE] = "compute", + }; + + if (mp_msg_test(ra->log, msgl)) { + MP_MSG(ra, msgl, "%s shader source:\n", shader_names[type]); + mp_log_source(ra->log, msgl, glsl); + } + return ret; +} + +static const VkShaderStageFlags stageFlags[] = { + [RA_RENDERPASS_TYPE_RASTER] = VK_SHADER_STAGE_FRAGMENT_BIT, + [RA_RENDERPASS_TYPE_COMPUTE] = VK_SHADER_STAGE_COMPUTE_BIT, +}; + +static struct ra_renderpass *vk_renderpass_create(struct ra *ra, + const struct ra_renderpass_params *params) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + bool success = false; + assert(vk->spirv); + + struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); + pass->params = *ra_renderpass_params_copy(pass, params); + pass->params.cached_program = (bstr){0}; + struct ra_renderpass_vk *pass_vk = pass->priv = + talloc_zero(pass, struct ra_renderpass_vk); + + // temporary allocations/objects + void *tmp = talloc_new(NULL); + VkPipelineCache pipeCache = NULL; + VkShaderModule vert_shader = NULL; + VkShaderModule frag_shader = NULL; + VkShaderModule comp_shader = NULL; + + static int dsCount[RA_VARTYPE_COUNT] = {0}; + VkDescriptorSetLayoutBinding *bindings = NULL; + int num_bindings = 0; + + for (int i = 0; i < params->num_inputs; i++) { + struct ra_renderpass_input *inp = ¶ms->inputs[i]; + switch (inp->type) { + case RA_VARTYPE_TEX: + case RA_VARTYPE_IMG_W: + case RA_VARTYPE_BUF_RO: + case RA_VARTYPE_BUF_RW: { + VkDescriptorSetLayoutBinding desc = { + .binding = inp->binding, + .descriptorType = dsType[inp->type], + .descriptorCount = 1, + .stageFlags = stageFlags[params->type], + }; + + MP_TARRAY_APPEND(tmp, bindings, num_bindings, desc); + dsCount[inp->type]++; + break; + } + default: abort(); + } + } + + VkDescriptorPoolSize *dsPoolSizes = NULL; + int poolSizeCount = 0; + + for (enum ra_vartype t = 0; t < RA_VARTYPE_COUNT; t++) { + if (dsCount[t] > 0) { + VkDescriptorPoolSize dssize = { + .type = dsType[t], + .descriptorCount = dsCount[t] * MPVK_NUM_DS, + }; + + MP_TARRAY_APPEND(tmp, dsPoolSizes, poolSizeCount, dssize); + } + } + + VkDescriptorPoolCreateInfo pinfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .maxSets = MPVK_NUM_DS, + .pPoolSizes = dsPoolSizes, + .poolSizeCount = poolSizeCount, + }; + + VK(vkCreateDescriptorPool(vk->dev, &pinfo, MPVK_ALLOCATOR, &pass_vk->dsPool)); + + pass_vk->dswrite = talloc_array(pass, VkWriteDescriptorSet, num_bindings); + pass_vk->dsiinfo = talloc_array(pass, VkDescriptorImageInfo, num_bindings); + pass_vk->dsbinfo = talloc_array(pass, VkDescriptorBufferInfo, num_bindings); + + VkDescriptorSetLayoutCreateInfo dinfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .pBindings = bindings, + .bindingCount = num_bindings, + }; + + VK(vkCreateDescriptorSetLayout(vk->dev, &dinfo, MPVK_ALLOCATOR, + &pass_vk->dsLayout)); + + VkDescriptorSetLayout layouts[MPVK_NUM_DS]; + for (int i = 0; i < MPVK_NUM_DS; i++) + layouts[i] = pass_vk->dsLayout; + + VkDescriptorSetAllocateInfo ainfo = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = pass_vk->dsPool, + .descriptorSetCount = MPVK_NUM_DS, + .pSetLayouts = layouts, + }; + + VK(vkAllocateDescriptorSets(vk->dev, &ainfo, pass_vk->dss)); + + VkPipelineLayoutCreateInfo linfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .setLayoutCount = 1, + .pSetLayouts = &pass_vk->dsLayout, + .pushConstantRangeCount = params->push_constants_size ? 1 : 0, + .pPushConstantRanges = &(VkPushConstantRange){ + .stageFlags = stageFlags[params->type], + .offset = 0, + .size = params->push_constants_size, + }, + }; + + VK(vkCreatePipelineLayout(vk->dev, &linfo, MPVK_ALLOCATOR, + &pass_vk->pipeLayout)); + + struct bstr vert = {0}, frag = {0}, comp = {0}, pipecache = {0}; + if (vk_use_cached_program(params, vk->spirv, &vert, &frag, &comp, &pipecache)) { + MP_VERBOSE(ra, "Using cached SPIR-V and VkPipeline.\n"); + } else { + pipecache.len = 0; + switch (params->type) { + case RA_RENDERPASS_TYPE_RASTER: + VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_VERTEX, + params->vertex_shader, &vert)); + VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_FRAGMENT, + params->frag_shader, &frag)); + comp.len = 0; + break; + case RA_RENDERPASS_TYPE_COMPUTE: + VK(vk_compile_glsl(ra, tmp, GLSL_SHADER_COMPUTE, + params->compute_shader, &comp)); + frag.len = 0; + vert.len = 0; + break; + } + } + + VkPipelineCacheCreateInfo pcinfo = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, + .pInitialData = pipecache.start, + .initialDataSize = pipecache.len, + }; + + VK(vkCreatePipelineCache(vk->dev, &pcinfo, MPVK_ALLOCATOR, &pipeCache)); + + VkShaderModuleCreateInfo sinfo = { + .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, + }; + + switch (params->type) { + case RA_RENDERPASS_TYPE_RASTER: { + sinfo.pCode = (uint32_t *)vert.start; + sinfo.codeSize = vert.len; + VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &vert_shader)); + + sinfo.pCode = (uint32_t *)frag.start; + sinfo.codeSize = frag.len; + VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &frag_shader)); + + VkVertexInputAttributeDescription *attrs = talloc_array(tmp, + VkVertexInputAttributeDescription, params->num_vertex_attribs); + + for (int i = 0; i < params->num_vertex_attribs; i++) { + struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i]; + attrs[i] = (VkVertexInputAttributeDescription) { + .location = i, + .binding = 0, + .offset = inp->offset, + }; + + if (!vk_get_input_format(ra, inp, &attrs[i].format)) { + MP_ERR(ra, "No suitable VkFormat for vertex attrib '%s'!\n", + inp->name); + goto error; + } + } + VK(vk_create_render_pass(vk->dev, params->target_format, + params->enable_blend, &pass_vk->renderPass)); + + static const VkBlendFactor blendFactors[] = { + [RA_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO, + [RA_BLEND_ONE] = VK_BLEND_FACTOR_ONE, + [RA_BLEND_SRC_ALPHA] = VK_BLEND_FACTOR_SRC_ALPHA, + [RA_BLEND_ONE_MINUS_SRC_ALPHA] = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + }; + + VkGraphicsPipelineCreateInfo cinfo = { + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .stageCount = 2, + .pStages = (VkPipelineShaderStageCreateInfo[]) { + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_VERTEX_BIT, + .module = vert_shader, + .pName = "main", + }, { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_FRAGMENT_BIT, + .module = frag_shader, + .pName = "main", + } + }, + .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &(VkVertexInputBindingDescription) { + .binding = 0, + .stride = params->vertex_stride, + .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, + }, + .vertexAttributeDescriptionCount = params->num_vertex_attribs, + .pVertexAttributeDescriptions = attrs, + }, + .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, + }, + .pViewportState = &(VkPipelineViewportStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .viewportCount = 1, + .scissorCount = 1, + }, + .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .polygonMode = VK_POLYGON_MODE_FILL, + .cullMode = VK_CULL_MODE_NONE, + .lineWidth = 1.0f, + }, + .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, + }, + .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, + .attachmentCount = 1, + .pAttachments = &(VkPipelineColorBlendAttachmentState) { + .blendEnable = params->enable_blend, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcColorBlendFactor = blendFactors[params->blend_src_rgb], + .dstColorBlendFactor = blendFactors[params->blend_dst_rgb], + .alphaBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = blendFactors[params->blend_src_alpha], + .dstAlphaBlendFactor = blendFactors[params->blend_dst_alpha], + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | + VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | + VK_COLOR_COMPONENT_A_BIT, + }, + }, + .pDynamicState = &(VkPipelineDynamicStateCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .dynamicStateCount = 2, + .pDynamicStates = (VkDynamicState[]){ + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + }, + }, + .layout = pass_vk->pipeLayout, + .renderPass = pass_vk->renderPass, + }; + + VK(vkCreateGraphicsPipelines(vk->dev, pipeCache, 1, &cinfo, + MPVK_ALLOCATOR, &pass_vk->pipe)); + break; + } + case RA_RENDERPASS_TYPE_COMPUTE: { + sinfo.pCode = (uint32_t *)comp.start; + sinfo.codeSize = comp.len; + VK(vkCreateShaderModule(vk->dev, &sinfo, MPVK_ALLOCATOR, &comp_shader)); + + VkComputePipelineCreateInfo cinfo = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = comp_shader, + .pName = "main", + }, + .layout = pass_vk->pipeLayout, + }; + + VK(vkCreateComputePipelines(vk->dev, pipeCache, 1, &cinfo, + MPVK_ALLOCATOR, &pass_vk->pipe)); + break; + } + } + + // Update params->cached_program + struct bstr cache = {0}; + VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, NULL)); + cache.start = talloc_size(tmp, cache.len); + VK(vkGetPipelineCacheData(vk->dev, pipeCache, &cache.len, cache.start)); + + struct vk_cache_header header = { + .cache_version = vk_cache_version, + .compiler_version = vk->spirv->compiler_version, + .vert_spirv_len = vert.len, + .frag_spirv_len = frag.len, + .comp_spirv_len = comp.len, + .pipecache_len = cache.len, + }; + + for (int i = 0; i < MP_ARRAY_SIZE(header.magic); i++) + header.magic[i] = vk_cache_magic[i]; + for (int i = 0; i < sizeof(vk->spirv->name); i++) + header.compiler[i] = vk->spirv->name[i]; + + struct bstr *prog = &pass->params.cached_program; + bstr_xappend(pass, prog, (struct bstr){ (char *) &header, sizeof(header) }); + bstr_xappend(pass, prog, vert); + bstr_xappend(pass, prog, frag); + bstr_xappend(pass, prog, comp); + bstr_xappend(pass, prog, cache); + + success = true; + +error: + if (!success) { + vk_renderpass_destroy(ra, pass); + pass = NULL; + } + + vkDestroyShaderModule(vk->dev, vert_shader, MPVK_ALLOCATOR); + vkDestroyShaderModule(vk->dev, frag_shader, MPVK_ALLOCATOR); + vkDestroyShaderModule(vk->dev, comp_shader, MPVK_ALLOCATOR); + vkDestroyPipelineCache(vk->dev, pipeCache, MPVK_ALLOCATOR); + talloc_free(tmp); + return pass; +} + +static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd, + struct ra_renderpass *pass, + struct ra_renderpass_input_val val, + VkDescriptorSet ds, int idx) +{ + struct ra_renderpass_vk *pass_vk = pass->priv; + struct ra_renderpass_input *inp = &pass->params.inputs[val.index]; + + VkWriteDescriptorSet *wds = &pass_vk->dswrite[idx]; + *wds = (VkWriteDescriptorSet) { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = ds, + .dstBinding = inp->binding, + .descriptorCount = 1, + .descriptorType = dsType[inp->type], + }; + + static const VkPipelineStageFlags passStages[] = { + [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + }; + + switch (inp->type) { + case RA_VARTYPE_TEX: { + struct ra_tex *tex = *(struct ra_tex **)val.data; + struct ra_tex_vk *tex_vk = tex->priv; + + assert(tex->params.render_src); + tex_barrier(cmd, tex_vk, passStages[pass->params.type], + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false); + + VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; + *iinfo = (VkDescriptorImageInfo) { + .sampler = tex_vk->sampler, + .imageView = tex_vk->view, + .imageLayout = tex_vk->current_layout, + }; + + wds->pImageInfo = iinfo; + break; + } + case RA_VARTYPE_IMG_W: { + struct ra_tex *tex = *(struct ra_tex **)val.data; + struct ra_tex_vk *tex_vk = tex->priv; + + assert(tex->params.storage_dst); + tex_barrier(cmd, tex_vk, passStages[pass->params.type], + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, false); + + VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; + *iinfo = (VkDescriptorImageInfo) { + .imageView = tex_vk->view, + .imageLayout = tex_vk->current_layout, + }; + + wds->pImageInfo = iinfo; + break; + } + case RA_VARTYPE_BUF_RO: + case RA_VARTYPE_BUF_RW: { + struct ra_buf *buf = *(struct ra_buf **)val.data; + struct ra_buf_vk *buf_vk = buf->priv; + + VkBufferUsageFlags access = VK_ACCESS_SHADER_READ_BIT; + if (inp->type == RA_VARTYPE_BUF_RW) + access |= VK_ACCESS_SHADER_WRITE_BIT; + + buf_barrier(ra, cmd, buf, passStages[pass->params.type], + access, buf_vk->slice.mem.offset, buf->params.size); + + VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx]; + *binfo = (VkDescriptorBufferInfo) { + .buffer = buf_vk->slice.buf, + .offset = buf_vk->slice.mem.offset, + .range = buf->params.size, + }; + + wds->pBufferInfo = binfo; + break; + } + } +} + +static void vk_renderpass_run(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + struct ra_renderpass *pass = params->pass; + struct ra_renderpass_vk *pass_vk = pass->priv; + + struct vk_cmd *cmd = vk_require_cmd(ra); + if (!cmd) + goto error; + + static const VkPipelineBindPoint bindPoint[] = { + [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_BIND_POINT_GRAPHICS, + [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_BIND_POINT_COMPUTE, + }; + + vkCmdBindPipeline(cmd->buf, bindPoint[pass->params.type], pass_vk->pipe); + + VkDescriptorSet ds = pass_vk->dss[pass_vk->dindex++]; + pass_vk->dindex %= MPVK_NUM_DS; + + for (int i = 0; i < params->num_values; i++) + vk_update_descriptor(ra, cmd, pass, params->values[i], ds, i); + + if (params->num_values > 0) { + vkUpdateDescriptorSets(vk->dev, params->num_values, pass_vk->dswrite, + 0, NULL); + } + + vkCmdBindDescriptorSets(cmd->buf, bindPoint[pass->params.type], + pass_vk->pipeLayout, 0, 1, &ds, 0, NULL); + + if (pass->params.push_constants_size) { + vkCmdPushConstants(cmd->buf, pass_vk->pipeLayout, + stageFlags[pass->params.type], 0, + pass->params.push_constants_size, + params->push_constants); + } + + switch (pass->params.type) { + case RA_RENDERPASS_TYPE_COMPUTE: + vkCmdDispatch(cmd->buf, params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + break; + case RA_RENDERPASS_TYPE_RASTER: { + struct ra_tex *tex = params->target; + struct ra_tex_vk *tex_vk = tex->priv; + assert(tex->params.render_dst); + + struct ra_buf_params buf_params = { + .type = RA_BUF_TYPE_VERTEX, + .size = params->vertex_count * pass->params.vertex_stride, + .host_mutable = true, + }; + + struct ra_buf *buf = ra_buf_pool_get(ra, &pass_vk->vbo, &buf_params); + if (!buf) { + MP_ERR(ra, "Failed allocating vertex buffer!\n"); + goto error; + } + struct ra_buf_vk *buf_vk = buf->priv; + + vk_buf_update(ra, buf, 0, params->vertex_data, buf_params.size); + + buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, + VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, + buf_vk->slice.mem.offset, buf_params.size); + + vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf, + &buf_vk->slice.mem.offset); + + if (pass->params.enable_blend) { + // Normally this transition is handled implicitly by the renderpass, + // but if we need to preserve the FBO we have to do it manually. + tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, false); + } + + VkViewport viewport = { + .x = params->viewport.x0, + .y = params->viewport.y0, + .width = mp_rect_w(params->viewport), + .height = mp_rect_h(params->viewport), + }; + + VkRect2D scissor = { + .offset = {params->scissors.x0, params->scissors.y0}, + .extent = {mp_rect_w(params->scissors), mp_rect_h(params->scissors)}, + }; + + vkCmdSetViewport(cmd->buf, 0, 1, &viewport); + vkCmdSetScissor(cmd->buf, 0, 1, &scissor); + + VkRenderPassBeginInfo binfo = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = pass_vk->renderPass, + .framebuffer = tex_vk->framebuffer, + .renderArea = (VkRect2D){{0, 0}, {tex->params.w, tex->params.h}}, + }; + + vkCmdBeginRenderPass(cmd->buf, &binfo, VK_SUBPASS_CONTENTS_INLINE); + vkCmdDraw(cmd->buf, params->vertex_count, 1, 0, 0); + vkCmdEndRenderPass(cmd->buf); + + // The renderPass implicitly transitions the texture to this layout + tex_vk->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + tex_vk->current_access = VK_ACCESS_SHADER_READ_BIT; + tex_vk->current_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + break; + } + default: abort(); + }; + +error: + return; +} + +static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc) +{ + assert(src->params.blit_src); + assert(dst->params.blit_dst); + + struct ra_tex_vk *src_vk = src->priv; + struct ra_tex_vk *dst_vk = dst->priv; + + struct vk_cmd *cmd = vk_require_cmd(ra); + if (!cmd) + return; + + tex_barrier(cmd, src_vk, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + false); + + bool discard = dst_rc->x0 == 0 && + dst_rc->y0 == 0 && + dst_rc->x1 == dst->params.w && + dst_rc->y1 == dst->params.h; + + tex_barrier(cmd, dst_vk, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + discard); + + VkImageBlit region = { + .srcSubresource = vk_layers, + .srcOffsets = {{src_rc->x0, src_rc->y0, 0}, {src_rc->x1, src_rc->y1, 1}}, + .dstSubresource = vk_layers, + .dstOffsets = {{dst_rc->x0, dst_rc->y0, 0}, {dst_rc->x1, dst_rc->y1, 1}}, + }; + + vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout, dst_vk->img, + dst_vk->current_layout, 1, ®ion, VK_FILTER_NEAREST); +} + +static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4], + struct mp_rect *rc) +{ + struct ra_vk *p = ra->priv; + struct ra_tex_vk *tex_vk = tex->priv; + assert(tex->params.blit_dst); + + struct vk_cmd *cmd = vk_require_cmd(ra); + if (!cmd) + return; + + struct mp_rect full = {0, 0, tex->params.w, tex->params.h}; + if (!rc || mp_rect_equals(rc, &full)) { + // To clear the entire image, we can use the efficient clear command + tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true); + + VkClearColorValue clearColor = {0}; + for (int c = 0; c < 4; c++) + clearColor.float32[c] = color[c]; + + vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout, + &clearColor, 1, &vk_range); + } else { + // To simulate per-region clearing, we blit from a 1x1 texture instead + struct ra_tex_upload_params ul_params = { + .tex = p->clear_tex, + .invalidate = true, + .src = &color[0], + }; + vk_tex_upload(ra, &ul_params); + vk_blit(ra, tex, p->clear_tex, rc, &(struct mp_rect){0, 0, 1, 1}); + } +} + +static int vk_desc_namespace(enum ra_vartype type) +{ + return 0; +} + +#define VK_QUERY_POOL_SIZE (MPVK_MAX_STREAMING_DEPTH * 4) + +struct vk_timer { + VkQueryPool pool; + int index; + uint64_t result; +}; + +static void vk_timer_destroy(struct ra *ra, ra_timer *ratimer) +{ + if (!ratimer) + return; + + struct mpvk_ctx *vk = ra_vk_get(ra); + struct vk_timer *timer = ratimer; + + vkDestroyQueryPool(vk->dev, timer->pool, MPVK_ALLOCATOR); + + talloc_free(timer); +} + +MAKE_LAZY_DESTRUCTOR(vk_timer_destroy, ra_timer); + +static ra_timer *vk_timer_create(struct ra *ra) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + + struct vk_timer *timer = talloc_zero(NULL, struct vk_timer); + + struct VkQueryPoolCreateInfo qinfo = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .queryType = VK_QUERY_TYPE_TIMESTAMP, + .queryCount = VK_QUERY_POOL_SIZE, + }; + + VK(vkCreateQueryPool(vk->dev, &qinfo, MPVK_ALLOCATOR, &timer->pool)); + + return (ra_timer *)timer; + +error: + vk_timer_destroy(ra, timer); + return NULL; +} + +static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index, + VkPipelineStageFlags stage) +{ + struct vk_cmd *cmd = vk_require_cmd(ra); + if (!cmd) + return; + + vkCmdWriteTimestamp(cmd->buf, stage, pool, index); +} + +static void vk_timer_start(struct ra *ra, ra_timer *ratimer) +{ + struct mpvk_ctx *vk = ra_vk_get(ra); + struct vk_timer *timer = ratimer; + + timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE; + + uint64_t out[2]; + VkResult res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2, + sizeof(out), &out[0], sizeof(uint64_t), + VK_QUERY_RESULT_64_BIT); + switch (res) { + case VK_SUCCESS: + timer->result = (out[1] - out[0]) * vk->limits.timestampPeriod; + break; + case VK_NOT_READY: + timer->result = 0; + break; + default: + MP_WARN(vk, "Failed reading timer query result: %s\n", vk_err(res)); + return; + }; + + vk_timer_record(ra, timer->pool, timer->index, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); +} + +static uint64_t vk_timer_stop(struct ra *ra, ra_timer *ratimer) +{ + struct vk_timer *timer = ratimer; + vk_timer_record(ra, timer->pool, timer->index + 1, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + + return timer->result; +} + +static struct ra_fns ra_fns_vk = { + .destroy = vk_destroy_ra, + .tex_create = vk_tex_create, + .tex_destroy = vk_tex_destroy_lazy, + .tex_upload = vk_tex_upload, + .buf_create = vk_buf_create, + .buf_destroy = vk_buf_destroy_lazy, + .buf_update = vk_buf_update, + .buf_poll = vk_buf_poll, + .clear = vk_clear, + .blit = vk_blit, + .uniform_layout = std140_layout, + .push_constant_layout = std430_layout, + .desc_namespace = vk_desc_namespace, + .renderpass_create = vk_renderpass_create, + .renderpass_destroy = vk_renderpass_destroy_lazy, + .renderpass_run = vk_renderpass_run, + .timer_create = vk_timer_create, + .timer_destroy = vk_timer_destroy_lazy, + .timer_start = vk_timer_start, + .timer_stop = vk_timer_stop, +}; + +static void present_cb(void *priv, int *inflight) +{ + *inflight -= 1; +} + +bool ra_vk_submit(struct ra *ra, struct ra_tex *tex, VkSemaphore acquired, + VkSemaphore *done, int *inflight) +{ + struct vk_cmd *cmd = vk_require_cmd(ra); + if (!cmd) + goto error; + + if (inflight) { + *inflight += 1; + vk_cmd_callback(cmd, (vk_cb)present_cb, NULL, inflight); + } + + struct ra_tex_vk *tex_vk = tex->priv; + assert(tex_vk->external_img); + tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, false); + + // These are the only two stages that we use/support for actually + // outputting to swapchain imagechain images, so just add a dependency + // on both of them. In theory, we could maybe come up with some more + // advanced mechanism of tracking dynamic dependencies, but that seems + // like overkill. + vk_cmd_dep(cmd, acquired, + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_TRANSFER_BIT); + + return vk_flush(ra, done); + +error: + return false; +} diff --git a/video/out/vulkan/ra_vk.h b/video/out/vulkan/ra_vk.h new file mode 100644 index 0000000..893421b --- /dev/null +++ b/video/out/vulkan/ra_vk.h @@ -0,0 +1,31 @@ +#pragma once + +#include "video/out/gpu/ra.h" + +#include "common.h" +#include "utils.h" + +struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log); + +// Access to the VkDevice is needed for swapchain creation +VkDevice ra_vk_get_dev(struct ra *ra); + +// Allocates a ra_tex that wraps a swapchain image. The contents of the image +// will be invalidated, and access to it will only be internally synchronized. +// So the calling could should not do anything else with the VkImage. +struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg, + VkSwapchainCreateInfoKHR info); + +// This function flushes the command buffers, transitions `tex` (which must be +// a wrapped swapchain image) into a format suitable for presentation, and +// submits the current rendering commands. The indicated semaphore must fire +// before the submitted command can run. If `done` is non-NULL, it will be +// set to a semaphore that fires once the command completes. If `inflight` +// is non-NULL, it will be incremented when the command starts and decremented +// when it completes. +bool ra_vk_submit(struct ra *ra, struct ra_tex *tex, VkSemaphore acquired, + VkSemaphore *done, int *inflight); + +// May be called on a struct ra of any type. Returns NULL if the ra is not +// a vulkan ra. +struct mpvk_ctx *ra_vk_get(struct ra *ra); diff --git a/video/out/vulkan/spirv_nvidia.c b/video/out/vulkan/spirv_nvidia.c new file mode 100644 index 0000000..6cc43a5 --- /dev/null +++ b/video/out/vulkan/spirv_nvidia.c @@ -0,0 +1,54 @@ +#include "video/out/gpu/spirv.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +static bool nv_glsl_compile(struct spirv_compiler *spirv, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *out_spirv) +{ + // The nvidia extension literally assumes your SPIRV is in fact valid GLSL + *out_spirv = bstr0(glsl); + return true; +} + +static bool nv_glsl_init(struct ra_ctx *ctx) +{ + struct mpvk_ctx *vk = ra_vk_ctx_get(ctx); + if (!vk) + return false; + + struct spirv_compiler *spv = ctx->spirv; + spv->required_ext = VK_NV_GLSL_SHADER_EXTENSION_NAME; + spv->glsl_version = 450; // impossible to query, so hard-code it.. + spv->ra_caps = RA_CAP_NESTED_ARRAY; + + // Make sure the extension is actually available, and fail gracefully + // if it isn't + VkExtensionProperties *props = NULL; + uint32_t extnum = 0; + VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL, &extnum, NULL)); + props = talloc_array(NULL, VkExtensionProperties, extnum); + VK(vkEnumerateDeviceExtensionProperties(vk->physd, NULL, &extnum, props)); + + bool ret = true; + for (int e = 0; e < extnum; e++) { + if (strncmp(props[e].extensionName, spv->required_ext, + VK_MAX_EXTENSION_NAME_SIZE) == 0) + goto done; + } + +error: + MP_VERBOSE(ctx, "Device doesn't support VK_NV_glsl_shader, skipping..\n"); + ret = false; + +done: + talloc_free(props); + return ret; +} + +const struct spirv_compiler_fns spirv_nvidia_builtin = { + .compile_glsl = nv_glsl_compile, + .init = nv_glsl_init, +}; diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c new file mode 100644 index 0000000..baf0ebc --- /dev/null +++ b/video/out/vulkan/utils.c @@ -0,0 +1,729 @@ +#include <libavutil/macros.h> + +#include "video/out/gpu/spirv.h" +#include "utils.h" +#include "malloc.h" + +const char* vk_err(VkResult res) +{ + switch (res) { + // These are technically success codes, but include them nonetheless + case VK_SUCCESS: return "VK_SUCCESS"; + case VK_NOT_READY: return "VK_NOT_READY"; + case VK_TIMEOUT: return "VK_TIMEOUT"; + case VK_EVENT_SET: return "VK_EVENT_SET"; + case VK_EVENT_RESET: return "VK_EVENT_RESET"; + case VK_INCOMPLETE: return "VK_INCOMPLETE"; + case VK_SUBOPTIMAL_KHR: return "VK_SUBOPTIMAL_KHR"; + + // Actual error codes + case VK_ERROR_OUT_OF_HOST_MEMORY: return "VK_ERROR_OUT_OF_HOST_MEMORY"; + case VK_ERROR_OUT_OF_DEVICE_MEMORY: return "VK_ERROR_OUT_OF_DEVICE_MEMORY"; + case VK_ERROR_INITIALIZATION_FAILED: return "VK_ERROR_INITIALIZATION_FAILED"; + case VK_ERROR_DEVICE_LOST: return "VK_ERROR_DEVICE_LOST"; + case VK_ERROR_MEMORY_MAP_FAILED: return "VK_ERROR_MEMORY_MAP_FAILED"; + case VK_ERROR_LAYER_NOT_PRESENT: return "VK_ERROR_LAYER_NOT_PRESENT"; + case VK_ERROR_EXTENSION_NOT_PRESENT: return "VK_ERROR_EXTENSION_NOT_PRESENT"; + case VK_ERROR_FEATURE_NOT_PRESENT: return "VK_ERROR_FEATURE_NOT_PRESENT"; + case VK_ERROR_INCOMPATIBLE_DRIVER: return "VK_ERROR_INCOMPATIBLE_DRIVER"; + case VK_ERROR_TOO_MANY_OBJECTS: return "VK_ERROR_TOO_MANY_OBJECTS"; + case VK_ERROR_FORMAT_NOT_SUPPORTED: return "VK_ERROR_FORMAT_NOT_SUPPORTED"; + case VK_ERROR_FRAGMENTED_POOL: return "VK_ERROR_FRAGMENTED_POOL"; + case VK_ERROR_INVALID_SHADER_NV: return "VK_ERROR_INVALID_SHADER_NV"; + case VK_ERROR_OUT_OF_DATE_KHR: return "VK_ERROR_OUT_OF_DATE_KHR"; + case VK_ERROR_SURFACE_LOST_KHR: return "VK_ERROR_SURFACE_LOST_KHR"; + } + + return "Unknown error!"; +} + +static const char* vk_dbg_type(VkDebugReportObjectTypeEXT type) +{ + switch (type) { + case VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT: + return "VkInstance"; + case VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT: + return "VkPhysicalDevice"; + case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT: + return "VkDevice"; + case VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT: + return "VkQueue"; + case VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT: + return "VkSemaphore"; + case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT: + return "VkCommandBuffer"; + case VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT: + return "VkFence"; + case VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT: + return "VkDeviceMemory"; + case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT: + return "VkBuffer"; + case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT: + return "VkImage"; + case VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT: + return "VkEvent"; + case VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT: + return "VkQueryPool"; + case VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT: + return "VkBufferView"; + case VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT: + return "VkImageView"; + case VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT: + return "VkShaderModule"; + case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT: + return "VkPipelineCache"; + case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT: + return "VkPipelineLayout"; + case VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT: + return "VkRenderPass"; + case VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT: + return "VkPipeline"; + case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT: + return "VkDescriptorSetLayout"; + case VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT: + return "VkSampler"; + case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT: + return "VkDescriptorPool"; + case VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT: + return "VkDescriptorSet"; + case VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT: + return "VkFramebuffer"; + case VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT: + return "VkCommandPool"; + case VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT: + return "VkSurfaceKHR"; + case VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT: + return "VkSwapchainKHR"; + case VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT: + return "VkDebugReportCallbackEXT"; + case VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT: + default: + return "unknown object"; + } +} + +static VkBool32 vk_dbg_callback(VkDebugReportFlagsEXT flags, + VkDebugReportObjectTypeEXT objType, + uint64_t obj, size_t loc, int32_t msgCode, + const char *layer, const char *msg, void *priv) +{ + struct mpvk_ctx *vk = priv; + int lev = MSGL_V; + + switch (flags) { + case VK_DEBUG_REPORT_ERROR_BIT_EXT: lev = MSGL_ERR; break; + case VK_DEBUG_REPORT_WARNING_BIT_EXT: lev = MSGL_WARN; break; + case VK_DEBUG_REPORT_INFORMATION_BIT_EXT: lev = MSGL_TRACE; break; + case VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT: lev = MSGL_WARN; break; + case VK_DEBUG_REPORT_DEBUG_BIT_EXT: lev = MSGL_DEBUG; break; + }; + + MP_MSG(vk, lev, "vk [%s] %d: %s (obj 0x%llx (%s), loc 0x%zx)\n", + layer, (int)msgCode, msg, (unsigned long long)obj, + vk_dbg_type(objType), loc); + + // The return value of this function determines whether the call will + // be explicitly aborted (to prevent GPU errors) or not. In this case, + // we generally want this to be on for the errors. + return (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT); +} + +static void vk_cmdpool_uninit(struct mpvk_ctx *vk, struct vk_cmdpool *pool) +{ + if (!pool) + return; + + // also frees associated command buffers + vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR); + for (int n = 0; n < MPVK_MAX_CMDS; n++) { + vkDestroyFence(vk->dev, pool->cmds[n].fence, MPVK_ALLOCATOR); + vkDestroySemaphore(vk->dev, pool->cmds[n].done, MPVK_ALLOCATOR); + talloc_free(pool->cmds[n].callbacks); + } + talloc_free(pool); +} + +void mpvk_uninit(struct mpvk_ctx *vk) +{ + if (!vk->inst) + return; + + if (vk->dev) { + vk_cmdpool_uninit(vk, vk->pool); + vk_malloc_uninit(vk); + vkDestroyDevice(vk->dev, MPVK_ALLOCATOR); + } + + if (vk->dbg) { + // Same deal as creating the debug callback, we need to load this + // first. + VK_LOAD_PFN(vkDestroyDebugReportCallbackEXT) + pfn_vkDestroyDebugReportCallbackEXT(vk->inst, vk->dbg, MPVK_ALLOCATOR); + } + + vkDestroySurfaceKHR(vk->inst, vk->surf, MPVK_ALLOCATOR); + vkDestroyInstance(vk->inst, MPVK_ALLOCATOR); + + *vk = (struct mpvk_ctx){0}; +} + +bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log, + const char *surf_ext_name, bool debug) +{ + *vk = (struct mpvk_ctx) { + .log = log, + }; + + VkInstanceCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + }; + + if (debug) { + // Enables the LunarG standard validation layer, which + // is a meta-layer that loads lots of other validators + static const char* layers[] = { + "VK_LAYER_LUNARG_standard_validation", + }; + + info.ppEnabledLayerNames = layers; + info.enabledLayerCount = MP_ARRAY_SIZE(layers); + } + + // Enable whatever extensions were compiled in. + const char *extensions[] = { + VK_KHR_SURFACE_EXTENSION_NAME, + surf_ext_name, + + // Extra extensions only used for debugging. These are toggled by + // decreasing the enabledExtensionCount, so the number needs to be + // synchronized with the code below. + VK_EXT_DEBUG_REPORT_EXTENSION_NAME, + }; + + const int debugExtensionCount = 1; + + info.ppEnabledExtensionNames = extensions; + info.enabledExtensionCount = MP_ARRAY_SIZE(extensions); + + if (!debug) + info.enabledExtensionCount -= debugExtensionCount; + + MP_VERBOSE(vk, "Creating instance with extensions:\n"); + for (int i = 0; i < info.enabledExtensionCount; i++) + MP_VERBOSE(vk, " %s\n", info.ppEnabledExtensionNames[i]); + + VkResult res = vkCreateInstance(&info, MPVK_ALLOCATOR, &vk->inst); + if (res != VK_SUCCESS) { + MP_VERBOSE(vk, "Failed creating instance: %s\n", vk_err(res)); + return false; + } + + if (debug) { + // Set up a debug callback to catch validation messages + VkDebugReportCallbackCreateInfoEXT dinfo = { + .sType = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, + .flags = VK_DEBUG_REPORT_INFORMATION_BIT_EXT | + VK_DEBUG_REPORT_WARNING_BIT_EXT | + VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT | + VK_DEBUG_REPORT_ERROR_BIT_EXT | + VK_DEBUG_REPORT_DEBUG_BIT_EXT, + .pfnCallback = vk_dbg_callback, + .pUserData = vk, + }; + + // Since this is not part of the core spec, we need to load it. This + // can't fail because we've already successfully created an instance + // with this extension enabled. + VK_LOAD_PFN(vkCreateDebugReportCallbackEXT) + pfn_vkCreateDebugReportCallbackEXT(vk->inst, &dinfo, MPVK_ALLOCATOR, + &vk->dbg); + } + + return true; +} + +#define MPVK_MAX_DEVICES 16 + +static bool physd_supports_surface(struct mpvk_ctx *vk, VkPhysicalDevice physd) +{ + uint32_t qfnum; + vkGetPhysicalDeviceQueueFamilyProperties(physd, &qfnum, NULL); + + for (int i = 0; i < qfnum; i++) { + VkBool32 sup; + VK(vkGetPhysicalDeviceSurfaceSupportKHR(physd, i, vk->surf, &sup)); + if (sup) + return true; + } + +error: + return false; +} + +bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw) +{ + assert(vk->surf); + + MP_VERBOSE(vk, "Probing for vulkan devices:\n"); + + VkPhysicalDevice *devices = NULL; + uint32_t num = 0; + VK(vkEnumeratePhysicalDevices(vk->inst, &num, NULL)); + devices = talloc_array(NULL, VkPhysicalDevice, num); + VK(vkEnumeratePhysicalDevices(vk->inst, &num, devices)); + + // Sorted by "priority". Reuses some m_opt code for convenience + static const struct m_opt_choice_alternatives types[] = { + {"discrete", VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU}, + {"integrated", VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU}, + {"virtual", VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU}, + {"software", VK_PHYSICAL_DEVICE_TYPE_CPU}, + {"unknown", VK_PHYSICAL_DEVICE_TYPE_OTHER}, + {0} + }; + + VkPhysicalDeviceProperties props[MPVK_MAX_DEVICES]; + for (int i = 0; i < num; i++) { + vkGetPhysicalDeviceProperties(devices[i], &props[i]); + MP_VERBOSE(vk, " GPU %d: %s (%s)\n", i, props[i].deviceName, + m_opt_choice_str(types, props[i].deviceType)); + } + + // Iterate through each type in order of decreasing preference + for (int t = 0; types[t].name; t++) { + // Disallow SW rendering unless explicitly enabled + if (types[t].value == VK_PHYSICAL_DEVICE_TYPE_CPU && !sw) + continue; + + for (int i = 0; i < num; i++) { + VkPhysicalDeviceProperties prop = props[i]; + if (prop.deviceType != types[t].value) + continue; + if (name && strcmp(name, prop.deviceName) != 0) + continue; + if (!physd_supports_surface(vk, devices[i])) + continue; + + MP_VERBOSE(vk, "Chose device:\n"); + MP_VERBOSE(vk, " Device Name: %s\n", prop.deviceName); + MP_VERBOSE(vk, " Device ID: %x:%x\n", + (unsigned)prop.vendorID, (unsigned)prop.deviceID); + MP_VERBOSE(vk, " Driver version: %d\n", (int)prop.driverVersion); + MP_VERBOSE(vk, " API version: %d.%d.%d\n", + (int)VK_VERSION_MAJOR(prop.apiVersion), + (int)VK_VERSION_MINOR(prop.apiVersion), + (int)VK_VERSION_PATCH(prop.apiVersion)); + vk->physd = devices[i]; + vk->limits = prop.limits; + talloc_free(devices); + return true; + } + } + +error: + MP_VERBOSE(vk, "Found no suitable device, giving up.\n"); + talloc_free(devices); + return false; +} + +bool mpvk_pick_surface_format(struct mpvk_ctx *vk) +{ + assert(vk->physd); + + VkSurfaceFormatKHR *formats = NULL; + int num; + + // Enumerate through the surface formats and find one that we can map to + // a ra_format + VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, NULL)); + formats = talloc_array(NULL, VkSurfaceFormatKHR, num); + VK(vkGetPhysicalDeviceSurfaceFormatsKHR(vk->physd, vk->surf, &num, formats)); + + for (int i = 0; i < num; i++) { + // A value of VK_FORMAT_UNDEFINED means we can pick anything we want + if (formats[i].format == VK_FORMAT_UNDEFINED) { + vk->surf_format = (VkSurfaceFormatKHR) { + .colorSpace = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, + .format = VK_FORMAT_R16G16B16A16_UNORM, + }; + break; + } + + if (formats[i].colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR) + continue; + + // Format whitelist, since we want only >= 8 bit _UNORM formats + switch (formats[i].format) { + case VK_FORMAT_R8G8B8_UNORM: + case VK_FORMAT_B8G8R8_UNORM: + case VK_FORMAT_R8G8B8A8_UNORM: + case VK_FORMAT_B8G8R8A8_UNORM: + case VK_FORMAT_A8B8G8R8_UNORM_PACK32: + case VK_FORMAT_A2R10G10B10_UNORM_PACK32: + case VK_FORMAT_A2B10G10R10_UNORM_PACK32: + case VK_FORMAT_R16G16B16_UNORM: + case VK_FORMAT_R16G16B16A16_UNORM: + break; // accept + default: continue; + } + + vk->surf_format = formats[i]; + break; + } + + talloc_free(formats); + + if (!vk->surf_format.format) + goto error; + + return true; + +error: + MP_ERR(vk, "Failed picking surface format!\n"); + talloc_free(formats); + return false; +} + +static bool vk_cmdpool_init(struct mpvk_ctx *vk, VkDeviceQueueCreateInfo qinfo, + VkQueueFamilyProperties props, + struct vk_cmdpool **out) +{ + struct vk_cmdpool *pool = *out = talloc_ptrtype(NULL, pool); + *pool = (struct vk_cmdpool) { + .qf = qinfo.queueFamilyIndex, + .props = props, + .qcount = qinfo.queueCount, + }; + + for (int n = 0; n < pool->qcount; n++) + vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]); + + VkCommandPoolCreateInfo cinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = pool->qf, + }; + + VK(vkCreateCommandPool(vk->dev, &cinfo, MPVK_ALLOCATOR, &pool->pool)); + + VkCommandBufferAllocateInfo ainfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = pool->pool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = MPVK_MAX_CMDS, + }; + + VkCommandBuffer cmdbufs[MPVK_MAX_CMDS]; + VK(vkAllocateCommandBuffers(vk->dev, &ainfo, cmdbufs)); + + for (int n = 0; n < MPVK_MAX_CMDS; n++) { + struct vk_cmd *cmd = &pool->cmds[n]; + cmd->pool = pool; + cmd->buf = cmdbufs[n]; + + VkFenceCreateInfo finfo = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + + VK(vkCreateFence(vk->dev, &finfo, MPVK_ALLOCATOR, &cmd->fence)); + + VkSemaphoreCreateInfo sinfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + }; + + VK(vkCreateSemaphore(vk->dev, &sinfo, MPVK_ALLOCATOR, &cmd->done)); + } + + return true; + +error: + return false; +} + +bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) +{ + assert(vk->physd); + void *tmp = talloc_new(NULL); + + // Enumerate the queue families and find suitable families for each task + int qfnum; + vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, NULL); + VkQueueFamilyProperties *qfs = talloc_array(tmp, VkQueueFamilyProperties, qfnum); + vkGetPhysicalDeviceQueueFamilyProperties(vk->physd, &qfnum, qfs); + + MP_VERBOSE(vk, "Queue families supported by device:\n"); + + for (int i = 0; i < qfnum; i++) { + MP_VERBOSE(vk, " QF %d: flags 0x%x num %d\n", i, + (unsigned)qfs[i].queueFlags, (int)qfs[i].queueCount); + } + + // For most of our rendering operations, we want to use one "primary" pool, + // so just pick the queue family with the most features. + int idx = -1; + for (int i = 0; i < qfnum; i++) { + if (!(qfs[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)) + continue; + + // QF supports more features + if (idx < 0 || qfs[i].queueFlags > qfs[idx].queueFlags) + idx = i; + + // QF supports more queues (at the same specialization level) + if (qfs[i].queueFlags == qfs[idx].queueFlags && + qfs[i].queueCount > qfs[idx].queueCount) + { + idx = i; + } + } + + // Vulkan requires at least one GRAPHICS queue, so if this fails something + // is horribly wrong. + assert(idx >= 0); + + // Ensure we can actually present to the surface using this queue + VkBool32 sup; + VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx, vk->surf, &sup)); + if (!sup) { + MP_ERR(vk, "Queue family does not support surface presentation!\n"); + goto error; + } + + // Now that we know which queue families we want, we can create the logical + // device + assert(opts.queue_count <= MPVK_MAX_QUEUES); + static const float priorities[MPVK_MAX_QUEUES] = {0}; + VkDeviceQueueCreateInfo qinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueFamilyIndex = idx, + .queueCount = MPMIN(qfs[idx].queueCount, opts.queue_count), + .pQueuePriorities = priorities, + }; + + const char **exts = NULL; + int num_exts = 0; + MP_TARRAY_APPEND(tmp, exts, num_exts, VK_KHR_SWAPCHAIN_EXTENSION_NAME); + if (vk->spirv->required_ext) + MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext); + + VkDeviceCreateInfo dinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &qinfo, + .ppEnabledExtensionNames = exts, + .enabledExtensionCount = num_exts, + }; + + MP_VERBOSE(vk, "Creating vulkan device with extensions:\n"); + for (int i = 0; i < num_exts; i++) + MP_VERBOSE(vk, " %s\n", exts[i]); + + VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev)); + + vk_malloc_init(vk); + + // Create the vk_cmdpools and all required queues / synchronization objects + if (!vk_cmdpool_init(vk, qinfo, qfs[idx], &vk->pool)) + goto error; + + talloc_free(tmp); + return true; + +error: + MP_ERR(vk, "Failed creating logical device!\n"); + talloc_free(tmp); + return false; +} + +static void run_callbacks(struct mpvk_ctx *vk, struct vk_cmd *cmd) +{ + for (int i = 0; i < cmd->num_callbacks; i++) { + struct vk_callback *cb = &cmd->callbacks[i]; + cb->run(cb->priv, cb->arg); + *cb = (struct vk_callback){0}; + } + + cmd->num_callbacks = 0; + + // Also reset vk->last_cmd in case this was the last command to run + if (vk->last_cmd == cmd) + vk->last_cmd = NULL; +} + +static void wait_for_cmds(struct mpvk_ctx *vk, struct vk_cmd cmds[], int num) +{ + if (!num) + return; + + VkFence fences[MPVK_MAX_CMDS]; + for (int i = 0; i < num; i++) + fences[i] = cmds[i].fence; + + vkWaitForFences(vk->dev, num, fences, true, UINT64_MAX); + + for (int i = 0; i < num; i++) + run_callbacks(vk, &cmds[i]); +} + +void mpvk_pool_wait_idle(struct mpvk_ctx *vk, struct vk_cmdpool *pool) +{ + if (!pool) + return; + + int idx = pool->cindex, pidx = pool->cindex_pending; + if (pidx < idx) { // range doesn't wrap + wait_for_cmds(vk, &pool->cmds[pidx], idx - pidx); + } else if (pidx > idx) { // range wraps + wait_for_cmds(vk, &pool->cmds[pidx], MPVK_MAX_CMDS - pidx); + wait_for_cmds(vk, &pool->cmds[0], idx); + } + pool->cindex_pending = pool->cindex; +} + +void mpvk_dev_wait_idle(struct mpvk_ctx *vk) +{ + mpvk_pool_wait_idle(vk, vk->pool); +} + +void mpvk_pool_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool, + uint64_t timeout) +{ + if (!pool) + return; + + // If requested, hard block until at least one command completes + if (timeout > 0 && pool->cindex_pending != pool->cindex) { + vkWaitForFences(vk->dev, 1, &pool->cmds[pool->cindex_pending].fence, + true, timeout); + } + + // Lazily garbage collect the commands based on their status + while (pool->cindex_pending != pool->cindex) { + struct vk_cmd *cmd = &pool->cmds[pool->cindex_pending]; + VkResult res = vkGetFenceStatus(vk->dev, cmd->fence); + if (res != VK_SUCCESS) + break; + run_callbacks(vk, cmd); + pool->cindex_pending++; + pool->cindex_pending %= MPVK_MAX_CMDS; + } +} + +void mpvk_dev_poll_cmds(struct mpvk_ctx *vk, uint32_t timeout) +{ + mpvk_pool_poll_cmds(vk, vk->pool, timeout); +} + +void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg) +{ + if (vk->last_cmd) { + vk_cmd_callback(vk->last_cmd, callback, p, arg); + } else { + // The device was already idle, so we can just immediately call it + callback(p, arg); + } +} + +void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg) +{ + MP_TARRAY_GROW(NULL, cmd->callbacks, cmd->num_callbacks); + cmd->callbacks[cmd->num_callbacks++] = (struct vk_callback) { + .run = callback, + .priv = p, + .arg = arg, + }; +} + +void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, + VkPipelineStageFlags depstage) +{ + assert(cmd->num_deps < MPVK_MAX_CMD_DEPS); + cmd->deps[cmd->num_deps] = dep; + cmd->depstages[cmd->num_deps++] = depstage; +} + +struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool) +{ + // Garbage collect the cmdpool first + mpvk_pool_poll_cmds(vk, pool, 0); + + int next = (pool->cindex + 1) % MPVK_MAX_CMDS; + if (next == pool->cindex_pending) { + MP_ERR(vk, "No free command buffers!\n"); + goto error; + } + + struct vk_cmd *cmd = &pool->cmds[pool->cindex]; + pool->cindex = next; + + VK(vkResetCommandBuffer(cmd->buf, 0)); + + VkCommandBufferBeginInfo binfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + VK(vkBeginCommandBuffer(cmd->buf, &binfo)); + + return cmd; + +error: + return NULL; +} + +bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done) +{ + VK(vkEndCommandBuffer(cmd->buf)); + + struct vk_cmdpool *pool = cmd->pool; + VkQueue queue = pool->queues[pool->qindex]; + + VkSubmitInfo sinfo = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd->buf, + .waitSemaphoreCount = cmd->num_deps, + .pWaitSemaphores = cmd->deps, + .pWaitDstStageMask = cmd->depstages, + }; + + if (done) { + sinfo.signalSemaphoreCount = 1; + sinfo.pSignalSemaphores = &cmd->done; + *done = cmd->done; + } + + VK(vkResetFences(vk->dev, 1, &cmd->fence)); + VK(vkQueueSubmit(queue, 1, &sinfo, cmd->fence)); + MP_TRACE(vk, "Submitted command on queue %p (QF %d)\n", (void *)queue, + pool->qf); + + for (int i = 0; i < cmd->num_deps; i++) + cmd->deps[i] = NULL; + cmd->num_deps = 0; + + vk->last_cmd = cmd; + return true; + +error: + return false; +} + +void vk_cmd_cycle_queues(struct mpvk_ctx *vk) +{ + struct vk_cmdpool *pool = vk->pool; + pool->qindex = (pool->qindex + 1) % pool->qcount; +} + +const VkImageSubresourceRange vk_range = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .levelCount = 1, + .layerCount = 1, +}; + +const VkImageSubresourceLayers vk_layers = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, +}; diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h new file mode 100644 index 0000000..0cc8a29 --- /dev/null +++ b/video/out/vulkan/utils.h @@ -0,0 +1,154 @@ +#pragma once + +#include "video/out/vo.h" +#include "video/out/gpu/context.h" +#include "video/mp_image.h" + +#include "common.h" +#include "formats.h" + +#define VK_LOAD_PFN(name) PFN_##name pfn_##name = (PFN_##name) \ + vkGetInstanceProcAddr(vk->inst, #name); + +// Return a human-readable name for various struct mpvk_ctx enums +const char* vk_err(VkResult res); + +// Convenience macros to simplify a lot of common boilerplate +#define VK_ASSERT(res, str) \ + do { \ + if (res != VK_SUCCESS) { \ + MP_ERR(vk, str ": %s\n", vk_err(res)); \ + goto error; \ + } \ + } while (0) + +#define VK(cmd) \ + do { \ + MP_TRACE(vk, #cmd "\n"); \ + VkResult res ## __LINE__ = (cmd); \ + VK_ASSERT(res ## __LINE__, #cmd); \ + } while (0) + +// Uninits everything in the correct order +void mpvk_uninit(struct mpvk_ctx *vk); + +// Initialization functions: As a rule of thumb, these need to be called in +// this order, followed by vk_malloc_init, followed by RA initialization, and +// finally followed by vk_swchain initialization. + +// Create a vulkan instance. Returns VK_NULL_HANDLE on failure +bool mpvk_instance_init(struct mpvk_ctx *vk, struct mp_log *log, + const char *surf_ext_name, bool debug); + +// Generate a VkSurfaceKHR usable for video output. Returns VK_NULL_HANDLE on +// failure. Must be called after mpvk_instance_init. +bool mpvk_surface_init(struct vo *vo, struct mpvk_ctx *vk); + +// Find a suitable physical device for use with rendering and which supports +// the surface. +// name: only match a device with this name +// sw: also allow software/virtual devices +bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw); + +// Pick a suitable surface format that's supported by this physical device. +bool mpvk_pick_surface_format(struct mpvk_ctx *vk); + +struct mpvk_device_opts { + int queue_count; // number of queues to use +}; + +// Create a logical device and initialize the vk_cmdpools +bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts); + +// Wait until all commands submitted to all queues have completed +void mpvk_pool_wait_idle(struct mpvk_ctx *vk, struct vk_cmdpool *pool); +void mpvk_dev_wait_idle(struct mpvk_ctx *vk); + +// Wait until at least one command submitted to any queue has completed, and +// process the callbacks. Good for event loops that need to delay until a +// command completes. Will block at most `timeout` nanoseconds. If used with +// 0, it only garbage collects completed commands without blocking. +void mpvk_pool_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool, + uint64_t timeout); +void mpvk_dev_poll_cmds(struct mpvk_ctx *vk, uint32_t timeout); + +// Since lots of vulkan operations need to be done lazily once the affected +// resources are no longer in use, provide an abstraction for tracking these. +// In practice, these are only checked and run when submitting new commands, so +// the actual execution may be delayed by a frame. +typedef void (*vk_cb)(void *priv, void *arg); + +struct vk_callback { + vk_cb run; + void *priv; + void *arg; // as a convenience, you also get to pass an arg for "free" +}; + +// Associate a callback with the completion of all currently pending commands. +// This will essentially run once the device is completely idle. +void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg); + +#define MPVK_MAX_CMD_DEPS 8 + +// Helper wrapper around command buffers that also track dependencies, +// callbacks and synchronization primitives +struct vk_cmd { + struct vk_cmdpool *pool; // pool it was allocated from + VkCommandBuffer buf; + VkFence fence; // the fence guards cmd buffer reuse + VkSemaphore done; // the semaphore signals when execution is done + // The semaphores represent dependencies that need to complete before + // this command can be executed. These are *not* owned by the vk_cmd + VkSemaphore deps[MPVK_MAX_CMD_DEPS]; + VkPipelineStageFlags depstages[MPVK_MAX_CMD_DEPS]; + int num_deps; + // Since VkFences are useless, we have to manually track "callbacks" + // to fire once the VkFence completes. These are used for multiple purposes, + // ranging from garbage collection (resource deallocation) to fencing. + struct vk_callback *callbacks; + int num_callbacks; +}; + +// Associate a callback with the completion of the current command. This +// bool will be set to `true` once the command completes, or shortly thereafter. +void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg); + +// Associate a dependency for the current command. This semaphore must signal +// by the corresponding stage before the command may execute. +void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, + VkPipelineStageFlags depstage); + +#define MPVK_MAX_QUEUES 8 +#define MPVK_MAX_CMDS 64 + +// Command pool / queue family hybrid abstraction +struct vk_cmdpool { + VkQueueFamilyProperties props; + uint32_t qf; // queue family index + VkCommandPool pool; + VkQueue queues[MPVK_MAX_QUEUES]; + int qcount; + int qindex; + // Command buffers associated with this queue + struct vk_cmd cmds[MPVK_MAX_CMDS]; + int cindex; + int cindex_pending; +}; + +// Fetch the next command buffer from a command pool and begin recording to it. +// Returns NULL on failure. +struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool); + +// Finish the currently recording command buffer and submit it for execution. +// If `done` is not NULL, it will be set to a semaphore that will signal once +// the command completes. (And MUST have a corresponding semaphore wait) +// Returns whether successful. +bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done); + +// Rotate the queues for each vk_cmdpool. Call this once per frame to ensure +// good parallelism between frames when using multiple queues +void vk_cmd_cycle_queues(struct mpvk_ctx *vk); + +// Predefined structs for a simple non-layered, non-mipped image +extern const VkImageSubresourceRange vk_range; +extern const VkImageSubresourceLayers vk_layers; diff --git a/video/out/w32_common.c b/video/out/w32_common.c index b93a4fd..feeae81 100644 --- a/video/out/w32_common.c +++ b/video/out/w32_common.c @@ -62,8 +62,12 @@ typedef enum MONITOR_DPI_TYPE { } MONITOR_DPI_TYPE; #endif +#define rect_w(r) ((r).right - (r).left) +#define rect_h(r) ((r).bottom - (r).top) + struct w32_api { HRESULT (WINAPI *pGetDpiForMonitor)(HMONITOR, MONITOR_DPI_TYPE, UINT*, UINT*); + BOOL (WINAPI *pImmDisableIME)(DWORD); }; struct vo_w32_state { @@ -84,15 +88,8 @@ struct vo_w32_state { HWINEVENTHOOK parent_evt_hook; HMONITOR monitor; // Handle of the current screen - struct mp_rect screenrc; // Size and virtual position of the current screen char *color_profile; // Path of the current screen's color profile - // last non-fullscreen extends (updated only on fullscreen or on initialization) - int prev_width; - int prev_height; - int prev_x; - int prev_y; - // Has the window seen a WM_DESTROY? If so, don't call DestroyWindow again. bool destroyed; @@ -102,11 +99,10 @@ struct vo_w32_state { bool current_fs; bool toggle_fs; // whether the current fullscreen state needs to be switched - // currently known window state - int window_x; - int window_y; - int dw; - int dh; + RECT windowrc; // currently known window rect + RECT screenrc; // current screen rect + // last non-fullscreen rect, updated only on fullscreen or on initialization + RECT prev_windowrc; // video size uint32_t o_dwidth; @@ -130,6 +126,9 @@ struct vo_w32_state { // UTF-16 decoding state for WM_CHAR and VK_PACKET int high_surrogate; + // Whether to fit the window on screen on next window state updating + bool fit_on_screen; + ITaskbarList2 *taskbar_list; ITaskbarList3 *taskbar_list3; UINT tbtnCreatedMsg; @@ -140,6 +139,7 @@ struct vo_w32_state { // updates on move/resize/displaychange double display_fps; + bool moving; bool snapped; int snap_dx; int snap_dy; @@ -182,16 +182,16 @@ static LRESULT borderless_nchittest(struct vo_w32_state *w32, int x, int y) if (mouse.y < frame_size) { if (mouse.x < diagonal_width) return HTTOPLEFT; - if (mouse.x >= w32->dw - diagonal_width) + if (mouse.x >= rect_w(w32->windowrc) - diagonal_width) return HTTOPRIGHT; return HTTOP; } // Hit-test bottom border - if (mouse.y >= w32->dh - frame_size) { + if (mouse.y >= rect_h(w32->windowrc) - frame_size) { if (mouse.x < diagonal_width) return HTBOTTOMLEFT; - if (mouse.x >= w32->dw - diagonal_width) + if (mouse.x >= rect_w(w32->windowrc) - diagonal_width) return HTBOTTOMRIGHT; return HTBOTTOM; } @@ -199,7 +199,7 @@ static LRESULT borderless_nchittest(struct vo_w32_state *w32, int x, int y) // Hit-test side borders if (mouse.x < frame_size) return HTLEFT; - if (mouse.x >= w32->dw - frame_size) + if (mouse.x >= rect_w(w32->windowrc) - frame_size) return HTRIGHT; return HTCLIENT; } @@ -607,6 +607,9 @@ static void update_playback_state(struct vo_w32_state *w32) static bool snap_to_screen_edges(struct vo_w32_state *w32, RECT *rc) { + if (w32->parent || w32->current_fs || IsMaximized(w32->window)) + return false; + if (!w32->opts->snap_window) { w32->snapped = false; return false; @@ -616,16 +619,24 @@ static bool snap_to_screen_edges(struct vo_w32_state *w32, RECT *rc) POINT cursor; if (!GetWindowRect(w32->window, &rect) || !GetCursorPos(&cursor)) return false; - // Check for aero snapping - if ((rc->right - rc->left != rect.right - rect.left) || - (rc->bottom - rc->top != rect.bottom - rect.top)) + // Check if window is going to be aero-snapped + if (rect_w(*rc) != rect_w(rect) || rect_h(*rc) != rect_h(rect)) + return false; + + // Check if window has already been aero-snapped + WINDOWPLACEMENT wp = {0}; + wp.length = sizeof(wp); + if (!GetWindowPlacement(w32->window, &wp)) + return false; + RECT wr = wp.rcNormalPosition; + if (rect_w(*rc) != rect_w(wr) || rect_h(*rc) != rect_h(wr)) return false; MONITORINFO mi = { .cbSize = sizeof(mi) }; if (!GetMonitorInfoW(w32->monitor, &mi)) return false; // Get the work area to let the window snap to taskbar - RECT wr = mi.rcWork; + wr = mi.rcWork; // Check for invisible borders and adjust the work area size RECT frame = {0}; @@ -706,15 +717,10 @@ static void update_screen_rect(struct vo_w32_state *w32) // Handle --fs-screen=all if (w32->current_fs && screen == -2) { - struct mp_rect rc = { - GetSystemMetrics(SM_XVIRTUALSCREEN), - GetSystemMetrics(SM_YVIRTUALSCREEN), - GetSystemMetrics(SM_CXVIRTUALSCREEN), - GetSystemMetrics(SM_CYVIRTUALSCREEN), - }; - rc.x1 += rc.x0; - rc.y1 += rc.y0; - w32->screenrc = rc; + const int x = GetSystemMetrics(SM_XVIRTUALSCREEN); + const int y = GetSystemMetrics(SM_YVIRTUALSCREEN); + SetRect(&w32->screenrc, x, y, x + GetSystemMetrics(SM_CXVIRTUALSCREEN), + y + GetSystemMetrics(SM_CYVIRTUALSCREEN)); return; } @@ -734,10 +740,7 @@ static void update_screen_rect(struct vo_w32_state *w32) MONITORINFO mi = { .cbSize = sizeof(mi) }; GetMonitorInfoW(mon, &mi); - w32->screenrc = (struct mp_rect){ - mi.rcMonitor.left, mi.rcMonitor.top, - mi.rcMonitor.right, mi.rcMonitor.bottom, - }; + w32->screenrc = mi.rcMonitor; } static DWORD update_style(struct vo_w32_state *w32, DWORD style) @@ -754,139 +757,148 @@ static DWORD update_style(struct vo_w32_state *w32, DWORD style) return style; } -// Update the window title, position, size, and border style. -static void reinit_window_state(struct vo_w32_state *w32) +static void update_window_style(struct vo_w32_state *w32) { - HWND layer = HWND_NOTOPMOST; - RECT r; - if (w32->parent) return; - bool new_fs = w32->toggle_fs ? !w32->current_fs : w32->opts->fullscreen; - bool toggle_fs = w32->current_fs != new_fs; - w32->current_fs = new_fs; - w32->toggle_fs = false; + // SetWindowLongPtr can trigger a WM_SIZE event, so window rect + // has to be saved now and restored after setting the new style. + const RECT wr = w32->windowrc; + const DWORD style = GetWindowLongPtrW(w32->window, GWL_STYLE); + SetWindowLongPtrW(w32->window, GWL_STYLE, update_style(w32, style)); + w32->windowrc = wr; +} - if (w32->taskbar_list) { - ITaskbarList2_MarkFullscreenWindow(w32->taskbar_list, - w32->window, w32->current_fs); +// Adjust rc size and position if its size is larger than rc2. +// returns true if the rectangle was modified. +static bool fit_rect(RECT *rc, RECT *rc2) +{ + // Calculate old size and maximum new size + int o_w = rect_w(*rc), o_h = rect_h(*rc); + int n_w = rect_w(*rc2), n_h = rect_h(*rc2); + if (o_w <= n_w && o_h <= n_h) + return false; + + // Apply letterboxing + const float o_asp = o_w / (float)MPMAX(o_h, 1); + const float n_asp = n_w / (float)MPMAX(n_h, 1); + if (o_asp > n_asp) { + n_h = n_w / o_asp; + } else { + n_w = n_h * o_asp; } - DWORD style = update_style(w32, GetWindowLongPtrW(w32->window, GWL_STYLE)); + // Calculate new position and save the rect + const int x = rc->left + o_w / 2 - n_w / 2; + const int y = rc->top + o_h / 2 - n_h / 2; + SetRect(rc, x, y, x + n_w, y + n_h); + return true; +} - if (w32->opts->ontop) - layer = HWND_TOPMOST; +// Adjust window size and position if its size is larger than the screen size. +static void fit_window_on_screen(struct vo_w32_state *w32) +{ + if (w32->parent || w32->current_fs || IsMaximized(w32->window)) + return; - // xxx not sure if this can trigger any unwanted messages (WM_MOVE/WM_SIZE) - update_screen_rect(w32); + RECT screen = w32->screenrc; + if (w32->opts->border && w32->opts->fit_border) + subtract_window_borders(w32->window, &screen); - int screen_w = w32->screenrc.x1 - w32->screenrc.x0; - int screen_h = w32->screenrc.y1 - w32->screenrc.y0; + if (fit_rect(&w32->windowrc, &screen)) { + MP_VERBOSE(w32, "adjusted window bounds: %d:%d:%d:%d\n", + (int)w32->windowrc.left, (int)w32->windowrc.top, + (int)rect_w(w32->windowrc), (int)rect_h(w32->windowrc)); + } +} - if (w32->current_fs) { - // Save window position and size when switching to fullscreen. - if (toggle_fs) { - w32->prev_width = w32->dw; - w32->prev_height = w32->dh; - w32->prev_x = w32->window_x; - w32->prev_y = w32->window_y; - MP_VERBOSE(w32, "save window bounds: %d:%d:%d:%d\n", - w32->prev_x, w32->prev_y, w32->prev_width, w32->prev_height); - } +// Calculate new fullscreen state and change window size and position. +// returns true if fullscreen state was changed. +static bool update_fullscreen_state(struct vo_w32_state *w32) +{ + if (w32->parent) + return false; - w32->window_x = w32->screenrc.x0; - w32->window_y = w32->screenrc.y0; - w32->dw = screen_w; - w32->dh = screen_h; - } else { - if (toggle_fs) { - // Restore window position and size when switching from fullscreen. - MP_VERBOSE(w32, "restore window bounds: %d:%d:%d:%d\n", - w32->prev_x, w32->prev_y, w32->prev_width, w32->prev_height); - w32->dw = w32->prev_width; - w32->dh = w32->prev_height; - w32->window_x = w32->prev_x; - w32->window_y = w32->prev_y; - } + bool new_fs = w32->opts->fullscreen; + if (w32->toggle_fs) { + new_fs = !w32->current_fs; + w32->toggle_fs = false; } - r.left = w32->window_x; - r.right = r.left + w32->dw; - r.top = w32->window_y; - r.bottom = r.top + w32->dh; - - SetWindowLongPtrW(w32->window, GWL_STYLE, style); - - RECT cr = r; - add_window_borders(w32->window, &r); - // Check on client area size instead of window size on --fit-border=no - long o_w; - long o_h; - if( w32->opts->fit_border ) { - o_w = r.right - r.left; - o_h = r.bottom - r.top; - } else { - o_w = cr.right - cr.left; - o_h = cr.bottom - cr.top; - } + bool toggle_fs = w32->current_fs != new_fs; + w32->current_fs = new_fs; - if ( !w32->current_fs && ( o_w > screen_w || o_h > screen_h ) ) - { - MP_VERBOSE(w32, "requested window size larger than the screen\n"); - // Use the aspect of the client area, not the full window size. - // Basically, try to compute the maximum window size. - long n_w; - long n_h; - if( w32->opts->fit_border ) { - n_w = screen_w - (r.right - cr.right) - (cr.left - r.left); - n_h = screen_h - (r.bottom - cr.bottom) - (cr.top - r.top); - } else { - n_w = screen_w; - n_h = screen_h; - } - // Letterbox - double asp = (cr.right - cr.left) / (double)(cr.bottom - cr.top); - double s_asp = n_w / (double)n_h; - if (asp > s_asp) { - n_h = n_w / asp; + update_screen_rect(w32); + + if (toggle_fs) { + RECT rc; + char msg[50]; + if (w32->current_fs) { + // Save window rect when switching to fullscreen. + rc = w32->prev_windowrc = w32->windowrc; + sprintf(msg, "save window bounds"); } else { - n_w = n_h * asp; + // Restore window rect when switching from fullscreen. + rc = w32->windowrc = w32->prev_windowrc; + sprintf(msg, "restore window bounds"); } - // Save new size - w32->dw = n_w; - w32->dh = n_h; - // Get old window center - long o_cx = r.left + (r.right - r.left) / 2; - long o_cy = r.top + (r.bottom - r.top) / 2; - // Add window borders to the new window size - r = (RECT){.right = n_w, .bottom = n_h}; - add_window_borders(w32->window, &r); - // Get top and left border size for client area position calculation - long b_top = -r.top; - long b_left = -r.left; - // Center the final window around the old window center - n_w = r.right - r.left; - n_h = r.bottom - r.top; - r.left = o_cx - n_w / 2; - r.top = o_cy - n_h / 2; - r.right = r.left + n_w; - r.bottom = r.top + n_h; - // Save new client area position - w32->window_x = r.left + b_left; - w32->window_y = r.top + b_top; + MP_VERBOSE(w32, "%s: %d:%d:%d:%d\n", msg, + (int)rc.left, (int)rc.top, (int)rect_w(rc), (int)rect_h(rc)); } + if (w32->current_fs) + w32->windowrc = w32->screenrc; + MP_VERBOSE(w32, "reset window bounds: %d:%d:%d:%d\n", - (int) r.left, (int) r.top, (int)(r.right - r.left), - (int)(r.bottom - r.top)); + (int)w32->windowrc.left, (int)w32->windowrc.top, + (int)rect_w(w32->windowrc), (int)rect_h(w32->windowrc)); + return toggle_fs; +} + +static void update_window_state(struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + RECT wr = w32->windowrc; + add_window_borders(w32->window, &wr); - SetWindowPos(w32->window, layer, r.left, r.top, r.right - r.left, - r.bottom - r.top, SWP_FRAMECHANGED | SWP_SHOWWINDOW); + SetWindowPos(w32->window, w32->opts->ontop ? HWND_TOPMOST : HWND_NOTOPMOST, + wr.left, wr.top, rect_w(wr), rect_h(wr), + SWP_FRAMECHANGED | SWP_SHOWWINDOW); + + // Notify the taskbar about the fullscreen state only after the window + // is visible, to make sure the taskbar item has already been created + if (w32->taskbar_list) { + ITaskbarList2_MarkFullscreenWindow(w32->taskbar_list, + w32->window, w32->current_fs); + } signal_events(w32, VO_EVENT_RESIZE); } +static void reinit_window_state(struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + // The order matters: fs state should be updated prior to changing styles + bool toggle_fs = update_fullscreen_state(w32); + update_window_style(w32); + + // Assume that the window has already been fit on screen before switching fs + if (!toggle_fs || w32->fit_on_screen) { + fit_window_on_screen(w32); + // The fullscreen state might still be active, so set the flag + // to fit on screen next time the window leaves the fullscreen. + w32->fit_on_screen = w32->current_fs; + } + + // Show and activate the window after all window state parameters were set + update_window_state(w32); +} + static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { @@ -917,25 +929,26 @@ static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, signal_events(w32, VO_EVENT_EXPOSE); break; case WM_MOVE: { - POINT p = {0}; - ClientToScreen(w32->window, &p); - w32->window_x = p.x; - w32->window_y = p.y; + const int x = GET_X_LPARAM(lParam), y = GET_Y_LPARAM(lParam); + OffsetRect(&w32->windowrc, x - w32->windowrc.left, + y - w32->windowrc.top); // Window may intersect with new monitors (see VOCTRL_GET_DISPLAY_NAMES) signal_events(w32, VO_EVENT_WIN_STATE); update_display_info(w32); // if we moved between monitors - MP_DBG(w32, "move window: %d:%d\n", w32->window_x, w32->window_y); + MP_DBG(w32, "move window: %d:%d\n", x, y); break; } case WM_MOVING: { + w32->moving = true; RECT *rc = (RECT*)lParam; if (snap_to_screen_edges(w32, rc)) return TRUE; break; } case WM_ENTERSIZEMOVE: + w32->moving = true; if (w32->snapped) { // Save the cursor offset from the window borders, // so the player window can be unsnapped later @@ -947,13 +960,19 @@ static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, } } break; + case WM_EXITSIZEMOVE: + w32->moving = false; + break; case WM_SIZE: { - RECT r; - if (GetClientRect(w32->window, &r) && r.right > 0 && r.bottom > 0) { - w32->dw = r.right; - w32->dh = r.bottom; + if (w32->moving) + w32->snapped = false; + + const int w = LOWORD(lParam), h = HIWORD(lParam); + if (w > 0 && h > 0) { + w32->windowrc.right = w32->windowrc.left + w; + w32->windowrc.bottom = w32->windowrc.top + h; signal_events(w32, VO_EVENT_RESIZE); - MP_VERBOSE(w32, "resize window: %d:%d\n", w32->dw, w32->dh); + MP_VERBOSE(w32, "resize window: %d:%d\n", w, h); } // Window may have been minimized or restored @@ -971,7 +990,7 @@ static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, // (subtracting the window borders) RECT r = *rc; subtract_window_borders(w32->window, &r); - int c_w = r.right - r.left, c_h = r.bottom - r.top; + int c_w = rect_w(r), c_h = rect_h(r); float aspect = w32->o_dwidth / (float) MPMAX(w32->o_dheight, 1); int d_w = c_h * aspect - c_w; int d_h = c_w / aspect - c_h; @@ -988,8 +1007,7 @@ static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, update_display_info(w32); break; case WM_CLOSE: - // Don't actually allow it to destroy the window, or whatever else it - // is that will make us lose WM_USER wakeups. + // Don't destroy the window yet to not lose wakeup events. mp_input_put_key(w32->input_ctx, MP_KEY_CLOSE_WIN); return 0; case WM_NCDESTROY: // Sometimes only WM_NCDESTROY is received in --wid mode @@ -1260,56 +1278,51 @@ static void run_message_loop(struct vo_w32_state *w32) static void gui_thread_reconfig(void *ptr) { struct vo_w32_state *w32 = ptr; - struct vo *vo = w32->vo; struct vo_win_geometry geo; - vo_calc_window_geometry(vo, &w32->screenrc, &geo); + struct mp_rect screen = { w32->screenrc.left, w32->screenrc.top, + w32->screenrc.right, w32->screenrc.bottom }; + vo_calc_window_geometry(vo, &screen, &geo); vo_apply_window_geometry(vo, &geo); - bool reset_size = w32->o_dwidth != vo->dwidth || w32->o_dheight != vo->dheight; - bool pos_init = false; + bool reset_size = w32->o_dwidth != vo->dwidth || + w32->o_dheight != vo->dheight; w32->o_dwidth = vo->dwidth; w32->o_dheight = vo->dheight; - // the desired size is ignored in wid mode, it always matches the window size. - if (!w32->parent) { - if (w32->window_bounds_initialized) { - // restore vo_dwidth/vo_dheight, which are reset against our will - // in vo_config() - RECT r; - GetClientRect(w32->window, &r); - vo->dwidth = r.right; - vo->dheight = r.bottom; - } else { - w32->window_bounds_initialized = true; - reset_size = true; - pos_init = true; - w32->window_x = w32->prev_x = geo.win.x0; - w32->window_y = w32->prev_y = geo.win.y0; - } + if (!w32->parent && !w32->window_bounds_initialized) { + SetRect(&w32->windowrc, geo.win.x0, geo.win.y0, + geo.win.x0 + vo->dwidth, geo.win.y0 + vo->dheight); + w32->prev_windowrc = w32->windowrc; + w32->window_bounds_initialized = true; + w32->fit_on_screen = true; + goto finish; + } - if (reset_size) { - w32->prev_width = vo->dwidth = w32->o_dwidth; - w32->prev_height = vo->dheight = w32->o_dheight; - } - } else { + // The rect which size is going to be modified. + RECT *rc = &w32->windowrc; + + // The desired size always matches the window size in wid mode. + if (!reset_size || w32->parent) { RECT r; GetClientRect(w32->window, &r); + // Restore vo_dwidth and vo_dheight, which were reset in vo_config() vo->dwidth = r.right; vo->dheight = r.bottom; + } else { + if (w32->current_fs) + rc = &w32->prev_windowrc; + w32->fit_on_screen = true; } - // Recenter window around old position on new video size - // excluding the case when initial position handled by win_state. - if (!pos_init) { - w32->window_x += w32->dw / 2 - vo->dwidth / 2; - w32->window_y += w32->dh / 2 - vo->dheight / 2; - } - w32->dw = vo->dwidth; - w32->dh = vo->dheight; + // Save new window size and position. + const int x = rc->left + rect_w(*rc) / 2 - vo->dwidth / 2; + const int y = rc->top + rect_h(*rc) / 2 - vo->dheight / 2; + SetRect(rc, x, y, x + vo->dwidth, y + vo->dheight); +finish: reinit_window_state(w32); } @@ -1320,25 +1333,18 @@ void vo_w32_config(struct vo *vo) mp_dispatch_run(w32->dispatch, gui_thread_reconfig, w32); } -static void thread_disable_ime(void) -{ - // Disables the IME for windows on this thread. imm32.dll must be loaded - // dynamically to account for machines without East Asian language support. - HMODULE imm32 = LoadLibraryW(L"imm32.dll"); - if (!imm32) - return; - BOOL (WINAPI *pImmDisableIME)(DWORD) = (BOOL (WINAPI*)(DWORD)) - GetProcAddress(imm32, "ImmDisableIME"); - if (pImmDisableIME) - pImmDisableIME(0); - FreeLibrary(imm32); -} - static void w32_api_load(struct vo_w32_state *w32) { HMODULE shcore_dll = LoadLibraryW(L"shcore.dll"); + // Available since Win8.1 w32->api.pGetDpiForMonitor = !shcore_dll ? NULL : (void *)GetProcAddress(shcore_dll, "GetDpiForMonitor"); + + // imm32.dll must be loaded dynamically + // to account for machines without East Asian language support + HMODULE imm32_dll = LoadLibraryW(L"imm32.dll"); + w32->api.pImmDisableIME = !imm32_dll ? NULL : + (void *)GetProcAddress(imm32_dll, "ImmDisableIME"); } static void *gui_thread(void *ptr) @@ -1350,7 +1356,10 @@ static void *gui_thread(void *ptr) mpthread_set_name("win32 window"); w32_api_load(w32); - thread_disable_ime(); + + // Disables the IME for windows on this thread + if (w32->api.pImmDisableIME) + w32->api.pImmDisableIME(0); if (w32->opts->WinID >= 0) w32->parent = (HWND)(intptr_t)(w32->opts->WinID); @@ -1423,6 +1432,9 @@ static void *gui_thread(void *ptr) EnableWindow(w32->window, 0); w32->cursor_visible = true; + w32->moving = false; + w32->snapped = false; + w32->snap_dx = w32->snap_dy = 0; update_screen_rect(w32); @@ -1544,10 +1556,11 @@ static int gui_thread_control(struct vo_w32_state *w32, int request, void *arg) reinit_window_state(w32); return VO_TRUE; case VOCTRL_ONTOP: - reinit_window_state(w32); + update_window_state(w32); return VO_TRUE; case VOCTRL_BORDER: - reinit_window_state(w32); + update_window_style(w32); + update_window_state(w32); return VO_TRUE; case VOCTRL_GET_FULLSCREEN: *(bool *)arg = w32->current_fs; @@ -1558,8 +1571,9 @@ static int gui_thread_control(struct vo_w32_state *w32, int request, void *arg) if (!w32->window_bounds_initialized) return VO_FALSE; - s[0] = w32->current_fs ? w32->prev_width : w32->dw; - s[1] = w32->current_fs ? w32->prev_height : w32->dh; + RECT *rc = w32->current_fs ? &w32->prev_windowrc : &w32->windowrc; + s[0] = rect_w(*rc); + s[1] = rect_h(*rc); return VO_TRUE; } case VOCTRL_SET_UNFS_WINDOW_SIZE: { @@ -1567,18 +1581,13 @@ static int gui_thread_control(struct vo_w32_state *w32, int request, void *arg) if (!w32->window_bounds_initialized) return VO_FALSE; - if (w32->current_fs) { - w32->prev_x += w32->prev_width / 2 - s[0] / 2; - w32->prev_y += w32->prev_height / 2 - s[1] / 2; - w32->prev_width = s[0]; - w32->prev_height = s[1]; - } else { - w32->window_x += w32->dw / 2 - s[0] / 2; - w32->window_y += w32->dh / 2 - s[1] / 2; - w32->dw = s[0]; - w32->dh = s[1]; - } + RECT *rc = w32->current_fs ? &w32->prev_windowrc : &w32->windowrc; + const int x = rc->left + rect_w(*rc) / 2 - s[0] / 2; + const int y = rc->top + rect_h(*rc) / 2 - s[1] / 2; + SetRect(rc, x, y, x + s[0], y + s[1]); + + w32->fit_on_screen = true; reinit_window_state(w32); return VO_TRUE; } @@ -1648,8 +1657,8 @@ static void do_control(void *ptr) *events |= atomic_fetch_and(&w32->event_flags, 0); // Safe access, since caller (owner of vo) is blocked. if (*events & VO_EVENT_RESIZE) { - w32->vo->dwidth = w32->dw; - w32->vo->dheight = w32->dh; + w32->vo->dwidth = rect_w(w32->windowrc); + w32->vo->dheight = rect_h(w32->windowrc); } } @@ -1660,8 +1669,8 @@ int vo_w32_control(struct vo *vo, int *events, int request, void *arg) *events |= atomic_fetch_and(&w32->event_flags, 0); if (*events & VO_EVENT_RESIZE) { mp_dispatch_lock(w32->dispatch); - vo->dwidth = w32->dw; - vo->dheight = w32->dh; + vo->dwidth = rect_w(w32->windowrc); + vo->dheight = rect_h(w32->windowrc); mp_dispatch_unlock(w32->dispatch); } return VO_TRUE; diff --git a/video/out/wayland/buffer.c b/video/out/wayland/buffer.c deleted file mode 100644 index dce3ca4..0000000 --- a/video/out/wayland/buffer.c +++ /dev/null @@ -1,140 +0,0 @@ -/* - * This file is part of mpv video player. - * Copyright © 2014 Alexander Preisinger <alexander.preisinger@gmail.com> - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include "buffer.h" -#include "memfile.h" - -#include <unistd.h> -#include <sys/mman.h> - -int8_t format_get_bytes(const format_t *fmt) -{ - return mp_imgfmt_get_desc(fmt->mp_format).bytes[0]; -} - -shm_buffer_t* shm_buffer_create(uint32_t width, - uint32_t height, - format_t fmt, - struct wl_shm *shm, - const struct wl_buffer_listener *listener) -{ - int8_t bytes = format_get_bytes(&fmt); - uint32_t stride = SHM_BUFFER_STRIDE(width, bytes); - uint32_t size = stride * height; - - shm_buffer_t *buffer = calloc(1, sizeof(shm_buffer_t)); - int fd = memfile_create(size); - - if (fd < 0) { - free(buffer); - return NULL; - } - - buffer->data = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); - - if (buffer->data == MAP_FAILED) { - close(fd); - free(buffer); - return NULL; - } - - buffer->shm_pool = wl_shm_create_pool(shm, fd, size); - buffer->buffer = wl_shm_pool_create_buffer(buffer->shm_pool, - 0, width, height, stride, - fmt.wl_format); - - wl_buffer_add_listener(buffer->buffer, listener, buffer); - - buffer->fd = fd; - buffer->height = height; - buffer->stride = stride; - buffer->format = fmt; - buffer->bytes = bytes; - buffer->pool_size = size; - buffer->pending_height = 0; - buffer->pending_width = 0; - - return buffer; -} - -int shm_buffer_resize(shm_buffer_t *buffer, uint32_t width, uint32_t height) -{ - uint32_t new_stride = SHM_BUFFER_STRIDE(width, buffer->bytes); - uint32_t new_size = new_stride * height; - - if (SHM_BUFFER_IS_BUSY(buffer)) { - SHM_BUFFER_SET_PNDNG_RSZ(buffer); - buffer->pending_width = width; - buffer->pending_height = height; - return SHM_BUFFER_BUSY; - } - - SHM_BUFFER_CLEAR_PNDNG_RSZ(buffer); - - if (new_size > buffer->pool_size) { - munmap(buffer->data, buffer->pool_size); - ftruncate(buffer->fd, new_size); - - buffer->data = mmap(NULL, new_size, PROT_READ | PROT_WRITE, - MAP_SHARED, buffer->fd, 0); - - // TODO: the buffer should be destroyed when -1 is return - if (buffer->data == MAP_FAILED) - return -1; - - wl_shm_pool_resize(buffer->shm_pool, new_size); - buffer->pool_size = new_size; - } - - const void *listener = wl_proxy_get_listener((struct wl_proxy*)buffer->buffer); - - wl_buffer_destroy(buffer->buffer); - buffer->buffer = wl_shm_pool_create_buffer(buffer->shm_pool, - 0, width, height, new_stride, - buffer->format.wl_format); - - wl_buffer_add_listener(buffer->buffer, listener, buffer); - - buffer->height = height; - buffer->stride = new_stride; - - return 0; -} - -int shm_buffer_pending_resize(shm_buffer_t *buffer) -{ - if (SHM_BUFFER_PENDING_RESIZE(buffer)) { - SHM_BUFFER_CLEAR_PNDNG_RSZ(buffer); - return shm_buffer_resize(buffer, buffer->pending_width, buffer->pending_height); - } - else { - return 0; - } -} - -void shm_buffer_destroy(shm_buffer_t *buffer) -{ - if (!buffer) - return; - - wl_buffer_destroy(buffer->buffer); - wl_shm_pool_destroy(buffer->shm_pool); - munmap(buffer->data, buffer->pool_size); - close(buffer->fd); - free(buffer); -} diff --git a/video/out/wayland/buffer.h b/video/out/wayland/buffer.h deleted file mode 100644 index 783cd10..0000000 --- a/video/out/wayland/buffer.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * This file is part of mpv video player. - * Copyright © 2014 Alexander Preisinger <alexander.preisinger@gmail.com> - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MPLAYER_WAYLAND_BUFFER_H -#define MPLAYER_WAYLAND_BUFFER_H - -#include <libavutil/common.h> -#include "video/sws_utils.h" -#include "video/img_format.h" -#include "video/out/wayland_common.h" - -#define SHM_BUFFER_STRIDE(width, bytes) \ - FFALIGN((width) * (bytes), SWS_MIN_BYTE_ALIGN) - -typedef struct format { - enum wl_shm_format wl_format; - enum mp_imgfmt mp_format; -} format_t; - -int8_t format_get_bytes(const format_t *fmt); - -typedef enum shm_buffer_flags { - SHM_BUFFER_BUSY = 1 << 0, // in use by the compositor - SHM_BUFFER_DIRTY = 1 << 1, // buffer contains new content - SHM_BUFFER_ONESHOT = 1 << 2, // free after release - SHM_BUFFER_RESIZE_LATER = 1 << 3, // free after release -} shm_buffer_flags_t; - -#define SHM_BUFFER_IS_BUSY(b) (!!((b)->flags & SHM_BUFFER_BUSY)) -#define SHM_BUFFER_IS_DIRTY(b) (!!((b)->flags & SHM_BUFFER_DIRTY)) -#define SHM_BUFFER_IS_ONESHOT(b) (!!((b)->flags & SHM_BUFFER_ONESHOT)) -#define SHM_BUFFER_PENDING_RESIZE(b) (!!((b)->flags & SHM_BUFFER_RESIZE_LATER)) - -#define SHM_BUFFER_SET_BUSY(b) (b)->flags |= SHM_BUFFER_BUSY -#define SHM_BUFFER_SET_DIRTY(b) (b)->flags |= SHM_BUFFER_DIRTY -#define SHM_BUFFER_SET_ONESHOT(b) (b)->flags |= SHM_BUFFER_ONESHOT -#define SHM_BUFFER_SET_PNDNG_RSZ(b) (b)->flags |= SHM_BUFFER_RESIZE_LATER - -#define SHM_BUFFER_CLEAR_BUSY(b) (b)->flags &= ~SHM_BUFFER_BUSY -#define SHM_BUFFER_CLEAR_DIRTY(b) (b)->flags &= ~SHM_BUFFER_DIRTY -#define SHM_BUFFER_CLEAR_ONESHOT(b) (b)->flags &= ~SHM_BUFFER_ONESHOT -#define SHM_BUFFER_CLEAR_PNDNG_RSZ(b) (b)->flags &= ~SHM_BUFFER_RESIZE_LATER - -typedef struct buffer { - struct wl_buffer *buffer; - - int flags; - - uint32_t height; - uint32_t stride; - uint32_t bytes; // bytes per pixel - // width = stride / bytes per pixel - // size = stride * height - - struct wl_shm_pool *shm_pool; // for growing buffers; - - int fd; - void *data; - uint32_t pool_size; // size of pool and data XXX - // pool_size can be far bigger than the buffer size - - format_t format; - - uint32_t pending_height; - uint32_t pending_width; -} shm_buffer_t; - -shm_buffer_t* shm_buffer_create(uint32_t width, - uint32_t height, - format_t fmt, - struct wl_shm *shm, - const struct wl_buffer_listener *listener); - -// shm pool is only able to grow and won't shrink -// returns 0 on success or buffer flags indicating the buffer status which -// prevent it from resizing -int shm_buffer_resize(shm_buffer_t *buffer, uint32_t width, uint32_t height); - -// if shm_buffer_resize returns SHM_BUFFER_BUSY this function can be called -// after the buffer is released to resize it afterwards -// returns 0 if no pending resize flag was set and -1 on errors -int shm_buffer_pending_resize(shm_buffer_t *buffer); - -// buffer is freed, don't use the buffer after calling this function on it -void shm_buffer_destroy(shm_buffer_t *buffer); - -#endif /* MPLAYER_WAYLAND_BUFFER_H */ diff --git a/video/out/wayland/memfile.c b/video/out/wayland/memfile.c deleted file mode 100644 index f28216d..0000000 --- a/video/out/wayland/memfile.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * This file is part of mpv video player. - * Copyright © 2014 Alexander Preisinger <alexander.preisinger@gmail.com> - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <errno.h> -#include <string.h> - -#include "video/out/wayland/memfile.h" - -/* copied from weston clients */ -static int set_cloexec_or_close(int fd) -{ - long flags; - - if (fd == -1) - return -1; - - if ((flags = fcntl(fd, F_GETFD)) == -1) - goto err; - - if (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) == -1) - goto err; - - return fd; - -err: - close(fd); - return -1; -} - -static int create_tmpfile_cloexec(char *tmpname) -{ - int fd; - -#ifdef HAVE_MKOSTEMP - fd = mkostemp(tmpname, O_CLOEXEC); - if (fd >= 0) - unlink(tmpname); -#else - fd = mkstemp(tmpname); - if (fd >= 0) { - fd = set_cloexec_or_close(fd); - unlink(tmpname); - } -#endif - - return fd; -} - -static int os_create_anonymous_file(off_t size) -{ - static const char template[] = "/mpv-temp-XXXXXX"; - const char *path; - char *name; - int fd; - - path = getenv("XDG_RUNTIME_DIR"); - if (!path) { - errno = ENOENT; - return -1; - } - - name = malloc(strlen(path) + sizeof(template)); - if (!name) - return -1; - - strcpy(name, path); - strcat(name, template); - - fd = create_tmpfile_cloexec(name); - - free(name); - - if (fd < 0) - return -1; - - if (ftruncate(fd, size) < 0) { - close(fd); - return -1; - } - - return fd; -} - -int memfile_create(off_t size) -{ - return os_create_anonymous_file(size); -} diff --git a/video/out/wayland/memfile.h b/video/out/wayland/memfile.h deleted file mode 100644 index 67cdb1b..0000000 --- a/video/out/wayland/memfile.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * This file is part of mpv video player. - * Copyright © 2014 Alexander Preisinger <alexander.preisinger@gmail.com> - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MPLAYER_WAYLAND_MEMFILE_H -#define MPLAYER_WAYLAND_MEMFILE_H - -// create file decsriptor to memory space without filesystem representation -// truncates to size immediately -int memfile_create(off_t size); - -#endif /* MPLAYER_WAYLAND_MEMFILE_H */ diff --git a/video/out/wayland/server-decoration.xml b/video/out/wayland/server-decoration.xml new file mode 100644 index 0000000..8bc106c --- /dev/null +++ b/video/out/wayland/server-decoration.xml @@ -0,0 +1,94 @@ +<?xml version="1.0" encoding="UTF-8"?> +<protocol name="server_decoration"> + <copyright><![CDATA[ + Copyright (C) 2015 Martin Gräßlin + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 2.1 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + ]]></copyright> + <interface name="org_kde_kwin_server_decoration_manager" version="1"> + <description summary="Server side window decoration manager"> + This interface allows to coordinate whether the server should create + a server-side window decoration around a wl_surface representing a + shell surface (wl_shell_surface or similar). By announcing support + for this interface the server indicates that it supports server + side decorations. + </description> + <request name="create"> + <description summary="Create a server-side decoration object for a given surface"> + When a client creates a server-side decoration object it indicates + that it supports the protocol. The client is supposed to tell the + server whether it wants server-side decorations or will provide + client-side decorations. + + If the client does not create a server-side decoration object for + a surface the server interprets this as lack of support for this + protocol and considers it as client-side decorated. Nevertheless a + client-side decorated surface should use this protocol to indicate + to the server that it does not want a server-side deco. + </description> + <arg name="id" type="new_id" interface="org_kde_kwin_server_decoration"/> + <arg name="surface" type="object" interface="wl_surface"/> + </request> + <enum name="mode"> + <description summary="Possible values to use in request_mode and the event mode."/> + <entry name="None" value="0" summary="Undecorated: The surface is not decorated at all, neither server nor client-side. An example is a popup surface which should not be decorated."/> + <entry name="Client" value="1" summary="Client-side decoration: The decoration is part of the surface and the client."/> + <entry name="Server" value="2" summary="Server-side decoration: The server embeds the surface into a decoration frame."/> + </enum> + <event name="default_mode"> + <description summary="The default mode used on the server"> + This event is emitted directly after binding the interface. It contains + the default mode for the decoration. When a new server decoration object + is created this new object will be in the default mode until the first + request_mode is requested. + + The server may change the default mode at any time. + </description> + <arg name="mode" type="uint" summary="The default decoration mode applied to newly created server decorations."/> + </event> + </interface> + <interface name="org_kde_kwin_server_decoration" version="1"> + <request name="release" type="destructor"> + <description summary="release the server decoration object"/> + </request> + <enum name="mode"> + <description summary="Possible values to use in request_mode and the event mode."/> + <entry name="None" value="0" summary="Undecorated: The surface is not decorated at all, neither server nor client-side. An example is a popup surface which should not be decorated."/> + <entry name="Client" value="1" summary="Client-side decoration: The decoration is part of the surface and the client."/> + <entry name="Server" value="2" summary="Server-side decoration: The server embeds the surface into a decoration frame."/> + </enum> + <request name="request_mode"> + <description summary="The decoration mode the surface wants to use."/> + <arg name="mode" type="uint" summary="The mode this surface wants to use."/> + </request> + <event name="mode"> + <description summary="The new decoration mode applied by the server"> + This event is emitted directly after the decoration is created and + represents the base decoration policy by the server. E.g. a server + which wants all surfaces to be client-side decorated will send Client, + a server which wants server-side decoration will send Server. + + The client can request a different mode through the decoration request. + The server will acknowledge this by another event with the same mode. So + even if a server prefers server-side decoration it's possible to force a + client-side decoration. + + The server may emit this event at any time. In this case the client can + again request a different mode. It's the responsibility of the server to + prevent a feedback loop. + </description> + <arg name="mode" type="uint" summary="The decoration mode applied to the surface by the server."/> + </event> + </interface> +</protocol> diff --git a/video/out/wayland_common.c b/video/out/wayland_common.c index 181723a..19adf01 100644 --- a/video/out/wayland_common.c +++ b/video/out/wayland_common.c @@ -1,8 +1,5 @@ /* * This file is part of mpv video player. - * Copyright © 2008 Kristian Høgsberg - * Copyright © 2012-2013 Collabora, Ltd. - * Copyright © 2013 Alexander Preisinger <alexander.preisinger@gmail.com> * * mpv is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -18,243 +15,331 @@ * License along with mpv. If not, see <http://www.gnu.org/licenses/>. */ -#include <stdio.h> -#include <stdlib.h> -#include <math.h> -#include <inttypes.h> -#include <limits.h> -#include <assert.h> #include <poll.h> #include <unistd.h> - -#include <sys/mman.h> #include <linux/input.h> - -#include "config.h" -#include "misc/bstr.h" -#include "options/options.h" #include "common/msg.h" -#include "mpv_talloc.h" - -#include "wayland_common.h" - -#include "vo.h" -#include "win_state.h" +#include "input/input.h" +#include "input/keycodes.h" #include "osdep/io.h" #include "osdep/timer.h" +#include "win_state.h" +#include "wayland_common.h" -#include "input/input.h" -#include "input/event.h" -#include "input/keycodes.h" +// Generated from xdg-shell-unstable-v6.xml +#include "video/out/wayland/xdg-shell-v6.h" -static int lookupkey(int key); +// Generated from idle-inhibit-unstable-v1.xml +#include "video/out/wayland/idle-inhibit-v1.h" -static void hide_cursor(struct vo_wayland_state * wl); -static void show_cursor(struct vo_wayland_state * wl); -static void window_move(struct vo_wayland_state * wl, uint32_t serial); -static void window_set_title(struct vo_wayland_state * wl, const char *title); -static void schedule_resize(struct vo_wayland_state *wl, - uint32_t edges, - int32_t width, - int32_t height); +// Generated from server-decoration.xml +#include "video/out/wayland/srv-decor.h" -static void vo_wayland_fullscreen(struct vo *vo); +static void xdg_shell_ping(void *data, struct zxdg_shell_v6 *shell, uint32_t serial) +{ + zxdg_shell_v6_pong(shell, serial); +} -static const struct wl_callback_listener frame_listener; +static const struct zxdg_shell_v6_listener xdg_shell_listener = { + xdg_shell_ping, +}; -static const struct mp_keymap keymap[] = { - // special keys - {XKB_KEY_Pause, MP_KEY_PAUSE}, {XKB_KEY_Escape, MP_KEY_ESC}, - {XKB_KEY_BackSpace, MP_KEY_BS}, {XKB_KEY_Tab, MP_KEY_TAB}, - {XKB_KEY_Return, MP_KEY_ENTER}, {XKB_KEY_Menu, MP_KEY_MENU}, - {XKB_KEY_Print, MP_KEY_PRINT}, +static int spawn_cursor(struct vo_wayland_state *wl) +{ + if (wl->allocated_cursor_scale == wl->scaling) /* Reuse if size is identical */ + return 0; + else if (wl->cursor_theme) + wl_cursor_theme_destroy(wl->cursor_theme); + + wl->cursor_theme = wl_cursor_theme_load(NULL, 32*wl->scaling, wl->shm); + if (!wl->cursor_theme) { + MP_ERR(wl, "Unable to load cursor theme!\n"); + return 1; + } - // cursor keys - {XKB_KEY_Left, MP_KEY_LEFT}, {XKB_KEY_Right, MP_KEY_RIGHT}, - {XKB_KEY_Up, MP_KEY_UP}, {XKB_KEY_Down, MP_KEY_DOWN}, + wl->default_cursor = wl_cursor_theme_get_cursor(wl->cursor_theme, "left_ptr"); + if (!wl->default_cursor) { + MP_ERR(wl, "Unable to load cursor theme!\n"); + return 1; + } - // navigation block - {XKB_KEY_Insert, MP_KEY_INSERT}, {XKB_KEY_Delete, MP_KEY_DELETE}, - {XKB_KEY_Home, MP_KEY_HOME}, {XKB_KEY_End, MP_KEY_END}, - {XKB_KEY_Page_Up, MP_KEY_PAGE_UP}, {XKB_KEY_Page_Down, MP_KEY_PAGE_DOWN}, + wl->allocated_cursor_scale = wl->scaling; - // F-keys - {XKB_KEY_F1, MP_KEY_F+1}, {XKB_KEY_F2, MP_KEY_F+2}, - {XKB_KEY_F3, MP_KEY_F+3}, {XKB_KEY_F4, MP_KEY_F+4}, - {XKB_KEY_F5, MP_KEY_F+5}, {XKB_KEY_F6, MP_KEY_F+6}, - {XKB_KEY_F7, MP_KEY_F+7}, {XKB_KEY_F8, MP_KEY_F+8}, - {XKB_KEY_F9, MP_KEY_F+9}, {XKB_KEY_F10, MP_KEY_F+10}, - {XKB_KEY_F11, MP_KEY_F+11}, {XKB_KEY_F12, MP_KEY_F+12}, + return 0; +} - // numpad independent of numlock - {XKB_KEY_KP_Subtract, '-'}, {XKB_KEY_KP_Add, '+'}, - {XKB_KEY_KP_Multiply, '*'}, {XKB_KEY_KP_Divide, '/'}, - {XKB_KEY_KP_Enter, MP_KEY_KPENTER}, +static int set_cursor_visibility(struct vo_wayland_state *wl, bool on) +{ + if (!wl->pointer) + return VO_NOTAVAIL; + if (on) { + if (spawn_cursor(wl)) + return VO_FALSE; + struct wl_cursor_image *img = wl->default_cursor->images[0]; + struct wl_buffer *buffer = wl_cursor_image_get_buffer(img); + if (!buffer) + return VO_FALSE; + wl_pointer_set_cursor(wl->pointer, wl->pointer_id, wl->cursor_surface, + img->hotspot_x/wl->scaling, img->hotspot_y/wl->scaling); + wl_surface_set_buffer_scale(wl->cursor_surface, wl->scaling); + wl_surface_attach(wl->cursor_surface, buffer, 0, 0); + wl_surface_damage(wl->cursor_surface, 0, 0, img->width, img->height); + wl_surface_commit(wl->cursor_surface); + } else { + wl_pointer_set_cursor(wl->pointer, wl->pointer_id, NULL, 0, 0); + } + return VO_TRUE; +} - // numpad with numlock - {XKB_KEY_KP_0, MP_KEY_KP0}, {XKB_KEY_KP_1, MP_KEY_KP1}, - {XKB_KEY_KP_2, MP_KEY_KP2}, {XKB_KEY_KP_3, MP_KEY_KP3}, - {XKB_KEY_KP_4, MP_KEY_KP4}, {XKB_KEY_KP_5, MP_KEY_KP5}, - {XKB_KEY_KP_6, MP_KEY_KP6}, {XKB_KEY_KP_7, MP_KEY_KP7}, - {XKB_KEY_KP_8, MP_KEY_KP8}, {XKB_KEY_KP_9, MP_KEY_KP9}, - {XKB_KEY_KP_Decimal, MP_KEY_KPDEC}, {XKB_KEY_KP_Separator, MP_KEY_KPDEC}, +static void pointer_handle_enter(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface, + wl_fixed_t sx, wl_fixed_t sy) +{ + struct vo_wayland_state *wl = data; - // numpad without numlock - {XKB_KEY_KP_Insert, MP_KEY_KPINS}, {XKB_KEY_KP_End, MP_KEY_KP1}, - {XKB_KEY_KP_Down, MP_KEY_KP2}, {XKB_KEY_KP_Page_Down, MP_KEY_KP3}, - {XKB_KEY_KP_Left, MP_KEY_KP4}, {XKB_KEY_KP_Begin, MP_KEY_KP5}, - {XKB_KEY_KP_Right, MP_KEY_KP6}, {XKB_KEY_KP_Home, MP_KEY_KP7}, - {XKB_KEY_KP_Up, MP_KEY_KP8}, {XKB_KEY_KP_Page_Up, MP_KEY_KP9}, - {XKB_KEY_KP_Delete, MP_KEY_KPDEL}, + wl->pointer = pointer; + wl->pointer_id = serial; - // "Multimedia keyboard" keys - {XKB_KEY_XF86MenuKB, MP_KEY_MENU}, - {XKB_KEY_XF86AudioPlay, MP_KEY_PLAY}, {XKB_KEY_XF86AudioPause, MP_KEY_PAUSE}, - {XKB_KEY_XF86AudioStop, MP_KEY_STOP}, - {XKB_KEY_XF86AudioPrev, MP_KEY_PREV}, {XKB_KEY_XF86AudioNext, MP_KEY_NEXT}, - {XKB_KEY_XF86AudioRewind, MP_KEY_REWIND}, - {XKB_KEY_XF86AudioForward, MP_KEY_FORWARD}, - {XKB_KEY_XF86AudioMute, MP_KEY_MUTE}, - {XKB_KEY_XF86AudioLowerVolume, MP_KEY_VOLUME_DOWN}, - {XKB_KEY_XF86AudioRaiseVolume, MP_KEY_VOLUME_UP}, - {XKB_KEY_XF86HomePage, MP_KEY_HOMEPAGE}, {XKB_KEY_XF86WWW, MP_KEY_WWW}, - {XKB_KEY_XF86Mail, MP_KEY_MAIL}, {XKB_KEY_XF86Favorites, MP_KEY_FAVORITES}, - {XKB_KEY_XF86Search, MP_KEY_SEARCH}, {XKB_KEY_XF86Sleep, MP_KEY_SLEEP}, + set_cursor_visibility(wl, true); + mp_input_put_key(wl->vo->input_ctx, MP_KEY_MOUSE_ENTER); +} - {0, 0} -}; +static void pointer_handle_leave(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface) +{ + struct vo_wayland_state *wl = data; + mp_input_put_key(wl->vo->input_ctx, MP_KEY_MOUSE_LEAVE); +} +static void pointer_handle_motion(void *data, struct wl_pointer *pointer, + uint32_t time, wl_fixed_t sx, wl_fixed_t sy) +{ + struct vo_wayland_state *wl = data; -/** Wayland listeners **/ + wl->mouse_x = wl_fixed_to_int(sx) * wl->scaling; + wl->mouse_y = wl_fixed_to_int(sy) * wl->scaling; -static void ssurface_handle_ping(void *data, - struct wl_shell_surface *shell_surface, - uint32_t serial) + mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y); +} + +static void window_move(struct vo_wayland_state *wl, uint32_t serial) { - wl_shell_surface_pong(shell_surface, serial); + if (wl->xdg_toplevel) + zxdg_toplevel_v6_move(wl->xdg_toplevel, wl->seat, serial); } -static void ssurface_handle_configure(void *data, - struct wl_shell_surface *shell_surface, - uint32_t edges, - int32_t width, - int32_t height) +static void pointer_handle_button(void *data, struct wl_pointer *wl_pointer, + uint32_t serial, uint32_t time, uint32_t button, + uint32_t state) { struct vo_wayland_state *wl = data; - float win_aspect = wl->window.aspect; - if (!width || !height) - return; - if (!wl->window.is_fullscreen) - width = win_aspect * height; - schedule_resize(wl, edges, width, height); + + state = state == WL_POINTER_BUTTON_STATE_PRESSED ? MP_KEY_STATE_DOWN + : MP_KEY_STATE_UP; + + button = button == BTN_LEFT ? MP_MBTN_LEFT : + button == BTN_MIDDLE ? MP_MBTN_MID : MP_MBTN_RIGHT; + + mp_input_put_key(wl->vo->input_ctx, button | state); + + if (!mp_input_test_dragging(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y) && + (button == MP_MBTN_LEFT) && (state == MP_KEY_STATE_DOWN)) + window_move(wl, serial); } -static void ssurface_handle_popup_done(void *data, - struct wl_shell_surface *shell_surface) +static void pointer_handle_axis(void *data, struct wl_pointer *wl_pointer, + uint32_t time, uint32_t axis, wl_fixed_t value) { + struct vo_wayland_state *wl = data; + double val = wl_fixed_to_double(value)*0.1; + switch (axis) { + case WL_POINTER_AXIS_VERTICAL_SCROLL: + if (value > 0) + mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_DOWN, +val); + if (value < 0) + mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_UP, -val); + break; + case WL_POINTER_AXIS_HORIZONTAL_SCROLL: + if (value > 0) + mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_RIGHT, +val); + if (value < 0) + mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_LEFT, -val); + break; + } } -static const struct wl_shell_surface_listener shell_surface_listener = { - ssurface_handle_ping, - ssurface_handle_configure, - ssurface_handle_popup_done +static const struct wl_pointer_listener pointer_listener = { + pointer_handle_enter, + pointer_handle_leave, + pointer_handle_motion, + pointer_handle_button, + pointer_handle_axis, }; -static void output_handle_geometry(void *data, - struct wl_output *wl_output, - int32_t x, - int32_t y, - int32_t physical_width, - int32_t physical_height, - int32_t subpixel, - const char *make, - const char *model, - int32_t transform) +static int check_for_resize(struct vo_wayland_state *wl, wl_fixed_t x_w, wl_fixed_t y_w, + enum zxdg_toplevel_v6_resize_edge *edge) { - struct vo_wayland_output *output = data; - output->make = make; - output->model = model; + if (wl->touch_entries || wl->fullscreen) + return 0; + + const int edge_pixels = 64; + int pos[2] = { wl_fixed_to_double(x_w), wl_fixed_to_double(y_w) }; + int left_edge = pos[0] < edge_pixels; + int top_edge = pos[1] < edge_pixels; + int right_edge = pos[0] > (mp_rect_w(wl->geometry) - edge_pixels); + int bottom_edge = pos[1] > (mp_rect_h(wl->geometry) - edge_pixels); + + if (left_edge) { + *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_LEFT; + if (top_edge) + *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_TOP_LEFT; + else if (bottom_edge) + *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_BOTTOM_LEFT; + } else if (right_edge) { + *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_RIGHT; + if (top_edge) + *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_TOP_RIGHT; + else if (bottom_edge) + *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_BOTTOM_RIGHT; + } else if (top_edge) { + *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_TOP; + } else if (bottom_edge) { + *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_BOTTOM; + } else { + *edge = 0; + return 0; + } + + return 1; } -static void output_handle_mode(void *data, - struct wl_output *wl_output, - uint32_t flags, - int32_t width, - int32_t height, - int32_t refresh) +static void touch_handle_down(void *data, struct wl_touch *wl_touch, + uint32_t serial, uint32_t time, struct wl_surface *surface, + int32_t id, wl_fixed_t x_w, wl_fixed_t y_w) { - struct vo_wayland_output *output = data; + struct vo_wayland_state *wl = data; - // only save current mode - if (!output || !(flags & WL_OUTPUT_MODE_CURRENT)) + enum zxdg_toplevel_v6_resize_edge edge; + if (check_for_resize(wl, x_w, y_w, &edge)) { + wl->touch_entries = 0; + zxdg_toplevel_v6_resize(wl->xdg_toplevel, wl->seat, serial, edge); + return; + } else if (wl->touch_entries) { + wl->touch_entries = 0; + zxdg_toplevel_v6_move(wl->xdg_toplevel, wl->seat, serial); return; + } - output->width = width; - output->height = height; - output->flags = flags; - output->refresh_rate = refresh; -} + wl->touch_entries = 1; -static void output_handle_done(void* data, struct wl_output *wl_output) -{ -} + wl->mouse_x = wl_fixed_to_int(x_w) * wl->scaling; + wl->mouse_y = wl_fixed_to_int(y_w) * wl->scaling; -static void output_handle_scale(void* data, struct wl_output *wl_output, - int32_t factor) -{ - struct vo_wayland_output *output = data; - output->scale = factor; + mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y); + mp_input_put_key(wl->vo->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_DOWN); } -static const struct wl_output_listener output_listener = { - output_handle_geometry, - output_handle_mode, - output_handle_done, - output_handle_scale -}; +static void touch_handle_up(void *data, struct wl_touch *wl_touch, + uint32_t serial, uint32_t time, int32_t id) +{ + struct vo_wayland_state *wl = data; + wl->touch_entries = 0; -/* SURFACE LISTENER */ + mp_input_put_key(wl->vo->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_UP); +} -static void surface_handle_enter(void *data, - struct wl_surface *wl_surface, - struct wl_output *output) +static void touch_handle_motion(void *data, struct wl_touch *wl_touch, + uint32_t time, int32_t id, wl_fixed_t x_w, wl_fixed_t y_w) { struct vo_wayland_state *wl = data; - wl->display.current_output = NULL; - struct vo_wayland_output *o; - wl_list_for_each(o, &wl->display.output_list, link) { - if (o->output == output) { - wl->display.current_output = o; - break; - } - } + wl->mouse_x = wl_fixed_to_int(x_w) * wl->scaling; + wl->mouse_y = wl_fixed_to_int(y_w) * wl->scaling; - wl->window.events |= VO_EVENT_WIN_STATE | VO_EVENT_RESIZE; + mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y); } -static void surface_handle_leave(void *data, - struct wl_surface *wl_surface, - struct wl_output *output) +static void touch_handle_frame(void *data, struct wl_touch *wl_touch) { - // window can be displayed at 2 output, but we only use the most recently - // entered and discard the previous one even if a part of the window is - // still visible on the previous entered output. - // Don't bother with a "leave" logic } -static const struct wl_surface_listener surface_listener = { - surface_handle_enter, - surface_handle_leave +static void touch_handle_cancel(void *data, struct wl_touch *wl_touch) +{ +} + +static const struct wl_touch_listener touch_listener = { + touch_handle_down, + touch_handle_up, + touch_handle_motion, + touch_handle_frame, + touch_handle_cancel, }; -/* KEYBOARD LISTENER */ -static void keyboard_handle_keymap(void *data, - struct wl_keyboard *wl_keyboard, - uint32_t format, - int32_t fd, - uint32_t size) +static const struct mp_keymap keymap[] = { + /* Special keys */ + {XKB_KEY_Pause, MP_KEY_PAUSE}, {XKB_KEY_Escape, MP_KEY_ESC}, + {XKB_KEY_BackSpace, MP_KEY_BS}, {XKB_KEY_Tab, MP_KEY_TAB}, + {XKB_KEY_Return, MP_KEY_ENTER}, {XKB_KEY_Menu, MP_KEY_MENU}, + {XKB_KEY_Print, MP_KEY_PRINT}, + + /* Cursor keys */ + {XKB_KEY_Left, MP_KEY_LEFT}, {XKB_KEY_Right, MP_KEY_RIGHT}, + {XKB_KEY_Up, MP_KEY_UP}, {XKB_KEY_Down, MP_KEY_DOWN}, + + /* Navigation keys */ + {XKB_KEY_Insert, MP_KEY_INSERT}, {XKB_KEY_Delete, MP_KEY_DELETE}, + {XKB_KEY_Home, MP_KEY_HOME}, {XKB_KEY_End, MP_KEY_END}, + {XKB_KEY_Page_Up, MP_KEY_PAGE_UP}, {XKB_KEY_Page_Down, MP_KEY_PAGE_DOWN}, + + /* F-keys */ + {XKB_KEY_F1, MP_KEY_F + 1}, {XKB_KEY_F2, MP_KEY_F + 2}, + {XKB_KEY_F3, MP_KEY_F + 3}, {XKB_KEY_F4, MP_KEY_F + 4}, + {XKB_KEY_F5, MP_KEY_F + 5}, {XKB_KEY_F6, MP_KEY_F + 6}, + {XKB_KEY_F7, MP_KEY_F + 7}, {XKB_KEY_F8, MP_KEY_F + 8}, + {XKB_KEY_F9, MP_KEY_F + 9}, {XKB_KEY_F10, MP_KEY_F +10}, + {XKB_KEY_F11, MP_KEY_F +11}, {XKB_KEY_F12, MP_KEY_F +12}, + + /* Numpad independent of numlock */ + {XKB_KEY_KP_Subtract, '-'}, {XKB_KEY_KP_Add, '+'}, + {XKB_KEY_KP_Multiply, '*'}, {XKB_KEY_KP_Divide, '/'}, + {XKB_KEY_KP_Enter, MP_KEY_KPENTER}, + + /* Numpad with numlock */ + {XKB_KEY_KP_0, MP_KEY_KP0}, {XKB_KEY_KP_1, MP_KEY_KP1}, + {XKB_KEY_KP_2, MP_KEY_KP2}, {XKB_KEY_KP_3, MP_KEY_KP3}, + {XKB_KEY_KP_4, MP_KEY_KP4}, {XKB_KEY_KP_5, MP_KEY_KP5}, + {XKB_KEY_KP_6, MP_KEY_KP6}, {XKB_KEY_KP_7, MP_KEY_KP7}, + {XKB_KEY_KP_8, MP_KEY_KP8}, {XKB_KEY_KP_9, MP_KEY_KP9}, + {XKB_KEY_KP_Decimal, MP_KEY_KPDEC}, {XKB_KEY_KP_Separator, MP_KEY_KPDEC}, + + /* Numpad without numlock */ + {XKB_KEY_KP_Insert, MP_KEY_KPINS}, {XKB_KEY_KP_End, MP_KEY_KP1}, + {XKB_KEY_KP_Down, MP_KEY_KP2}, {XKB_KEY_KP_Page_Down, MP_KEY_KP3}, + {XKB_KEY_KP_Left, MP_KEY_KP4}, {XKB_KEY_KP_Begin, MP_KEY_KP5}, + {XKB_KEY_KP_Right, MP_KEY_KP6}, {XKB_KEY_KP_Home, MP_KEY_KP7}, + {XKB_KEY_KP_Up, MP_KEY_KP8}, {XKB_KEY_KP_Page_Up, MP_KEY_KP9}, + {XKB_KEY_KP_Delete, MP_KEY_KPDEL}, + + /* Multimedia keys */ + {XKB_KEY_XF86MenuKB, MP_KEY_MENU}, + {XKB_KEY_XF86AudioPlay, MP_KEY_PLAY}, {XKB_KEY_XF86AudioPause, MP_KEY_PAUSE}, + {XKB_KEY_XF86AudioStop, MP_KEY_STOP}, + {XKB_KEY_XF86AudioPrev, MP_KEY_PREV}, {XKB_KEY_XF86AudioNext, MP_KEY_NEXT}, + {XKB_KEY_XF86AudioRewind, MP_KEY_REWIND}, + {XKB_KEY_XF86AudioForward, MP_KEY_FORWARD}, + {XKB_KEY_XF86AudioMute, MP_KEY_MUTE}, + {XKB_KEY_XF86AudioLowerVolume, MP_KEY_VOLUME_DOWN}, + {XKB_KEY_XF86AudioRaiseVolume, MP_KEY_VOLUME_UP}, + {XKB_KEY_XF86HomePage, MP_KEY_HOMEPAGE}, {XKB_KEY_XF86WWW, MP_KEY_WWW}, + {XKB_KEY_XF86Mail, MP_KEY_MAIL}, {XKB_KEY_XF86Favorites, MP_KEY_FAVORITES}, + {XKB_KEY_XF86Search, MP_KEY_SEARCH}, {XKB_KEY_XF86Sleep, MP_KEY_SLEEP}, + + {0, 0} +}; + +static void keyboard_handle_keymap(void *data, struct wl_keyboard *wl_keyboard, + uint32_t format, int32_t fd, uint32_t size) { struct vo_wayland_state *wl = data; char *map_str; @@ -270,68 +355,97 @@ static void keyboard_handle_keymap(void *data, return; } - wl->input.xkb.keymap = xkb_keymap_new_from_string(wl->input.xkb.context, - map_str, - XKB_KEYMAP_FORMAT_TEXT_V1, - 0); + wl->xkb_keymap = xkb_keymap_new_from_string(wl->xkb_context, map_str, + XKB_KEYMAP_FORMAT_TEXT_V1, 0); munmap(map_str, size); close(fd); - if (!wl->input.xkb.keymap) { + if (!wl->xkb_keymap) { MP_ERR(wl, "failed to compile keymap\n"); return; } - wl->input.xkb.state = xkb_state_new(wl->input.xkb.keymap); - if (!wl->input.xkb.state) { + wl->xkb_state = xkb_state_new(wl->xkb_keymap); + if (!wl->xkb_state) { MP_ERR(wl, "failed to create XKB state\n"); - xkb_keymap_unref(wl->input.xkb.keymap); - wl->input.xkb.keymap = NULL; + xkb_keymap_unref(wl->xkb_keymap); + wl->xkb_keymap = NULL; return; } } -static void keyboard_handle_enter(void *data, - struct wl_keyboard *wl_keyboard, - uint32_t serial, - struct wl_surface *surface, +static void keyboard_handle_enter(void *data, struct wl_keyboard *wl_keyboard, + uint32_t serial, struct wl_surface *surface, struct wl_array *keys) { } -static void keyboard_handle_leave(void *data, - struct wl_keyboard *wl_keyboard, - uint32_t serial, - struct wl_surface *surface) +static void keyboard_handle_leave(void *data, struct wl_keyboard *wl_keyboard, + uint32_t serial, struct wl_surface *surface) +{ +} + +static bool create_input(struct vo_wayland_state *wl) { + wl->xkb_context = xkb_context_new(XKB_CONTEXT_NO_FLAGS); + + if (!wl->xkb_context) { + MP_ERR(wl, "failed to initialize input: check xkbcommon\n"); + return 1; + } + + return 0; } -static void keyboard_handle_key(void *data, - struct wl_keyboard *wl_keyboard, - uint32_t serial, - uint32_t time, - uint32_t key, +static int lookupkey(int key) +{ + const char *passthrough_keys = " -+*/<>`~!@#$%^&()_{}:;\"\',.?\\|=[]"; + + int mpkey = 0; + if ((key >= 'a' && key <= 'z') || (key >= 'A' && key <= 'Z') || + (key >= '0' && key <= '9') || + (key > 0 && key < 256 && strchr(passthrough_keys, key))) + mpkey = key; + + if (!mpkey) + mpkey = lookup_keymap_table(keymap, key); + + return mpkey; +} + +static void keyboard_handle_key(void *data, struct wl_keyboard *wl_keyboard, + uint32_t serial, uint32_t time, uint32_t key, uint32_t state) { struct vo_wayland_state *wl = data; uint32_t code = code = key + 8; - xkb_keysym_t sym = xkb_state_key_get_one_sym(wl->input.xkb.state, code); + xkb_keysym_t sym = xkb_state_key_get_one_sym(wl->xkb_state, code); int mpmod = state == WL_KEYBOARD_KEY_STATE_PRESSED ? MP_KEY_STATE_DOWN : MP_KEY_STATE_UP; - static const char *mod_names[] = {XKB_MOD_NAME_SHIFT, XKB_MOD_NAME_CTRL, - XKB_MOD_NAME_ALT, XKB_MOD_NAME_LOGO, 0}; - static int mods[] = {MP_KEY_MODIFIER_SHIFT, MP_KEY_MODIFIER_CTRL, - MP_KEY_MODIFIER_ALT, MP_KEY_MODIFIER_META, 0}; + static const char *mod_names[] = { + XKB_MOD_NAME_SHIFT, + XKB_MOD_NAME_CTRL, + XKB_MOD_NAME_ALT, + XKB_MOD_NAME_LOGO, + 0, + }; + + static int mods[] = { + MP_KEY_MODIFIER_SHIFT, + MP_KEY_MODIFIER_CTRL, + MP_KEY_MODIFIER_ALT, + MP_KEY_MODIFIER_META, + 0, + }; for (int n = 0; mods[n]; n++) { - xkb_mod_index_t index = - xkb_keymap_mod_get_index(wl->input.xkb.keymap, mod_names[n]); - if (!xkb_state_mod_index_is_consumed(wl->input.xkb.state, code, index) - && xkb_state_mod_index_is_active(wl->input.xkb.state, index, + xkb_mod_index_t index = xkb_keymap_mod_get_index(wl->xkb_keymap, mod_names[n]); + if (!xkb_state_mod_index_is_consumed(wl->xkb_state, code, index) + && xkb_state_mod_index_is_active(wl->xkb_state, index, XKB_STATE_MODS_DEPRESSED)) mpmod |= mods[n]; } @@ -340,42 +454,29 @@ static void keyboard_handle_key(void *data, if (mpkey) { mp_input_put_key(wl->vo->input_ctx, mpkey | mpmod); } else { - char s[80]; + char s[128]; if (xkb_keysym_to_utf8(sym, s, sizeof(s)) > 0) mp_input_put_key_utf8(wl->vo->input_ctx, mpmod, bstr0(s)); } } -static void keyboard_handle_modifiers(void *data, - struct wl_keyboard *wl_keyboard, - uint32_t serial, - uint32_t mods_depressed, - uint32_t mods_latched, - uint32_t mods_locked, +static void keyboard_handle_modifiers(void *data, struct wl_keyboard *wl_keyboard, + uint32_t serial, uint32_t mods_depressed, + uint32_t mods_latched, uint32_t mods_locked, uint32_t group) { struct vo_wayland_state *wl = data; - xkb_state_update_mask(wl->input.xkb.state, - mods_depressed, - mods_latched, - mods_locked, - 0, 0, group); + xkb_state_update_mask(wl->xkb_state, mods_depressed, mods_latched, + mods_locked, 0, 0, group); } -static void keyboard_handle_repeat_info(void *data, - struct wl_keyboard *wl_keyboard, - int32_t rate, - int32_t delay) +static void keyboard_handle_repeat_info(void *data, struct wl_keyboard *wl_keyboard, + int32_t rate, int32_t delay) { struct vo_wayland_state *wl = data; - if (wl->vo->opts->native_keyrepeat) { - if (rate < 0 || delay < 0) { - MP_WARN(wl, "Invalid rate or delay values sent by compositor\n"); - return; - } + if (wl->vo->opts->native_keyrepeat) mp_input_set_repeat_info(wl->vo->input_ctx, rate, delay); - } } static const struct wl_keyboard_listener keyboard_listener = { @@ -384,562 +485,566 @@ static const struct wl_keyboard_listener keyboard_listener = { keyboard_handle_leave, keyboard_handle_key, keyboard_handle_modifiers, - keyboard_handle_repeat_info + keyboard_handle_repeat_info, }; -/* POINTER LISTENER */ -static void pointer_handle_enter(void *data, - struct wl_pointer *pointer, - uint32_t serial, - struct wl_surface *surface, - wl_fixed_t sx_w, - wl_fixed_t sy_w) +static void seat_handle_caps(void *data, struct wl_seat *seat, + enum wl_seat_capability caps) { struct vo_wayland_state *wl = data; - wl->cursor.serial = serial; - wl->cursor.pointer = pointer; + if ((caps & WL_SEAT_CAPABILITY_POINTER) && !wl->pointer) { + wl->pointer = wl_seat_get_pointer(seat); + wl_pointer_add_listener(wl->pointer, &pointer_listener, wl); + } else if (!(caps & WL_SEAT_CAPABILITY_POINTER) && wl->pointer) { + wl_pointer_destroy(wl->pointer); + wl->pointer = NULL; + } - /* Release the left button on pointer enter again - * because after moving the shell surface no release event is sent */ - mp_input_put_key(wl->vo->input_ctx, MP_KEY_MOUSE_ENTER); - mp_input_put_key(wl->vo->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_UP); - show_cursor(wl); + if ((caps & WL_SEAT_CAPABILITY_KEYBOARD) && !wl->keyboard) { + wl->keyboard = wl_seat_get_keyboard(seat); + wl_keyboard_add_listener(wl->keyboard, &keyboard_listener, wl); + } else if (!(caps & WL_SEAT_CAPABILITY_KEYBOARD) && wl->keyboard) { + wl_keyboard_destroy(wl->keyboard); + wl->keyboard = NULL; + } + + if ((caps & WL_SEAT_CAPABILITY_TOUCH) && !wl->touch) { + wl->touch = wl_seat_get_touch(seat); + wl_touch_set_user_data(wl->touch, wl); + wl_touch_add_listener(wl->touch, &touch_listener, wl); + } else if (!(caps & WL_SEAT_CAPABILITY_TOUCH) && wl->touch) { + wl_touch_destroy(wl->touch); + wl->touch = NULL; + } } -static void pointer_handle_leave(void *data, - struct wl_pointer *pointer, - uint32_t serial, - struct wl_surface *surface) +static const struct wl_seat_listener seat_listener = { + seat_handle_caps, +}; + +static void output_handle_geometry(void *data, struct wl_output *wl_output, + int32_t x, int32_t y, int32_t phys_width, + int32_t phys_height, int32_t subpixel, + const char *make, const char *model, + int32_t transform) { - struct vo_wayland_state *wl = data; - mp_input_put_key(wl->vo->input_ctx, MP_KEY_MOUSE_LEAVE); + struct vo_wayland_output *output = data; + output->make = talloc_strdup(output->wl, make); + output->model = talloc_strdup(output->wl, model); + output->geometry.x0 = x; + output->geometry.y0 = y; + output->phys_width = phys_width; + output->phys_height = phys_height; } -static void pointer_handle_motion(void *data, - struct wl_pointer *pointer, - uint32_t time, - wl_fixed_t sx_w, - wl_fixed_t sy_w) +static void output_handle_mode(void *data, struct wl_output *wl_output, + uint32_t flags, int32_t width, + int32_t height, int32_t refresh) { - int32_t scale = 1; - struct vo_wayland_state *wl = data; - - if (wl->display.current_output) - scale = wl->display.current_output->scale; + struct vo_wayland_output *output = data; - wl->cursor.pointer = pointer; - wl->window.mouse_x = scale*wl_fixed_to_int(sx_w); - wl->window.mouse_y = scale*wl_fixed_to_int(sy_w); + /* Only save current mode */ + if (!(flags & WL_OUTPUT_MODE_CURRENT)) + return; - mp_input_set_mouse_pos(wl->vo->input_ctx, wl->window.mouse_x, - wl->window.mouse_y); + output->geometry.x1 = width; + output->geometry.y1 = height; + output->flags = flags; + output->refresh_rate = (double)refresh * 0.001; } -static void pointer_handle_button(void *data, - struct wl_pointer *pointer, - uint32_t serial, - uint32_t time, - uint32_t button, - uint32_t state) +static void output_handle_done(void* data, struct wl_output *wl_output) { - struct vo_wayland_state *wl = data; - - state = state == WL_POINTER_BUTTON_STATE_PRESSED ? MP_KEY_STATE_DOWN - : MP_KEY_STATE_UP; - - button = button == BTN_LEFT ? MP_MBTN_LEFT : - button == BTN_MIDDLE ? MP_MBTN_MID : MP_MBTN_RIGHT; - - mp_input_put_key(wl->vo->input_ctx, button | state); - - if (!mp_input_test_dragging(wl->vo->input_ctx, wl->window.mouse_x, wl->window.mouse_y) && - (button == MP_MBTN_LEFT) && (state == MP_KEY_STATE_DOWN)) - window_move(wl, serial); + struct vo_wayland_output *o = data; + + o->geometry.x1 += o->geometry.x0; + o->geometry.y1 += o->geometry.y0; + + MP_VERBOSE(o->wl, "Registered output %s %s (0x%x):\n" + "\tx: %dpx, y: %dpx\n" + "\tw: %dpx (%dmm), h: %dpx (%dmm)\n" + "\tscale: %d\n" + "\tHz: %f\n", o->make, o->model, o->id, o->geometry.x0, + o->geometry.y0, mp_rect_w(o->geometry), o->phys_width, + mp_rect_h(o->geometry), o->phys_height, o->scale, o->refresh_rate); } -static void pointer_handle_axis(void *data, - struct wl_pointer *pointer, - uint32_t time, - uint32_t axis, - wl_fixed_t value) +static void output_handle_scale(void* data, struct wl_output *wl_output, + int32_t factor) { - struct vo_wayland_state *wl = data; - - // value is 10.00 on a normal mouse wheel - // scale it down to 1.00 for multipliying it with the commands - if (axis == WL_POINTER_AXIS_VERTICAL_SCROLL) { - if (value > 0) - mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_DOWN, - wl_fixed_to_double(value)*0.1); - if (value < 0) - mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_UP, - wl_fixed_to_double(value)*-0.1); - } - else if (axis == WL_POINTER_AXIS_HORIZONTAL_SCROLL) { - if (value > 0) - mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_RIGHT, - wl_fixed_to_double(value)*0.1); - if (value < 0) - mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_LEFT, - wl_fixed_to_double(value)*-0.1); + struct vo_wayland_output *output = data; + if (!factor) { + MP_ERR(output->wl, "Invalid output scale given by the compositor!\n"); + return; } + output->scale = factor; } -static const struct wl_pointer_listener pointer_listener = { - pointer_handle_enter, - pointer_handle_leave, - pointer_handle_motion, - pointer_handle_button, - pointer_handle_axis, +static const struct wl_output_listener output_listener = { + output_handle_geometry, + output_handle_mode, + output_handle_done, + output_handle_scale, }; -static void seat_handle_capabilities(void *data, - struct wl_seat *seat, - enum wl_seat_capability caps) +static void data_offer_handle_offer(void *data, struct wl_data_offer *offer, + const char *mime_type) { struct vo_wayland_state *wl = data; - - if ((caps & WL_SEAT_CAPABILITY_KEYBOARD) && !wl->input.keyboard) { - wl->input.keyboard = wl_seat_get_keyboard(seat); - wl_keyboard_add_listener(wl->input.keyboard, &keyboard_listener, wl); - } - else if (!(caps & WL_SEAT_CAPABILITY_KEYBOARD) && wl->input.keyboard) { - wl_keyboard_destroy(wl->input.keyboard); - wl->input.keyboard = NULL; - } - if ((caps & WL_SEAT_CAPABILITY_POINTER) && !wl->input.pointer) { - wl->input.pointer = wl_seat_get_pointer(seat); - wl_pointer_add_listener(wl->input.pointer, &pointer_listener, wl); - } - else if (!(caps & WL_SEAT_CAPABILITY_POINTER) && wl->input.pointer) { - wl_pointer_destroy(wl->input.pointer); - wl->input.pointer = NULL; + int score = mp_event_get_mime_type_score(wl->vo->input_ctx, mime_type); + if (score > wl->dnd_mime_score) { + wl->dnd_mime_score = score; + talloc_free(wl->dnd_mime_type); + wl->dnd_mime_type = talloc_strdup(wl, mime_type); + MP_VERBOSE(wl, "Given DND offer with mime type %s\n", wl->dnd_mime_type); } } -static void seat_handle_name(void *data, - struct wl_seat *seat, - const char *name) +static void data_offer_source_actions(void *data, struct wl_data_offer *offer, uint32_t source_actions) +{ + +} + +static void data_offer_action(void *data, struct wl_data_offer *wl_data_offer, uint32_t dnd_action) { struct vo_wayland_state *wl = data; - MP_VERBOSE(wl, "Seat \"%s\" connected\n", name); + wl->dnd_action = dnd_action & WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY ? + DND_REPLACE : DND_APPEND; + MP_VERBOSE(wl, "DND action is %s\n", + wl->dnd_action == DND_REPLACE ? "DND_REPLACE" : "DND_APPEND"); } -static const struct wl_seat_listener seat_listener = { - seat_handle_capabilities, - seat_handle_name, +static const struct wl_data_offer_listener data_offer_listener = { + data_offer_handle_offer, + data_offer_source_actions, + data_offer_action, }; -static void registry_handle_global(void *data, struct wl_registry *reg, - uint32_t id, const char *interface, - uint32_t version) +static void data_device_handle_data_offer(void *data, struct wl_data_device *wl_ddev, + struct wl_data_offer *id) { struct vo_wayland_state *wl = data; + if (wl->dnd_offer) + wl_data_offer_destroy(wl->dnd_offer); - if (strcmp(interface, "wl_compositor") == 0) { - - wl->display.compositor = wl_registry_bind(reg, id, - &wl_compositor_interface, - MPMIN(3, version)); - } - - else if (strcmp(interface, "wl_shell") == 0) { - - wl->display.shell = wl_registry_bind(reg, id, &wl_shell_interface, 1); - } - - else if (strcmp(interface, "wl_shm") == 0) { - - wl->display.shm = wl_registry_bind(reg, id, &wl_shm_interface, 1); - } - - else if (strcmp(interface, "wl_output") == 0) { - - struct vo_wayland_output *output = - talloc_zero(wl, struct vo_wayland_output); - - output->id = id; - output->scale = 1; - output->output = wl_registry_bind(reg, id, &wl_output_interface, - MPMIN(2, version)); + wl->dnd_offer = id; + wl_data_offer_add_listener(id, &data_offer_listener, wl); +} - wl_output_add_listener(output->output, &output_listener, output); - wl_list_insert(&wl->display.output_list, &output->link); +static void data_device_handle_enter(void *data, struct wl_data_device *wl_ddev, + uint32_t serial, struct wl_surface *surface, + wl_fixed_t x, wl_fixed_t y, + struct wl_data_offer *id) +{ + struct vo_wayland_state *wl = data; + if (wl->dnd_offer != id) { + MP_FATAL(wl, "DND offer ID mismatch!\n"); + return; } - else if (strcmp(interface, "wl_seat") == 0) { - - wl->input.seat = wl_registry_bind(reg, id, &wl_seat_interface, 4); - wl_seat_add_listener(wl->input.seat, &seat_listener, wl); - - } + wl_data_offer_set_actions(id, WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY | + WL_DATA_DEVICE_MANAGER_DND_ACTION_MOVE, + WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY); - else if (strcmp(interface, "wl_subcompositor") == 0) { + wl_data_offer_accept(id, serial, wl->dnd_mime_type); - wl->display.subcomp = wl_registry_bind(reg, id, - &wl_subcompositor_interface, 1); - } + MP_VERBOSE(wl, "Accepting DND offer with mime type %s\n", wl->dnd_mime_type); } -static void registry_handle_global_remove(void *data, - struct wl_registry *registry, - uint32_t id) +static void data_device_handle_leave(void *data, struct wl_data_device *wl_ddev) { -} + struct vo_wayland_state *wl = data; -static const struct wl_registry_listener registry_listener = { - registry_handle_global, - registry_handle_global_remove -}; + if (wl->dnd_offer) { + if (wl->dnd_fd != -1) + return; + wl_data_offer_destroy(wl->dnd_offer); + wl->dnd_offer = NULL; + } + MP_VERBOSE(wl, "Releasing DND offer with mime type %s\n", wl->dnd_mime_type); -/*** internal functions ***/ + talloc_free(wl->dnd_mime_type); + wl->dnd_mime_type = NULL; + wl->dnd_mime_score = 0; +} -static int lookupkey(int key) +static void data_device_handle_motion(void *data, struct wl_data_device *wl_ddev, + uint32_t time, wl_fixed_t x, wl_fixed_t y) { - const char *passthrough_keys = " -+*/<>`~!@#$%^&()_{}:;\"\',.?\\|=[]"; - - int mpkey = 0; - if ((key >= 'a' && key <= 'z') || - (key >= 'A' && key <= 'Z') || - (key >= '0' && key <= '9') || - (key > 0 && key < 256 && strchr(passthrough_keys, key))) - mpkey = key; - - if (!mpkey) - mpkey = lookup_keymap_table(keymap, key); + struct vo_wayland_state *wl = data; - return mpkey; + wl_data_offer_accept(wl->dnd_offer, time, wl->dnd_mime_type); } -static void hide_cursor (struct vo_wayland_state *wl) +static void data_device_handle_drop(void *data, struct wl_data_device *wl_ddev) { - if (!wl->cursor.pointer) - return; + struct vo_wayland_state *wl = data; - wl_pointer_set_cursor(wl->cursor.pointer, wl->cursor.serial, NULL, 0, 0); -} + int pipefd[2]; -static void show_cursor (struct vo_wayland_state *wl) -{ - if (!wl->cursor.pointer) + if (pipe2(pipefd, O_CLOEXEC) == -1) { + MP_ERR(wl, "Failed to create dnd pipe!\n"); return; + } - struct wl_cursor_image *image = wl->cursor.default_cursor->images[0]; - struct wl_buffer *buffer = wl_cursor_image_get_buffer(image); - - wl_pointer_set_cursor(wl->cursor.pointer, - wl->cursor.serial, - wl->cursor.surface, - image->hotspot_x, - image->hotspot_y); + MP_VERBOSE(wl, "Receiving DND offer with mime %s\n", wl->dnd_mime_type); - wl_surface_attach(wl->cursor.surface, buffer, 0, 0); - wl_surface_damage(wl->cursor.surface, 0, 0, image->width, image->height); - wl_surface_commit(wl->cursor.surface); -} + wl_data_offer_receive(wl->dnd_offer, wl->dnd_mime_type, pipefd[1]); + close(pipefd[1]); -static void window_move(struct vo_wayland_state *wl, uint32_t serial) -{ - if (wl->display.shell) - wl_shell_surface_move(wl->window.shell_surface, wl->input.seat, serial); + wl->dnd_fd = pipefd[0]; } -static void window_set_toplevel(struct vo_wayland_state *wl) +static void data_device_handle_selection(void *data, struct wl_data_device *wl_ddev, + struct wl_data_offer *id) { - if (wl->display.shell) - wl_shell_surface_set_toplevel(wl->window.shell_surface); } -static void window_set_title(struct vo_wayland_state *wl, const char *title) -{ - if (wl->display.shell) - wl_shell_surface_set_title(wl->window.shell_surface, title); -} +static const struct wl_data_device_listener data_device_listener = { + data_device_handle_data_offer, + data_device_handle_enter, + data_device_handle_leave, + data_device_handle_motion, + data_device_handle_drop, + data_device_handle_selection, +}; -static void schedule_resize(struct vo_wayland_state *wl, - uint32_t edges, - int32_t width, - int32_t height) +static void surface_handle_enter(void *data, struct wl_surface *wl_surface, + struct wl_output *output) { - int32_t minimum_size = 150; - int32_t x, y; - float win_aspect = wl->window.aspect; - if (win_aspect <= 0) - win_aspect = 1; - - MP_DBG(wl, "schedule resize: %dx%d\n", width, height); - - width = MPMAX(minimum_size, width); - height = MPMAX(minimum_size, height); - if (wl->display.current_output) { - int scale = wl->display.current_output->scale; - width = MPMIN(width, wl->display.current_output->width /scale); - height = MPMIN(height, wl->display.current_output->height/scale); - } + struct vo_wayland_state *wl = data; + wl->current_output = NULL; - // don't keep the aspect ratio in fullscreen mode because the compositor - // shows the desktop in the border regions if the video does not have the same - // aspect ratio as the screen - /* if only the height is changed we have to calculate the width - * in any other case we calculate the height */ - switch (edges) { - case WL_SHELL_SURFACE_RESIZE_TOP: - case WL_SHELL_SURFACE_RESIZE_BOTTOM: - width = win_aspect * height; - break; - case WL_SHELL_SURFACE_RESIZE_LEFT: - case WL_SHELL_SURFACE_RESIZE_RIGHT: - case WL_SHELL_SURFACE_RESIZE_TOP_LEFT: // just a preference - case WL_SHELL_SURFACE_RESIZE_TOP_RIGHT: - case WL_SHELL_SURFACE_RESIZE_BOTTOM_LEFT: - case WL_SHELL_SURFACE_RESIZE_BOTTOM_RIGHT: - height = (1 / win_aspect) * width; + struct vo_wayland_output *o; + wl_list_for_each(o, &wl->output_list, link) { + if (o->output == output) { + wl->current_output = o; break; + } } - if (edges & WL_SHELL_SURFACE_RESIZE_LEFT) - x = wl->window.width - width; - else - x = 0; + wl->current_output->has_surface = true; + if (wl->scaling != wl->current_output->scale) + wl->pending_vo_events |= VO_EVENT_RESIZE; + wl->scaling = wl->current_output->scale; - if (edges & WL_SHELL_SURFACE_RESIZE_TOP) - y = wl->window.height - height; - else - y = 0; + MP_VERBOSE(wl, "Surface entered output %s %s (0x%x), scale = %i\n", o->make, + o->model, o->id, wl->scaling); - wl->window.sh_width = width; - wl->window.sh_height = height; - wl->window.sh_x = x; - wl->window.sh_y = y; - wl->window.events |= VO_EVENT_RESIZE; + wl->pending_vo_events |= VO_EVENT_WIN_STATE; } -static void frame_callback(void *data, - struct wl_callback *callback, - uint32_t time) +static void surface_handle_leave(void *data, struct wl_surface *wl_surface, + struct wl_output *output) { struct vo_wayland_state *wl = data; - if (wl->frame.function) - wl->frame.function(wl->frame.data, time); + struct vo_wayland_output *o; + wl_list_for_each(o, &wl->output_list, link) { + if (o->output == output) { + o->has_surface = false; + wl->pending_vo_events |= VO_EVENT_WIN_STATE; + return; + } + } +} + +static const struct wl_surface_listener surface_listener = { + surface_handle_enter, + surface_handle_leave, +}; + +static const struct wl_callback_listener frame_listener; + +static void frame_callback(void *data, struct wl_callback *callback, uint32_t time) +{ + struct vo_wayland_state *wl = data; if (callback) wl_callback_destroy(callback); - wl->frame.callback = wl_surface_frame(wl->window.video_surface); - - if (!wl->frame.callback) { - MP_ERR(wl, "wl_surface_frame failed\n"); - return; - } + wl->frame_callback = wl_surface_frame(wl->surface); + wl_callback_add_listener(wl->frame_callback, &frame_listener, wl); - wl_callback_add_listener(wl->frame.callback, &frame_listener, wl); - wl_surface_commit(wl->window.video_surface); + if (!vo_render_frame_external(wl->vo)) + wl_surface_commit(wl->surface); } static const struct wl_callback_listener frame_listener = { - frame_callback + frame_callback, }; -static bool create_display(struct vo_wayland_state *wl) +static void registry_handle_add(void *data, struct wl_registry *reg, uint32_t id, + const char *interface, uint32_t ver) { - if (wl->vo->probing && !getenv("XDG_RUNTIME_DIR")) - return false; - - wl->display.display = wl_display_connect(NULL); - - if (!wl->display.display) { - MP_MSG(wl, wl->vo->probing ? MSGL_V : MSGL_ERR, - "failed to connect to a wayland server: " - "check if a wayland compositor is running\n"); + int found = 1; + struct vo_wayland_state *wl = data; - return false; + if (!strcmp(interface, wl_compositor_interface.name) && found++) { + ver = MPMIN(ver, 4); /* Cap the version */ + wl->compositor = wl_registry_bind(reg, id, &wl_compositor_interface, ver); + wl->surface = wl_compositor_create_surface(wl->compositor); + wl->cursor_surface = wl_compositor_create_surface(wl->compositor); + wl_surface_add_listener(wl->surface, &surface_listener, wl); + vo_enable_external_renderloop(wl->vo); + wl->frame_callback = wl_surface_frame(wl->surface); + wl_callback_add_listener(wl->frame_callback, &frame_listener, wl); } - wl->display.registry = wl_display_get_registry(wl->display.display); - wl_registry_add_listener(wl->display.registry, ®istry_listener, wl); - - wl_display_roundtrip(wl->display.display); + if (!strcmp(interface, wl_output_interface.name) && (ver >= 2) && found++) { + struct vo_wayland_output *output = talloc_zero(wl, struct vo_wayland_output); - wl->display.display_fd = wl_display_get_fd(wl->display.display); + output->wl = wl; + output->id = id; + output->scale = 1; + output->output = wl_registry_bind(reg, id, &wl_output_interface, 2); - return true; -} - -static void destroy_display(struct vo_wayland_state *wl) -{ - struct vo_wayland_output *output = NULL; - struct vo_wayland_output *tmp = NULL; - - wl_list_for_each_safe(output, tmp, &wl->display.output_list, link) { - if (output && output->output) { - wl_output_destroy(output->output); - output->output = NULL; - wl_list_remove(&output->link); - } + wl_output_add_listener(output->output, &output_listener, output); + wl_list_insert(&wl->output_list, &output->link); } - if (wl->display.shm) - wl_shm_destroy(wl->display.shm); + if (!strcmp(interface, zxdg_shell_v6_interface.name) && found++) { + wl->shell = wl_registry_bind(reg, id, &zxdg_shell_v6_interface, 1); + zxdg_shell_v6_add_listener(wl->shell, &xdg_shell_listener, wl); + } - if (wl->display.shell) - wl_shell_destroy(wl->display.shell); + if (!strcmp(interface, wl_seat_interface.name) && found++) { + wl->seat = wl_registry_bind(reg, id, &wl_seat_interface, 1); + wl_seat_add_listener(wl->seat, &seat_listener, wl); + } - if (wl->display.subcomp) - wl_subcompositor_destroy(wl->display.subcomp); + if (!strcmp(interface, wl_shm_interface.name) && found++) { + wl->shm = wl_registry_bind(reg, id, &wl_shm_interface, 1); + } - if (wl->display.compositor) - wl_compositor_destroy(wl->display.compositor); + if (!strcmp(interface, wl_data_device_manager_interface.name) && (ver >= 3) && found++) { + wl->dnd_devman = wl_registry_bind(reg, id, &wl_data_device_manager_interface, 3); + } - if (wl->display.registry) - wl_registry_destroy(wl->display.registry); + if (!strcmp(interface, org_kde_kwin_server_decoration_manager_interface.name) && found++) { + wl->server_decoration_manager = wl_registry_bind(reg, id, &org_kde_kwin_server_decoration_manager_interface, 1); + } - if (wl->display.display) { - wl_display_flush(wl->display.display); - wl_display_disconnect(wl->display.display); + if (!strcmp(interface, zwp_idle_inhibit_manager_v1_interface.name) && found++) { + wl->idle_inhibit_manager = wl_registry_bind(reg, id, &zwp_idle_inhibit_manager_v1_interface, 1); } + + if (found > 1) + MP_VERBOSE(wl, "Registered for protocol %s\n", interface); } -static bool create_window(struct vo_wayland_state *wl) +static void remove_output(struct vo_wayland_output *out) { - wl->window.video_surface = - wl_compositor_create_surface(wl->display.compositor); - - wl_surface_add_listener(wl->window.video_surface, - &surface_listener, wl); + if (!out) + return; - if (wl->display.shell) { - wl->window.shell_surface = wl_shell_get_shell_surface(wl->display.shell, - wl->window.video_surface); + MP_VERBOSE(out->wl, "Deregistering output %s %s (0x%x)\n", out->make, + out->model, out->id); + wl_list_remove(&out->link); + talloc_free(out->make); + talloc_free(out->model); + talloc_free(out); + return; +} - if (!wl->window.shell_surface) { - MP_ERR(wl, "creating shell surface failed\n"); - return false; +static void registry_handle_remove(void *data, struct wl_registry *reg, uint32_t id) +{ + struct vo_wayland_state *wl = data; + struct vo_wayland_output *output, *tmp; + wl_list_for_each_safe(output, tmp, &wl->output_list, link) { + if (output->id == id) { + remove_output(output); + return; } - - wl_shell_surface_add_listener(wl->window.shell_surface, - &shell_surface_listener, wl); - - wl_shell_surface_set_toplevel(wl->window.shell_surface); - wl_shell_surface_set_class(wl->window.shell_surface, "mpv"); } - - return true; } -static void destroy_window(struct vo_wayland_state *wl) -{ - if (wl->window.shell_surface) - wl_shell_surface_destroy(wl->window.shell_surface); - - if (wl->window.video_surface) - wl_surface_destroy(wl->window.video_surface); +static const struct wl_registry_listener registry_listener = { + registry_handle_add, + registry_handle_remove, +}; - if (wl->frame.callback) - wl_callback_destroy(wl->frame.callback); +static void handle_surface_config(void *data, struct zxdg_surface_v6 *surface, + uint32_t serial) +{ + zxdg_surface_v6_ack_configure(surface, serial); } -static bool create_cursor(struct vo_wayland_state *wl) +static const struct zxdg_surface_v6_listener xdg_surface_listener = { + handle_surface_config, +}; + +static void handle_toplevel_config(void *data, struct zxdg_toplevel_v6 *toplevel, + int32_t width, int32_t height, struct wl_array *states) { - if (!wl->display.shm) { - MP_ERR(wl->vo, "no shm interface available\n"); - return false; + struct vo_wayland_state *wl = data; + struct mp_rect old_geometry = wl->geometry; + + int prev_fs_state = wl->fullscreen; + bool maximized = false; + wl->fullscreen = false; + enum zxdg_toplevel_v6_state *state; + wl_array_for_each(state, states) { + switch (*state) { + case ZXDG_TOPLEVEL_V6_STATE_FULLSCREEN: + wl->fullscreen = true; + break; + case ZXDG_TOPLEVEL_V6_STATE_RESIZING: + wl->pending_vo_events |= VO_EVENT_LIVE_RESIZING; + break; + case ZXDG_TOPLEVEL_V6_STATE_MAXIMIZED: + maximized = true; + break; + case ZXDG_TOPLEVEL_V6_STATE_ACTIVATED: + break; + } } - wl->cursor.surface = - wl_compositor_create_surface(wl->display.compositor); + if (prev_fs_state != wl->fullscreen) + wl->pending_vo_events |= VO_EVENT_FULLSCREEN_STATE; + if (!(wl->pending_vo_events & VO_EVENT_LIVE_RESIZING)) + vo_query_and_reset_events(wl->vo, VO_EVENT_LIVE_RESIZING); + + if (width > 0 && height > 0) { + if (!wl->fullscreen) { + if (wl->vo->opts->keepaspect && wl->vo->opts->keepaspect_window && + !maximized) { + if (width > height) + width = height * wl->aspect_ratio; + else + height = width / wl->aspect_ratio; + } + wl->window_size.x0 = 0; + wl->window_size.y0 = 0; + wl->window_size.x1 = width; + wl->window_size.y1 = height; + } + wl->geometry.x0 = 0; + wl->geometry.y0 = 0; + wl->geometry.x1 = width; + wl->geometry.y1 = height; + } else { + wl->geometry = wl->window_size; + } - if (!wl->cursor.surface) - return false; + if (mp_rect_equals(&old_geometry, &wl->geometry)) + return; - wl->cursor.theme = wl_cursor_theme_load(NULL, 32, wl->display.shm); - wl->cursor.default_cursor = wl_cursor_theme_get_cursor(wl->cursor.theme, - "left_ptr"); + MP_VERBOSE(wl, "Resizing due to xdg from %ix%i to %ix%i\n", + mp_rect_w(old_geometry)*wl->scaling, mp_rect_h(old_geometry)*wl->scaling, + mp_rect_w(wl->geometry)*wl->scaling, mp_rect_h(wl->geometry)*wl->scaling); - return true; + wl->pending_vo_events |= VO_EVENT_RESIZE; } -static void destroy_cursor(struct vo_wayland_state *wl) +static void handle_toplevel_close(void *data, struct zxdg_toplevel_v6 *xdg_toplevel) { - if (wl->cursor.theme) - wl_cursor_theme_destroy(wl->cursor.theme); - - if (wl->cursor.surface) - wl_surface_destroy(wl->cursor.surface); + struct vo_wayland_state *wl = data; + mp_input_put_key(wl->vo->input_ctx, MP_KEY_CLOSE_WIN); } -static bool create_input(struct vo_wayland_state *wl) +static const struct zxdg_toplevel_v6_listener xdg_toplevel_listener = { + handle_toplevel_config, + handle_toplevel_close, +}; + +static int create_xdg_surface(struct vo_wayland_state *wl) { - wl->input.xkb.context = xkb_context_new(0); + wl->xdg_surface = zxdg_shell_v6_get_xdg_surface(wl->shell, wl->surface); + zxdg_surface_v6_add_listener(wl->xdg_surface, &xdg_surface_listener, wl); - if (!wl->input.xkb.context) { - MP_ERR(wl, "failed to initialize input: check xkbcommon\n"); - return false; - } + wl->xdg_toplevel = zxdg_surface_v6_get_toplevel(wl->xdg_surface); + zxdg_toplevel_v6_add_listener(wl->xdg_toplevel, &xdg_toplevel_listener, wl); - return true; + zxdg_toplevel_v6_set_title (wl->xdg_toplevel, "mpv"); + zxdg_toplevel_v6_set_app_id(wl->xdg_toplevel, "mpv"); + + return 0; } -static void destroy_input(struct vo_wayland_state *wl) +static int set_border_decorations(struct vo_wayland_state *wl, int state) { - if (wl->input.keyboard) { - wl_keyboard_destroy(wl->input.keyboard); - xkb_keymap_unref(wl->input.xkb.keymap); - xkb_state_unref(wl->input.xkb.state); + if (!wl->server_decoration) + return VO_NOTIMPL; + enum org_kde_kwin_server_decoration_mode mode; + if (state) { + MP_VERBOSE(wl, "Enabling server decorations\n"); + mode = ORG_KDE_KWIN_SERVER_DECORATION_MODE_SERVER; + } else { + MP_VERBOSE(wl, "Disabling server decorations\n"); + mode = ORG_KDE_KWIN_SERVER_DECORATION_MODE_NONE; } - - if (wl->input.xkb.context) - xkb_context_unref(wl->input.xkb.context); - - if (wl->input.pointer) - wl_pointer_destroy(wl->input.pointer); - - if (wl->input.seat) - wl_seat_destroy(wl->input.seat); + org_kde_kwin_server_decoration_request_mode(wl->server_decoration, mode); + return VO_TRUE; } -/*** mplayer2 interface ***/ - int vo_wayland_init(struct vo *vo) { - vo->wayland = talloc_zero(NULL, struct vo_wayland_state); - struct vo_wayland_state *wl = vo->wayland; - *wl = (struct vo_wayland_state){ + vo->wl = talloc_zero(NULL, struct vo_wayland_state); + struct vo_wayland_state *wl = vo->wl; + + *wl = (struct vo_wayland_state) { + .display = wl_display_connect(NULL), .vo = vo, .log = mp_log_new(wl, vo->log, "wayland"), + .scaling = 1, .wakeup_pipe = {-1, -1}, + .dnd_fd = -1, }; - wl_list_init(&wl->display.output_list); + wl_list_init(&wl->output_list); + + if (!wl->display) + return false; + + if (create_input(wl)) + return false; + + wl->registry = wl_display_get_registry(wl->display); + wl_registry_add_listener(wl->registry, ®istry_listener, wl); + + /* Do a roundtrip to run the registry */ + wl_display_roundtrip(wl->display); + + if (!wl->shell) { + MP_FATAL(wl, "Compositor doesn't support the required %s protocol!\n", + zxdg_shell_v6_interface.name); + return false; + } + + if (!wl_list_length(&wl->output_list)) { + MP_FATAL(wl, "No outputs found or compositor doesn't support %s (ver. 2)\n", + wl_output_interface.name); + return false; + } - if (!create_input(wl) - || !create_display(wl) - || !create_window(wl) - || !create_cursor(wl)) - { - vo_wayland_uninit(vo); + /* Can't be initialized during registry due to multi-protocol dependence */ + if (create_xdg_surface(wl)) return false; + + if (wl->dnd_devman) { + wl->dnd_ddev = wl_data_device_manager_get_data_device(wl->dnd_devman, wl->seat); + wl_data_device_add_listener(wl->dnd_ddev, &data_device_listener, wl); + } else { + MP_VERBOSE(wl, "Compositor doesn't support the %s (ver. 3) protocol!\n", + wl_data_device_manager_interface.name); } - // create_display's roundtrip only adds the interfaces - // the second roundtrip receives output modes, geometry and more ... - wl_display_roundtrip(wl->display.display); - - struct vo_wayland_output *o = NULL; - wl_list_for_each(o, &wl->display.output_list, link) { - MP_VERBOSE(wl, "output received:\n" - "\tvendor: %s\n" - "\tmodel: %s\n" - "\tw: %d, h: %d\n" - "\tscale: %d\n" - "\tHz: %f\n", - o->make, o->model, - o->width, o->height, o->scale, - o->refresh_rate / 1000.0f); + if (wl->server_decoration_manager) { + wl->server_decoration = org_kde_kwin_server_decoration_manager_create(wl->server_decoration_manager, wl->surface); + set_border_decorations(wl, vo->opts->border); + } else { + MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n", + org_kde_kwin_server_decoration_manager_interface.name); } + if (!wl->idle_inhibit_manager) + MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n", + zwp_idle_inhibit_manager_v1_interface.name); + + wl->display_fd = wl_display_get_fd(wl->display); mp_make_wakeup_pipe(wl->wakeup_pipe); return true; @@ -947,220 +1052,340 @@ int vo_wayland_init(struct vo *vo) void vo_wayland_uninit(struct vo *vo) { - struct vo_wayland_state *wl = vo->wayland; - destroy_cursor(wl); - destroy_window(wl); - destroy_input(wl); - destroy_display(wl); + struct vo_wayland_state *wl = vo->wl; + if (!wl) + return; + + mp_input_put_key(wl->vo->input_ctx, MP_INPUT_RELEASE_ALL); + + if (wl->cursor_theme) + wl_cursor_theme_destroy(wl->cursor_theme); + + if (wl->cursor_surface) + wl_surface_destroy(wl->cursor_surface); + + if (wl->xkb_context) + xkb_context_unref(wl->xkb_context); + + if (wl->idle_inhibitor) + zwp_idle_inhibitor_v1_destroy(wl->idle_inhibitor); + + if (wl->idle_inhibit_manager) + zwp_idle_inhibit_manager_v1_destroy(wl->idle_inhibit_manager); + + if (wl->shell) + zxdg_shell_v6_destroy(wl->shell); + + if (wl->shm) + wl_shm_destroy(wl->shm); + + if (wl->dnd_devman) + wl_data_device_manager_destroy(wl->dnd_devman); + + if (wl->server_decoration) + org_kde_kwin_server_decoration_destroy(wl->server_decoration); + + if (wl->server_decoration_manager) + org_kde_kwin_server_decoration_manager_destroy(wl->server_decoration_manager); + + if (wl->surface) + wl_surface_destroy(wl->surface); + + if (wl->frame_callback) + wl_callback_destroy(wl->frame_callback); + + if (wl->display) { + close(wl_display_get_fd(wl->display)); + wl_display_disconnect(wl->display); + } + + struct vo_wayland_output *output, *tmp; + wl_list_for_each_safe(output, tmp, &wl->output_list, link) + remove_output(output); + + talloc_free(wl->dnd_mime_type); + for (int n = 0; n < 2; n++) close(wl->wakeup_pipe[n]); talloc_free(wl); - vo->wayland = NULL; + vo->wl = NULL; } -static void vo_wayland_ontop(struct vo *vo) +static struct vo_wayland_output *find_output(struct vo_wayland_state *wl, int index) { - struct vo_wayland_state *wl = vo->wayland; - if (!vo->opts->ontop) - return; - MP_DBG(wl, "going ontop\n"); - window_set_toplevel(wl); - schedule_resize(wl, 0, wl->window.width, wl->window.height); + int screen_id = 0; + struct vo_wayland_output *output; + wl_list_for_each(output, &wl->output_list, link) { + if (index == screen_id++) + return output; + } + return NULL; } -static void vo_wayland_fullscreen(struct vo *vo) +int vo_wayland_reconfig(struct vo *vo) { - struct vo_wayland_state *wl = vo->wayland; - if (!wl->display.shell) - return; + struct wl_output *wl_out = NULL; + struct mp_rect screenrc = { 0 }; + struct vo_wayland_state *wl = vo->wl; + + MP_VERBOSE(wl, "Reconfiguring!\n"); + + /* Surface enter events happen later but we already know the outputs and we'd + * like to know the output the surface would be on (for scaling or fullscreen), + * so if fsscreen_id is set or there's only one possible output, use it. */ + if (((!wl->current_output) && (wl_list_length(&wl->output_list) == 1)) || + (vo->opts->fullscreen && (vo->opts->fsscreen_id >= 0))) { + int idx = 0; + if (vo->opts->fullscreen && (vo->opts->fsscreen_id >= 0)) + idx = vo->opts->fsscreen_id; + struct vo_wayland_output *out = find_output(wl, idx); + if (!out) { + MP_ERR(wl, "Screen index %i not found/unavailable!\n", idx); + } else { + wl_out = out->output; + wl->current_output = out; + wl->scaling = out->scale; + screenrc = wl->current_output->geometry; + } + } + + struct vo_win_geometry geo; + vo_calc_window_geometry(vo, &screenrc, &geo); + vo_apply_window_geometry(vo, &geo); - struct wl_output *fs_output = wl->display.fs_output; + wl->geometry.x0 = 0; + wl->geometry.y0 = 0; + wl->geometry.x1 = vo->dwidth / wl->scaling; + wl->geometry.y1 = vo->dheight / wl->scaling; + wl->window_size = wl->geometry; + wl->aspect_ratio = vo->dwidth / (float)vo->dheight; if (vo->opts->fullscreen) { - MP_DBG(wl, "going fullscreen\n"); - wl->window.is_fullscreen = true; - wl->window.p_width = wl->window.width; - wl->window.p_height = wl->window.height; - if (wl->display.current_output) - schedule_resize(wl, 0, wl->display.current_output->width, - wl->display.current_output->height); - wl_shell_surface_set_fullscreen(wl->window.shell_surface, - WL_SHELL_SURFACE_FULLSCREEN_METHOD_DEFAULT, - 0, fs_output); + /* If already fullscreen, fix resolution for the frame size change */ + if (wl->fullscreen && wl->current_output) { + wl->geometry.x0 = 0; + wl->geometry.y0 = 0; + wl->geometry.x1 = mp_rect_w(wl->current_output->geometry)/wl->scaling; + wl->geometry.y1 = mp_rect_h(wl->current_output->geometry)/wl->scaling; + } else { + zxdg_toplevel_v6_set_fullscreen(wl->xdg_toplevel, wl_out); + } } - else { - MP_DBG(wl, "leaving fullscreen\n"); - wl->window.is_fullscreen = false; - window_set_toplevel(wl); - schedule_resize(wl, 0, wl->window.p_width, wl->window.p_height); + wl_surface_set_buffer_scale(wl->surface, wl->scaling); + wl_surface_commit(wl->surface); + wl->pending_vo_events |= VO_EVENT_RESIZE; + if (!wl->configured) { + if (spawn_cursor(wl)) + return false; + wl_display_roundtrip(wl->display); + wl->configured = true; } + + return true; } -static void vo_wayland_update_screeninfo(struct vo *vo, struct mp_rect *screenrc) +static int set_screensaver_inhibitor(struct vo_wayland_state *wl, int state) { - struct vo_wayland_state *wl = vo->wayland; - struct mp_vo_opts *opts = vo->opts; - - *screenrc = (struct mp_rect){0}; + if (!wl->idle_inhibit_manager) + return VO_NOTIMPL; + if (state == (!!wl->idle_inhibitor)) + return VO_TRUE; + if (state) { + MP_VERBOSE(wl, "Enabling idle inhibitor\n"); + struct zwp_idle_inhibit_manager_v1 *mgr = wl->idle_inhibit_manager; + wl->idle_inhibitor = zwp_idle_inhibit_manager_v1_create_inhibitor(mgr, wl->surface); + } else { + MP_VERBOSE(wl, "Disabling the idle inhibitor\n"); + zwp_idle_inhibitor_v1_destroy(wl->idle_inhibitor); + } + return VO_TRUE; +} - int screen_id = 0; +static int toggle_fullscreen(struct vo_wayland_state *wl) +{ + if (!wl->xdg_toplevel) + return VO_NOTAVAIL; + if (wl->fullscreen) + zxdg_toplevel_v6_unset_fullscreen(wl->xdg_toplevel); + else + zxdg_toplevel_v6_set_fullscreen(wl->xdg_toplevel, NULL); + return VO_TRUE; +} - struct vo_wayland_output *output; - struct vo_wayland_output *first_output = NULL; - struct vo_wayland_output *fsscreen_output = NULL; +static int update_window_title(struct vo_wayland_state *wl, char *title) +{ + if (!wl->xdg_toplevel) + return VO_NOTAVAIL; + zxdg_toplevel_v6_set_title(wl->xdg_toplevel, title); + return VO_TRUE; +} - if (opts->fsscreen_id >= 0) { - wl_list_for_each_reverse(output, &wl->display.output_list, link) { - if (!output || !output->width) - continue; +static void check_dnd_fd(struct vo_wayland_state *wl) +{ + if (wl->dnd_fd == -1) + return; - if (opts->fsscreen_id == screen_id) - fsscreen_output = output; + struct pollfd fdp = { wl->dnd_fd, POLLIN | POLLERR | POLLHUP, 0 }; + if (poll(&fdp, 1, 0) <= 0) + return; - screen_id++; + if (fdp.revents & POLLIN) { + ptrdiff_t offset = 0; + size_t data_read = 0; + const size_t chunk_size = 1; + uint8_t *buffer = ta_zalloc_size(wl, chunk_size); + if (!buffer) + goto end; + + while ((data_read = read(wl->dnd_fd, buffer + offset, chunk_size)) > 0) { + offset += data_read; + buffer = ta_realloc_size(wl, buffer, offset + chunk_size); + memset(buffer + offset, 0, chunk_size); + if (!buffer) + goto end; } - } - if (fsscreen_output) { - wl->display.fs_output = fsscreen_output->output; - screenrc->x1 = fsscreen_output->width; - screenrc->y1 = fsscreen_output->height; + MP_VERBOSE(wl, "Read %td bytes from the DND fd\n", offset); + + struct bstr file_list = bstr0(buffer); + mp_event_drop_mime_data(wl->vo->input_ctx, wl->dnd_mime_type, + file_list, wl->dnd_action); + talloc_free(buffer); +end: + wl_data_offer_finish(wl->dnd_offer); + talloc_free(wl->dnd_mime_type); + wl->dnd_mime_type = NULL; + wl->dnd_mime_score = 0; } - else { - wl->display.fs_output = NULL; /* current output is always 0 */ - if (first_output) { - screenrc->x1 = wl->display.current_output->width; - screenrc->y1 = wl->display.current_output->height; - } + if (fdp.revents & (POLLIN | POLLERR | POLLHUP)) { + close(wl->dnd_fd); + wl->dnd_fd = -1; } +} - wl->window.fs_width = screenrc->x1; - wl->window.fs_height = screenrc->y1; +static char **get_displays_spanned(struct vo_wayland_state *wl) +{ + char **names = NULL; + int displays_spanned = 0; + struct vo_wayland_output *output; + wl_list_for_each(output, &wl->output_list, link) { + if (output->has_surface) + MP_TARRAY_APPEND(NULL, names, displays_spanned, + talloc_strdup(NULL, output->model)); + } + MP_TARRAY_APPEND(NULL, names, displays_spanned, NULL); + return names; } int vo_wayland_control(struct vo *vo, int *events, int request, void *arg) { - struct vo_wayland_state *wl = vo->wayland; - wl_display_dispatch_pending(wl->display.display); + struct vo_wayland_state *wl = vo->wl; + wl_display_dispatch_pending(wl->display); switch (request) { - case VOCTRL_CHECK_EVENTS: - *events |= wl->window.events; - wl->window.events = 0; + case VOCTRL_CHECK_EVENTS: { + check_dnd_fd(wl); + *events |= wl->pending_vo_events; + wl->pending_vo_events = 0; return VO_TRUE; - case VOCTRL_FULLSCREEN: - vo_wayland_fullscreen(vo); + } + case VOCTRL_GET_FULLSCREEN: { + *(int *)arg = wl->fullscreen; return VO_TRUE; - case VOCTRL_ONTOP: - vo_wayland_ontop(vo); + } + case VOCTRL_GET_DISPLAY_NAMES: { + *(char ***)arg = get_displays_spanned(wl); return VO_TRUE; - case VOCTRL_GET_UNFS_WINDOW_SIZE: { - int *s = arg, scale = 1; - if (wl->display.current_output) - scale = wl->display.current_output->scale; - s[0] = scale*wl->window.width; - s[1] = scale*wl->window.height; + } + case VOCTRL_PAUSE: { + wl_callback_destroy(wl->frame_callback); + wl->frame_callback = NULL; + vo_disable_external_renderloop(wl->vo); return VO_TRUE; } - case VOCTRL_SET_UNFS_WINDOW_SIZE: { + case VOCTRL_RESUME: { + vo_enable_external_renderloop(wl->vo); + frame_callback(wl, NULL, 0); + return VO_TRUE; + } + case VOCTRL_GET_UNFS_WINDOW_SIZE: { int *s = arg; - if (!wl->window.is_fullscreen) - schedule_resize(wl, 0, s[0], s[1]); + s[0] = mp_rect_w(wl->geometry)*wl->scaling; + s[1] = mp_rect_h(wl->geometry)*wl->scaling; return VO_TRUE; } - case VOCTRL_SET_CURSOR_VISIBILITY: - if (*(bool *)arg) { - if (!wl->cursor.visible) - show_cursor(wl); - } - else { - if (wl->cursor.visible) - hide_cursor(wl); + case VOCTRL_SET_UNFS_WINDOW_SIZE: { + int *s = arg; + if (!wl->fullscreen) { + wl->geometry.x0 = 0; + wl->geometry.y0 = 0; + wl->geometry.x1 = s[0]/wl->scaling; + wl->geometry.y1 = s[1]/wl->scaling; + wl->pending_vo_events |= VO_EVENT_RESIZE; } - wl->cursor.visible = *(bool *)arg; - return VO_TRUE; - case VOCTRL_UPDATE_WINDOW_TITLE: - window_set_title(wl, (char*) arg); return VO_TRUE; + } case VOCTRL_GET_DISPLAY_FPS: { - if (!wl->display.current_output) - break; - - // refresh rate is stored in milli-Hertz (mHz) - double fps = wl->display.current_output->refresh_rate / 1000.0f; - *(double*) arg = fps; + if (!wl->current_output) + return VO_NOTAVAIL; + *(double *)arg = wl->current_output->refresh_rate; return VO_TRUE; } + case VOCTRL_UPDATE_WINDOW_TITLE: + return update_window_title(wl, (char *)arg); + case VOCTRL_FULLSCREEN: + return toggle_fullscreen(wl); + case VOCTRL_SET_CURSOR_VISIBILITY: + return set_cursor_visibility(wl, *(bool *)arg); + case VOCTRL_BORDER: + return set_border_decorations(wl, vo->opts->border); + case VOCTRL_KILL_SCREENSAVER: + return set_screensaver_inhibitor(wl, true); + case VOCTRL_RESTORE_SCREENSAVER: + return set_screensaver_inhibitor(wl, false); } - return VO_NOTIMPL; -} -bool vo_wayland_config(struct vo *vo) -{ - struct vo_wayland_state *wl = vo->wayland; - - struct mp_rect screenrc; - vo_wayland_update_screeninfo(vo, &screenrc); - - struct vo_win_geometry geo; - vo_calc_window_geometry(vo, &screenrc, &geo); - vo_apply_window_geometry(vo, &geo); - - wl->window.p_width = vo->dwidth; - wl->window.p_height = vo->dheight; - wl->window.aspect = vo->dwidth / (float) MPMAX(vo->dheight, 1); - - wl->window.width = vo->dwidth; - wl->window.height = vo->dheight; - vo_wayland_fullscreen(vo); - - return true; -} - -void vo_wayland_request_frame(struct vo *vo, void *data, vo_wayland_frame_cb cb) -{ - struct vo_wayland_state *wl = vo->wayland; - wl->frame.data = data; - wl->frame.function = cb; - MP_DBG(wl, "restart frame callback\n"); - frame_callback(wl, NULL, 0); + return VO_NOTIMPL; } void vo_wayland_wakeup(struct vo *vo) { - struct vo_wayland_state *wl = vo->wayland; + struct vo_wayland_state *wl = vo->wl; (void)write(wl->wakeup_pipe[1], &(char){0}, 1); } void vo_wayland_wait_events(struct vo *vo, int64_t until_time_us) { - struct vo_wayland_state *wl = vo->wayland; - struct wl_display *dp = wl->display.display; + struct vo_wayland_state *wl = vo->wl; + struct wl_display *display = wl->display; + + if (wl->display_fd == -1) + return; struct pollfd fds[2] = { - {.fd = wl->display.display_fd, .events = POLLIN }, - {.fd = wl->wakeup_pipe[0], .events = POLLIN }, + {.fd = wl->display_fd, .events = POLLIN }, + {.fd = wl->wakeup_pipe[0], .events = POLLIN }, }; int64_t wait_us = until_time_us - mp_time_us(); int timeout_ms = MPCLAMP((wait_us + 999) / 1000, 0, 10000); - wl_display_dispatch_pending(dp); - wl_display_flush(dp); + wl_display_dispatch_pending(display); + wl_display_flush(display); poll(fds, 2, timeout_ms); if (fds[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { - MP_FATAL(wl, "error occurred on the display fd: " - "closing file descriptor\n"); - close(wl->display.display_fd); + MP_FATAL(wl, "Error occurred on the display fd, closing\n"); + close(wl->display_fd); + wl->display_fd = -1; mp_input_put_key(vo->input_ctx, MP_KEY_CLOSE_WIN); } if (fds[0].revents & POLLIN) - wl_display_dispatch(dp); + wl_display_dispatch(display); if (fds[1].revents & POLLIN) mp_flush_wakeup_pipe(wl->wakeup_pipe[0]); diff --git a/video/out/wayland_common.h b/video/out/wayland_common.h index 4bb90d6..4911009 100644 --- a/video/out/wayland_common.h +++ b/video/out/wayland_common.h @@ -1,6 +1,5 @@ /* * This file is part of mpv video player. - * Copyright © 2013 Alexander Preisinger <alexander.preisinger@gmail.com> * * mpv is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -19,133 +18,96 @@ #ifndef MPLAYER_WAYLAND_COMMON_H #define MPLAYER_WAYLAND_COMMON_H -#include <stdint.h> -#include <stdbool.h> #include <wayland-client.h> #include <wayland-cursor.h> #include <xkbcommon/xkbcommon.h> -#include "config.h" - -#if HAVE_GL_WAYLAND -#include <wayland-egl.h> -#include <EGL/egl.h> -#include <EGL/eglext.h> -#endif - -struct vo; +#include "vo.h" +#include "input/event.h" struct vo_wayland_output { - uint32_t id; /* unique name */ + struct vo_wayland_state *wl; + uint32_t id; struct wl_output *output; + struct mp_rect geometry; + int phys_width; + int phys_height; + int scale; uint32_t flags; - int32_t width; - int32_t height; - int32_t scale; - int32_t refresh_rate; // fps (mHz) - const char *make; - const char *model; + double refresh_rate; + char *make; + char *model; + bool has_surface; struct wl_list link; }; -typedef void (*vo_wayland_frame_cb)(void *data, uint32_t time); - struct vo_wayland_state { - struct vo *vo; - struct mp_log* log; + struct mp_log *log; + struct vo *vo; + struct wl_display *display; + struct wl_shm *shm; + struct wl_compositor *compositor; + struct wl_registry *registry; + + /* State */ + struct mp_rect geometry; + struct mp_rect window_size; + float aspect_ratio; + bool fullscreen; + bool configured; int wakeup_pipe[2]; - - struct { - void *data; - vo_wayland_frame_cb function; - struct wl_callback *callback; - } frame; - -#if HAVE_GL_WAYLAND - struct { - EGLSurface egl_surface; - - struct wl_egl_window *egl_window; - - struct { - EGLDisplay dpy; - EGLContext ctx; - EGLConfig conf; - } egl; - } egl_context; -#endif - - struct { - int fd; - struct wl_display *display; - struct wl_registry *registry; - struct wl_compositor *compositor; - struct wl_shell *shell; - - struct wl_list output_list; - struct wl_output *fs_output; /* fullscreen output */ - struct vo_wayland_output *current_output; - - int display_fd; - - struct wl_shm *shm; - - struct wl_subcompositor *subcomp; - } display; - - struct { - int32_t width; // current size of the window - int32_t height; - int32_t p_width; // previous sizes for leaving fullscreen - int32_t p_height; - int32_t sh_width; // sheduled width for resizing - int32_t sh_height; - int32_t sh_x; // x, y calculated with the drag edges for moving - int32_t sh_y; - float aspect; - - bool is_fullscreen; // don't keep aspect ratio in fullscreen mode - int32_t fs_width; // fullscreen sizes - int32_t fs_height; - - struct wl_surface *video_surface; - int32_t mouse_x; // mouse position inside the surface - int32_t mouse_y; - struct wl_shell_surface *shell_surface; - int events; /* mplayer events (VO_EVENT_RESIZE) */ - } window; - - struct { - struct wl_cursor *default_cursor; - struct wl_cursor_theme *theme; - struct wl_surface *surface; - - /* pointer for fading out */ - bool visible; - struct wl_pointer *pointer; - uint32_t serial; - } cursor; - - struct { - struct wl_seat *seat; - struct wl_keyboard *keyboard; - struct wl_pointer *pointer; - - struct { - struct xkb_context *context; - struct xkb_keymap *keymap; - struct xkb_state *state; - } xkb; - } input; + int pending_vo_events; + int mouse_x; + int mouse_y; + int scaling; + int touch_entries; + uint32_t pointer_id; + int display_fd; + struct wl_callback *frame_callback; + struct wl_list output_list; + struct vo_wayland_output *current_output; + + /* Shell */ + struct wl_surface *surface; + struct zxdg_shell_v6 *shell; + struct zxdg_toplevel_v6 *xdg_toplevel; + struct zxdg_surface_v6 *xdg_surface; + struct org_kde_kwin_server_decoration_manager *server_decoration_manager; + struct org_kde_kwin_server_decoration *server_decoration; + struct zwp_idle_inhibit_manager_v1 *idle_inhibit_manager; + struct zwp_idle_inhibitor_v1 *idle_inhibitor; + + /* Input */ + struct wl_seat *seat; + struct wl_pointer *pointer; + struct wl_touch *touch; + struct wl_keyboard *keyboard; + struct xkb_context *xkb_context; + struct xkb_keymap *xkb_keymap; + struct xkb_state *xkb_state; + + /* DND */ + struct wl_data_device_manager *dnd_devman; + struct wl_data_device *dnd_ddev; + struct wl_data_offer *dnd_offer; + enum mp_dnd_action dnd_action; + char *dnd_mime_type; + int dnd_mime_score; + int dnd_fd; + + /* Cursor */ + struct wl_cursor_theme *cursor_theme; + struct wl_cursor *default_cursor; + struct wl_surface *cursor_surface; + int allocated_cursor_scale; }; int vo_wayland_init(struct vo *vo); -void vo_wayland_uninit(struct vo *vo); -bool vo_wayland_config(struct vo *vo); +int vo_wayland_reconfig(struct vo *vo); int vo_wayland_control(struct vo *vo, int *events, int request, void *arg); +void vo_wayland_check_events(struct vo *vo); +void vo_wayland_uninit(struct vo *vo); void vo_wayland_wakeup(struct vo *vo); void vo_wayland_wait_events(struct vo *vo, int64_t until_time_us); -void vo_wayland_request_frame(struct vo *vo, void *data, vo_wayland_frame_cb cb); #endif /* MPLAYER_WAYLAND_COMMON_H */ - diff --git a/video/out/win32/exclusive_hack.c b/video/out/win32/exclusive_hack.c deleted file mode 100644 index 668dfd5..0000000 --- a/video/out/win32/exclusive_hack.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <windows.h> -#include <winternl.h> -#include <pthread.h> - -#include "exclusive_hack.h" - -// Missing NT API definitions -typedef enum _MP_MUTANT_INFORMATION_CLASS { - MpMutantBasicInformation -} MP_MUTANT_INFORMATION_CLASS; -#define MUTANT_INFORMATION_CLASS MP_MUTANT_INFORMATION_CLASS -#define MutantBasicInformation MpMutantBasicInformation - -typedef struct _MP_MUTANT_BASIC_INFORMATION { - LONG CurrentCount; - BOOLEAN OwnedByCaller; - BOOLEAN AbandonedState; -} MP_MUTANT_BASIC_INFORMATION; -#define MUTANT_BASIC_INFORMATION MP_MUTANT_BASIC_INFORMATION - -static pthread_once_t internal_api_load_ran = PTHREAD_ONCE_INIT; -static bool internal_api_loaded = false; - -static HANDLE excl_mode_mutex; -static NTSTATUS (NTAPI *pNtQueryMutant)(HANDLE MutantHandle, - MUTANT_INFORMATION_CLASS MutantInformationClass, PVOID MutantInformation, - ULONG MutantInformationLength, PULONG ReturnLength); - -static void internal_api_load(void) -{ - HMODULE ntdll = GetModuleHandleW(L"ntdll.dll"); - if (!ntdll) - return; - pNtQueryMutant = (void*)GetProcAddress(ntdll, "NtQueryMutant"); - if (!pNtQueryMutant) - return; - excl_mode_mutex = OpenMutexW(MUTANT_QUERY_STATE, FALSE, - L"Local\\__DDrawExclMode__"); - if (!excl_mode_mutex) - return; - - internal_api_loaded = true; -} - -bool mp_w32_is_in_exclusive_mode(void) -{ - pthread_once(&internal_api_load_ran, internal_api_load); - if (!internal_api_loaded) - return false; - - // As far as we can tell, there is no way to know if a specific OpenGL - // program is being redirected by the DWM. It is possible, however, to - // know if some program on the computer is unredirected by the DWM, that - // is, whether some program is in exclusive fullscreen mode. Exclusive - // fullscreen programs acquire an undocumented mutex: __DDrawExclMode__. If - // this is acquired, it's probably by mpv. Even if it isn't, the penalty - // for incorrectly guessing true (dropped frames) is better than the - // penalty for incorrectly guessing false (tearing.) - - // Testing this mutex is another problem. There is no public function for - // testing a mutex without attempting to acquire it, but that method won't - // work because if mpv is in fullscreen, the mutex will already be acquired - // by this thread (in ddraw.dll) and Windows will happily let it be - // acquired again. Instead, use the undocumented NtQueryMutant function to - // test the mutex. - - // Note: SHQueryUserNotificationState uses this mutex internally, but it is - // probably not suitable because it sends a message to the shell instead of - // testing the mutex directly. mpv will check for exclusive mode once per - // frame, so if the shell is not running or not responding, it may cause - // performance issues. - - MUTANT_BASIC_INFORMATION mbi; - NTSTATUS s = pNtQueryMutant(excl_mode_mutex, MutantBasicInformation, &mbi, - sizeof mbi, NULL); - if (!NT_SUCCESS(s)) - return false; - - return !mbi.CurrentCount; -} diff --git a/video/out/x11_common.h b/video/out/x11_common.h index e69640c..1c00963 100644 --- a/video/out/x11_common.h +++ b/video/out/x11_common.h @@ -29,6 +29,11 @@ #include "common/common.h" +#include "config.h" +#if !HAVE_GPL +#error GPL only +#endif + struct vo; struct mp_log; |