diff options
author | James Cowgill <jcowgill@debian.org> | 2018-07-27 14:24:34 +0800 |
---|---|---|
committer | James Cowgill <jcowgill@debian.org> | 2018-07-27 14:24:34 +0800 |
commit | f4faf74f8747c113bd8c1f99e6b6fb1983f11e0d (patch) | |
tree | a9888a5b34d33fa31cc656c856d81333aa0e3ab3 /video/out | |
parent | d96cb5fac5258f82733a6e26aa212939f2ce991d (diff) |
New upstream version 0.29.0
Diffstat (limited to 'video/out')
87 files changed, 6317 insertions, 2472 deletions
diff --git a/video/out/cocoa-cb/events_view.swift b/video/out/cocoa-cb/events_view.swift new file mode 100644 index 0000000..7cc295f --- /dev/null +++ b/video/out/cocoa-cb/events_view.swift @@ -0,0 +1,267 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa + +class EventsView: NSView { + + weak var cocoaCB: CocoaCB! + var mpv: MPVHelper! { + get { return cocoaCB == nil ? nil : cocoaCB.mpv } + } + + var tracker: NSTrackingArea? + var hasMouseDown: Bool = false + + override var isFlipped: Bool { return true } + override var acceptsFirstResponder: Bool { return true } + + + init(cocoaCB ccb: CocoaCB) { + cocoaCB = ccb + super.init(frame: NSMakeRect(0, 0, 960, 480)) + autoresizingMask = [.viewWidthSizable, .viewHeightSizable] + wantsBestResolutionOpenGLSurface = true + register(forDraggedTypes: [NSFilenamesPboardType, NSURLPboardType]) + } + + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + override func updateTrackingAreas() { + if tracker != nil { + removeTrackingArea(tracker!) + } + + tracker = NSTrackingArea(rect: bounds, + options: [.activeAlways, .mouseEnteredAndExited, .mouseMoved, .enabledDuringMouseDrag], + owner: self, userInfo: nil) + addTrackingArea(tracker!) + + if containsMouseLocation() { + cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_LEAVE, 0) + } + } + + override func draggingEntered(_ sender: NSDraggingInfo) -> NSDragOperation { + guard let types = sender.draggingPasteboard().types else { return [] } + if types.contains(NSFilenamesPboardType) || types.contains(NSURLPboardType) { + return .copy + } + return [] + } + + override func performDragOperation(_ sender: NSDraggingInfo) -> Bool { + let pb = sender.draggingPasteboard() + guard let types = sender.draggingPasteboard().types else { return false } + if types.contains(NSFilenamesPboardType) { + if let files = pb.propertyList(forType: NSFilenamesPboardType) as? [Any] { + EventsResponder.sharedInstance().handleFilesArray(files) + return true + } + } else if types.contains(NSURLPboardType) { + if let url = pb.propertyList(forType: NSURLPboardType) as? [Any] { + EventsResponder.sharedInstance().handleFilesArray(url) + return true + } + } + return false + } + + override func acceptsFirstMouse(for event: NSEvent?) -> Bool { + return true + } + + override func becomeFirstResponder() -> Bool { + return true + } + + override func resignFirstResponder() -> Bool { + return true + } + + override func mouseEntered(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_ENTER, 0) + } + } + + override func mouseExited(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_LEAVE, 0) + } + cocoaCB.window.hideTitleBar() + } + + override func mouseMoved(with event: NSEvent) { + if mpv != nil && mpv.getBoolProperty("input-cursor") { + signalMouseMovement(event) + } + cocoaCB.window.showTitleBar() + } + + override func mouseDragged(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + signalMouseMovement(event) + } + } + + override func mouseDown(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + signalMouseDown(event) + } + } + + override func mouseUp(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + signalMouseUp(event) + } + cocoaCB.window.isMoving = false + } + + override func rightMouseDown(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + signalMouseDown(event) + } + } + + override func rightMouseUp(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + signalMouseUp(event) + } + } + + override func otherMouseDown(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + signalMouseDown(event) + } + } + + override func otherMouseUp(with event: NSEvent) { + if mpv.getBoolProperty("input-cursor") { + signalMouseUp(event) + } + } + + func signalMouseDown(_ event: NSEvent) { + signalMouseEvent(event, SWIFT_KEY_STATE_DOWN) + if event.clickCount > 1 { + signalMouseEvent(event, SWIFT_KEY_STATE_UP) + } + } + + func signalMouseUp(_ event: NSEvent) { + signalMouseEvent(event, SWIFT_KEY_STATE_UP) + } + + func signalMouseEvent(_ event: NSEvent, _ state: Int32) { + hasMouseDown = state == SWIFT_KEY_STATE_DOWN + let mpkey = getMpvButton(event) + cocoa_put_key_with_modifiers((mpkey | state), Int32(event.modifierFlags.rawValue)); + } + + func signalMouseMovement(_ event: NSEvent) { + var point = convert(event.locationInWindow, from: nil) + point = convertToBacking(point) + point.y = -point.y + + cocoaCB.window.updateMovableBackground(point) + if !cocoaCB.window.isMoving { + mpv.setMousePosition(point) + } + } + + func preciseScroll(_ event: NSEvent) { + var delta: Double + var cmd: Int32 + + if fabs(event.deltaY) >= fabs(event.deltaX) { + delta = Double(event.deltaY) * 0.1; + cmd = delta > 0 ? SWIFT_WHEEL_UP : SWIFT_WHEEL_DOWN; + } else { + delta = Double(event.deltaX) * 0.1; + cmd = delta > 0 ? SWIFT_WHEEL_RIGHT : SWIFT_WHEEL_LEFT; + } + + mpv.putAxis(cmd, delta: fabs(delta)) + } + + override func scrollWheel(with event: NSEvent) { + if !mpv.getBoolProperty("input-cursor") { + return + } + + if event.hasPreciseScrollingDeltas { + preciseScroll(event) + } else { + let modifiers = event.modifierFlags + let deltaX = modifiers.contains(.shift) ? event.scrollingDeltaY : event.scrollingDeltaX + let deltaY = modifiers.contains(.shift) ? event.scrollingDeltaX : event.scrollingDeltaY + var mpkey: Int32 + + if fabs(deltaY) >= fabs(deltaX) { + mpkey = deltaY > 0 ? SWIFT_WHEEL_UP : SWIFT_WHEEL_DOWN; + } else { + mpkey = deltaX > 0 ? SWIFT_WHEEL_RIGHT : SWIFT_WHEEL_LEFT; + } + + cocoa_put_key_with_modifiers(mpkey, Int32(modifiers.rawValue)) + } + } + + func containsMouseLocation() -> Bool { + if cocoaCB == nil { return false } + var topMargin: CGFloat = 0.0 + let menuBarHeight = NSApp.mainMenu!.menuBarHeight + + if cocoaCB.window.isInFullscreen && (menuBarHeight > 0) { + topMargin = cocoaCB.window.titleBarHeight + 1 + menuBarHeight + } + + var vF = window!.screen!.frame + vF.size.height -= topMargin + + let vFW = window!.convertFromScreen(vF) + let vFV = convert(vFW, from: nil) + let pt = convert(window!.mouseLocationOutsideOfEventStream, from: nil) + + var clippedBounds = bounds.intersection(vFV) + if !cocoaCB.window.isInFullscreen { + clippedBounds.origin.y += cocoaCB.window.titleBarHeight + clippedBounds.size.height -= cocoaCB.window.titleBarHeight + } + return clippedBounds.contains(pt) + } + + func canHideCursor() -> Bool { + if cocoaCB.window == nil { return false } + return !hasMouseDown && containsMouseLocation() && window!.isKeyWindow + } + + func getMpvButton(_ event: NSEvent) -> Int32 { + let buttonNumber = event.buttonNumber + switch (buttonNumber) { + case 0: return SWIFT_MBTN_LEFT; + case 1: return SWIFT_MBTN_RIGHT; + case 2: return SWIFT_MBTN_MID; + case 3: return SWIFT_MBTN_BACK; + case 4: return SWIFT_MBTN_FORWARD; + default: return SWIFT_MBTN9 + Int32(buttonNumber - 5); + } + } +} diff --git a/video/out/cocoa-cb/video_layer.swift b/video/out/cocoa-cb/video_layer.swift new file mode 100644 index 0000000..b389327 --- /dev/null +++ b/video/out/cocoa-cb/video_layer.swift @@ -0,0 +1,233 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa +import OpenGL.GL +import OpenGL.GL3 + +class VideoLayer: CAOpenGLLayer { + + weak var cocoaCB: CocoaCB! + var mpv: MPVHelper! { + get { return cocoaCB == nil ? nil : cocoaCB.mpv } + } + + let videoLock = NSLock() + let displayLock = NSLock() + var hasVideo: Bool = false + var needsFlip: Bool = false + var canDrawOffScreen: Bool = false + var cglContext: CGLContextObj? = nil + var surfaceSize: NSSize? + + enum Draw: Int { case normal = 1, atomic, atomicEnd } + var draw: Draw = .normal + + let queue: DispatchQueue = DispatchQueue(label: "io.mpv.queue.draw") + + var needsICCUpdate: Bool = false { + didSet { + if needsICCUpdate == true { + update() + } + } + } + + var inLiveResize: Bool = false { + didSet { + if inLiveResize { + isAsynchronous = true + } + update() + } + } + + init(cocoaCB ccb: CocoaCB) { + cocoaCB = ccb + super.init() + autoresizingMask = [.layerWidthSizable, .layerHeightSizable] + backgroundColor = NSColor.black.cgColor + + CGLCreateContext(copyCGLPixelFormat(forDisplayMask: 0), nil, &cglContext) + var i: GLint = 1 + CGLSetParameter(cglContext!, kCGLCPSwapInterval, &i) + CGLSetCurrentContext(cglContext!) + + mpv.initRender() + mpv.setRenderUpdateCallback(updateCallback, context: self) + mpv.setRenderControlCallback(cocoaCB.controlCallback, context: cocoaCB) + } + + override init(layer: Any) { + let oldLayer = layer as! VideoLayer + cocoaCB = oldLayer.cocoaCB + super.init() + } + + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + override func canDraw(inCGLContext ctx: CGLContextObj, + pixelFormat pf: CGLPixelFormatObj, + forLayerTime t: CFTimeInterval, + displayTime ts: UnsafePointer<CVTimeStamp>?) -> Bool { + if inLiveResize == false { + isAsynchronous = false + } + return mpv != nil && cocoaCB.backendState == .initialized + } + + override func draw(inCGLContext ctx: CGLContextObj, + pixelFormat pf: CGLPixelFormatObj, + forLayerTime t: CFTimeInterval, + displayTime ts: UnsafePointer<CVTimeStamp>?) { + needsFlip = false + canDrawOffScreen = true + draw(ctx) + } + + func draw(_ ctx: CGLContextObj) { + if draw.rawValue >= Draw.atomic.rawValue { + if draw == .atomic { + draw = .atomicEnd + } else { + atomicDrawingEnd() + } + } + + updateSurfaceSize() + mpv.drawRender(surfaceSize!) + CGLFlushDrawable(ctx) + + if needsICCUpdate { + needsICCUpdate = false + cocoaCB.updateICCProfile() + } + } + + func updateSurfaceSize() { + var dims: [GLint] = [0, 0, 0, 0] + glGetIntegerv(GLenum(GL_VIEWPORT), &dims) + surfaceSize = NSMakeSize(CGFloat(dims[2]), CGFloat(dims[3])) + + if NSEqualSizes(surfaceSize!, NSZeroSize) { + surfaceSize = bounds.size + surfaceSize!.width *= contentsScale + surfaceSize!.height *= contentsScale + } + } + + func atomicDrawingStart() { + if draw == .normal && hasVideo { + NSDisableScreenUpdates() + draw = .atomic + } + } + + func atomicDrawingEnd() { + if draw.rawValue >= Draw.atomic.rawValue { + NSEnableScreenUpdates() + draw = .normal + } + } + + override func copyCGLPixelFormat(forDisplayMask mask: UInt32) -> CGLPixelFormatObj { + let glVersions: [CGLOpenGLProfile] = [ + kCGLOGLPVersion_3_2_Core, + kCGLOGLPVersion_Legacy + ] + + var pix: CGLPixelFormatObj? + var err: CGLError = CGLError(rawValue: 0) + var npix: GLint = 0 + + verLoop : for ver in glVersions { + var glAttributes: [CGLPixelFormatAttribute] = [ + kCGLPFAOpenGLProfile, CGLPixelFormatAttribute(ver.rawValue), + kCGLPFAAccelerated, + kCGLPFADoubleBuffer, + kCGLPFABackingStore, + kCGLPFAAllowOfflineRenderers, + kCGLPFASupportsAutomaticGraphicsSwitching, + _CGLPixelFormatAttribute(rawValue: 0) + ] + + for index in stride(from: glAttributes.count-2, through: 4, by: -1) { + err = CGLChoosePixelFormat(glAttributes, &pix, &npix) + if err == kCGLBadAttribute || err == kCGLBadPixelFormat || pix == nil { + glAttributes.remove(at: index) + } else { + break verLoop + } + } + } + + if err != kCGLNoError || pix == nil { + let errS = String(cString: CGLErrorString(err)) + mpv.sendError("Couldn't create CGL pixel format: \(errS) (\(err.rawValue))") + exit(1) + } + return pix! + } + + override func copyCGLContext(forPixelFormat pf: CGLPixelFormatObj) -> CGLContextObj { + contentsScale = cocoaCB.window.backingScaleFactor + return cglContext! + } + + let updateCallback: mpv_render_update_fn = { (ctx) in + let layer: VideoLayer = MPVHelper.bridge(ptr: ctx!) + layer.update() + } + + override func display() { + displayLock.lock() + let isUpdate = needsFlip + super.display() + CATransaction.flush() + if isUpdate { + if !cocoaCB.window.occlusionState.contains(.visible) && + needsFlip && canDrawOffScreen + { + CGLSetCurrentContext(cglContext!) + draw(cglContext!) + } else if needsFlip { + update() + } + } + displayLock.unlock() + } + + func setVideo(_ state: Bool) { + videoLock.lock() + hasVideo = state + videoLock.unlock() + } + + func update() { + queue.async { + self.videoLock.lock() + if !self.inLiveResize && self.hasVideo { + self.needsFlip = true + self.display() + } + self.videoLock.unlock() + } + } + +} diff --git a/video/out/cocoa-cb/window.swift b/video/out/cocoa-cb/window.swift new file mode 100644 index 0000000..907476f --- /dev/null +++ b/video/out/cocoa-cb/window.swift @@ -0,0 +1,591 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa + +class Window: NSWindow, NSWindowDelegate { + + weak var cocoaCB: CocoaCB! = nil + var mpv: MPVHelper! { + get { return cocoaCB == nil ? nil : cocoaCB.mpv } + } + + var targetScreen: NSScreen? + var previousScreen: NSScreen? + var currentScreen: NSScreen? + var unfScreen: NSScreen? + + var unfsContentFrame: NSRect? + var isInFullscreen: Bool = false + var isAnimating: Bool = false + var isMoving: Bool = false + var forceTargetScreen: Bool = false + + var keepAspect: Bool = true { + didSet { + if !isInFullscreen { + unfsContentFrame = convertToScreen(contentView!.frame) + } + + if keepAspect { + contentAspectRatio = unfsContentFrame!.size + } else { + resizeIncrements = NSSize(width: 1.0, height: 1.0) + } + } + } + + var border: Bool = true { + didSet { if !border { hideTitleBar() } } + } + + var titleBarEffect: NSVisualEffectView? + var titleBar: NSView { + get { return (standardWindowButton(.closeButton)?.superview)! } + } + var titleBarHeight: CGFloat { + get { return NSWindow.frameRect(forContentRect: CGRect.zero, styleMask: .titled).size.height } + } + var titleButtons: [NSButton] { + get { return ([.closeButton, .miniaturizeButton, .zoomButton] as [NSWindowButton]).flatMap { standardWindowButton($0) } } + } + + override var canBecomeKey: Bool { return true } + override var canBecomeMain: Bool { return true } + + override var styleMask: NSWindowStyleMask { + get { return super.styleMask } + set { + let responder = firstResponder + let windowTitle = title + super.styleMask = newValue + makeFirstResponder(responder) + title = windowTitle + } + } + + convenience init(contentRect: NSRect, screen: NSScreen?, view: NSView, cocoaCB ccb: CocoaCB) { + self.init(contentRect: contentRect, + styleMask: [.titled, .closable, .miniaturizable, .resizable], + backing: .buffered, defer: false, screen: screen) + cocoaCB = ccb + title = cocoaCB.title + minSize = NSMakeSize(160, 90) + collectionBehavior = .fullScreenPrimary + delegate = self + contentView!.addSubview(view) + view.frame = contentView!.frame + + unfsContentFrame = convertToScreen(contentView!.frame) + targetScreen = screen! + currentScreen = screen! + unfScreen = screen! + initTitleBar() + + if let app = NSApp as? Application { + app.menuBar.register(#selector(setHalfWindowSize), for: MPM_H_SIZE) + app.menuBar.register(#selector(setNormalWindowSize), for: MPM_N_SIZE) + app.menuBar.register(#selector(setDoubleWindowSize), for: MPM_D_SIZE) + app.menuBar.register(#selector(performMiniaturize(_:)), for: MPM_MINIMIZE) + app.menuBar.register(#selector(performZoom(_:)), for: MPM_ZOOM) + } + } + + func initTitleBar() { + var f = contentView!.bounds + f.origin.y = f.size.height - titleBarHeight + f.size.height = titleBarHeight + + styleMask.insert(.fullSizeContentView) + titleBar.alphaValue = 0 + titlebarAppearsTransparent = true + titleBarEffect = NSVisualEffectView(frame: f) + titleBarEffect!.alphaValue = 0 + titleBarEffect!.blendingMode = .withinWindow + titleBarEffect!.autoresizingMask = [.viewWidthSizable, .viewMinYMargin] + + setTitleBarStyle(Int(mpv.macOpts!.macos_title_bar_style)) + contentView!.addSubview(titleBarEffect!, positioned: .above, relativeTo: nil) + } + + func setTitleBarStyle(_ style: Any) { + var effect: String + + if style is Int { + switch style as! Int { + case 4: + effect = "auto" + case 3: + effect = "mediumlight" + case 2: + effect = "light" + case 1: + effect = "ultradark" + case 0: fallthrough + default: + effect = "dark" + } + } else { + effect = style as! String + } + + if effect == "auto" { + let systemStyle = UserDefaults.standard.string(forKey: "AppleInterfaceStyle") + effect = systemStyle == nil ? "mediumlight" : "ultradark" + } + + switch effect { + case "mediumlight": + appearance = NSAppearance(named: NSAppearanceNameVibrantLight) + titleBarEffect!.material = .titlebar + titleBarEffect!.state = .followsWindowActiveState + case "light": + appearance = NSAppearance(named: NSAppearanceNameVibrantLight) + titleBarEffect!.material = .light + titleBarEffect!.state = .active + case "ultradark": + appearance = NSAppearance(named: NSAppearanceNameVibrantDark) + titleBarEffect!.material = .titlebar + titleBarEffect!.state = .followsWindowActiveState + case "dark": fallthrough + default: + appearance = NSAppearance(named: NSAppearanceNameVibrantDark) + titleBarEffect!.material = .dark + titleBarEffect!.state = .active + } + } + + func showTitleBar() { + if titleBarEffect == nil || (!border && !isInFullscreen) { return } + let loc = cocoaCB.view.convert(mouseLocationOutsideOfEventStream, from: nil) + + titleButtons.forEach { $0.isHidden = false } + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = 0.20 + titleBar.animator().alphaValue = 1 + if !isInFullscreen && !isAnimating { + titleBarEffect!.animator().alphaValue = 1 + } + }, completionHandler: nil ) + + if loc.y > titleBarHeight { + hideTitleBarDelayed() + } else { + NSObject.cancelPreviousPerformRequests(withTarget: self, selector: #selector(hideTitleBar), object: nil) + } + } + + func hideTitleBar() { + if titleBarEffect == nil { return } + if isInFullscreen && !isAnimating { + titleBarEffect!.alphaValue = 0 + return + } + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = 0.20 + titleBar.animator().alphaValue = 0 + titleBarEffect!.animator().alphaValue = 0 + }, completionHandler: { + self.titleButtons.forEach { $0.isHidden = true } + }) + } + + func hideTitleBarDelayed() { + NSObject.cancelPreviousPerformRequests(withTarget: self, + selector: #selector(hideTitleBar), + object: nil) + perform(#selector(hideTitleBar), with: nil, afterDelay: 0.5) + } + + override func toggleFullScreen(_ sender: Any?) { + if isAnimating { + return + } + + isAnimating = true + + targetScreen = cocoaCB.getTargetScreen(forFullscreen: !isInFullscreen) + if targetScreen == nil && previousScreen == nil { + targetScreen = screen + } else if targetScreen == nil { + targetScreen = previousScreen + previousScreen = nil + } else { + previousScreen = screen + } + + if !isInFullscreen { + unfsContentFrame = convertToScreen(contentView!.frame) + unfScreen = screen + } + // move window to target screen when going to fullscreen + if !isInFullscreen && (targetScreen != screen) { + let frame = calculateWindowPosition(for: targetScreen!, withoutBounds: false) + setFrame(frame, display: true) + } + + if mpv.getBoolProperty("native-fs") { + super.toggleFullScreen(sender) + } else { + if !isInFullscreen { + setToFullScreen() + } + else { + setToWindow() + } + } + } + + func customWindowsToEnterFullScreen(for window: NSWindow) -> [NSWindow]? { + return [window] + } + + func customWindowsToExitFullScreen(for window: NSWindow) -> [NSWindow]? { + return [window] + } + + func window(_ window: NSWindow, startCustomAnimationToEnterFullScreenWithDuration duration: TimeInterval) { + cocoaCB.view.layerContentsPlacement = .scaleProportionallyToFit + hideTitleBar() + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = getFsAnimationDuration(duration - 0.05) + window.animator().setFrame(targetScreen!.frame, display: true) + }, completionHandler: { }) + } + + func window(_ window: NSWindow, startCustomAnimationToExitFullScreenWithDuration duration: TimeInterval) { + let newFrame = calculateWindowPosition(for: targetScreen!, withoutBounds: targetScreen == screen) + let intermediateFrame = aspectFit(rect: newFrame, in: screen!.frame) + cocoaCB.view.layerContentsPlacement = .scaleProportionallyToFill + hideTitleBar() + setFrame(intermediateFrame, display: true) + + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = getFsAnimationDuration(duration - 0.05) + window.animator().setFrame(newFrame, display: true) + }, completionHandler: { }) + } + + func windowDidEnterFullScreen(_ notification: Notification) { + isInFullscreen = true + cocoaCB.flagEvents(VO_EVENT_FULLSCREEN_STATE) + cocoaCB.updateCusorVisibility() + endAnimation(frame) + showTitleBar() + } + + func windowDidExitFullScreen(_ notification: Notification) { + isInFullscreen = false + cocoaCB.flagEvents(VO_EVENT_FULLSCREEN_STATE) + endAnimation(calculateWindowPosition(for: targetScreen!, withoutBounds: targetScreen == screen)) + cocoaCB.view.layerContentsPlacement = .scaleProportionallyToFit + } + + func windowDidFailToEnterFullScreen(_ window: NSWindow) { + let newFrame = calculateWindowPosition(for: targetScreen!, withoutBounds: targetScreen == screen) + setFrame(newFrame, display: true) + endAnimation() + } + + func windowDidFailToExitFullScreen(_ window: NSWindow) { + let newFrame = targetScreen!.frame + setFrame(newFrame, display: true) + endAnimation() + cocoaCB.view.layerContentsPlacement = .scaleProportionallyToFit + } + + func endAnimation(_ newFrame: NSRect = NSZeroRect) { + if !NSEqualRects(newFrame, NSZeroRect) { + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = 0.01 + self.animator().setFrame(newFrame, display: true) + }, completionHandler: nil ) + } + + isAnimating = false + cocoaCB.layer.update() + cocoaCB.checkShutdown() + } + + func setToFullScreen() { + styleMask.insert(.fullScreen) + NSApp.presentationOptions = [.autoHideMenuBar, .autoHideDock] + setFrame(targetScreen!.frame, display: true) + endAnimation() + isInFullscreen = true + cocoaCB.flagEvents(VO_EVENT_FULLSCREEN_STATE) + cocoaCB.layer.update() + } + + func setToWindow() { + let newFrame = calculateWindowPosition(for: targetScreen!, withoutBounds: targetScreen == screen) + NSApp.presentationOptions = [] + setFrame(newFrame, display: true) + styleMask.remove(.fullScreen) + endAnimation() + isInFullscreen = false + cocoaCB.flagEvents(VO_EVENT_FULLSCREEN_STATE) + cocoaCB.layer.update() + } + + func getFsAnimationDuration(_ def: Double) -> Double{ + let duration = mpv.getStringProperty("macos-fs-animation-duration") ?? "default" + if duration == "default" { + return def + } else { + return Double(duration)!/1000 + } + } + + func setOnTop(_ state: Bool, _ ontopLevel: Any) { + if state { + if ontopLevel is Int { + switch ontopLevel as! Int { + case -1: + level = Int(CGWindowLevelForKey(.floatingWindow)) + case -2: + level = Int(CGWindowLevelForKey(.statusWindow))+1 + default: + level = ontopLevel as! Int + } + } else { + switch ontopLevel as! String { + case "window": + level = Int(CGWindowLevelForKey(.floatingWindow)) + case "system": + level = Int(CGWindowLevelForKey(.statusWindow))+1 + default: + level = Int(ontopLevel as! String)! + } + } + collectionBehavior.remove(.transient) + collectionBehavior.insert(.managed) + } else { + level = Int(CGWindowLevelForKey(.normalWindow)) + } + } + + func updateMovableBackground(_ pos: NSPoint) { + if !isInFullscreen { + isMovableByWindowBackground = mpv.canBeDraggedAt(pos) + } else { + isMovableByWindowBackground = false + } + } + + func updateFrame(_ rect: NSRect) { + if rect != frame { + let cRect = frameRect(forContentRect: rect) + unfsContentFrame = rect + setFrame(cRect, display: true) + } + } + + func updateSize(_ size: NSSize) { + if size != contentView!.frame.size { + let newContentFrame = centeredContentSize(for: frame, size: size) + if !isInFullscreen { + updateFrame(newContentFrame) + } else { + unfsContentFrame = newContentFrame + } + } + } + + override func setFrame(_ frameRect: NSRect, display flag: Bool) { + let newFrame = !isAnimating && isInFullscreen ? targetScreen!.frame : + frameRect + super.setFrame(newFrame, display: flag) + + if keepAspect { + contentAspectRatio = unfsContentFrame!.size + } + } + + func centeredContentSize(for rect: NSRect, size sz: NSSize) -> NSRect { + let cRect = contentRect(forFrameRect: rect) + let dx = (cRect.size.width - sz.width) / 2 + let dy = (cRect.size.height - sz.height) / 2 + return NSInsetRect(cRect, dx, dy) + } + + func aspectFit(rect r: NSRect, in rTarget: NSRect) -> NSRect { + var s = rTarget.width / r.width; + if r.height*s > rTarget.height { + s = rTarget.height / r.height + } + let w = r.width * s + let h = r.height * s + return NSRect(x: rTarget.midX - w/2, y: rTarget.midY - h/2, width: w, height: h) + } + + func calculateWindowPosition(for tScreen: NSScreen, withoutBounds: Bool) -> NSRect { + var newFrame = frameRect(forContentRect: unfsContentFrame!) + let targetFrame = tScreen.frame + let targetVisibleFrame = tScreen.visibleFrame + let unfsScreenFrame = unfScreen!.frame + let visibleWindow = NSIntersectionRect(unfsScreenFrame, newFrame) + + // calculate visible area of every side + let left = newFrame.origin.x - unfsScreenFrame.origin.x + let right = unfsScreenFrame.size.width - + (newFrame.origin.x - unfsScreenFrame.origin.x + newFrame.size.width) + let bottom = newFrame.origin.y - unfsScreenFrame.origin.y + let top = unfsScreenFrame.size.height - + (newFrame.origin.y - unfsScreenFrame.origin.y + newFrame.size.height) + + // normalize visible areas, decide which one to take horizontal/vertical + var xPer = (unfsScreenFrame.size.width - visibleWindow.size.width) + var yPer = (unfsScreenFrame.size.height - visibleWindow.size.height) + if xPer != 0 { xPer = (left >= 0 || right < 0 ? left : right) / xPer } + if yPer != 0 { yPer = (bottom >= 0 || top < 0 ? bottom : top) / yPer } + + // calculate visible area for every side for target screen + let xNewLeft = targetFrame.origin.x + + (targetFrame.size.width - visibleWindow.size.width) * xPer + let xNewRight = targetFrame.origin.x + targetFrame.size.width - + (targetFrame.size.width - visibleWindow.size.width) * xPer - newFrame.size.width + let yNewBottom = targetFrame.origin.y + + (targetFrame.size.height - visibleWindow.size.height) * yPer + let yNewTop = targetFrame.origin.y + targetFrame.size.height - + (targetFrame.size.height - visibleWindow.size.height) * yPer - newFrame.size.height + + // calculate new coordinates, decide which one to take horizontal/vertical + newFrame.origin.x = left >= 0 || right < 0 ? xNewLeft : xNewRight + newFrame.origin.y = bottom >= 0 || top < 0 ? yNewBottom : yNewTop + + // don't place new window on top of a visible menubar + let topMar = targetFrame.size.height - + (newFrame.origin.y - targetFrame.origin.y + newFrame.size.height) + let menuBarHeight = targetFrame.size.height - + (targetVisibleFrame.size.height + targetVisibleFrame.origin.y) + if topMar < menuBarHeight { + newFrame.origin.y -= top - menuBarHeight + } + + if withoutBounds { + return newFrame + } + + // screen bounds right and left + if newFrame.origin.x + newFrame.size.width > targetFrame.origin.x + targetFrame.size.width { + newFrame.origin.x = targetFrame.origin.x + targetFrame.size.width - newFrame.size.width + } + if newFrame.origin.x < targetFrame.origin.x { + newFrame.origin.x = targetFrame.origin.x + } + + // screen bounds top and bottom + if newFrame.origin.y + newFrame.size.height > targetFrame.origin.y + targetFrame.size.height { + newFrame.origin.y = targetFrame.origin.y + targetFrame.size.height - newFrame.size.height + } + if newFrame.origin.y < targetFrame.origin.y { + newFrame.origin.y = targetFrame.origin.y + } + return newFrame + } + + override func constrainFrameRect(_ frameRect: NSRect, to tScreen: NSScreen?) -> NSRect { + if (isAnimating && !isInFullscreen) || (!isAnimating && isInFullscreen) { + return frameRect + } + + var nf: NSRect = frameRect + let ts: NSScreen = tScreen ?? screen ?? NSScreen.main()! + let of: NSRect = frame + let vf: NSRect = (isAnimating ? targetScreen! : ts).visibleFrame + let ncf: NSRect = contentRect(forFrameRect: nf) + + // screen bounds top and bottom + if NSMaxY(nf) > NSMaxY(vf) { + nf.origin.y = NSMaxY(vf) - NSHeight(nf) + } + if NSMaxY(ncf) < NSMinY(vf) { + nf.origin.y = NSMinY(vf) + NSMinY(ncf) - NSMaxY(ncf) + } + + // screen bounds right and left + if NSMinX(nf) > NSMaxX(vf) { + nf.origin.x = NSMaxX(vf) - NSWidth(nf) + } + if NSMaxX(nf) < NSMinX(vf) { + nf.origin.x = NSMinX(vf) + } + + if NSHeight(nf) < NSHeight(vf) && NSHeight(of) > NSHeight(vf) && !isInFullscreen { + // If the window height is smaller than the visible frame, but it was + // bigger previously recenter the smaller window vertically. This is + // needed to counter the 'snap to top' behaviour. + nf.origin.y = (NSHeight(vf) - NSHeight(nf)) / 2 + } + return nf + } + + func setNormalWindowSize() { setWindowScale(1.0) } + func setHalfWindowSize() { setWindowScale(0.5) } + func setDoubleWindowSize() { setWindowScale(2.0) } + + func setWindowScale(_ scale: Double) { + mpv.commandAsync(["osd-auto", "set", "window-scale", "\(scale)"]) + } + + func windowDidChangeScreen(_ notification: Notification) { + if screen == nil { + return + } + if !isAnimating && (currentScreen != screen) { + previousScreen = screen + } + if currentScreen != screen { + cocoaCB.updateDisplaylink() + } + currentScreen = screen + } + + func windowDidChangeScreenProfile(_ notification: Notification) { + cocoaCB.layer.needsICCUpdate = true + } + + func windowDidChangeBackingProperties(_ notification: Notification) { + cocoaCB.layer.contentsScale = backingScaleFactor + } + + func windowWillStartLiveResize(_ notification: Notification) { + cocoaCB.layer.inLiveResize = true + } + + func windowDidEndLiveResize(_ notification: Notification) { + cocoaCB.layer.inLiveResize = false + } + + func windowShouldClose(_ sender: Any) -> Bool { + cocoa_put_key(SWIFT_KEY_CLOSE_WIN) + return false + } + + func windowDidResignKey(_ notification: Notification) { + cocoaCB.setCursorVisiblility(true) + } + + func windowDidBecomeKey(_ notification: Notification) { + cocoaCB.updateCusorVisibility() + } + + func windowWillMove(_ notification: Notification) { + isMoving = true + } +} diff --git a/video/out/cocoa/window.m b/video/out/cocoa/window.m index 2feaab9..3762987 100644 --- a/video/out/cocoa/window.m +++ b/video/out/cocoa/window.m @@ -45,7 +45,8 @@ @synthesize targetScreen = _target_screen; @synthesize previousScreen = _previous_screen; @synthesize currentScreen = _current_screen; -@synthesize unfScreen = _unf_Screen; +@synthesize unfScreen = _unf_screen; + - (id)initWithContentRect:(NSRect)content_rect styleMask:(NSWindowStyleMask)style_mask backing:(NSBackingStoreType)buffering_type diff --git a/video/out/cocoa_cb_common.swift b/video/out/cocoa_cb_common.swift new file mode 100644 index 0000000..a4aae9f --- /dev/null +++ b/video/out/cocoa_cb_common.swift @@ -0,0 +1,514 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa +import IOKit.pwr_mgt + +class CocoaCB: NSObject { + + var mpv: MPVHelper! + var window: Window! + var view: EventsView! + var layer: VideoLayer! + var link: CVDisplayLink? + + var cursorHidden: Bool = false + var cursorVisibilityWanted: Bool = true + var isShuttingDown: Bool = false + + var title: String = "mpv" { + didSet { if window != nil { window.title = title } } + } + + enum State { + case uninitialized + case needsInit + case initialized + } + var backendState: State = .uninitialized + + let eventsLock = NSLock() + var events: Int = 0 + + var lightSensor: io_connect_t = 0 + var lastLmu: UInt64 = 0 + var lightSensorIOPort: IONotificationPortRef? + var displaySleepAssertion: IOPMAssertionID = IOPMAssertionID(0) + + let queue: DispatchQueue = DispatchQueue(label: "io.mpv.queue") + + convenience init(_ mpvHandle: OpaquePointer) { + self.init() + mpv = MPVHelper(mpvHandle) + layer = VideoLayer(cocoaCB: self) + } + + func preinit(_ vo: UnsafeMutablePointer<vo>) { + if backendState == .uninitialized { + backendState = .needsInit + + if let app = NSApp as? Application { + let ptr = mp_get_config_group(mpv.mpctx!, vo.pointee.global, + app.getMacOSConf()) + mpv.macOpts = UnsafeMutablePointer<macos_opts>(OpaquePointer(ptr))!.pointee + } + + view = EventsView(cocoaCB: self) + view.layer = layer + view.wantsLayer = true + view.layerContentsPlacement = .scaleProportionallyToFit + startDisplayLink(vo) + initLightSensor() + addDisplayReconfigureObserver() + } + } + + func uninit() { + layer.setVideo(false) + window.orderOut(nil) + } + + func reconfig(_ vo: UnsafeMutablePointer<vo>) { + if backendState == .needsInit { + DispatchQueue.main.sync { self.initBackend(vo) } + } else { + DispatchQueue.main.async { + self.layer.setVideo(true) + self.updateWindowSize(vo) + self.layer.update() + } + } + } + + func initBackend(_ vo: UnsafeMutablePointer<vo>) { + let opts: mp_vo_opts = vo.pointee.opts.pointee + NSApp.setActivationPolicy(.regular) + setAppIcon() + + let targetScreen = getScreenBy(id: Int(opts.screen_id)) ?? NSScreen.main() + let wr = getWindowGeometry(forScreen: targetScreen!, videoOut: vo) + window = Window(contentRect: wr, screen: targetScreen, view: view, cocoaCB: self) + updateICCProfile() + window.setOnTop(Bool(opts.ontop), Int(opts.ontop_level)) + window.keepAspect = Bool(opts.keepaspect_window) + window.title = title + window.border = Bool(opts.border) + + window.isRestorable = false + window.makeMain() + window.makeKeyAndOrderFront(nil) + NSApp.activate(ignoringOtherApps: true) + layer.setVideo(true) + + if Bool(opts.fullscreen) { + DispatchQueue.main.async { + self.window.toggleFullScreen(nil) + } + } else { + window.isMovableByWindowBackground = true + } + + backendState = .initialized + } + + func updateWindowSize(_ vo: UnsafeMutablePointer<vo>) { + let opts: mp_vo_opts = vo.pointee.opts.pointee + let targetScreen = getScreenBy(id: Int(opts.screen_id)) ?? NSScreen.main() + let wr = getWindowGeometry(forScreen: targetScreen!, videoOut: vo) + if !window.isVisible { + window.makeKeyAndOrderFront(nil) + } + layer.atomicDrawingStart() + window.updateSize(wr.size) + } + + func setAppIcon() { + if let app = NSApp as? Application { + NSApp.applicationIconImage = app.getMPVIcon() + } + } + + let linkCallback: CVDisplayLinkOutputCallback = { + (displayLink: CVDisplayLink, + inNow: UnsafePointer<CVTimeStamp>, + inOutputTime: UnsafePointer<CVTimeStamp>, + flagsIn: CVOptionFlags, + flagsOut: UnsafeMutablePointer<CVOptionFlags>, + displayLinkContext: UnsafeMutableRawPointer?) -> CVReturn in + let ccb: CocoaCB = MPVHelper.bridge(ptr: displayLinkContext!) + ccb.mpv.reportRenderFlip() + return kCVReturnSuccess + } + + func startDisplayLink(_ vo: UnsafeMutablePointer<vo>) { + let opts: mp_vo_opts = vo.pointee.opts.pointee + let screen = getScreenBy(id: Int(opts.screen_id)) ?? NSScreen.main() + let displayId = screen!.deviceDescription["NSScreenNumber"] as! UInt32 + + CVDisplayLinkCreateWithActiveCGDisplays(&link) + CVDisplayLinkSetCurrentCGDisplay(link!, displayId) + if #available(macOS 10.12, *) { + CVDisplayLinkSetOutputHandler(link!) { link, now, out, inFlags, outFlags -> CVReturn in + self.mpv.reportRenderFlip() + return kCVReturnSuccess + } + } else { + CVDisplayLinkSetOutputCallback(link!, linkCallback, MPVHelper.bridge(obj: self)) + } + CVDisplayLinkStart(link!) + } + + func stopDisplaylink() { + if link != nil && CVDisplayLinkIsRunning(link!) { + CVDisplayLinkStop(link!) + } + } + + func updateDisplaylink() { + let displayId = UInt32(window.screen!.deviceDescription["NSScreenNumber"] as! Int) + CVDisplayLinkSetCurrentCGDisplay(link!, displayId) + + queue.asyncAfter(deadline: DispatchTime.now() + 0.1) { + self.flagEvents(VO_EVENT_WIN_STATE) + } + } + + func currentFps() -> Double { + var actualFps = CVDisplayLinkGetActualOutputVideoRefreshPeriod(link!) + let nominalData = CVDisplayLinkGetNominalOutputVideoRefreshPeriod(link!) + + if (nominalData.flags & Int32(CVTimeFlags.isIndefinite.rawValue)) < 1 { + let nominalFps = Double(nominalData.timeScale) / Double(nominalData.timeValue) + + if actualFps > 0 { + actualFps = 1/actualFps + } + + if fabs(actualFps - nominalFps) > 0.1 { + mpv.sendVerbose("Falling back to nominal display refresh rate: \(nominalFps)") + return nominalFps + } else { + return actualFps + } + } + mpv.sendWarning("Falling back to standard display refresh rate: 60Hz") + return 60.0 + } + + func enableDisplaySleep() { + IOPMAssertionRelease(displaySleepAssertion) + displaySleepAssertion = IOPMAssertionID(0) + } + + func disableDisplaySleep() { + if displaySleepAssertion != IOPMAssertionID(0) { return } + IOPMAssertionCreateWithName( + kIOPMAssertionTypePreventUserIdleDisplaySleep as CFString, + IOPMAssertionLevel(kIOPMAssertionLevelOn), + "io.mpv.video_playing_back" as CFString, + &displaySleepAssertion) + } + + func updateCusorVisibility() { + setCursorVisiblility(cursorVisibilityWanted) + } + + func setCursorVisiblility(_ visible: Bool) { + let visibility = visible ? true : !view.canHideCursor() + + if visibility && cursorHidden { + NSCursor.unhide() + cursorHidden = false; + } else if !visibility && !cursorHidden { + NSCursor.hide() + cursorHidden = true + } + } + + func updateICCProfile() { + mpv.setRenderICCProfile(window.screen!.colorSpace!) + layer.colorspace = window.screen!.colorSpace!.cgColorSpace! + } + + func lmuToLux(_ v: UInt64) -> Int { + // the polinomial approximation for apple lmu value -> lux was empirically + // derived by firefox developers (Apple provides no documentation). + // https://bugzilla.mozilla.org/show_bug.cgi?id=793728 + let power_c4 = 1 / pow(10, 27) + let power_c3 = 1 / pow(10, 19) + let power_c2 = 1 / pow(10, 12) + let power_c1 = 1 / pow(10, 5) + + let term4 = -3.0 * power_c4 * pow(Decimal(v), 4) + let term3 = 2.6 * power_c3 * pow(Decimal(v), 3) + let term2 = -3.4 * power_c2 * pow(Decimal(v), 2) + let term1 = 3.9 * power_c1 * Decimal(v) + + let lux = Int(ceil( Double((term4 + term3 + term2 + term1 - 0.19) as NSNumber))) + return Int(lux > 0 ? lux : 0) + } + + var lightSensorCallback: IOServiceInterestCallback = { (ctx, service, messageType, messageArgument) -> Void in + let ccb: CocoaCB = MPVHelper.bridge(ptr: ctx!) + + var outputs: UInt32 = 2 + var values: [UInt64] = [0, 0] + + var kr = IOConnectCallMethod(ccb.lightSensor, 0, nil, 0, nil, 0, &values, &outputs, nil, nil) + if kr == KERN_SUCCESS { + var mean = (values[0] + values[1]) / 2 + if ccb.lastLmu != mean { + ccb.lastLmu = mean + ccb.mpv.setRenderLux(ccb.lmuToLux(ccb.lastLmu)) + } + } + } + + func initLightSensor() { + let srv = IOServiceGetMatchingService(kIOMasterPortDefault, IOServiceMatching("AppleLMUController")) + if srv == IO_OBJECT_NULL { + mpv.sendVerbose("Can't find an ambient light sensor") + return + } + + lightSensorIOPort = IONotificationPortCreate(kIOMasterPortDefault) + IONotificationPortSetDispatchQueue(lightSensorIOPort, queue) + var n = io_object_t() + IOServiceAddInterestNotification(lightSensorIOPort, srv, kIOGeneralInterest, lightSensorCallback, MPVHelper.bridge(obj: self), &n) + let kr = IOServiceOpen(srv, mach_task_self_, 0, &lightSensor) + IOObjectRelease(srv) + + if kr != KERN_SUCCESS { + mpv.sendVerbose("Can't start ambient light sensor connection") + return + } + lightSensorCallback(MPVHelper.bridge(obj: self), 0, 0, nil) + } + + func uninitLightSensor() { + if lightSensorIOPort != nil { + IONotificationPortDestroy(lightSensorIOPort) + IOObjectRelease(lightSensor) + } + } + + var reconfigureCallback: CGDisplayReconfigurationCallBack = { (display, flags, userInfo) in + if flags.contains(.setModeFlag) { + let ccb: CocoaCB = MPVHelper.bridge(ptr: userInfo!) + let displayID = (ccb.window.screen!.deviceDescription["NSScreenNumber"] as! NSNumber).intValue + if UInt32(displayID) == display { + ccb.mpv.sendVerbose("Detected display mode change, updating screen refresh rate\n"); + ccb.flagEvents(VO_EVENT_WIN_STATE) + } + } + } + + func addDisplayReconfigureObserver() { + CGDisplayRegisterReconfigurationCallback(reconfigureCallback, MPVHelper.bridge(obj: self)) + } + + func removeDisplayReconfigureObserver() { + CGDisplayRemoveReconfigurationCallback(reconfigureCallback, MPVHelper.bridge(obj: self)) + } + + func getTargetScreen(forFullscreen fs: Bool) -> NSScreen? { + let screenType = fs ? "fs-screen" : "screen" + let screenID = mpv.getStringProperty(screenType) ?? "current" + + switch screenID { + case "current", "default", "all": + return getScreenBy(id: -1) + default: + return getScreenBy(id: Int(screenID)!) + } + } + + func getScreenBy(id screenID: Int) -> NSScreen? { + let screens = NSScreen.screens() + if screenID >= screens!.count { + mpv.sendInfo("Screen ID \(screenID) does not exist, falling back to current device") + return nil + } else if screenID < 0 { + return nil + } + return screens![screenID] + } + + func getWindowGeometry(forScreen targetScreen: NSScreen, + videoOut vo: UnsafeMutablePointer<vo>) -> NSRect { + let r = targetScreen.convertRectToBacking(targetScreen.frame) + var screenRC: mp_rect = mp_rect(x0: Int32(0), + y0: Int32(0), + x1: Int32(r.size.width), + y1: Int32(r.size.height)) + + var geo: vo_win_geometry = vo_win_geometry() + vo_calc_window_geometry2(vo, &screenRC, Double(targetScreen.backingScaleFactor), &geo) + + // flip y coordinates + geo.win.y1 = Int32(r.size.height) - geo.win.y1 + geo.win.y0 = Int32(r.size.height) - geo.win.y0 + + let wr = NSMakeRect(CGFloat(geo.win.x0), CGFloat(geo.win.y1), + CGFloat(geo.win.x1 - geo.win.x0), + CGFloat(geo.win.y0 - geo.win.y1)) + return targetScreen.convertRectFromBacking(wr) + } + + func flagEvents(_ ev: Int) { + eventsLock.lock() + events |= ev + eventsLock.unlock() + } + + func checkEvents() -> Int { + eventsLock.lock() + let ev = events + events = 0 + eventsLock.unlock() + return ev + } + + var controlCallback: mp_render_cb_control_fn = { ( vo, ctx, events, request, data ) -> Int32 in + let ccb: CocoaCB = MPVHelper.bridge(ptr: ctx!) + + switch mp_voctrl(request) { + case VOCTRL_CHECK_EVENTS: + events!.pointee = Int32(ccb.checkEvents()) + return VO_TRUE + case VOCTRL_FULLSCREEN: + DispatchQueue.main.async { + ccb.window.toggleFullScreen(nil) + } + return VO_TRUE + case VOCTRL_GET_FULLSCREEN: + let fsData = data!.assumingMemoryBound(to: Int32.self) + fsData.pointee = ccb.window.isInFullscreen ? 1 : 0 + return VO_TRUE + case VOCTRL_GET_DISPLAY_FPS: + let fps = data!.assumingMemoryBound(to: CDouble.self) + fps.pointee = ccb.currentFps() + return VO_TRUE + case VOCTRL_RESTORE_SCREENSAVER: + ccb.enableDisplaySleep() + return VO_TRUE + case VOCTRL_KILL_SCREENSAVER: + ccb.disableDisplaySleep() + return VO_TRUE + case VOCTRL_SET_CURSOR_VISIBILITY: + ccb.cursorVisibilityWanted = data!.assumingMemoryBound(to: CBool.self).pointee + DispatchQueue.main.async { + ccb.setCursorVisiblility(ccb.cursorVisibilityWanted) + } + return VO_TRUE + case VOCTRL_SET_UNFS_WINDOW_SIZE: + let sizeData = data!.assumingMemoryBound(to: Int32.self) + let size = UnsafeBufferPointer(start: sizeData, count: 2) + var rect = NSMakeRect(0, 0, CGFloat(size[0]), CGFloat(size[1])) + DispatchQueue.main.async { + if !ccb.mpv.getBoolProperty("hidpi-window-scale") { + rect = ccb.window.currentScreen!.convertRectFromBacking(rect) + } + ccb.window.updateSize(rect.size) + } + return VO_TRUE + case VOCTRL_GET_WIN_STATE: + let minimized = data!.assumingMemoryBound(to: Int32.self) + minimized.pointee = ccb.window.isMiniaturized ? VO_WIN_STATE_MINIMIZED : Int32(0) + return VO_TRUE + case VOCTRL_UPDATE_WINDOW_TITLE: + let titleData = data!.assumingMemoryBound(to: Int8.self) + let title = String(cString: titleData) + DispatchQueue.main.async { + ccb.title = String(cString: titleData) + } + return VO_TRUE + case VOCTRL_PREINIT: + DispatchQueue.main.sync { ccb.preinit(vo!) } + return VO_TRUE + case VOCTRL_UNINIT: + DispatchQueue.main.async { ccb.uninit() } + return VO_TRUE + case VOCTRL_RECONFIG: + ccb.reconfig(vo!) + return VO_TRUE + default: + return VO_NOTIMPL + } + } + + func shutdown(_ destroy: Bool = false) { + setCursorVisiblility(true) + layer.setVideo(false) + stopDisplaylink() + uninitLightSensor() + removeDisplayReconfigureObserver() + mpv.deinitRender() + mpv.deinitMPV(destroy) + } + + func checkShutdown() { + if isShuttingDown { + shutdown(true) + } + } + + func processEvent(_ event: UnsafePointer<mpv_event>) { + switch event.pointee.event_id { + case MPV_EVENT_SHUTDOWN: + if window != nil && window.isAnimating { + isShuttingDown = true + return + } + shutdown() + case MPV_EVENT_PROPERTY_CHANGE: + if backendState == .initialized { + handlePropertyChange(event) + } + default: + break + } + } + + func handlePropertyChange(_ event: UnsafePointer<mpv_event>) { + let pData = OpaquePointer(event.pointee.data) + guard let property = UnsafePointer<mpv_event_property>(pData)?.pointee else { + return + } + + switch String(cString: property.name) { + case "border": + if let data = MPVHelper.mpvFlagToBool(property.data) { + window.border = data + } + case "ontop": + if let data = MPVHelper.mpvFlagToBool(property.data) { + window.setOnTop(data, mpv.getStringProperty("ontop-level") ?? "window") + } + case "keepaspect-window": + if let data = MPVHelper.mpvFlagToBool(property.data) { + window.keepAspect = data + } + case "macos-title-bar-style": + if let data = MPVHelper.mpvStringArrayToString(property.data) { + window.setTitleBarStyle(data) + } + default: + break + } + } +} diff --git a/video/out/d3d11/context.c b/video/out/d3d11/context.c index b02d2e8..82c7d16 100644 --- a/video/out/d3d11/context.c +++ b/video/out/d3d11/context.c @@ -70,14 +70,6 @@ struct priv { IDXGISwapChain *swapchain; }; -static struct mp_image *d3d11_screenshot(struct ra_swapchain *sw) -{ - struct priv *p = sw->ctx->priv; - if (!p->swapchain) - return NULL; - return mp_d3d11_screenshot(p->swapchain); -} - static struct ra_tex *get_backbuffer(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -131,6 +123,10 @@ static int d3d11_color_depth(struct ra_swapchain *sw) static bool d3d11_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) { struct priv *p = sw->priv; + + if (!p->backbuffer) + return false; + *out_fbo = (struct ra_fbo) { .tex = p->backbuffer, .flip = false, @@ -177,7 +173,6 @@ static void d3d11_uninit(struct ra_ctx *ctx) static const struct ra_swapchain_fns d3d11_swapchain = { .color_depth = d3d11_color_depth, - .screenshot = d3d11_screenshot, .start_frame = d3d11_start_frame, .submit_frame = d3d11_submit_frame, .swap_buffers = d3d11_swap_buffers, @@ -226,6 +221,8 @@ static bool d3d11_init(struct ra_ctx *ctx) goto error; p->backbuffer = get_backbuffer(ctx); + if (!p->backbuffer) + goto error; return true; diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c index d83fdc5..8d22fe3 100644 --- a/video/out/d3d11/hwdec_d3d11va.c +++ b/video/out/d3d11/hwdec_d3d11va.c @@ -104,9 +104,12 @@ static int init(struct ra_hwdec *hw) ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); ID3D10Multithread_Release(multithread); + static const int subfmts[] = {IMGFMT_NV12, IMGFMT_P010, 0}; p->hwctx = (struct mp_hwdec_ctx){ .driver_name = hw->driver->name, .av_device_ref = d3d11_wrap_device_ref(p->device), + .supported_formats = subfmts, + .hw_imgfmt = IMGFMT_D3D11, }; hwdec_devices_add(hw->devs, &p->hwctx); return 0; @@ -236,7 +239,7 @@ static void mapper_unmap(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_d3d11va = { .name = "d3d11va", .priv_size = sizeof(struct priv_owner), - .imgfmts = {IMGFMT_D3D11VA, IMGFMT_D3D11NV12, 0}, + .imgfmts = {IMGFMT_D3D11, 0}, .init = init, .uninit = uninit, .mapper = &(const struct ra_hwdec_mapper_driver){ diff --git a/video/out/d3d11/hwdec_dxva2dxgi.c b/video/out/d3d11/hwdec_dxva2dxgi.c new file mode 100644 index 0000000..97471d0 --- /dev/null +++ b/video/out/d3d11/hwdec_dxva2dxgi.c @@ -0,0 +1,465 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <d3d9.h> +#include <d3d11.h> +#include <dxva2api.h> + +#include "common/common.h" +#include "osdep/windows_utils.h" +#include "video/hwdec.h" +#include "video/d3d.h" +#include "video/out/d3d11/ra_d3d11.h" +#include "video/out/gpu/hwdec.h" + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + ID3D11Device *dev11; + IDirect3DDevice9Ex *dev9; +}; + +struct queue_surf { + ID3D11Texture2D *tex11; + ID3D11Query *idle11; + ID3D11Texture2D *stage11; + IDirect3DTexture9 *tex9; + IDirect3DSurface9 *surf9; + IDirect3DSurface9 *stage9; + struct ra_tex *tex; + + bool busy11; // The surface is currently being used by D3D11 +}; + +struct priv { + ID3D11Device *dev11; + ID3D11DeviceContext *ctx11; + IDirect3DDevice9Ex *dev9; + + // Surface queue stuff. Following Microsoft recommendations, a queue of + // surfaces is used to share images between D3D9 and D3D11. This allows + // multiple D3D11 frames to be in-flight at once. + struct queue_surf **queue; + int queue_len; + int queue_pos; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); + SAFE_RELEASE(p->dev11); + SAFE_RELEASE(p->dev9); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + IDirect3D9Ex *d3d9ex = NULL; + int ret = -1; + HRESULT hr; + + if (!ra_is_d3d11(hw->ra)) + goto done; + p->dev11 = ra_d3d11_get_device(hw->ra); + if (!p->dev11) + goto done; + + d3d_load_dlls(); + if (!d3d9_dll) { + MP_FATAL(hw, "Failed to load \"d3d9.dll\": %s\n", mp_LastError_to_str()); + goto done; + } + if (!dxva2_dll) { + MP_FATAL(hw, "Failed to load \"dxva2.dll\": %s\n", mp_LastError_to_str()); + goto done; + } + + HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D); + Direct3DCreate9Ex = (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex"); + if (!Direct3DCreate9Ex) { + MP_FATAL(hw, "Direct3D 9Ex not supported\n"); + goto done; + } + + hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &d3d9ex); + if (FAILED(hr)) { + MP_FATAL(hw, "Couldn't create Direct3D9Ex: %s\n", mp_HRESULT_to_str(hr)); + goto done; + } + + D3DPRESENT_PARAMETERS pparams = { + .BackBufferWidth = 16, + .BackBufferHeight = 16, + .BackBufferCount = 1, + .SwapEffect = D3DSWAPEFFECT_DISCARD, + .hDeviceWindow = GetDesktopWindow(), + .Windowed = TRUE, + .Flags = D3DPRESENTFLAG_VIDEO, + }; + hr = IDirect3D9Ex_CreateDeviceEx(d3d9ex, D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, GetDesktopWindow(), D3DCREATE_NOWINDOWCHANGES | + D3DCREATE_FPU_PRESERVE | D3DCREATE_HARDWARE_VERTEXPROCESSING | + D3DCREATE_DISABLE_PSGP_THREADING | D3DCREATE_MULTITHREADED, &pparams, + NULL, &p->dev9); + if (FAILED(hr)) { + MP_FATAL(hw, "Failed to create Direct3D9Ex device: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + // Check if it's possible to StretchRect() from NV12 to XRGB surfaces + hr = IDirect3D9Ex_CheckDeviceFormatConversion(d3d9ex, D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, MAKEFOURCC('N', 'V', '1', '2'), D3DFMT_X8R8G8B8); + if (hr != S_OK) { + MP_FATAL(hw, "Can't StretchRect from NV12 to XRGB surfaces\n"); + goto done; + } + + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = hw->driver->name, + .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->dev9), + }; + hwdec_devices_add(hw->devs, &p->hwctx); + + ret = 0; +done: + SAFE_RELEASE(d3d9ex); + return ret; +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + + p->dev11 = o->dev11; + p->dev9 = o->dev9; + ID3D11Device_GetImmediateContext(o->dev11, &p->ctx11); + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = IMGFMT_RGB0; + mapper->dst_params.hw_subfmt = 0; + return 0; +} + +static void surf_destroy(struct ra_hwdec_mapper *mapper, + struct queue_surf *surf) +{ + if (!surf) + return; + SAFE_RELEASE(surf->tex11); + SAFE_RELEASE(surf->idle11); + SAFE_RELEASE(surf->stage11); + SAFE_RELEASE(surf->tex9); + SAFE_RELEASE(surf->surf9); + SAFE_RELEASE(surf->stage9); + ra_tex_free(mapper->ra, &surf->tex); + talloc_free(surf); +} + +static struct queue_surf *surf_create(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + IDXGIResource *res11 = NULL; + bool success = false; + HRESULT hr; + + struct queue_surf *surf = talloc_ptrtype(p, surf); + + D3D11_TEXTURE2D_DESC desc11 = { + .Width = mapper->src->w, + .Height = mapper->src->h, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_B8G8R8X8_UNORM, + .SampleDesc.Count = 1, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, + .MiscFlags = D3D11_RESOURCE_MISC_SHARED, + }; + hr = ID3D11Device_CreateTexture2D(p->dev11, &desc11, NULL, &surf->tex11); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D11 texture: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + // Try to use a 16x16 staging texture, unless the source surface is + // smaller. Ideally, a 1x1 texture would be sufficient, but Microsoft's + // D3D9ExDXGISharedSurf example uses 16x16 to avoid driver bugs. + D3D11_TEXTURE2D_DESC sdesc11 = { + .Width = MPMIN(16, desc11.Width), + .Height = MPMIN(16, desc11.Height), + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_B8G8R8X8_UNORM, + .SampleDesc.Count = 1, + .Usage = D3D11_USAGE_STAGING, + .CPUAccessFlags = D3D11_CPU_ACCESS_READ, + }; + hr = ID3D11Device_CreateTexture2D(p->dev11, &sdesc11, NULL, &surf->stage11); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D11 staging texture: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = ID3D11Texture2D_QueryInterface(surf->tex11, &IID_IDXGIResource, + (void**)&res11); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to get share handle: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + HANDLE share_handle; + hr = IDXGIResource_GetSharedHandle(res11, &share_handle); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to get share handle: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = ID3D11Device_CreateQuery(p->dev11, + &(D3D11_QUERY_DESC) { D3D11_QUERY_EVENT }, &surf->idle11); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D11 query: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + // Share the D3D11 texture with D3D9Ex + hr = IDirect3DDevice9Ex_CreateTexture(p->dev9, desc11.Width, desc11.Height, + 1, D3DUSAGE_RENDERTARGET, D3DFMT_X8R8G8B8, D3DPOOL_DEFAULT, + &surf->tex9, &share_handle); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D9 texture: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = IDirect3DTexture9_GetSurfaceLevel(surf->tex9, 0, &surf->surf9); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to get D3D9 surface: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + // As above, try to use a 16x16 staging texture to avoid driver bugs + hr = IDirect3DDevice9Ex_CreateRenderTarget(p->dev9, + MPMIN(16, desc11.Width), MPMIN(16, desc11.Height), D3DFMT_X8R8G8B8, + D3DMULTISAMPLE_NONE, 0, TRUE, &surf->stage9, NULL); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D9 staging surface: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + surf->tex = ra_d3d11_wrap_tex(mapper->ra, (ID3D11Resource *)surf->tex11); + if (!surf->tex) + goto done; + + success = true; +done: + if (!success) + surf_destroy(mapper, surf); + SAFE_RELEASE(res11); + return success ? surf : NULL; +} + +// true if the surface is currently in-use by the D3D11 graphics pipeline +static bool surf_is_idle11(struct ra_hwdec_mapper *mapper, + struct queue_surf *surf) +{ + struct priv *p = mapper->priv; + HRESULT hr; + BOOL idle; + + if (!surf->busy11) + return true; + + hr = ID3D11DeviceContext_GetData(p->ctx11, + (ID3D11Asynchronous *)surf->idle11, &idle, sizeof(idle), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE || !idle) + return false; + + surf->busy11 = false; + return true; +} + +// If the surface is currently in-use by the D3D11 graphics pipeline, wait for +// it to become idle. Should only be called in the queue-underflow case. +static bool surf_wait_idle11(struct ra_hwdec_mapper *mapper, + struct queue_surf *surf) +{ + struct priv *p = mapper->priv; + HRESULT hr; + + ID3D11DeviceContext_CopySubresourceRegion(p->ctx11, + (ID3D11Resource *)surf->stage11, 0, 0, 0, 0, + (ID3D11Resource *)surf->tex11, 0, (&(D3D11_BOX){ + .right = MPMIN(16, mapper->src->w), + .bottom = MPMIN(16, mapper->src->h), + .back = 1, + })); + + // Block until the surface becomes idle (see surf_wait_idle9()) + D3D11_MAPPED_SUBRESOURCE map = {0}; + hr = ID3D11DeviceContext_Map(p->ctx11, (ID3D11Resource *)surf->stage11, 0, + D3D11_MAP_READ, 0, &map); + if (FAILED(hr)) { + MP_ERR(mapper, "Couldn't map D3D11 staging texture: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + ID3D11DeviceContext_Unmap(p->ctx11, (ID3D11Resource *)surf->stage11, 0); + surf->busy11 = false; + return true; +} + +static bool surf_wait_idle9(struct ra_hwdec_mapper *mapper, + struct queue_surf *surf) +{ + struct priv *p = mapper->priv; + HRESULT hr; + + // Rather than polling for the surface to become idle, copy part of the + // surface to a staging texture and map it. This should block until the + // surface becomes idle. Microsoft's ISurfaceQueue does this as well. + RECT rc = {0, 0, MPMIN(16, mapper->src->w), MPMIN(16, mapper->src->h)}; + hr = IDirect3DDevice9Ex_StretchRect(p->dev9, surf->surf9, &rc, surf->stage9, + &rc, D3DTEXF_NONE); + if (FAILED(hr)) { + MP_ERR(mapper, "Couldn't copy to D3D9 staging texture: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + D3DLOCKED_RECT lock; + hr = IDirect3DSurface9_LockRect(surf->stage9, &lock, NULL, D3DLOCK_READONLY); + if (FAILED(hr)) { + MP_ERR(mapper, "Couldn't map D3D9 staging texture: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + IDirect3DSurface9_UnlockRect(surf->stage9); + p->queue[p->queue_pos]->busy11 = true; + return true; +} + +static struct queue_surf *surf_acquire(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + if (!p->queue_len || !surf_is_idle11(mapper, p->queue[p->queue_pos])) { + if (p->queue_len < 16) { + struct queue_surf *surf = surf_create(mapper); + if (!surf) + return NULL; + + // The next surface is busy, so grow the queue + MP_TARRAY_INSERT_AT(p, p->queue, p->queue_len, p->queue_pos, surf); + MP_DBG(mapper, "Queue grew to %d surfaces\n", p->queue_len); + } else { + // For sanity, don't let the queue grow beyond 16 surfaces. It + // should never get this big. If it does, wait for the surface to + // become idle rather than polling it. + if (!surf_wait_idle11(mapper, p->queue[p->queue_pos])) + return NULL; + MP_WARN(mapper, "Queue underflow!\n"); + } + } + return p->queue[p->queue_pos]; +} + +static void surf_release(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + ID3D11DeviceContext_End(p->ctx11, + (ID3D11Asynchronous *)p->queue[p->queue_pos]->idle11); + + // The current surface is now in-flight, move to the next surface + p->queue_pos++; + if (p->queue_pos >= p->queue_len) + p->queue_pos = 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + for (int i = 0; i < p->queue_len; i++) + surf_destroy(mapper, p->queue[i]); +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + HRESULT hr; + + struct queue_surf *surf = surf_acquire(mapper); + if (!surf) + return -1; + + RECT rc = {0, 0, mapper->src->w, mapper->src->h}; + IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)mapper->src->planes[3]; + + hr = IDirect3DDevice9Ex_StretchRect(p->dev9, hw_surface, &rc, surf->surf9, + &rc, D3DTEXF_NONE); + if (FAILED(hr)) { + MP_ERR(mapper, "StretchRect() failed: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + if (!surf_wait_idle9(mapper, surf)) + return -1; + + mapper->tex[0] = surf->tex; + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + if (p->queue_pos < p->queue_len && + p->queue[p->queue_pos]->tex == mapper->tex[0]) + { + surf_release(mapper); + mapper->tex[0] = NULL; + } +} + +const struct ra_hwdec_driver ra_hwdec_dxva2dxgi = { + .name = "dxva2-dxgi", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_DXVA2, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c index 63dc5b9..1d24558 100644 --- a/video/out/d3d11/ra_d3d11.c +++ b/video/out/d3d11/ra_d3d11.c @@ -78,6 +78,9 @@ struct d3d_tex { ID3D11Texture3D *tex3d; int array_slice; + // Staging texture for tex_download(), 2D only + ID3D11Texture2D *staging; + ID3D11ShaderResourceView *srv; ID3D11RenderTargetView *rtv; ID3D11UnorderedAccessView *uav; @@ -86,9 +89,9 @@ struct d3d_tex { struct d3d_buf { ID3D11Buffer *buf; - ID3D11Buffer *staging; ID3D11UnorderedAccessView *uav; - void *data; // Data for mapped staging texture + void *data; // System-memory mirror of the data in buf + bool dirty; // Is buf out of date? }; struct d3d_rpass { @@ -181,6 +184,7 @@ static struct d3d_fmt formats[] = { { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) }, { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = true }, + { "bgrx8", 3, 4, { 8, 8, 8}, DXFMT(B8G8R8X8, UNORM), .unordered = true }, }; static bool dll_version_equal(struct dll_version a, struct dll_version b) @@ -358,12 +362,17 @@ static void tex_destroy(struct ra *ra, struct ra_tex *tex) SAFE_RELEASE(tex_p->uav); SAFE_RELEASE(tex_p->sampler); SAFE_RELEASE(tex_p->res); + SAFE_RELEASE(tex_p->staging); talloc_free(tex); } static struct ra_tex *tex_create(struct ra *ra, const struct ra_tex_params *params) { + // Only 2D textures may be downloaded for now + if (params->downloadable && params->dimensions != 2) + return NULL; + struct ra_d3d11 *p = ra->priv; HRESULT hr; @@ -436,6 +445,21 @@ static struct ra_tex *tex_create(struct ra *ra, goto error; } tex_p->res = (ID3D11Resource *)tex_p->tex2d; + + // Create a staging texture with CPU access for tex_download() + if (params->downloadable) { + desc2d.BindFlags = 0; + desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc2d.Usage = D3D11_USAGE_STAGING; + + hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL, + &tex_p->staging); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to staging texture: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + } break; case 3:; D3D11_TEXTURE3D_DESC desc3d = { @@ -651,17 +675,45 @@ static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params) return true; } +static bool tex_download(struct ra *ra, struct ra_tex_download_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_tex *tex = params->tex; + struct d3d_tex *tex_p = tex->priv; + HRESULT hr; + + if (!tex_p->staging) + return false; + + ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource*)tex_p->staging, + tex_p->res); + + D3D11_MAPPED_SUBRESOURCE lock; + hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource*)tex_p->staging, 0, + D3D11_MAP_READ, 0, &lock); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to map staging texture: %s\n", mp_HRESULT_to_str(hr)); + return false; + } + + char *cdst = params->dst; + char *csrc = lock.pData; + for (int y = 0; y < tex->params.h; y++) { + memcpy(cdst + y * params->stride, csrc + y * lock.RowPitch, + MPMIN(params->stride, lock.RowPitch)); + } + + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource*)tex_p->staging, 0); + + return true; +} + static void buf_destroy(struct ra *ra, struct ra_buf *buf) { if (!buf) return; - struct ra_d3d11 *p = ra->priv; struct d3d_buf *buf_p = buf->priv; - - if (buf_p->data) - ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)buf_p->staging, 0); SAFE_RELEASE(buf_p->buf); - SAFE_RELEASE(buf_p->staging); SAFE_RELEASE(buf_p->uav); talloc_free(buf); } @@ -705,24 +757,13 @@ static struct ra_buf *buf_create(struct ra *ra, goto error; } - if (params->host_mutable) { - // D3D11 doesn't allow constant buffer updates that aren't aligned to a - // full constant boundary (vec4,) and some drivers don't allow partial - // constant buffer updates at all, but the RA consumer is allowed to - // partially update an ra_buf. The best way to handle partial updates - // without causing a pipeline stall is probably to keep a copy of the - // data in a staging buffer. - - desc.Usage = D3D11_USAGE_STAGING; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.BindFlags = 0; - hr = ID3D11Device_CreateBuffer(p->dev, &desc, NULL, &buf_p->staging); - if (FAILED(hr)) { - MP_ERR(ra, "Failed to create staging buffer: %s\n", - mp_HRESULT_to_str(hr)); - goto error; - } - } + // D3D11 doesn't allow constant buffer updates that aren't aligned to a + // full constant boundary (vec4,) and some drivers don't allow partial + // constant buffer updates at all. To support partial buffer updates, keep + // a mirror of the buffer data in system memory and upload the whole thing + // before the buffer is used. + if (params->host_mutable) + buf_p->data = talloc_zero_size(buf, desc.ByteWidth); if (params->type == RA_BUF_TYPE_SHADER_STORAGE) { D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { @@ -752,40 +793,23 @@ static void buf_resolve(struct ra *ra, struct ra_buf *buf) struct ra_d3d11 *p = ra->priv; struct d3d_buf *buf_p = buf->priv; - assert(buf->params.host_mutable); - if (!buf_p->data) + if (!buf->params.host_mutable || !buf_p->dirty) return; - ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)buf_p->staging, 0); - buf_p->data = NULL; - - // Synchronize the GPU buffer with the staging buffer - ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource *)buf_p->buf, - (ID3D11Resource *)buf_p->staging); + // Synchronize the GPU buffer with the system-memory copy + ID3D11DeviceContext_UpdateSubresource(p->ctx, (ID3D11Resource *)buf_p->buf, + 0, NULL, buf_p->data, 0, 0); + buf_p->dirty = false; } static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, const void *data, size_t size) { - struct ra_d3d11 *p = ra->priv; struct d3d_buf *buf_p = buf->priv; - HRESULT hr; - - if (!buf_p->data) { - // If this is the first update after the buffer was created or after it - // has been used in a renderpass, it will be unmapped, so map it - D3D11_MAPPED_SUBRESOURCE map = {0}; - hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)buf_p->staging, - 0, D3D11_MAP_WRITE, 0, &map); - if (FAILED(hr)) { - MP_ERR(ra, "Failed to map resource\n"); - return; - } - buf_p->data = map.pData; - } char *cdata = buf_p->data; memcpy(cdata + offset, data, size); + buf_p->dirty = true; } static const char *get_shader_target(struct ra *ra, enum glsl_shader type) @@ -2077,6 +2101,7 @@ static struct ra_fns ra_fns_d3d11 = { .tex_create = tex_create, .tex_destroy = tex_destroy, .tex_upload = tex_upload, + .tex_download = tex_download, .buf_create = buf_create, .buf_destroy = buf_destroy, .buf_update = buf_update, diff --git a/video/out/dr_helper.c b/video/out/dr_helper.c new file mode 100644 index 0000000..e826d08 --- /dev/null +++ b/video/out/dr_helper.c @@ -0,0 +1,130 @@ +#include <stdlib.h> +#include <assert.h> +#include <pthread.h> + +#include <libavutil/buffer.h> + +#include "mpv_talloc.h" +#include "misc/dispatch.h" +#include "osdep/atomic.h" +#include "video/mp_image.h" + +#include "dr_helper.h" + +struct dr_helper { + pthread_t thread; + struct mp_dispatch_queue *dispatch; + atomic_ullong dr_in_flight; + + struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h, + int stride_align); + void *get_image_ctx; +}; + +static void dr_helper_destroy(void *ptr) +{ + struct dr_helper *dr = ptr; + + // All references must have been freed on destruction, or we'll have + // dangling pointers. + assert(atomic_load(&dr->dr_in_flight) == 0); +} + +struct dr_helper *dr_helper_create(struct mp_dispatch_queue *dispatch, + struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h, + int stride_align), + void *get_image_ctx) +{ + struct dr_helper *dr = talloc_ptrtype(NULL, dr); + talloc_set_destructor(dr, dr_helper_destroy); + *dr = (struct dr_helper){ + .thread = pthread_self(), + .dispatch = dispatch, + .dr_in_flight = ATOMIC_VAR_INIT(0), + .get_image = get_image, + .get_image_ctx = get_image_ctx, + }; + return dr; +} + +struct free_dr_context { + struct dr_helper *dr; + AVBufferRef *ref; +}; + +static void dr_thread_free(void *ptr) +{ + struct free_dr_context *ctx = ptr; + + unsigned long long v = atomic_fetch_add(&ctx->dr->dr_in_flight, -1); + assert(v); // value before sub is 0 - unexpected underflow. + + av_buffer_unref(&ctx->ref); + talloc_free(ctx); +} + +static void free_dr_buffer_on_dr_thread(void *opaque, uint8_t *data) +{ + struct free_dr_context *ctx = opaque; + + // The image could be unreffed even on the DR thread. In practice, this + // matters most on DR destruction. + if (pthread_equal(ctx->dr->thread, pthread_self())) { + dr_thread_free(ctx); + } else { + mp_dispatch_run(ctx->dr->dispatch, dr_thread_free, ctx); + } +} + +struct get_image_cmd { + struct dr_helper *dr; + int imgfmt, w, h, stride_align; + struct mp_image *res; +}; + +static void sync_get_image(void *ptr) +{ + struct get_image_cmd *cmd = ptr; + struct dr_helper *dr = cmd->dr; + + cmd->res = dr->get_image(dr->get_image_ctx, cmd->imgfmt, cmd->w, cmd->h, + cmd->stride_align); + if (!cmd->res) + return; + + // We require exactly 1 AVBufferRef. + assert(cmd->res->bufs[0]); + assert(!cmd->res->bufs[1]); + + // Apply some magic to get it free'd on the DR thread as well. For this to + // work, we create a dummy-ref that aliases the original ref, which is why + // the original ref must be writable in the first place. (A newly allocated + // image should be always writable of course.) + assert(mp_image_is_writeable(cmd->res)); + + struct free_dr_context *ctx = talloc_zero(NULL, struct free_dr_context); + *ctx = (struct free_dr_context){ + .dr = dr, + .ref = cmd->res->bufs[0], + }; + + AVBufferRef *new_ref = av_buffer_create(ctx->ref->data, ctx->ref->size, + free_dr_buffer_on_dr_thread, ctx, 0); + if (!new_ref) + abort(); // tiny malloc OOM + + cmd->res->bufs[0] = new_ref; + + atomic_fetch_add(&dr->dr_in_flight, 1); +} + +struct mp_image *dr_helper_get_image(struct dr_helper *dr, int imgfmt, + int w, int h, int stride_align) +{ + struct get_image_cmd cmd = { + .dr = dr, + .imgfmt = imgfmt, .w = w, .h = h, .stride_align = stride_align, + }; + mp_dispatch_run(dr->dispatch, sync_get_image, &cmd); + return cmd.res; +} diff --git a/video/out/dr_helper.h b/video/out/dr_helper.h new file mode 100644 index 0000000..cf37c57 --- /dev/null +++ b/video/out/dr_helper.h @@ -0,0 +1,20 @@ +#pragma once + +// This is a helper for implementing thread-safety for DR callbacks. These need +// to allocate GPU buffers on the GPU thread (e.g. OpenGL with its forced TLS), +// and the buffers also need to be freed on the GPU thread. +struct dr_helper; + +struct mp_image; +struct mp_dispatch_queue; + +// This MUST be called on the "target" thread (it will call pthread_self()). +// dr_helper_get_image() calls will use the dispatch queue to run get_image on +// the target thread too. +struct dr_helper *dr_helper_create(struct mp_dispatch_queue *dispatch, + struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h, + int stride_align), + void *get_image_ctx); + +struct mp_image *dr_helper_get_image(struct dr_helper *dr, int imgfmt, + int w, int h, int stride_align); diff --git a/video/out/drm_atomic.c b/video/out/drm_atomic.c index 7a55483..5c6b3bb 100644 --- a/video/out/drm_atomic.c +++ b/video/out/drm_atomic.c @@ -78,6 +78,17 @@ int drm_object_get_property(struct drm_object *object, char *name, uint64_t *val return -EINVAL; } +drmModePropertyBlobPtr drm_object_get_property_blob(struct drm_object *object, char *name) +{ + uint64_t blob_id; + + if (!drm_object_get_property(object, name, &blob_id)) { + return drmModeGetPropertyBlob(object->fd, blob_id); + } + + return NULL; +} + int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object, char *name, uint64_t value) { @@ -98,6 +109,7 @@ struct drm_object * drm_object_create(struct mp_log *log, int fd, obj = talloc_zero(NULL, struct drm_object); obj->id = object_id; obj->type = type; + obj->fd = fd; if (drm_object_create_properties(log, fd, obj)) { talloc_free(obj); @@ -125,16 +137,18 @@ void drm_object_print_info(struct mp_log *log, struct drm_object *object) (long long)object->props->prop_values[i]); } -struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, - int crtc_id, int overlay_id) +struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, int crtc_id, + int connector_id, int osd_plane_id, int video_plane_id) { - drmModePlane *drmplane = NULL; drmModePlaneRes *plane_res = NULL; drmModeRes *res = NULL; struct drm_object *plane = NULL; struct drm_atomic_context *ctx; int crtc_index = -1; - int layercount = 0; + int layercount = -1; + int primary_id = 0; + int overlay_id = 0; + uint64_t value; res = drmModeGetResources(fd); @@ -169,55 +183,95 @@ struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, } } + for (int i = 0; i < res->count_connectors; i++) { + drmModeConnector *connector = drmModeGetConnector(fd, res->connectors[i]); + if (connector) { + if (connector->connector_id == connector_id) + ctx->connector = drm_object_create(log, ctx->fd, connector->connector_id, + DRM_MODE_OBJECT_CONNECTOR); + + drmModeFreeConnector(connector); + if (ctx->connector) + break; + } + } + for (unsigned int j = 0; j < plane_res->count_planes; j++) { - drmplane = drmModeGetPlane (ctx->fd, plane_res->planes[j]); - if (drmplane->possible_crtcs & (1 << crtc_index)) { - plane = drm_object_create(log, ctx->fd, drmplane->plane_id, + drmModePlane *drmplane = drmModeGetPlane(ctx->fd, plane_res->planes[j]); + const uint32_t possible_crtcs = drmplane->possible_crtcs; + const uint32_t plane_id = drmplane->plane_id; + drmModeFreePlane(drmplane); + drmplane = NULL; + + if (possible_crtcs & (1 << crtc_index)) { + plane = drm_object_create(log, ctx->fd, plane_id, DRM_MODE_OBJECT_PLANE); - if (plane) { - if (drm_object_get_property(plane, "TYPE", &value) == -EINVAL) { - mp_err(log, "Unable to retrieve type property from plane %d\n", j); - goto fail; - } else { - if ((value == DRM_PLANE_TYPE_OVERLAY) && - (layercount == overlay_id)) { - ctx->overlay_plane = plane; - } - else if (value == DRM_PLANE_TYPE_PRIMARY) { - ctx->primary_plane = plane; - } - else { - drm_object_free(plane); - plane = NULL; - } - - if (value == DRM_PLANE_TYPE_OVERLAY) - layercount++; - } - } else { + if (!plane) { mp_err(log, "Failed to create Plane object from plane ID %d\n", - drmplane->plane_id); + plane_id); goto fail; } + + if (drm_object_get_property(plane, "TYPE", &value) == -EINVAL) { + mp_err(log, "Unable to retrieve type property from plane %d\n", j); + goto fail; + } + + if (value != DRM_PLANE_TYPE_CURSOR) { // Skip cursor planes + layercount++; + + if ((!primary_id) && (value == DRM_PLANE_TYPE_PRIMARY)) + primary_id = plane_id; + + if ((!overlay_id) && (value == DRM_PLANE_TYPE_OVERLAY)) + overlay_id = plane_id; + + if (layercount == osd_plane_id) { + ctx->osd_plane = plane; + continue; + } + + if (layercount == video_plane_id) { + ctx->video_plane = plane; + continue; + } + } + + drm_object_free(plane); + plane = NULL; } - drmModeFreePlane(drmplane); - drmplane = NULL; } - if (!ctx->primary_plane) { - mp_err(log, "Failed to find primary plane\n"); - goto fail; + // default OSD plane to primary if unspecified + if (!ctx->osd_plane) { + if (primary_id) { + mp_verbose(log, "Using default plane %d for OSD\n", primary_id); + ctx->osd_plane = drm_object_create(log, ctx->fd, primary_id, DRM_MODE_OBJECT_PLANE); + } else { + mp_err(log, "Failed to find OSD plane with id=%d\n", osd_plane_id); + goto fail; + } + } else { + mp_verbose(log, "Found OSD plane with ID %d\n", ctx->osd_plane->id); } - if (!ctx->overlay_plane) { - mp_err(log, "Failed to find overlay plane with id=%d\n", overlay_id); - goto fail; + // default video plane to overlay if unspecified + if (!ctx->video_plane) { + if (overlay_id) { + mp_verbose(log, "Using default plane %d for video\n", overlay_id); + ctx->video_plane = drm_object_create(log, ctx->fd, overlay_id, DRM_MODE_OBJECT_PLANE); + } else { + mp_err(log, "Failed to find video plane with id=%d\n", video_plane_id); + goto fail; + } + } else { + mp_verbose(log, "Found video plane with ID %d\n", ctx->video_plane->id); } - mp_verbose(log, "Found Primary plane with ID %d, overlay with ID %d\n", - ctx->primary_plane->id, ctx->overlay_plane->id); + mp_verbose(log, "Found Video plane with ID %d, OSD with ID %d\n", + ctx->video_plane->id, ctx->osd_plane->id); drmModeFreePlaneResources(plane_res); drmModeFreeResources(res); @@ -229,8 +283,6 @@ fail: drmModeFreeResources(res); if (plane_res) drmModeFreePlaneResources(plane_res); - if (drmplane) - drmModeFreePlane(drmplane); if (plane) drm_object_free(plane); return NULL; @@ -238,8 +290,153 @@ fail: void drm_atomic_destroy_context(struct drm_atomic_context *ctx) { + drm_mode_destroy_blob(ctx->fd, &ctx->old_state.crtc.mode); drm_object_free(ctx->crtc); - drm_object_free(ctx->primary_plane); - drm_object_free(ctx->overlay_plane); + drm_object_free(ctx->connector); + drm_object_free(ctx->osd_plane); + drm_object_free(ctx->video_plane); talloc_free(ctx); } + +static bool drm_atomic_save_plane_state(struct drm_object *plane, + struct drm_atomic_plane_state *plane_state) +{ + bool ret = true; + + if (0 > drm_object_get_property(plane, "FB_ID", &plane_state->fb_id)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_ID", &plane_state->crtc_id)) + ret = false; + if (0 > drm_object_get_property(plane, "SRC_X", &plane_state->src_x)) + ret = false; + if (0 > drm_object_get_property(plane, "SRC_Y", &plane_state->src_y)) + ret = false; + if (0 > drm_object_get_property(plane, "SRC_W", &plane_state->src_w)) + ret = false; + if (0 > drm_object_get_property(plane, "SRC_H", &plane_state->src_h)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_X", &plane_state->crtc_x)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_Y", &plane_state->crtc_y)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_W", &plane_state->crtc_w)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_H", &plane_state->crtc_h)) + ret = false; + // ZPOS might not exist, so ignore whether or not this succeeds + drm_object_get_property(plane, "ZPOS", &plane_state->zpos); + + return ret; +} + +static bool drm_atomic_restore_plane_state(drmModeAtomicReq *request, + struct drm_object *plane, + const struct drm_atomic_plane_state *plane_state) +{ + bool ret = true; + + if (0 > drm_object_set_property(request, plane, "FB_ID", plane_state->fb_id)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_ID", plane_state->crtc_id)) + ret = false; + if (0 > drm_object_set_property(request, plane, "SRC_X", plane_state->src_x)) + ret = false; + if (0 > drm_object_set_property(request, plane, "SRC_Y", plane_state->src_y)) + ret = false; + if (0 > drm_object_set_property(request, plane, "SRC_W", plane_state->src_w)) + ret = false; + if (0 > drm_object_set_property(request, plane, "SRC_H", plane_state->src_h)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_X", plane_state->crtc_x)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_Y", plane_state->crtc_y)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_W", plane_state->crtc_w)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_H", plane_state->crtc_h)) + ret = false; + // ZPOS might not exist, so ignore whether or not this succeeds + drm_object_set_property(request, plane, "ZPOS", plane_state->zpos); + + return ret; +} + +bool drm_atomic_save_old_state(struct drm_atomic_context *ctx) +{ + if (ctx->old_state.saved) + return false; + + bool ret = true; + + drmModeCrtc *crtc = drmModeGetCrtc(ctx->fd, ctx->crtc->id); + if (crtc == NULL) + return false; + ctx->old_state.crtc.mode.mode = crtc->mode; + drmModeFreeCrtc(crtc); + + if (0 > drm_object_get_property(ctx->crtc, "ACTIVE", &ctx->old_state.crtc.active)) + ret = false; + + if (0 > drm_object_get_property(ctx->connector, "CRTC_ID", &ctx->old_state.connector.crtc_id)) + ret = false; + + if (!drm_atomic_save_plane_state(ctx->osd_plane, &ctx->old_state.osd_plane)) + ret = false; + if (!drm_atomic_save_plane_state(ctx->video_plane, &ctx->old_state.video_plane)) + ret = false; + + ctx->old_state.saved = true; + + return ret; +} + +bool drm_atomic_restore_old_state(drmModeAtomicReqPtr request, struct drm_atomic_context *ctx) +{ + if (!ctx->old_state.saved) + return false; + + bool ret = true; + + if (0 > drm_object_set_property(request, ctx->connector, "CRTC_ID", ctx->old_state.connector.crtc_id)) + ret = false; + + if (!drm_mode_ensure_blob(ctx->fd, &ctx->old_state.crtc.mode)) + ret = false; + if (0 > drm_object_set_property(request, ctx->crtc, "MODE_ID", ctx->old_state.crtc.mode.blob_id)) + ret = false; + if (0 > drm_object_set_property(request, ctx->crtc, "ACTIVE", ctx->old_state.crtc.active)) + ret = false; + + if (!drm_atomic_restore_plane_state(request, ctx->osd_plane, &ctx->old_state.osd_plane)) + ret = false; + if (!drm_atomic_restore_plane_state(request, ctx->video_plane, &ctx->old_state.video_plane)) + ret = false; + + ctx->old_state.saved = false; + + return ret; +} + +bool drm_mode_ensure_blob(int fd, struct drm_mode *mode) +{ + int ret = 0; + + if (!mode->blob_id) { + ret = drmModeCreatePropertyBlob(fd, &mode->mode, sizeof(drmModeModeInfo), + &mode->blob_id); + } + + return (ret == 0); +} + +bool drm_mode_destroy_blob(int fd, struct drm_mode *mode) +{ + int ret = 0; + + if (mode->blob_id) { + ret = drmModeDestroyPropertyBlob(fd, mode->blob_id); + mode->blob_id = 0; + } + + return (ret == 0); +} diff --git a/video/out/drm_atomic.h b/video/out/drm_atomic.h index d0ebdb9..cd0252a 100644 --- a/video/out/drm_atomic.h +++ b/video/out/drm_atomic.h @@ -19,12 +19,47 @@ #define MP_DRMATOMIC_H #include <stdlib.h> +#include <stdbool.h> #include <xf86drm.h> #include <xf86drmMode.h> #include "common/msg.h" +struct drm_mode { + drmModeModeInfo mode; + uint32_t blob_id; +}; + +struct drm_atomic_plane_state { + uint64_t fb_id; + uint64_t crtc_id; + uint64_t src_x; + uint64_t src_y; + uint64_t src_w; + uint64_t src_h; + uint64_t crtc_x; + uint64_t crtc_y; + uint64_t crtc_w; + uint64_t crtc_h; + uint64_t zpos; +}; + +// Used to store the restore state for VT switching and uninit +struct drm_atomic_state { + bool saved; + struct { + uint64_t crtc_id; + } connector; + struct { + struct drm_mode mode; + uint64_t active; + } crtc; + struct drm_atomic_plane_state osd_plane; + struct drm_atomic_plane_state video_plane; +}; + struct drm_object { + int fd; uint32_t id; uint32_t type; drmModeObjectProperties *props; @@ -35,10 +70,13 @@ struct drm_atomic_context { int fd; struct drm_object *crtc; - struct drm_object *primary_plane; - struct drm_object *overlay_plane; + struct drm_object *connector; + struct drm_object *osd_plane; + struct drm_object *video_plane; drmModeAtomicReq *request; + + struct drm_atomic_state old_state; }; @@ -46,10 +84,18 @@ int drm_object_create_properties(struct mp_log *log, int fd, struct drm_object * void drm_object_free_properties(struct drm_object *object); int drm_object_get_property(struct drm_object *object, char *name, uint64_t *value); int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object, char *name, uint64_t value); +drmModePropertyBlobPtr drm_object_get_property_blob(struct drm_object *object, char *name); struct drm_object * drm_object_create(struct mp_log *log, int fd, uint32_t object_id, uint32_t type); void drm_object_free(struct drm_object *object); void drm_object_print_info(struct mp_log *log, struct drm_object *object); -struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, int crtc_id, int overlay_id); +struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, int crtc_id, int connector_id, + int osd_plane_id, int video_plane_id); void drm_atomic_destroy_context(struct drm_atomic_context *ctx); +bool drm_atomic_save_old_state(struct drm_atomic_context *ctx); +bool drm_atomic_restore_old_state(drmModeAtomicReq *request, struct drm_atomic_context *ctx); + +bool drm_mode_ensure_blob(int fd, struct drm_mode *mode); +bool drm_mode_destroy_blob(int fd, struct drm_mode *mode); + #endif // MP_DRMATOMIC_H diff --git a/video/out/drm_common.c b/video/out/drm_common.c index 8402ac7..aa3d099 100644 --- a/video/out/drm_common.c +++ b/video/out/drm_common.c @@ -47,9 +47,18 @@ const struct m_sub_options drm_conf = { OPT_STRING_VALIDATE("drm-connector", drm_connector_spec, 0, drm_validate_connector_opt), OPT_INT("drm-mode", drm_mode_id, 0), - OPT_INT("drm-overlay", drm_overlay_id, 0), + OPT_INT("drm-osd-plane-id", drm_osd_plane_id, 0), + OPT_INT("drm-video-plane-id", drm_video_plane_id, 0), + OPT_CHOICE("drm-format", drm_format, 0, + ({"xrgb8888", DRM_OPTS_FORMAT_XRGB8888}, + {"xrgb2101010", DRM_OPTS_FORMAT_XRGB2101010})), + OPT_SIZE_BOX("drm-osd-size", drm_osd_size, 0), {0}, }, + .defaults = &(const struct drm_opts) { + .drm_osd_plane_id = -1, + .drm_video_plane_id = -1, + }, .size = sizeof(struct drm_opts), }; @@ -164,6 +173,27 @@ static bool setup_connector(struct kms *kms, const drmModeRes *res, static bool setup_crtc(struct kms *kms, const drmModeRes *res) { + // First try to find currently connected encoder and its current CRTC + for (unsigned int i = 0; i < res->count_encoders; i++) { + drmModeEncoder *encoder = drmModeGetEncoder(kms->fd, res->encoders[i]); + if (!encoder) { + MP_WARN(kms, "Cannot retrieve encoder %u:%u: %s\n", + i, res->encoders[i], mp_strerror(errno)); + continue; + } + + if (encoder->encoder_id == kms->connector->encoder_id && encoder->crtc_id != 0) { + MP_VERBOSE(kms, "Connector %u currently connected to encoder %u\n", + kms->connector->connector_id, kms->connector->encoder_id); + kms->encoder = encoder; + kms->crtc_id = encoder->crtc_id; + goto success; + } + + drmModeFreeEncoder(encoder); + } + + // Otherwise pick first legal encoder and CRTC combo for the connector for (unsigned int i = 0; i < kms->connector->count_encoders; ++i) { drmModeEncoder *encoder = drmModeGetEncoder(kms->fd, kms->connector->encoders[i]); @@ -181,7 +211,7 @@ static bool setup_crtc(struct kms *kms, const drmModeRes *res) kms->encoder = encoder; kms->crtc_id = res->crtcs[j]; - return true; + goto success; } drmModeFreeEncoder(encoder); @@ -190,6 +220,11 @@ static bool setup_crtc(struct kms *kms, const drmModeRes *res) MP_ERR(kms, "Connector %u has no suitable CRTC\n", kms->connector->connector_id); return false; + + success: + MP_VERBOSE(kms, "Selected Encoder %u with CRTC %u\n", + kms->encoder->encoder_id, kms->crtc_id); + return true; } static bool setup_mode(struct kms *kms, int mode_id) @@ -202,7 +237,7 @@ static bool setup_mode(struct kms *kms, int mode_id) return false; } - kms->mode = kms->connector->modes[mode_id]; + kms->mode.mode = kms->connector->modes[mode_id]; return true; } @@ -234,7 +269,7 @@ static void parse_connector_spec(struct mp_log *log, struct kms *kms_create(struct mp_log *log, const char *connector_spec, - int mode_id, int overlay_id) + int mode_id, int osd_plane_id, int video_plane_id) { int card_no = -1; char *connector_name = NULL; @@ -246,7 +281,7 @@ struct kms *kms_create(struct mp_log *log, const char *connector_spec, .fd = open_card(card_no), .connector = NULL, .encoder = NULL, - .mode = { 0 }, + .mode = {{0}}, .crtc_id = -1, .card_no = card_no, }; @@ -281,14 +316,14 @@ struct kms *kms_create(struct mp_log *log, const char *connector_spec, mp_verbose(log, "No DRM Atomic support found\n"); } else { mp_verbose(log, "DRM Atomic support found\n"); - kms->atomic_context = drm_atomic_create_context(kms->log, kms->fd, kms->crtc_id, overlay_id); + kms->atomic_context = drm_atomic_create_context(kms->log, kms->fd, kms->crtc_id, + kms->connector->connector_id, osd_plane_id, video_plane_id); if (!kms->atomic_context) { mp_err(log, "Failed to create DRM atomic context\n"); goto err; } } - drmModeFreeResources(res); return kms; @@ -305,6 +340,7 @@ void kms_destroy(struct kms *kms) { if (!kms) return; + drm_mode_destroy_blob(kms->fd, &kms->mode); if (kms->connector) { drmModeFreeConnector(kms->connector); kms->connector = NULL; @@ -389,7 +425,7 @@ void kms_show_available_cards_and_connectors(struct mp_log *log) double kms_get_display_fps(const struct kms *kms) { - return mode_get_Hz(&kms->mode); + return mode_get_Hz(&kms->mode.mode); } int drm_validate_connector_opt(struct mp_log *log, const struct m_option *opt, @@ -428,7 +464,6 @@ static int install_signal(int signo, void (*handler)(int)) return sigaction(signo, &act, NULL); } - bool vt_switcher_init(struct vt_switcher *s, struct mp_log *log) { s->log = log; @@ -479,6 +514,14 @@ bool vt_switcher_init(struct vt_switcher *s, struct mp_log *log) return false; } + // Block the VT switching signals from interrupting the VO thread (they will + // still be picked up by other threads, which will fill vt_switcher_pipe for us) + sigset_t set; + sigemptyset(&set); + sigaddset(&set, RELEASE_SIGNAL); + sigaddset(&set, ACQUIRE_SIGNAL); + pthread_sigmask(SIG_BLOCK, &set, NULL); + return true; } @@ -504,6 +547,13 @@ void vt_switcher_interrupt_poll(struct vt_switcher *s) void vt_switcher_destroy(struct vt_switcher *s) { + struct vt_mode vt_mode = {0}; + vt_mode.mode = VT_AUTO; + if (ioctl(s->tty_fd, VT_SETMODE, &vt_mode) < 0) { + MP_ERR(s, "VT_SETMODE failed: %s\n", mp_strerror(errno)); + return; + } + install_signal(RELEASE_SIGNAL, SIG_DFL); install_signal(ACQUIRE_SIGNAL, SIG_DFL); close(s->tty_fd); diff --git a/video/out/drm_common.h b/video/out/drm_common.h index ff913ff..3f14410 100644 --- a/video/out/drm_common.h +++ b/video/out/drm_common.h @@ -24,12 +24,15 @@ #include "options/m_option.h" #include "drm_atomic.h" +#define DRM_OPTS_FORMAT_XRGB8888 0 +#define DRM_OPTS_FORMAT_XRGB2101010 1 + struct kms { struct mp_log *log; int fd; drmModeConnector *connector; drmModeEncoder *encoder; - drmModeModeInfo mode; + struct drm_mode mode; uint32_t crtc_id; int card_no; struct drm_atomic_context *atomic_context; @@ -45,7 +48,10 @@ struct vt_switcher { struct drm_opts { char *drm_connector_spec; int drm_mode_id; - int drm_overlay_id; + int drm_osd_plane_id; + int drm_video_plane_id; + int drm_format; + struct m_geometry drm_osd_size; }; bool vt_switcher_init(struct vt_switcher *s, struct mp_log *log); @@ -59,7 +65,7 @@ void vt_switcher_release(struct vt_switcher *s, void (*handler)(void*), void *user_data); struct kms *kms_create(struct mp_log *log, const char *connector_spec, - int mode_id, int overlay_id); + int mode_id, int osd_plane_id, int video_plane_id); void kms_destroy(struct kms *kms); double kms_get_display_fps(const struct kms *kms); diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h index 78c0441..a2fcb37 100644 --- a/video/out/gpu/context.h +++ b/video/out/gpu/context.h @@ -69,9 +69,6 @@ struct ra_swapchain_fns { // Gets the current framebuffer depth in bits (0 if unknown). Optional. int (*color_depth)(struct ra_swapchain *sw); - // Retrieves a screenshot of the framebuffer. Optional. - struct mp_image *(*screenshot)(struct ra_swapchain *sw); - // Called when rendering starts. Returns NULL on failure. This must be // followed by submit_frame, to submit the rendered frame. This function // can also fail sporadically, and such errors should be ignored unless diff --git a/video/out/gpu/d3d11_helpers.c b/video/out/gpu/d3d11_helpers.c index b96b03a..d267ac3 100644 --- a/video/out/gpu/d3d11_helpers.c +++ b/video/out/gpu/d3d11_helpers.c @@ -315,29 +315,18 @@ bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, if (FAILED(hr)) factory2 = NULL; - // Try B8G8R8A8_UNORM first, since at least in Windows 8, it's always the - // format of the desktop image - static const DXGI_FORMAT formats[] = { - DXGI_FORMAT_B8G8R8A8_UNORM, - DXGI_FORMAT_R8G8B8A8_UNORM, - }; - static const int formats_len = MP_ARRAY_SIZE(formats); bool flip = factory2 && opts->flip; // Return here to retry creating the swapchain do { - for (int i = 0; i < formats_len; i++) { - if (factory2) { - // Create a DXGI 1.2+ (Windows 8+) swap chain if possible - hr = create_swapchain_1_2(dev, factory2, log, opts, flip, - formats[i], &swapchain); - } else { - // Fall back to DXGI 1.1 (Windows 7) - hr = create_swapchain_1_1(dev, factory, log, opts, formats[i], - &swapchain); - } - if (SUCCEEDED(hr)) - break; + if (factory2) { + // Create a DXGI 1.2+ (Windows 8+) swap chain if possible + hr = create_swapchain_1_2(dev, factory2, log, opts, flip, + DXGI_FORMAT_R8G8B8A8_UNORM, &swapchain); + } else { + // Fall back to DXGI 1.1 (Windows 7) + hr = create_swapchain_1_1(dev, factory, log, opts, + DXGI_FORMAT_R8G8B8A8_UNORM, &swapchain); } if (SUCCEEDED(hr)) break; @@ -385,84 +374,3 @@ done: SAFE_RELEASE(dxgi_dev); return success; } - -struct mp_image *mp_d3d11_screenshot(IDXGISwapChain *swapchain) -{ - ID3D11Device *dev = NULL; - ID3D11DeviceContext *ctx = NULL; - ID3D11Texture2D *frontbuffer = NULL; - ID3D11Texture2D *staging = NULL; - struct mp_image *img = NULL; - HRESULT hr; - - // Validate the swap chain. This screenshot method will only work on DXGI - // 1.2+ flip/sequential swap chains. It's probably not possible at all with - // discard swap chains, since by definition, the backbuffer contents is - // discarded on Present(). - DXGI_SWAP_CHAIN_DESC scd; - hr = IDXGISwapChain_GetDesc(swapchain, &scd); - if (FAILED(hr)) - goto done; - if (scd.SwapEffect != DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL) - goto done; - - // Get the last buffer that was presented with Present(). This should be - // the n-1th buffer for a swap chain of length n. - hr = IDXGISwapChain_GetBuffer(swapchain, scd.BufferCount - 1, - &IID_ID3D11Texture2D, (void**)&frontbuffer); - if (FAILED(hr)) - goto done; - - ID3D11Texture2D_GetDevice(frontbuffer, &dev); - ID3D11Device_GetImmediateContext(dev, &ctx); - - D3D11_TEXTURE2D_DESC td; - ID3D11Texture2D_GetDesc(frontbuffer, &td); - if (td.SampleDesc.Count > 1) - goto done; - - // Validate the backbuffer format and convert to an mpv IMGFMT - enum mp_imgfmt fmt; - switch (td.Format) { - case DXGI_FORMAT_B8G8R8A8_UNORM: fmt = IMGFMT_BGR0; break; - case DXGI_FORMAT_R8G8B8A8_UNORM: fmt = IMGFMT_RGB0; break; - default: - goto done; - } - - // Create a staging texture based on the frontbuffer with CPU access - td.BindFlags = 0; - td.MiscFlags = 0; - td.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - td.Usage = D3D11_USAGE_STAGING; - hr = ID3D11Device_CreateTexture2D(dev, &td, 0, &staging); - if (FAILED(hr)) - goto done; - - ID3D11DeviceContext_CopyResource(ctx, (ID3D11Resource*)staging, - (ID3D11Resource*)frontbuffer); - - // Attempt to map the staging texture to CPU-accessible memory - D3D11_MAPPED_SUBRESOURCE lock; - hr = ID3D11DeviceContext_Map(ctx, (ID3D11Resource*)staging, 0, - D3D11_MAP_READ, 0, &lock); - if (FAILED(hr)) - goto done; - - img = mp_image_alloc(fmt, td.Width, td.Height); - if (!img) - return NULL; - for (int i = 0; i < td.Height; i++) { - memcpy(img->planes[0] + img->stride[0] * i, - (char*)lock.pData + lock.RowPitch * i, td.Width * 4); - } - - ID3D11DeviceContext_Unmap(ctx, (ID3D11Resource*)staging, 0); - -done: - SAFE_RELEASE(frontbuffer); - SAFE_RELEASE(staging); - SAFE_RELEASE(ctx); - SAFE_RELEASE(dev); - return img; -} diff --git a/video/out/gpu/d3d11_helpers.h b/video/out/gpu/d3d11_helpers.h index 481c183..996b934 100644 --- a/video/out/gpu/d3d11_helpers.h +++ b/video/out/gpu/d3d11_helpers.h @@ -78,6 +78,4 @@ bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, struct d3d11_swapchain_opts *opts, IDXGISwapChain **swapchain_out); -struct mp_image *mp_d3d11_screenshot(IDXGISwapChain *swapchain); - #endif diff --git a/video/out/gpu/hwdec.c b/video/out/gpu/hwdec.c index 5284116..fc37074 100644 --- a/video/out/gpu/hwdec.c +++ b/video/out/gpu/hwdec.c @@ -35,6 +35,7 @@ extern const struct ra_hwdec_driver ra_hwdec_d3d11eglrgb; extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx; extern const struct ra_hwdec_driver ra_hwdec_dxva2; extern const struct ra_hwdec_driver ra_hwdec_d3d11va; +extern const struct ra_hwdec_driver ra_hwdec_dxva2dxgi; extern const struct ra_hwdec_driver ra_hwdec_cuda; extern const struct ra_hwdec_driver ra_hwdec_cuda_nvdec; extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay; @@ -48,13 +49,18 @@ const struct ra_hwdec_driver *const ra_hwdec_drivers[] = { &ra_hwdec_videotoolbox, #endif #if HAVE_D3D_HWACCEL + #if HAVE_EGL_ANGLE &ra_hwdec_d3d11egl, &ra_hwdec_d3d11eglrgb, - #if HAVE_D3D9_HWACCEL + #if HAVE_D3D9_HWACCEL &ra_hwdec_dxva2egl, + #endif #endif #if HAVE_D3D11 &ra_hwdec_d3d11va, + #if HAVE_D3D9_HWACCEL + &ra_hwdec_dxva2dxgi, + #endif #endif #endif #if HAVE_GL_DXINTEROP_D3D9 diff --git a/video/out/gpu/lcms.c b/video/out/gpu/lcms.c index 3552351..bc76db9 100644 --- a/video/out/gpu/lcms.c +++ b/video/out/gpu/lcms.c @@ -83,7 +83,7 @@ const struct m_sub_options mp_icc_conf = { OPT_FLAG("icc-profile-auto", profile_auto, 0), OPT_STRING("icc-cache-dir", cache_dir, M_OPT_FILE), OPT_INT("icc-intent", intent, 0), - OPT_INTRANGE("icc-contrast", contrast, 0, 0, 100000), + OPT_INTRANGE("icc-contrast", contrast, 0, 0, 1000000), OPT_STRING_VALIDATE("icc-3dlut-size", size_str, 0, validate_3dlut_size_opt), OPT_REPLACED("3dlut-size", "icc-3dlut-size"), @@ -304,7 +304,8 @@ static cmsHPROFILE get_vid_profile(struct gl_lcms *p, cmsContext cms, // Built-in contrast failsafe double contrast = 3.0 / (src_black[0] + src_black[1] + src_black[2]); - if (contrast > 100000) { + MP_VERBOSE(p, "Detected ICC profile contrast: %f\n", contrast); + if (contrast > 100000 && !p->opts->contrast) { MP_WARN(p, "ICC profile detected contrast very high (>100000)," " falling back to contrast 1000 for sanity. Set the" " icc-contrast option to silence this warning.\n"); diff --git a/video/out/gpu/libmpv_gpu.c b/video/out/gpu/libmpv_gpu.c new file mode 100644 index 0000000..fce2acf --- /dev/null +++ b/video/out/gpu/libmpv_gpu.c @@ -0,0 +1,239 @@ +#include "config.h" +#include "hwdec.h" +#include "libmpv_gpu.h" +#include "libmpv/render_gl.h" +#include "video.h" +#include "video/out/libmpv.h" + +static const struct libmpv_gpu_context_fns *context_backends[] = { +#if HAVE_GL + &libmpv_gpu_context_gl, +#endif + NULL +}; + +struct priv { + struct libmpv_gpu_context *context; + + struct gl_video *renderer; +}; + +struct native_resource_entry { + const char *name; // ra_add_native_resource() internal name argument + size_t size; // size of struct pointed to (0 for no copy) +}; + +static const struct native_resource_entry native_resource_map[] = { + [MPV_RENDER_PARAM_X11_DISPLAY] = { + .name = "x11", + .size = 0, + }, + [MPV_RENDER_PARAM_WL_DISPLAY] = { + .name = "wl", + .size = 0, + }, + [MPV_RENDER_PARAM_DRM_DISPLAY] = { + .name = "drm_params", + .size = sizeof (mpv_opengl_drm_params), + }, + [MPV_RENDER_PARAM_DRM_OSD_SIZE] = { + .name = "drm_osd_size", + .size = sizeof (mpv_opengl_drm_osd_size), + }, +}; + +static int init(struct render_backend *ctx, mpv_render_param *params) +{ + ctx->priv = talloc_zero(NULL, struct priv); + struct priv *p = ctx->priv; + + char *api = get_mpv_render_param(params, MPV_RENDER_PARAM_API_TYPE, NULL); + if (!api) + return MPV_ERROR_INVALID_PARAMETER; + + for (int n = 0; context_backends[n]; n++) { + const struct libmpv_gpu_context_fns *backend = context_backends[n]; + if (strcmp(backend->api_name, api) == 0) { + p->context = talloc_zero(NULL, struct libmpv_gpu_context); + *p->context = (struct libmpv_gpu_context){ + .global = ctx->global, + .log = ctx->log, + .fns = backend, + }; + break; + } + } + + if (!p->context) + return MPV_ERROR_INVALID_PARAMETER; + + int err = p->context->fns->init(p->context, params); + if (err < 0) + return err; + + for (int n = 0; params && params[n].type; n++) { + if (params[n].type > 0 && + params[n].type < MP_ARRAY_SIZE(native_resource_map) && + native_resource_map[params[n].type].name) + { + const struct native_resource_entry *entry = + &native_resource_map[params[n].type]; + void *data = params[n].data; + if (entry->size) + data = talloc_memdup(p, data, entry->size); + ra_add_native_resource(p->context->ra, entry->name, data); + } + } + + p->renderer = gl_video_init(p->context->ra, ctx->log, ctx->global); + + ctx->hwdec_devs = hwdec_devices_create(); + gl_video_load_hwdecs(p->renderer, ctx->hwdec_devs, true); + ctx->driver_caps = VO_CAP_ROTATE90; + return 0; +} + +static bool check_format(struct render_backend *ctx, int imgfmt) +{ + struct priv *p = ctx->priv; + + return gl_video_check_format(p->renderer, imgfmt); +} + +static int set_parameter(struct render_backend *ctx, mpv_render_param param) +{ + struct priv *p = ctx->priv; + + switch (param.type) { + case MPV_RENDER_PARAM_ICC_PROFILE: { + mpv_byte_array *data = param.data; + gl_video_set_icc_profile(p->renderer, (bstr){data->data, data->size}); + return 0; + } + case MPV_RENDER_PARAM_AMBIENT_LIGHT: { + int lux = *(int *)param.data; + gl_video_set_ambient_lux(p->renderer, lux); + return 0; + } + default: + return MPV_ERROR_NOT_IMPLEMENTED; + } +} + +static void reconfig(struct render_backend *ctx, struct mp_image_params *params) +{ + struct priv *p = ctx->priv; + + gl_video_config(p->renderer, params); +} + +static void reset(struct render_backend *ctx) +{ + struct priv *p = ctx->priv; + + gl_video_reset(p->renderer); +} + +static void update_external(struct render_backend *ctx, struct vo *vo) +{ + struct priv *p = ctx->priv; + + gl_video_set_osd_source(p->renderer, vo ? vo->osd : NULL); + if (vo) + gl_video_configure_queue(p->renderer, vo); +} + +static void resize(struct render_backend *ctx, struct mp_rect *src, + struct mp_rect *dst, struct mp_osd_res *osd) +{ + struct priv *p = ctx->priv; + + gl_video_resize(p->renderer, src, dst, osd); +} + +static int get_target_size(struct render_backend *ctx, mpv_render_param *params, + int *out_w, int *out_h) +{ + struct priv *p = ctx->priv; + + // Mapping the surface is cheap, better than adding new backend entrypoints. + struct ra_tex *tex; + int err = p->context->fns->wrap_fbo(p->context, params, &tex); + if (err < 0) + return err; + *out_w = tex->params.w; + *out_h = tex->params.h; + return 0; +} + +static int render(struct render_backend *ctx, mpv_render_param *params, + struct vo_frame *frame) +{ + struct priv *p = ctx->priv; + + // Mapping the surface is cheap, better than adding new backend entrypoints. + struct ra_tex *tex; + int err = p->context->fns->wrap_fbo(p->context, params, &tex); + if (err < 0) + return err; + + int depth = *(int *)get_mpv_render_param(params, MPV_RENDER_PARAM_DEPTH, + &(int){0}); + gl_video_set_fb_depth(p->renderer, depth); + + bool flip = *(int *)get_mpv_render_param(params, MPV_RENDER_PARAM_FLIP_Y, + &(int){0}); + + struct ra_fbo target = {.tex = tex, .flip = flip}; + gl_video_render_frame(p->renderer, frame, target, RENDER_FRAME_DEF); + p->context->fns->done_frame(p->context, frame->display_synced); + + return 0; +} + +static struct mp_image *get_image(struct render_backend *ctx, int imgfmt, + int w, int h, int stride_align) +{ + struct priv *p = ctx->priv; + + return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align); +} + +static void screenshot(struct render_backend *ctx, struct vo_frame *frame, + struct voctrl_screenshot *args) +{ + struct priv *p = ctx->priv; + + gl_video_screenshot(p->renderer, frame, args); +} + +static void destroy(struct render_backend *ctx) +{ + struct priv *p = ctx->priv; + + if (p->renderer) + gl_video_uninit(p->renderer); + + hwdec_devices_destroy(ctx->hwdec_devs); + + if (p->context) { + p->context->fns->destroy(p->context); + talloc_free(p->context->priv); + talloc_free(p->context); + } +} + +const struct render_backend_fns render_backend_gpu = { + .init = init, + .check_format = check_format, + .set_parameter = set_parameter, + .reconfig = reconfig, + .reset = reset, + .update_external = update_external, + .resize = resize, + .get_target_size = get_target_size, + .render = render, + .get_image = get_image, + .screenshot = screenshot, + .destroy = destroy, +}; diff --git a/video/out/gpu/libmpv_gpu.h b/video/out/gpu/libmpv_gpu.h new file mode 100644 index 0000000..2c9f712 --- /dev/null +++ b/video/out/gpu/libmpv_gpu.h @@ -0,0 +1,40 @@ +#pragma once + +#include "video/out/libmpv.h" + +struct ra_tex; + +struct libmpv_gpu_context { + struct mpv_global *global; + struct mp_log *log; + const struct libmpv_gpu_context_fns *fns; + + struct ra *ra; + void *priv; +}; + +// Manage backend specific interaction between libmpv and ra backend, that can't +// be managed by ra itself (initialization and passing FBOs). +struct libmpv_gpu_context_fns { + // The libmpv API type name, see MPV_RENDER_PARAM_API_TYPE. + const char *api_name; + // Pretty much works like render_backend_fns.init, except that the + // API type is already checked by the caller. + // Successful init must set ctx->ra. + int (*init)(struct libmpv_gpu_context *ctx, mpv_render_param *params); + // Wrap the surface passed to mpv_render_context_render() (via the params + // array) into a ra_tex and return it. Returns a libmpv error code, and sets + // *out to a temporary object on success. The returned object is valid until + // another wrap_fbo() or done_frame() is called. + // This does not need to care about generic attributes, like flipping. + int (*wrap_fbo)(struct libmpv_gpu_context *ctx, mpv_render_param *params, + struct ra_tex **out); + // Signal that the ra_tex object obtained with wrap_fbo is no longer used. + // For certain backends, this might also be used to signal the end of + // rendering (like OpenGL doing weird crap). + void (*done_frame)(struct libmpv_gpu_context *ctx, bool ds); + // Free all data in ctx->priv. + void (*destroy)(struct libmpv_gpu_context *ctx); +}; + +extern const struct libmpv_gpu_context_fns libmpv_gpu_context_gl; diff --git a/video/out/gpu/osd.c b/video/out/gpu/osd.c index 317deb6..75f69f0 100644 --- a/video/out/gpu/osd.c +++ b/video/out/gpu/osd.c @@ -314,7 +314,7 @@ void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, const int *factors = &blend_factors[part->format][0]; gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]); - gl_sc_dispatch_draw(sc, fbo.tex, vertex_vao, MP_ARRAY_SIZE(vertex_vao), + gl_sc_dispatch_draw(sc, fbo.tex, false, vertex_vao, MP_ARRAY_SIZE(vertex_vao), sizeof(struct vertex), part->vertices, part->num_vertices); } diff --git a/video/out/gpu/ra.c b/video/out/gpu/ra.c index fdb20fe..0c15651 100644 --- a/video/out/gpu/ra.c +++ b/video/out/gpu/ra.c @@ -4,6 +4,26 @@ #include "ra.h" +void ra_add_native_resource(struct ra *ra, const char *name, void *data) +{ + struct ra_native_resource r = { + .name = name, + .data = data, + }; + MP_TARRAY_APPEND(ra, ra->native_resources, ra->num_native_resources, r); +} + +void *ra_get_native_resource(struct ra *ra, const char *name) +{ + for (int n = 0; n < ra->num_native_resources; n++) { + struct ra_native_resource *r = &ra->native_resources[n]; + if (strcmp(r->name, name) == 0) + return r->data; + } + + return NULL; +} + struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params) { return ra->fns->tex_create(ra, params); diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h index 934e5db..79caacc 100644 --- a/video/out/gpu/ra.h +++ b/video/out/gpu/ra.h @@ -39,8 +39,29 @@ struct ra { // RA_CAP_DIRECT_UPLOAD is supported. This is basically only relevant for // OpenGL. Set by the RA user. bool use_pbo; + + // Array of native resources. For the most part an "escape" mechanism, and + // usually does not contain parameters required for basic functionality. + struct ra_native_resource *native_resources; + int num_native_resources; +}; + +// For passing through windowing system specific parameters and such. The +// names are always internal (except for legacy opengl-cb uses; the libmpv +// render API uses mpv_render_param_type and maps them to names internally). +// For example, a name="x11" entry has a X11 display as (Display*)data. +struct ra_native_resource { + const char *name; + void *data; }; +// Add a ra_native_resource entry. Both name and data pointers must stay valid +// until ra termination. +void ra_add_native_resource(struct ra *ra, const char *name, void *data); + +// Search ra->native_resources, returns NULL on failure. +void *ra_get_native_resource(struct ra *ra, const char *name); + enum { RA_CAP_TEX_1D = 1 << 0, // supports 1D textures (as shader inputs) RA_CAP_TEX_3D = 1 << 1, // supports 3D textures (as shader inputs) @@ -53,6 +74,8 @@ enum { RA_CAP_GLOBAL_UNIFORM = 1 << 8, // supports using "naked" uniforms (not UBO) RA_CAP_GATHER = 1 << 9, // supports textureGather in GLSL RA_CAP_FRAGCOORD = 1 << 10, // supports reading from gl_FragCoord + RA_CAP_PARALLEL_COMPUTE = 1 << 11, // supports parallel compute shaders + RA_CAP_NUM_GROUPS = 1 << 12, // supports gl_NumWorkGroups }; enum ra_ctype { @@ -84,6 +107,8 @@ struct ra_format { // only applies to 2-component textures bool linear_filter; // linear filtering available from shader bool renderable; // can be used for render targets + bool dummy_format; // is not a real ra_format but a fake one (e.g. FBO). + // dummy formats cannot be used to create textures // If not 0, the format represents some sort of packed fringe format, whose // shader representation is given by the special_imgfmt_desc pointer. @@ -106,6 +131,7 @@ struct ra_tex_params { bool blit_src; // must be usable as a blit source bool blit_dst; // must be usable as a blit destination bool host_mutable; // texture may be updated with tex_upload + bool downloadable; // texture can be read with tex_download // When used as render source texture. bool src_linear; // if false, use nearest sampling (whether this can // be true depends on ra_format.linear_filter) @@ -147,6 +173,13 @@ struct ra_tex_upload_params { ptrdiff_t stride; // The size of a horizontal line in bytes (*not* texels!) }; +struct ra_tex_download_params { + struct ra_tex *tex; // Texture to download from + // Downloading directly (set by caller, data written to by callee): + void *dst; // Address of data (packed with no alignment) + ptrdiff_t stride; // The size of a horizontal line in bytes (*not* texels!) +}; + // Buffer usage type. This restricts what types of operations may be performed // on a buffer. enum ra_buf_type { @@ -285,6 +318,9 @@ struct ra_renderpass_params { enum ra_blend blend_src_alpha; enum ra_blend blend_dst_alpha; + // If true, the contents of `target` not written to will become undefined + bool invalidate_target; + // --- type==RA_RENDERPASS_TYPE_COMPUTE only // Shader text, like vertex_shader/frag_shader. @@ -372,6 +408,10 @@ struct ra_fns { // Returns whether successful. bool (*tex_upload)(struct ra *ra, const struct ra_tex_upload_params *params); + // Copy data from the texture to memory. ra_tex_params.downloadable must + // have been set to true on texture creation. + bool (*tex_download)(struct ra *ra, struct ra_tex_download_params *params); + // Create a buffer. This can be used as a persistently mapped buffer, // a uniform buffer, a shader storage buffer or possibly others. // Not all usage types must be supported; may return NULL if unavailable. diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c index 6d0f370..f38f0a4 100644 --- a/video/out/gpu/shader_cache.c +++ b/video/out/gpu/shader_cache.c @@ -777,11 +777,6 @@ static void gl_sc_generate(struct gl_shader_cache *sc, ADD(header, "#define texture texture2D\n"); } - if (sc->ra->glsl_vulkan && type == RA_RENDERPASS_TYPE_COMPUTE) { - ADD(header, "#define gl_GlobalInvocationIndex " - "(gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID)\n"); - } - // Additional helpers. ADD(header, "#define LUT_POS(x, lut_size)" " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n"); @@ -965,13 +960,14 @@ static void gl_sc_generate(struct gl_shader_cache *sc, } struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, - struct ra_tex *target, + struct ra_tex *target, bool discard, const struct ra_renderpass_input *vao, int vao_len, size_t vertex_stride, void *vertices, size_t num_vertices) { struct timer_pool *timer = NULL; + sc->params.invalidate_target = discard; gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format, vao, vao_len, vertex_stride); if (!sc->current_shader) diff --git a/video/out/gpu/shader_cache.h b/video/out/gpu/shader_cache.h index 2fe7dcf..547c6b6 100644 --- a/video/out/gpu/shader_cache.h +++ b/video/out/gpu/shader_cache.h @@ -50,7 +50,7 @@ void gl_sc_blend(struct gl_shader_cache *sc, enum ra_blend blend_dst_alpha); void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, - struct ra_tex *target, + struct ra_tex *target, bool discard, const struct ra_renderpass_input *vao, int vao_len, size_t vertex_stride, void *ptr, size_t num); diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c index f80d63a..7594c2b 100644 --- a/video/out/gpu/video.c +++ b/video/out/gpu/video.c @@ -313,9 +313,9 @@ static const struct gl_video_opts gl_video_opts_def = { .alpha_mode = ALPHA_BLEND_TILES, .background = {0, 0, 0, 255}, .gamma = 1.0f, - .tone_mapping = TONE_MAPPING_MOBIUS, + .tone_mapping = TONE_MAPPING_HABLE, .tone_mapping_param = NAN, - .tone_mapping_desat = 1.0, + .tone_mapping_desat = 0.5, .early_flush = -1, .hwdec_interop = "auto", }; @@ -351,6 +351,7 @@ const struct m_sub_options gl_video_conf = { OPT_FLAG("gamma-auto", gamma_auto, 0), OPT_CHOICE_C("target-prim", target_prim, 0, mp_csp_prim_names), OPT_CHOICE_C("target-trc", target_trc, 0, mp_csp_trc_names), + OPT_INTRANGE("target-peak", target_peak, 0, 10, 10000), OPT_CHOICE("tone-mapping", tone_mapping, 0, ({"clip", TONE_MAPPING_CLIP}, {"mobius", TONE_MAPPING_MOBIUS}, @@ -358,7 +359,10 @@ const struct m_sub_options gl_video_conf = { {"hable", TONE_MAPPING_HABLE}, {"gamma", TONE_MAPPING_GAMMA}, {"linear", TONE_MAPPING_LINEAR})), - OPT_FLAG("hdr-compute-peak", compute_hdr_peak, 0), + OPT_CHOICE("hdr-compute-peak", compute_hdr_peak, 0, + ({"auto", 0}, + {"yes", 1}, + {"no", -1})), OPT_FLOAT("tone-mapping-param", tone_mapping_param, 0), OPT_FLOAT("tone-mapping-desaturate", tone_mapping_desat, 0), OPT_FLAG("gamut-warning", gamut_warning, 0), @@ -529,9 +533,6 @@ static void uninit_rendering(struct gl_video *p) for (int n = 0; n < p->num_hook_textures; n++) ra_tex_free(p->ra, &p->hook_textures[n]); - for (int n = 0; n < 2; n++) - ra_tex_free(p->ra, &p->vdpau_deinterleave_tex[n]); - gl_video_reset_surfaces(p); gl_video_reset_hooks(p); @@ -548,6 +549,7 @@ struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p) return (struct mp_colorspace) { .primaries = p->opts.target_prim, .gamma = p->opts.target_trc, + .sig_peak = p->opts.target_peak / MP_REF_WHITE, }; } @@ -862,9 +864,6 @@ static void init_video(struct gl_video *p) } p->color_swizzle[4] = '\0'; - // Format-dependent checks. - check_gl_features(p); - mp_image_params_guess_csp(&p->image_params); av_lfg_init(&p->lfg, 1); @@ -909,6 +908,9 @@ static void init_video(struct gl_video *p) debug_check_gl(p, "after video texture creation"); + // Format-dependent checks. + check_gl_features(p); + gl_video_setup_hooks(p); } @@ -1001,6 +1003,9 @@ static void uninit_video(struct gl_video *p) p->hwdec_active = false; p->hwdec_overlay = NULL; ra_hwdec_mapper_free(&p->hwdec_mapper); + + for (int n = 0; n < 2; n++) + ra_tex_free(p->ra, &p->vdpau_deinterleave_tex[n]); } static void pass_record(struct gl_video *p, struct mp_pass_perf perf) @@ -1148,12 +1153,15 @@ static void dispatch_compute(struct gl_video *p, int w, int h, int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1, num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1; + if (!(p->ra->caps & RA_CAP_NUM_GROUPS)) + PRELUDE("#define gl_NumWorkGroups uvec3(%d, %d, 1)\n", num_x, num_y); + pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1)); cleanup_binds(p); } static struct mp_pass_perf render_pass_quad(struct gl_video *p, - struct ra_fbo fbo, + struct ra_fbo fbo, bool discard, const struct mp_rect *dst) { // The first element is reserved for `vec2 position` @@ -1211,15 +1219,15 @@ static struct mp_pass_perf render_pass_quad(struct gl_video *p, &p->tmp_vertex[num_vertex_attribs * 1], vertex_stride); - return gl_sc_dispatch_draw(p->sc, fbo.tex, p->vao, num_vertex_attribs, + return gl_sc_dispatch_draw(p->sc, fbo.tex, discard, p->vao, num_vertex_attribs, vertex_stride, p->tmp_vertex, num_vertices); } static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo, - const struct mp_rect *dst) + bool discard, const struct mp_rect *dst) { pass_prepare_src_tex(p); - pass_record(p, render_pass_quad(p, fbo, dst)); + pass_record(p, render_pass_quad(p, fbo, discard, dst)); debug_check_gl(p, "after rendering"); cleanup_binds(p); } @@ -1237,6 +1245,11 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, return; } + // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders + // over fragment shaders wherever possible. + if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE)) + pass_is_compute(p, 16, 16); + if (p->pass_compute.active) { gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex); if (!p->pass_compute.directly_writes) @@ -1248,7 +1261,7 @@ static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, debug_check_gl(p, "after dispatching compute shader"); } else { struct ra_fbo fbo = { .tex = *dst_tex, }; - finish_pass_fbo(p, fbo, &(struct mp_rect){0, 0, w, h}); + finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h}); } } @@ -2384,6 +2397,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool .gamma = p->opts.target_trc, .primaries = p->opts.target_prim, .light = MP_CSP_LIGHT_DISPLAY, + .sig_peak = p->opts.target_peak / MP_REF_WHITE, }; if (p->use_lut_3d) { @@ -2403,6 +2417,7 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool if (gl_video_get_lut3d(p, prim_orig, trc_orig)) { dst.primaries = prim_orig; dst.gamma = trc_orig; + assert(dst.primaries && dst.gamma); } } @@ -2437,20 +2452,23 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool dst.gamma = MP_CSP_TRC_GAMMA22; } - bool detect_peak = p->opts.compute_hdr_peak && mp_trc_is_hdr(src.gamma); + // If there's no specific signal peak known for the output display, infer + // it from the chosen transfer function + if (!dst.sig_peak) + dst.sig_peak = mp_trc_nom_peak(dst.gamma); + + bool detect_peak = p->opts.compute_hdr_peak >= 0 && mp_trc_is_hdr(src.gamma); if (detect_peak && !p->hdr_peak_ssbo) { struct { - unsigned int sig_peak_raw; - unsigned int index; - unsigned int frame_max[PEAK_DETECT_FRAMES+1]; + uint32_t counter; + uint32_t frame_idx; + uint32_t frame_num; + uint32_t frame_max[PEAK_DETECT_FRAMES+1]; + uint32_t frame_sum[PEAK_DETECT_FRAMES+1]; + uint32_t total_max; + uint32_t total_sum; } peak_ssbo = {0}; - // Prefill with safe values - int safe = MP_REF_WHITE * mp_trc_nom_peak(p->image_params.color.gamma); - peak_ssbo.sig_peak_raw = PEAK_DETECT_FRAMES * safe; - for (int i = 0; i < PEAK_DETECT_FRAMES+1; i++) - peak_ssbo.frame_max[i] = safe; - struct ra_buf_params params = { .type = RA_BUF_TYPE_SHADER_STORAGE, .size = sizeof(peak_ssbo), @@ -2460,7 +2478,8 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms); if (!p->hdr_peak_ssbo) { MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n"); - detect_peak = (p->opts.compute_hdr_peak = false); + detect_peak = false; + p->opts.compute_hdr_peak = -1; } } @@ -2468,9 +2487,15 @@ static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, bool pass_describe(p, "detect HDR peak"); pass_is_compute(p, 8, 8); // 8x8 is good for performance gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, - "uint sig_peak_raw;" - "uint index;" - "uint frame_max[%d];", PEAK_DETECT_FRAMES + 1 + "uint counter;" + "uint frame_idx;" + "uint frame_num;" + "uint frame_max[%d];" + "uint frame_avg[%d];" + "uint total_max;" + "uint total_avg;", + PEAK_DETECT_FRAMES + 1, + PEAK_DETECT_FRAMES + 1 ); } @@ -2603,7 +2628,10 @@ static void pass_dither(struct gl_video *p) static void pass_draw_osd(struct gl_video *p, int draw_flags, double pts, struct mp_osd_res rect, struct ra_fbo fbo, bool cms) { - mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo_out, draw_flags); + if ((draw_flags & OSD_DRAW_SUB_ONLY) && (draw_flags & OSD_DRAW_OSD_ONLY)) + return; + + mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo3d, draw_flags); timer_pool_start(p->osd_timer); for (int n = 0; n < MAX_OSD_PARTS; n++) { @@ -2672,7 +2700,9 @@ static void pass_render_frame_dumb(struct gl_video *p) // The main rendering function, takes care of everything up to and including // upscaling. p->image is rendered. -static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t id) +// flags: bit set of RENDER_FRAME_* flags +static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, + uint64_t id, int flags) { // initialize the texture parameters and temporary variables p->texture_w = p->image_params.w; @@ -2703,7 +2733,9 @@ static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t if (vpts == MP_NOPTS_VALUE) vpts = p->osd_pts; - if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO) { + if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO && + (flags & RENDER_FRAME_SUBS)) + { double scale[2]; get_scale_factors(p, false, scale); struct mp_osd_res rect = { @@ -2722,7 +2754,9 @@ static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, uint64_t int vp_w = p->dst_rect.x1 - p->dst_rect.x0, vp_h = p->dst_rect.y1 - p->dst_rect.y0; - if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES) { + if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES && + (flags & RENDER_FRAME_SUBS)) + { // Recreate the real video size from the src/dst rects struct mp_osd_res rect = { .w = vp_w, .h = vp_h, @@ -2799,17 +2833,18 @@ static void pass_draw_to_screen(struct gl_video *p, struct ra_fbo fbo) pass_dither(p); pass_describe(p, "output to screen"); - finish_pass_fbo(p, fbo, &p->dst_rect); + finish_pass_fbo(p, fbo, false, &p->dst_rect); } +// flags: bit set of RENDER_FRAME_* flags static bool update_surface(struct gl_video *p, struct mp_image *mpi, - uint64_t id, struct surface *surf) + uint64_t id, struct surface *surf, int flags) { int vp_w = p->dst_rect.x1 - p->dst_rect.x0, vp_h = p->dst_rect.y1 - p->dst_rect.y0; pass_info_reset(p, false); - if (!pass_render_frame(p, mpi, id)) + if (!pass_render_frame(p, mpi, id, flags)) return false; // Frame blending should always be done in linear light to preserve the @@ -2827,8 +2862,9 @@ static bool update_surface(struct gl_video *p, struct mp_image *mpi, } // Draws an interpolate frame to fbo, based on the frame timing in t +// flags: bit set of RENDER_FRAME_* flags static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, - struct ra_fbo fbo) + struct ra_fbo fbo, int flags) { bool is_new = false; @@ -2842,7 +2878,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, // it manually + reset the queue if not if (p->surfaces[p->surface_now].id == 0) { struct surface *now = &p->surfaces[p->surface_now]; - if (!update_surface(p, t->current, t->frame_id, now)) + if (!update_surface(p, t->current, t->frame_id, now, flags)) return; p->surface_idx = p->surface_now; is_new = true; @@ -2900,7 +2936,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, if (f_id > p->surfaces[p->surface_idx].id) { struct surface *dst = &p->surfaces[surface_dst]; - if (!update_surface(p, f, f_id, dst)) + if (!update_surface(p, f, f_id, dst, flags)) return; p->surface_idx = surface_dst; surface_dst = surface_wrap(surface_dst + 1); @@ -3000,7 +3036,7 @@ static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, } void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, - struct ra_fbo fbo) + struct ra_fbo fbo, int flags) { gl_video_update_options(p); @@ -3043,7 +3079,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, } if (interpolate) { - gl_video_interpolate_frame(p, frame, fbo); + gl_video_interpolate_frame(p, frame, fbo, flags); } else { bool is_new = frame->frame_id != p->image.id; @@ -3055,18 +3091,25 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, p->output_tex_valid = false; pass_info_reset(p, !is_new); - if (!pass_render_frame(p, frame->current, frame->frame_id)) + if (!pass_render_frame(p, frame->current, frame->frame_id, flags)) goto done; // For the non-interpolation case, we draw to a single "cache" // texture to speed up subsequent re-draws (if any exist) struct ra_fbo dest_fbo = fbo; if (frame->num_vsyncs > 1 && frame->display_synced && - !p->dumb_mode && (p->ra->caps & RA_CAP_BLIT)) + !p->dumb_mode && (p->ra->caps & RA_CAP_BLIT) && + fbo.tex->params.blit_dst) { + // Attempt to use the same format as the destination FBO + // if possible. Some RAs use a wrapped dummy format here, + // so fall back to the fbo_format in that case. + const struct ra_format *fmt = fbo.tex->params.format; + if (fmt->dummy_format) + fmt = p->fbo_format; bool r = ra_tex_resize(p->ra, p->log, &p->output_tex, fbo.tex->params.w, fbo.tex->params.h, - p->fbo_format); + fmt); if (r) { dest_fbo = (struct ra_fbo) { p->output_tex }; p->output_tex_valid = true; @@ -3076,7 +3119,7 @@ void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, } // "output tex valid" and "output tex needed" are equivalent - if (p->output_tex_valid) { + if (p->output_tex_valid && fbo.tex->params.blit_dst) { pass_info_reset(p, true); pass_describe(p, "redraw cached frame"); struct mp_rect src = p->dst_rect; @@ -3097,19 +3140,25 @@ done: debug_check_gl(p, "after video rendering"); - if (p->osd) { + if (p->osd && (flags & (RENDER_FRAME_SUBS | RENDER_FRAME_OSD))) { // If we haven't actually drawn anything so far, then we technically // need to consider this the start of a new pass. Let's call it a // redraw just because, since it's basically a blank frame anyway if (!has_frame) pass_info_reset(p, true); - pass_draw_osd(p, p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0, - p->osd_pts, p->osd_rect, fbo, true); + int osd_flags = p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0; + if (!(flags & RENDER_FRAME_SUBS)) + osd_flags |= OSD_DRAW_OSD_ONLY; + if (!(flags & RENDER_FRAME_OSD)) + osd_flags |= OSD_DRAW_SUB_ONLY; + + pass_draw_osd(p, osd_flags, p->osd_pts, p->osd_rect, fbo, true); debug_check_gl(p, "after OSD rendering"); } - if (gl_sc_error_state(p->sc) || p->broken_frame) { + p->broken_frame |= gl_sc_error_state(p->sc); + if (p->broken_frame) { // Make the screen solid blue to make it visually clear that an // error has occurred float color[4] = {0.0, 0.05, 0.5, 1.0}; @@ -3120,6 +3169,100 @@ done: pass_report_performance(p); } +void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame, + struct voctrl_screenshot *args) +{ + if (!p->ra->fns->tex_download) + return; + + bool ok = false; + struct mp_image *res = NULL; + struct ra_tex *target = NULL; + struct mp_rect old_src = p->src_rect; + struct mp_rect old_dst = p->dst_rect; + struct mp_osd_res old_osd = p->osd_rect; + struct vo_frame *nframe = vo_frame_ref(frame); + + // Disable interpolation and such. + nframe->redraw = true; + nframe->repeat = false; + nframe->still = true; + nframe->pts = 0; + nframe->duration = -1; + + if (!args->scaled) { + int w, h; + mp_image_params_get_dsize(&p->image_params, &w, &h); + if (w < 1 || h < 1) + return; + + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, w, h); + + struct mp_rect src = {0, 0, p->image_params.w, p->image_params.h}; + struct mp_rect dst = {0, 0, w, h}; + struct mp_osd_res osd = {.w = w, .h = h, .display_par = 1.0}; + gl_video_resize(p, &src, &dst, &osd); + } + + gl_video_reset_surfaces(p); + + struct ra_tex_params params = { + .dimensions = 2, + .downloadable = true, + .w = p->osd_rect.w, + .h = p->osd_rect.h, + .render_dst = true, + }; + + params.format = ra_find_unorm_format(p->ra, 1, 4); + int mpfmt = IMGFMT_RGB0; + if (args->high_bit_depth && p->ra_format.component_bits > 8) { + const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4); + if (fmt && fmt->renderable) { + params.format = fmt; + mpfmt = IMGFMT_RGBA64; + } + } + + if (!params.format || !params.format->renderable) + goto done; + target = ra_tex_create(p->ra, ¶ms); + if (!target) + goto done; + + int flags = 0; + if (args->subs) + flags |= RENDER_FRAME_SUBS; + if (args->osd) + flags |= RENDER_FRAME_OSD; + gl_video_render_frame(p, nframe, (struct ra_fbo){target}, flags); + + res = mp_image_alloc(mpfmt, params.w, params.h); + if (!res) + goto done; + + struct ra_tex_download_params download_params = { + .tex = target, + .dst = res->planes[0], + .stride = res->stride[0], + }; + if (!p->ra->fns->tex_download(p->ra, &download_params)) + goto done; + + if (p->broken_frame) + goto done; + + ok = true; +done: + talloc_free(nframe); + ra_tex_free(p->ra, &target); + gl_video_resize(p, &old_src, &old_dst, &old_osd); + if (!ok) + TA_FREEP(&res); + args->res = res; +} + // Use this color instead of the global option. void gl_video_set_clear_color(struct gl_video *p, struct m_color c) { @@ -3154,7 +3297,7 @@ void gl_video_resize(struct gl_video *p, gl_video_reset_surfaces(p); if (p->osd) - mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo_out); + mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo3d); } static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out) @@ -3205,7 +3348,7 @@ static void reinterleave_vdpau(struct gl_video *p, const struct ra_format *fmt = ra_find_unorm_format(p->ra, 1, comps); ra_tex_resize(p->ra, p->log, tex, w, h * 2, fmt); struct ra_fbo fbo = { *tex }; - finish_pass_fbo(p, fbo, &(struct mp_rect){0, 0, w, h * 2}); + finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h * 2}); output[n] = *tex; } @@ -3333,7 +3476,7 @@ static bool test_fbo(struct gl_video *p, const struct ra_format *fmt) } // Return whether dumb-mode can be used without disabling any features. -// Essentially, vo_opengl with mostly default settings will return true. +// Essentially, vo_gpu with mostly default settings will return true. static bool check_dumb_mode(struct gl_video *p) { struct gl_video_opts *o = &p->opts; @@ -3374,25 +3517,63 @@ static void check_gl_features(struct gl_video *p) bool have_texrg = rg_tex && !rg_tex->luminance_alpha; bool have_compute = ra->caps & RA_CAP_COMPUTE; bool have_ssbo = ra->caps & RA_CAP_BUF_RW; + bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD; - const char *auto_fbo_fmts[] = {"rgba16", "rgba16f", "rgba16hf", + const char *auto_fbo_fmts[] = {"rgba16f", "rgba16hf", "rgba16", "rgb10_a2", "rgba8", 0}; const char *user_fbo_fmts[] = {p->opts.fbo_format, 0}; const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto") ? user_fbo_fmts : auto_fbo_fmts; + bool user_specified_fbo_fmt = fbo_fmts == user_fbo_fmts; + bool fbo_test_result = false; bool have_fbo = false; p->fbo_format = NULL; for (int n = 0; fbo_fmts[n]; n++) { const char *fmt = fbo_fmts[n]; const struct ra_format *f = ra_find_named_format(p->ra, fmt); - if (!f && fbo_fmts == user_fbo_fmts) + if (!f && user_specified_fbo_fmt) MP_WARN(p, "FBO format '%s' not found!\n", fmt); - if (f && f->renderable && f->linear_filter && test_fbo(p, f)) { + if (f && f->renderable && f->linear_filter && + (fbo_test_result = test_fbo(p, f))) { MP_VERBOSE(p, "Using FBO format %s.\n", f->name); have_fbo = true; p->fbo_format = f; break; } + + if (user_specified_fbo_fmt) { + MP_WARN(p, "User-specified FBO format '%s' failed to initialize! " + "(exists=%d, renderable=%d, linear_filter=%d, " + "fbo_test_result=%d)\n", + fmt, !!f, f ? f->renderable : 0, f ? f->linear_filter : 0, + fbo_test_result); + } + } + + if (!have_fragcoord && p->opts.dither_depth >= 0 && + p->opts.dither_algo != DITHER_NONE) + { + p->opts.dither_algo = DITHER_NONE; + MP_WARN(p, "Disabling dithering (no gl_FragCoord).\n"); + } + if (!have_fragcoord && p->opts.alpha_mode == ALPHA_BLEND_TILES) { + p->opts.alpha_mode = ALPHA_BLEND; + // Verbose, since this is the default setting + MP_VERBOSE(p, "Disabling alpha checkerboard (no gl_FragCoord).\n"); + } + if (!have_fbo && have_compute) { + have_compute = false; + MP_WARN(p, "Force-disabling compute shaders as an FBO format was not " + "available! See your FBO format configuration!\n"); + } + + bool have_compute_peak = have_compute && have_ssbo; + if (!have_compute_peak && p->opts.compute_hdr_peak >= 0) { + int msgl = p->opts.compute_hdr_peak == 1 ? MSGL_WARN : MSGL_V; + MP_MSG(p, msgl, "Disabling HDR peak computation (one or more of the " + "following is not supported: compute shaders=%d, " + "SSBO=%d).\n", have_compute, have_ssbo); + p->opts.compute_hdr_peak = -1; } p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg; @@ -3414,6 +3595,7 @@ static void check_gl_features(struct gl_video *p) .alpha_mode = p->opts.alpha_mode, .use_rectangle = p->opts.use_rectangle, .background = p->opts.background, + .compute_hdr_peak = p->opts.compute_hdr_peak, .dither_algo = p->opts.dither_algo, .dither_depth = p->opts.dither_depth, .dither_size = p->opts.dither_size, @@ -3479,23 +3661,6 @@ static void check_gl_features(struct gl_video *p) p->opts.deband = 0; MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); } - if ((!have_compute || !have_ssbo) && p->opts.compute_hdr_peak) { - p->opts.compute_hdr_peak = 0; - MP_WARN(p, "Disabling HDR peak computation (no compute shaders).\n"); - } - if (!(ra->caps & RA_CAP_FRAGCOORD) && p->opts.dither_depth >= 0 && - p->opts.dither_algo != DITHER_NONE) - { - p->opts.dither_algo = DITHER_NONE; - MP_WARN(p, "Disabling dithering (no gl_FragCoord).\n"); - } - if (!(ra->caps & RA_CAP_FRAGCOORD) && - p->opts.alpha_mode == ALPHA_BLEND_TILES) - { - p->opts.alpha_mode = ALPHA_BLEND; - // Verbose, since this is the default setting - MP_VERBOSE(p, "Disabling alpha checkerboard (no gl_FragCoord).\n"); - } } static void init_gl(struct gl_video *p) @@ -3838,6 +4003,9 @@ static void gl_video_dr_free_buffer(void *opaque, uint8_t *data) struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h, int stride_align) { + if (!gl_video_check_format(p, imgfmt)) + return NULL; + int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align); if (size < 0) return NULL; diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h index 78f8828..2184599 100644 --- a/video/out/gpu/video.h +++ b/video/out/gpu/video.h @@ -96,7 +96,7 @@ enum tone_mapping { }; // How many frames to average over for HDR peak detection -#define PEAK_DETECT_FRAMES 100 +#define PEAK_DETECT_FRAMES 63 struct gl_video_opts { int dumb_mode; @@ -106,7 +106,7 @@ struct gl_video_opts { int gamma_auto; int target_prim; int target_trc; - int target_brightness; + int target_peak; int tone_mapping; int compute_hdr_peak; float tone_mapping_param; @@ -146,6 +146,13 @@ extern const struct m_sub_options gl_video_conf; struct gl_video; struct vo_frame; +struct voctrl_screenshot; + +enum { + RENDER_FRAME_SUBS = 1 << 0, + RENDER_FRAME_OSD = 2 << 0, + RENDER_FRAME_DEF = RENDER_FRAME_SUBS | RENDER_FRAME_OSD, +}; struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, struct mpv_global *g); @@ -153,9 +160,8 @@ void gl_video_uninit(struct gl_video *p); void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd); bool gl_video_check_format(struct gl_video *p, int mp_format); void gl_video_config(struct gl_video *p, struct mp_image_params *params); -void gl_video_set_output_depth(struct gl_video *p, int r, int g, int b); void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, - struct ra_fbo fbo); + struct ra_fbo fbo, int flags); void gl_video_resize(struct gl_video *p, struct mp_rect *src, struct mp_rect *dst, struct mp_osd_res *osd); @@ -166,6 +172,9 @@ void gl_video_set_osd_pts(struct gl_video *p, double pts); bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *osd, double pts); +void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame, + struct voctrl_screenshot *args); + float gl_video_scale_ambient_lux(float lmin, float lmax, float rmin, float rmax, float lux); void gl_video_set_ambient_lux(struct gl_video *p, int lux); diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c index 3e71c31..2b18d17 100644 --- a/video/out/gpu/video_shaders.c +++ b/video/out/gpu/video_shaders.c @@ -334,6 +334,10 @@ static const float SLOG_A = 0.432699, // Linearize (expand), given a TRC as input. In essence, this is the ITU-R // EOTF, calculated on an idealized (reference) monitor with a white point of // MP_REF_WHITE and infinite contrast. +// +// These functions always output to a normalized scale of [0,1], for +// convenience of the video.c code that calls it. To get the values in an +// absolute scale, multiply the result by `mp_trc_nom_peak(trc)` void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) { if (trc == MP_CSP_TRC_LINEAR) @@ -417,6 +421,8 @@ void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) // Delinearize (compress), given a TRC as output. This corresponds to the // inverse EOTF (not the OETF) in ITU-R terminology, again assuming a // reference monitor. +// +// Like pass_linearize, this functions ingests values on an normalized scale void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) { if (trc == MP_CSP_TRC_LINEAR) @@ -488,24 +494,25 @@ void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) } // Apply the OOTF mapping from a given light type to display-referred light. -// The extra peak parameter is used to scale the values before and after -// the OOTF, and can be inferred using mp_trc_nom_peak -void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) +// Assumes absolute scale values. `peak` is used to tune the OOTF where +// applicable (currently only HLG). +static void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, + float peak) { if (light == MP_CSP_LIGHT_DISPLAY) return; GLSLF("// apply ootf\n"); - GLSLF("color.rgb *= vec3(%f);\n", peak); switch (light) { - case MP_CSP_LIGHT_SCENE_HLG: - // HLG OOTF from BT.2100, assuming a reference display with a - // peak of 1000 cd/m² -> gamma = 1.2 - GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), 0.2));\n", - (1000 / MP_REF_WHITE) / pow(12, 1.2)); + case MP_CSP_LIGHT_SCENE_HLG: { + // HLG OOTF from BT.2100, scaled to the chosen display peak + float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0)); + GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), %f));\n", + peak / pow(12, gamma), gamma - 1.0); break; + } case MP_CSP_LIGHT_SCENE_709_1886: // This OOTF is defined by encoding the result as 709 and then decoding // it as 1886; although this is called 709_1886 we actually use the @@ -521,25 +528,26 @@ void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) default: abort(); } - - GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); } // Inverse of the function pass_ootf, for completeness' sake. -void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak) +static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, + float peak) { if (light == MP_CSP_LIGHT_DISPLAY) return; GLSLF("// apply inverse ootf\n"); - GLSLF("color.rgb *= vec3(%f);\n", peak); switch (light) { - case MP_CSP_LIGHT_SCENE_HLG: - GLSLF("color.rgb *= vec3(1.0/%f);\n", (1000 / MP_REF_WHITE) / pow(12, 1.2)); - GLSL(color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), 0.2/1.2)));) + case MP_CSP_LIGHT_SCENE_HLG: { + float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0)); + GLSLF("color.rgb *= vec3(1.0/%f);\n", peak / pow(12, gamma)); + GLSLF("color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), %f)));\n", + (gamma - 1.0) / gamma); break; + } case MP_CSP_LIGHT_SCENE_709_1886: GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) GLSL(color.rgb = mix(color.rgb * vec3(1.0/4.5), @@ -553,13 +561,89 @@ void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, floa default: abort(); } +} - GLSLF("color.rgb *= vec3(1.0/%f);\n", peak); +// Average light level for SDR signals. This is equal to a signal level of 0.5 +// under a typical presentation gamma of about 2.0. +static const float sdr_avg = 0.25; + +// The threshold for which to consider an average luminance difference to be +// a sign of a scene change. +static const int scene_threshold = 0.2 * MP_REF_WHITE; + +static void hdr_update_peak(struct gl_shader_cache *sc) +{ + // For performance, we want to do as few atomic operations on global + // memory as possible, so use an atomic in shmem for the work group. + GLSLH(shared uint wg_sum;); + GLSL(wg_sum = 0;) + + // Have each thread update the work group sum with the local value + GLSL(barrier();) + GLSLF("atomicAdd(wg_sum, uint(sig * %f));\n", MP_REF_WHITE); + + // Have one thread per work group update the global atomics. We use the + // work group average even for the global sum, to make the values slightly + // more stable and smooth out tiny super-highlights. + GLSL(memoryBarrierShared();) + GLSL(barrier();) + GLSL(if (gl_LocalInvocationIndex == 0) {) + GLSL( uint wg_avg = wg_sum / (gl_WorkGroupSize.x * gl_WorkGroupSize.y);) + GLSL( atomicMax(frame_max[frame_idx], wg_avg);) + GLSL( atomicAdd(frame_avg[frame_idx], wg_avg);) + GLSL(}) + + const float refi = 1.0 / MP_REF_WHITE; + + // Update the sig_peak/sig_avg from the old SSBO state + GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) + GLSL(if (frame_num > 0) {) + GLSLF(" float peak = %f * float(total_max) / float(frame_num);\n", refi); + GLSLF(" float avg = %f * float(total_avg) / float(frame_num);\n", refi); + GLSLF(" sig_peak = max(1.0, peak);\n"); + GLSLF(" sig_avg = max(%f, avg);\n", sdr_avg); + GLSL(}); + + // Finally, to update the global state, we increment a counter per dispatch + GLSL(memoryBarrierBuffer();) + GLSL(barrier();) + GLSL(if (gl_LocalInvocationIndex == 0 && atomicAdd(counter, 1) == num_wg - 1) {) + + // Since we sum up all the workgroups, we also still need to divide the + // average by the number of work groups + GLSL( counter = 0;) + GLSL( frame_avg[frame_idx] /= num_wg;) + GLSL( uint cur_max = frame_max[frame_idx];) + GLSL( uint cur_avg = frame_avg[frame_idx];) + + // Scene change detection + GLSL( int diff = int(frame_num * cur_avg) - int(total_avg);) + GLSLF(" if (abs(diff) > frame_num * %d) {\n", scene_threshold); + GLSL( frame_num = 0;) + GLSL( total_max = total_avg = 0;) + GLSLF(" for (uint i = 0; i < %d; i++)\n", PEAK_DETECT_FRAMES+1); + GLSL( frame_max[i] = frame_avg[i] = 0;) + GLSL( frame_max[frame_idx] = cur_max;) + GLSL( frame_avg[frame_idx] = cur_avg;) + GLSL( }) + + // Add the current frame, then subtract and reset the next frame + GLSLF(" uint next = (frame_idx + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); + GLSL( total_max += cur_max - frame_max[next];) + GLSL( total_avg += cur_avg - frame_avg[next];) + GLSL( frame_max[next] = frame_avg[next] = 0;) + + // Update the index and count + GLSL( frame_idx = next;) + GLSLF(" frame_num = min(frame_num + 1, %d);\n", PEAK_DETECT_FRAMES); + GLSL( memoryBarrierBuffer();) + GLSL(}) } // Tone map from a known peak brightness to the range [0,1]. If ref_peak // is 0, we will use peak detection instead -static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, +static void pass_tone_map(struct gl_shader_cache *sc, bool detect_peak, + float src_peak, float dst_peak, enum tone_mapping algo, float param, float desat) { GLSLF("// HDR tone mapping\n"); @@ -568,57 +652,44 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, // sure to reduce the value range as far as necessary to keep the entire // signal in range, so tone map based on the brightest component. GLSL(float sig = max(max(color.r, color.g), color.b);) + GLSLF("float sig_peak = %f;\n", src_peak); + GLSLF("float sig_avg = %f;\n", sdr_avg); + + if (detect_peak) + hdr_update_peak(sc); + + // Rescale the variables in order to bring it into a representation where + // 1.0 represents the dst_peak. This is because all of the tone mapping + // algorithms are defined in such a way that they map to the range [0.0, 1.0]. + if (dst_peak > 1.0) { + GLSLF("sig *= %f;\n", 1.0 / dst_peak); + GLSLF("sig_peak *= %f;\n", 1.0 / dst_peak); + } - // Desaturate the color using a coefficient dependent on the signal + GLSL(float sig_orig = sig;) + GLSLF("float slope = min(1.0, %f / sig_avg);\n", sdr_avg); + GLSL(sig *= slope;) + GLSL(sig_peak *= slope;) + + // Desaturate the color using a coefficient dependent on the signal. + // Do this after peak detection in order to prevent over-desaturating + // overly bright souces if (desat > 0) { + float base = 0.18 * dst_peak; GLSL(float luma = dot(dst_luma, color.rgb);) - GLSL(float coeff = max(sig - 0.18, 1e-6) / max(sig, 1e-6);); + GLSLF("float coeff = max(sig - %f, 1e-6) / max(sig, 1e-6);\n", base); GLSLF("coeff = pow(coeff, %f);\n", 10.0 / desat); GLSL(color.rgb = mix(color.rgb, vec3(luma), coeff);) - GLSL(sig = mix(sig, luma, coeff);) // also make sure to update `sig` - } - - if (!ref_peak) { - // For performance, we want to do as few atomic operations on global - // memory as possible, so use an atomic in shmem for the work group. - // We also want slightly more stable values, so use the group average - // instead of the group max - GLSLHF("shared uint group_sum = 0;\n"); - GLSLF("atomicAdd(group_sum, uint(sig * %f));\n", MP_REF_WHITE); - - // Have one thread in each work group update the frame maximum - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSL(if (gl_LocalInvocationIndex == 0)) - GLSL(atomicMax(frame_max[index], group_sum / - (gl_WorkGroupSize.x * gl_WorkGroupSize.y));) - - // Finally, have one thread per invocation update the total maximum - // and advance the index - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSL(if (gl_GlobalInvocationID == ivec3(0)) {) // do this once per invocation - GLSLF("uint next = (index + 1) %% %d;\n", PEAK_DETECT_FRAMES+1); - GLSLF("sig_peak_raw = sig_peak_raw + frame_max[index] - frame_max[next];\n"); - GLSLF("frame_max[next] = %d;\n", (int)MP_REF_WHITE); - GLSL(index = next;) - GLSL(}) - - GLSL(memoryBarrierBuffer();) - GLSL(barrier();) - GLSLF("float sig_peak = 1.0/%f * float(sig_peak_raw);\n", - MP_REF_WHITE * PEAK_DETECT_FRAMES); - } else { - GLSLHF("const float sig_peak = %f;\n", ref_peak); + GLSL(sig = mix(sig, luma * slope, coeff);) // also make sure to update `sig` } - GLSL(float sig_orig = sig;) switch (algo) { case TONE_MAPPING_CLIP: GLSLF("sig = %f * sig;\n", isnan(param) ? 1.0 : param); break; case TONE_MAPPING_MOBIUS: + GLSLF("if (sig_peak > (1.0 + 1e-6)) {\n"); GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param); // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 // where M(x) = scale * (x+a)/(x+b) @@ -627,6 +698,7 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, "max(1e-6, sig_peak - 1.0);\n"); GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); GLSL(sig = sig > j ? scale * (sig + a) / (sig + b) : sig;) + GLSLF("}\n"); break; case TONE_MAPPING_REINHARD: { @@ -668,7 +740,8 @@ static void pass_tone_map(struct gl_shader_cache *sc, float ref_peak, // Apply the computed scale factor to the color, linearly to prevent // discoloration - GLSL(color.rgb *= sig / sig_orig;) + GLSL(sig = min(sig, 1.0);) + GLSL(color.rgb *= vec3(sig / sig_orig);) } // Map colors from one source space to another. These source spaces must be @@ -686,11 +759,6 @@ void pass_color_map(struct gl_shader_cache *sc, { GLSLF("// color mapping\n"); - // Compute the highest encodable level - float src_range = mp_trc_nom_peak(src.gamma), - dst_range = mp_trc_nom_peak(dst.gamma); - float ref_peak = src.sig_peak / dst_range; - // Some operations need access to the video's luma coefficients, so make // them available float rgb2xyz[3][3]; @@ -699,30 +767,29 @@ void pass_color_map(struct gl_shader_cache *sc, mp_get_rgb2xyz_matrix(mp_get_csp_primaries(dst.primaries), rgb2xyz); gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz[1]); + bool need_ootf = src.light != dst.light; + if (src.light == MP_CSP_LIGHT_SCENE_HLG && src.sig_peak != dst.sig_peak) + need_ootf = true; + // All operations from here on require linear light as a starting point, // so we linearize even if src.gamma == dst.gamma when one of the other // operations needs it - bool need_gamma = src.gamma != dst.gamma || - src.primaries != dst.primaries || - src_range != dst_range || - src.sig_peak > dst_range || - src.light != dst.light; + bool need_linear = src.gamma != dst.gamma || + src.primaries != dst.primaries || + src.sig_peak > dst.sig_peak || + need_ootf; - if (need_gamma && !is_linear) { + if (need_linear && !is_linear) { + // We also pull it up so that 1.0 is the reference white pass_linearize(sc, src.gamma); - is_linear= true; + is_linear = true; } - if (src.light != dst.light) - pass_ootf(sc, src.light, mp_trc_nom_peak(src.gamma)); + // Pre-scale the incoming values into an absolute scale + GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(src.gamma)); - // Rescale the signal to compensate for differences in the encoding range - // and reference white level. This is necessary because of how mpv encodes - // brightness in textures. - if (src_range != dst_range) { - GLSLF("// rescale value range;\n"); - GLSLF("color.rgb *= vec3(%f);\n", src_range / dst_range); - } + if (need_ootf) + pass_ootf(sc, src.light, src.sig_peak); // Adapt to the right colorspace if necessary if (src.primaries != dst.primaries) { @@ -732,20 +799,26 @@ void pass_color_map(struct gl_shader_cache *sc, mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m); gl_sc_uniform_mat3(sc, "cms_matrix", true, &m[0][0]); GLSL(color.rgb = cms_matrix * color.rgb;) - // Since this can reduce the gamut, figure out by how much - for (int c = 0; c < 3; c++) - ref_peak = MPMAX(ref_peak, m[c][c]); } // Tone map to prevent clipping when the source signal peak exceeds the // encodable range or we've reduced the gamut - if (ref_peak > 1) { - pass_tone_map(sc, detect_peak ? 0 : ref_peak, algo, + if (src.sig_peak > dst.sig_peak) { + pass_tone_map(sc, detect_peak, src.sig_peak, dst.sig_peak, algo, tone_mapping_param, tone_mapping_desat); } - if (src.light != dst.light) - pass_inverse_ootf(sc, dst.light, mp_trc_nom_peak(dst.gamma)); + if (need_ootf) + pass_inverse_ootf(sc, dst.light, dst.sig_peak); + + // Post-scale the outgoing values from absolute scale to normalized. + // For SDR, we normalize to the chosen signal peak. For HDR, we normalize + // to the encoding range of the transfer function. + float dst_range = dst.sig_peak; + if (mp_trc_is_hdr(dst.gamma)) + dst_range = mp_trc_nom_peak(dst.gamma); + + GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range); // Warn for remaining out-of-gamut colors is enabled if (gamut_warning) { diff --git a/video/out/gpu/video_shaders.h b/video/out/gpu/video_shaders.h index 2ae2ac3..cd395d6 100644 --- a/video/out/gpu/video_shaders.h +++ b/video/out/gpu/video_shaders.h @@ -39,8 +39,6 @@ void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); -void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak); -void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, float peak); void pass_color_map(struct gl_shader_cache *sc, struct mp_colorspace src, struct mp_colorspace dst, diff --git a/video/out/libmpv.h b/video/out/libmpv.h new file mode 100644 index 0000000..2fe3338 --- /dev/null +++ b/video/out/libmpv.h @@ -0,0 +1,80 @@ +#pragma once + +#include <stdint.h> +#include <stdbool.h> +#include "libmpv/render.h" +#include "vo.h" + +// Helper for finding a parameter value. It returns the direct pointer to the +// value, and if not present, just returns the def argument. In particular, if +// def is not NULL, this never returns NULL (unless a param value is defined +// as accepting NULL, or the libmpv API user is triggering UB). +void *get_mpv_render_param(mpv_render_param *params, mpv_render_param_type type, + void *def); + +#define GET_MPV_RENDER_PARAM(params, type, ctype, def) \ + (*(ctype *)get_mpv_render_param(params, type, &(ctype){(def)})) + +typedef int (*mp_render_cb_control_fn)(struct vo *vo, void *cb_ctx, int *events, + uint32_t request, void *data); +void mp_render_context_set_control_callback(mpv_render_context *ctx, + mp_render_cb_control_fn callback, + void *callback_ctx); +bool mp_render_context_acquire(mpv_render_context *ctx); + +struct render_backend { + struct mpv_global *global; + struct mp_log *log; + const struct render_backend_fns *fns; + + // Set on init, immutable afterwards. + int driver_caps; + struct mp_hwdec_devices *hwdec_devs; + + void *priv; +}; + +// Generic backend for rendering via libmpv. This corresponds to vo/vo_driver, +// except for rendering via the mpv_render_*() API. (As a consequence it's as +// generic as the VO API.) Like with VOs, one backend can support multiple +// underlying GPU APIs. +struct render_backend_fns { + // Returns libmpv error code. In particular, this function has to check for + // MPV_RENDER_PARAM_API_TYPE, and silently return MPV_ERROR_NOT_IMPLEMENTED + // if the API is not included in this backend. + // If this fails, ->destroy() will be called. + int (*init)(struct render_backend *ctx, mpv_render_param *params); + // Check if the passed IMGFMT_ is supported. + bool (*check_format)(struct render_backend *ctx, int imgfmt); + // Implementation of mpv_render_context_set_parameter(). Optional. + int (*set_parameter)(struct render_backend *ctx, mpv_render_param param); + // Like vo_driver.reconfig(). + void (*reconfig)(struct render_backend *ctx, struct mp_image_params *params); + // Like VOCTRL_RESET. + void (*reset)(struct render_backend *ctx); + void (*screenshot)(struct render_backend *ctx, struct vo_frame *frame, + struct voctrl_screenshot *args); + // Like vo_driver.get_image(). + struct mp_image *(*get_image)(struct render_backend *ctx, int imgfmt, + int w, int h, int stride_align); + // This has two purposes: 1. set queue attributes on VO, 2. update the + // renderer's OSD pointer. Keep in mind that as soon as the caller releases + // the renderer lock, the VO pointer can become invalid. The OSD pointer + // will technically remain valid (even though it's a vo field), until it's + // unset with this function. + // Will be called if vo changes, or if renderer options change. + void (*update_external)(struct render_backend *ctx, struct vo *vo); + // Update screen area. + void (*resize)(struct render_backend *ctx, struct mp_rect *src, + struct mp_rect *dst, struct mp_osd_res *osd); + // Get target surface size from mpv_render_context_render() arguments. + int (*get_target_size)(struct render_backend *ctx, mpv_render_param *params, + int *out_w, int *out_h); + // Implementation of mpv_render_context_render(). + int (*render)(struct render_backend *ctx, mpv_render_param *params, + struct vo_frame *frame); + // Free all data in ctx->priv. + void (*destroy)(struct render_backend *ctx); +}; + +extern const struct render_backend_fns render_backend_gpu; diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c index fda40da..4b0cbcc 100644 --- a/video/out/opengl/common.c +++ b/video/out/opengl/common.c @@ -453,6 +453,7 @@ static const struct gl_functions gl_functions[] = { }, // These don't exist - they are for the sake of mpv internals, and libmpv // interaction (see libmpv/opengl_cb.h). + // This is not used by the render API, only the deprecated opengl-cb API. { .extension = "GL_MP_MPGetNativeDisplay", .functions = (const struct gl_function[]) { @@ -664,13 +665,3 @@ void mpgl_load_functions(GL *gl, void *(*getProcAddress)(const GLubyte *), { mpgl_load_functions2(gl, get_procaddr_wrapper, getProcAddress, ext2, log); } - -void *mpgl_get_native_display(struct GL *gl, const char *name) -{ - void *res = NULL; - if (gl->get_native_display) - res = gl->get_native_display(gl->get_native_display_ctx, name); - if (!res && gl->MPGetNativeDisplay) - res = gl->MPGetNativeDisplay(name); - return res; -} diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h index b9f582b..38414fe 100644 --- a/video/out/opengl/common.h +++ b/video/out/opengl/common.h @@ -78,9 +78,6 @@ void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), typedef void (GLAPIENTRY *MP_GLDEBUGPROC)(GLenum, GLenum, GLuint, GLenum, GLsizei, const GLchar *,const void *); -// Return a named host API reference (e.g. "wl" -> wl_display). -void *mpgl_get_native_display(struct GL *gl, const char *name); - //function pointers loaded from the OpenGL library struct GL { int version; // MPGL_VER() mangled (e.g. 210 for 2.1) @@ -90,11 +87,6 @@ struct GL { int mpgl_caps; // Bitfield of MPGL_CAP_* constants bool debug_context; // use of e.g. GLX_CONTEXT_DEBUG_BIT_ARB - // Use mpgl_get_native_display() instead. Also, this is set to use the - // fields in MPGLContext by default (if set). - void *get_native_display_ctx; - void *(*get_native_display)(void *ctx, const char *name); - void (GLAPIENTRY *Viewport)(GLint, GLint, GLsizei, GLsizei); void (GLAPIENTRY *Clear)(GLbitfield); void (GLAPIENTRY *GenTextures)(GLsizei, GLuint *); diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c index cdaf632..43b57aa 100644 --- a/video/out/opengl/context.c +++ b/video/out/opengl/context.c @@ -125,17 +125,6 @@ done: return ret; } -static void *get_native_display(void *priv, const char *name) -{ - struct priv *p = priv; - if (!p->params.native_display_type || !name) - return NULL; - if (strcmp(p->params.native_display_type, name) != 0) - return NULL; - - return p->params.native_display; -} - void ra_gl_ctx_uninit(struct ra_ctx *ctx) { if (ctx->swapchain) { @@ -173,8 +162,6 @@ bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params) if (ext) { if (ext->color_depth) p->fns.color_depth = ext->color_depth; - if (ext->screenshot) - p->fns.screenshot = ext->screenshot; if (ext->start_frame) p->fns.start_frame = ext->start_frame; if (ext->submit_frame) @@ -193,8 +180,6 @@ bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params) } gl->debug_context = ctx->opts.debug; - gl->get_native_display_ctx = p; - gl->get_native_display = get_native_display; if (gl->SwapInterval) { gl->SwapInterval(p->opts->swapinterval); @@ -247,24 +232,6 @@ int ra_gl_ctx_color_depth(struct ra_swapchain *sw) return depth_g; } -struct mp_image *ra_gl_ctx_screenshot(struct ra_swapchain *sw) -{ - struct priv *p = sw->priv; - - assert(p->wrapped_fb); - struct mp_image *screen = gl_read_fbo_contents(p->gl, p->main_fb, - p->wrapped_fb->params.w, - p->wrapped_fb->params.h); - - // OpenGL FB is also read in flipped order, so we need to flip when the - // rendering is *not* flipped, which in our case is whenever - // p->params.flipped is true. I hope that made sense - if (screen && p->params.flipped) - mp_image_vflip(screen); - - return screen; -} - bool ra_gl_ctx_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) { struct priv *p = sw->priv; @@ -348,7 +315,6 @@ void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw) static const struct ra_swapchain_fns ra_gl_swapchain_fns = { .color_depth = ra_gl_ctx_color_depth, - .screenshot = ra_gl_ctx_screenshot, .start_frame = ra_gl_ctx_start_frame, .submit_frame = ra_gl_ctx_submit_frame, .swap_buffers = ra_gl_ctx_swap_buffers, diff --git a/video/out/opengl/context.h b/video/out/opengl/context.h index 95ed374..5fccc70 100644 --- a/video/out/opengl/context.h +++ b/video/out/opengl/context.h @@ -34,10 +34,6 @@ struct ra_gl_ctx_params { // ra_swapchain_fns structs will entirely replace the equivalent ra_gl_ctx // functions in the resulting ra_swapchain. const struct ra_swapchain_fns *external_swapchain; - - // For hwdec_vaegl.c: - const char *native_display_type; - void *native_display; }; void ra_gl_ctx_uninit(struct ra_ctx *ctx); diff --git a/video/out/opengl/context_android.c b/video/out/opengl/context_android.c index a2acce2..d405e79 100644 --- a/video/out/opengl/context_android.c +++ b/video/out/opengl/context_android.c @@ -26,6 +26,19 @@ #include "options/m_config.h" #include "context.h" +struct android_opts { + struct m_geometry surface_size; +}; + +#define OPT_BASE_STRUCT struct android_opts +const struct m_sub_options android_conf = { + .opts = (const struct m_option[]) { + OPT_SIZE_BOX("android-surface-size", surface_size, UPDATE_VO_RESIZE), + {0} + }, + .size = sizeof(struct android_opts), +}; + struct priv { struct GL gl; EGLDisplay egl_display; @@ -123,10 +136,16 @@ fail: static bool android_reconfig(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - int w, h; + void *tmp = talloc_new(NULL); + struct android_opts *opts = mp_get_config_group(tmp, ctx->global, &android_conf); + int w = opts->surface_size.w, h = opts->surface_size.h; + + if (!w) + eglQuerySurface(p->egl_display, p->egl_surface, EGL_WIDTH, &w); + if (!h) + eglQuerySurface(p->egl_display, p->egl_surface, EGL_HEIGHT, &h); - if (!eglQuerySurface(p->egl_display, p->egl_surface, EGL_WIDTH, &w) || - !eglQuerySurface(p->egl_display, p->egl_surface, EGL_HEIGHT, &h)) { + if (!w || !h) { MP_FATAL(ctx, "Failed to get height and width!\n"); return false; } @@ -134,6 +153,8 @@ static bool android_reconfig(struct ra_ctx *ctx) ctx->vo->dwidth = w; ctx->vo->dheight = h; ra_gl_ctx_resize(ctx->swapchain, w, h, 0); + + talloc_free(tmp); return true; } diff --git a/video/out/opengl/context_angle.c b/video/out/opengl/context_angle.c index 986a503..6d45e29 100644 --- a/video/out/opengl/context_angle.c +++ b/video/out/opengl/context_angle.c @@ -525,17 +525,6 @@ static int angle_color_depth(struct ra_swapchain *sw) return 8; } -static struct mp_image *angle_screenshot(struct ra_swapchain *sw) -{ - struct priv *p = sw->ctx->priv; - if (p->dxgi_swapchain) { - struct mp_image *img = mp_d3d11_screenshot(p->dxgi_swapchain); - if (img) - return img; - } - return ra_gl_ctx_screenshot(sw); -} - static bool angle_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) { @@ -611,7 +600,6 @@ static bool angle_init(struct ra_ctx *ctx) // Custom swapchain impl for the D3D11 swapchain-based surface static const struct ra_swapchain_fns dxgi_swapchain_fns = { .color_depth = angle_color_depth, - .screenshot = angle_screenshot, .submit_frame = angle_submit_frame, }; struct ra_gl_ctx_params params = { diff --git a/video/out/opengl/context_cocoa.c b/video/out/opengl/context_cocoa.c index 2256d31..b73ca9d 100644 --- a/video/out/opengl/context_cocoa.c +++ b/video/out/opengl/context_cocoa.c @@ -19,7 +19,6 @@ #include <dlfcn.h> #include "options/m_config.h" #include "video/out/cocoa_common.h" -#include "osdep/macosx_versions.h" #include "context.h" struct cocoa_opts { @@ -37,6 +36,7 @@ const struct m_sub_options cocoa_conf = { struct priv { GL gl; + void (GLAPIENTRY *Flush)(void); CGLPixelFormatObj pix; CGLContextObj ctx; @@ -50,6 +50,8 @@ static int set_swap_interval(int enabled) return (err == kCGLNoError) ? 0 : -1; } +static void glFlushDummy(void) { } + static void *cocoa_glgetaddr(const char *s) { void *ret = NULL; @@ -139,6 +141,8 @@ static bool create_gl_context(struct ra_ctx *ctx) mpgl_load_functions(gl, (void *)cocoa_glgetaddr, NULL, ctx->vo->log); gl->SwapInterval = set_swap_interval; + p->Flush = gl->Flush; + gl->Flush = glFlushDummy; CGLReleasePixelFormat(p->pix); @@ -156,9 +160,8 @@ static void cocoa_uninit(struct ra_ctx *ctx) static void cocoa_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; - GL *gl = &p->gl; vo_cocoa_swap_buffers(ctx->vo); - gl->Flush(); + p->Flush(); } static bool cocoa_init(struct ra_ctx *ctx) @@ -168,6 +171,8 @@ static bool cocoa_init(struct ra_ctx *ctx) p->opts = mp_get_config_group(ctx, ctx->global, &cocoa_conf); vo_cocoa_init(ctx->vo); + MP_WARN(ctx->vo, "opengl cocoa backend is deprecated, use vo=libmpv instead\n"); + if (!create_gl_context(ctx)) goto fail; diff --git a/video/out/opengl/context_drm_egl.c b/video/out/opengl/context_drm_egl.c index 6191309..72eb2e3 100644 --- a/video/out/opengl/context_drm_egl.c +++ b/video/out/opengl/context_drm_egl.c @@ -25,13 +25,13 @@ #include <unistd.h> #include <gbm.h> -#include <drm_fourcc.h> #include <EGL/egl.h> #include <EGL/eglext.h> -#include "libmpv/opengl_cb.h" +#include "libmpv/render_gl.h" #include "video/out/drm_common.h" #include "common/common.h" +#include "osdep/timer.h" #include "egl_helpers.h" #include "common.h" @@ -72,7 +72,7 @@ struct priv { struct gbm gbm; struct framebuffer *fb; - uint32_t primary_plane_format; + uint32_t gbm_format; bool active; bool waiting_for_flip; @@ -80,9 +80,80 @@ struct priv { bool vt_switcher_active; struct vt_switcher vt_switcher; - struct mpv_opengl_cb_drm_params drm_params; + struct mpv_opengl_drm_params drm_params; + struct mpv_opengl_drm_osd_size osd_size; }; +// Not general. Limited to only the formats being used in this module +static const char *gbm_format_to_string(uint32_t format) +{ + switch (format) { + case GBM_FORMAT_XRGB8888: + return "GBM_FORMAT_XRGB8888"; + case GBM_FORMAT_ARGB8888: + return "GBM_FORMAT_ARGB8888"; + case GBM_FORMAT_XRGB2101010: + return "GBM_FORMAT_XRGB2101010"; + case GBM_FORMAT_ARGB2101010: + return "GBM_FORMAT_ARGB2101010"; + default: + return "UNKNOWN"; + } +} + +// Allow falling back to an ARGB EGLConfig when we have an XRGB framebuffer. +// Also allow falling back to an XRGB EGLConfig for ARGB framebuffers, since +// this seems neccessary to work with broken Mali drivers that don't report +// their EGLConfigs as supporting alpha properly. +static uint32_t fallback_format_for(uint32_t format) +{ + switch (format) { + case GBM_FORMAT_XRGB8888: + return GBM_FORMAT_ARGB8888; + case GBM_FORMAT_ARGB8888: + return GBM_FORMAT_XRGB8888; + case GBM_FORMAT_XRGB2101010: + return GBM_FORMAT_ARGB2101010; + case GBM_FORMAT_ARGB2101010: + return GBM_FORMAT_XRGB2101010; + default: + return 0; + } +} + +static int match_config_to_visual(void *user_data, EGLConfig *configs, int num_configs) +{ + struct ra_ctx *ctx = (struct ra_ctx*)user_data; + struct priv *p = ctx->priv; + const EGLint visual_id[] = { + (EGLint)p->gbm_format, + (EGLint)fallback_format_for(p->gbm_format), + 0 + }; + + for (unsigned int i = 0; visual_id[i] != 0; ++i) { + MP_VERBOSE(ctx, "Attempting to find EGLConfig matching %s\n", + gbm_format_to_string(visual_id[i])); + for (unsigned int j = 0; j < num_configs; ++j) { + EGLint id; + + if (!eglGetConfigAttrib(p->egl.display, configs[j], EGL_NATIVE_VISUAL_ID, &id)) + continue; + + if (visual_id[i] == id) { + MP_VERBOSE(ctx, "Found matching EGLConfig for %s\n", + gbm_format_to_string(visual_id[i])); + return j; + } + } + MP_VERBOSE(ctx, "No matching EGLConfig for %s\n", gbm_format_to_string(visual_id[i])); + } + + MP_ERR(ctx, "Could not find EGLConfig matching the GBM visual (%s).\n", + gbm_format_to_string(p->gbm_format)); + return -1; +} + static bool init_egl(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -97,7 +168,11 @@ static bool init_egl(struct ra_ctx *ctx) return false; } EGLConfig config; - if (!mpegl_create_context(ctx, p->egl.display, &p->egl.context, &config)) + if (!mpegl_create_context_cb(ctx, + p->egl.display, + (struct mpegl_cb){match_config_to_visual, ctx}, + &p->egl.context, + &config)) return false; MP_VERBOSE(ctx, "Initializing EGL surface\n"); p->egl.surface @@ -120,12 +195,12 @@ static bool init_gbm(struct ra_ctx *ctx) } MP_VERBOSE(ctx->vo, "Initializing GBM surface (%d x %d)\n", - p->kms->mode.hdisplay, p->kms->mode.vdisplay); + p->osd_size.width, p->osd_size.height); p->gbm.surface = gbm_surface_create( p->gbm.device, - p->kms->mode.hdisplay, - p->kms->mode.vdisplay, - p->primary_plane_format, // drm_fourcc.h defs should be gbm-compatible + p->osd_size.width, + p->osd_size.height, + p->gbm_format, GBM_BO_USE_SCANOUT | GBM_BO_USE_RENDERING); if (!p->gbm.surface) { MP_ERR(ctx->vo, "Failed to create GBM surface.\n"); @@ -159,7 +234,7 @@ static void update_framebuffer_from_bo(struct ra_ctx *ctx, struct gbm_bo *bo) uint32_t handle = gbm_bo_get_handle(bo).u32; int ret = drmModeAddFB2(fb->fd, fb->width, fb->height, - p->primary_plane_format, + p->gbm_format, (uint32_t[4]){handle, 0, 0, 0}, (uint32_t[4]){stride, 0, 0, 0}, (uint32_t[4]){0, 0, 0, 0}, @@ -172,17 +247,104 @@ static void update_framebuffer_from_bo(struct ra_ctx *ctx, struct gbm_bo *bo) p->fb = fb; } +static bool crtc_setup_atomic(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct drm_atomic_context *atomic_ctx = p->kms->atomic_context; + + if (!drm_atomic_save_old_state(atomic_ctx)) { + MP_WARN(ctx->vo, "Failed to save old DRM atomic state\n"); + } + + drmModeAtomicReqPtr request = drmModeAtomicAlloc(); + if (!request) { + MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n"); + return false; + } + + if (drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", p->kms->crtc_id) < 0) { + MP_ERR(ctx->vo, "Could not set CRTC_ID on connector\n"); + return false; + } + + if (!drm_mode_ensure_blob(p->kms->fd, &p->kms->mode)) { + MP_ERR(ctx->vo, "Failed to create DRM mode blob\n"); + goto err; + } + if (drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", p->kms->mode.blob_id) < 0) { + MP_ERR(ctx->vo, "Could not set MODE_ID on crtc\n"); + goto err; + } + if (drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1) < 0) { + MP_ERR(ctx->vo, "Could not set ACTIVE on crtc\n"); + goto err; + } + + drm_object_set_property(request, atomic_ctx->osd_plane, "FB_ID", p->fb->id); + drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_ID", p->kms->crtc_id); + drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_X", 0); + drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_Y", 0); + drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_W", p->osd_size.width << 16); + drm_object_set_property(request, atomic_ctx->osd_plane, "SRC_H", p->osd_size.height << 16); + drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_X", 0); + drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_Y", 0); + drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_W", p->kms->mode.mode.hdisplay); + drm_object_set_property(request, atomic_ctx->osd_plane, "CRTC_H", p->kms->mode.mode.vdisplay); + + int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL); + if (ret) + MP_ERR(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret); + + drmModeAtomicFree(request); + return ret == 0; + + err: + drmModeAtomicFree(request); + return false; +} + +static bool crtc_release_atomic(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + struct drm_atomic_context *atomic_ctx = p->kms->atomic_context; + drmModeAtomicReqPtr request = drmModeAtomicAlloc(); + if (!request) { + MP_ERR(ctx->vo, "Failed to allocate drm atomic request\n"); + return false; + } + + if (!drm_atomic_restore_old_state(request, atomic_ctx)) { + MP_WARN(ctx->vo, "Got error while restoring old state\n"); + } + + int ret = drmModeAtomicCommit(p->kms->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL); + + if (ret) + MP_WARN(ctx->vo, "Failed to commit ModeSetting atomic request (%d)\n", ret); + + drmModeAtomicFree(request); + return ret == 0; +} + static bool crtc_setup(struct ra_ctx *ctx) { struct priv *p = ctx->priv; if (p->active) return true; - p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id); - int ret = drmModeSetCrtc(p->kms->fd, p->kms->crtc_id, p->fb->id, - 0, 0, &p->kms->connector->connector_id, 1, - &p->kms->mode); - p->active = true; - return ret == 0; + + if (p->kms->atomic_context) { + int ret = crtc_setup_atomic(ctx); + p->active = true; + return ret; + } else { + p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id); + int ret = drmModeSetCrtc(p->kms->fd, p->kms->crtc_id, p->fb->id, + 0, 0, &p->kms->connector->connector_id, 1, + &p->kms->mode.mode); + p->active = true; + return ret == 0; + } } static void crtc_release(struct ra_ctx *ctx) @@ -202,21 +364,28 @@ static void crtc_release(struct ra_ctx *ctx) } } - if (p->old_crtc) { - drmModeSetCrtc(p->kms->fd, - p->old_crtc->crtc_id, p->old_crtc->buffer_id, - p->old_crtc->x, p->old_crtc->y, - &p->kms->connector->connector_id, 1, - &p->old_crtc->mode); - drmModeFreeCrtc(p->old_crtc); - p->old_crtc = NULL; + if (p->kms->atomic_context) { + if (p->kms->atomic_context->old_state.saved) { + if (!crtc_release_atomic(ctx)) + MP_ERR(ctx->vo, "Failed to restore previous mode\n"); + } + } else { + if (p->old_crtc) { + drmModeSetCrtc(p->kms->fd, + p->old_crtc->crtc_id, p->old_crtc->buffer_id, + p->old_crtc->x, p->old_crtc->y, + &p->kms->connector->connector_id, 1, + &p->old_crtc->mode); + drmModeFreeCrtc(p->old_crtc); + p->old_crtc = NULL; + } } } static void release_vt(void *data) { struct ra_ctx *ctx = data; - MP_VERBOSE(ctx->vo, "Releasing VT"); + MP_VERBOSE(ctx->vo, "Releasing VT\n"); crtc_release(ctx); if (USE_MASTER) { //this function enables support for switching to x, weston etc. @@ -233,7 +402,7 @@ static void release_vt(void *data) static void acquire_vt(void *data) { struct ra_ctx *ctx = data; - MP_VERBOSE(ctx->vo, "Acquiring VT"); + MP_VERBOSE(ctx->vo, "Acquiring VT\n"); if (USE_MASTER) { struct priv *p = ctx->priv; if (drmSetMaster(p->kms->fd)) { @@ -249,8 +418,10 @@ static bool drm_atomic_egl_start_frame(struct ra_swapchain *sw, struct ra_fbo *o { struct priv *p = sw->ctx->priv; if (p->kms->atomic_context) { - p->kms->atomic_context->request = drmModeAtomicAlloc(); - p->drm_params.atomic_request = p->kms->atomic_context->request; + if (!p->kms->atomic_context->request) { + p->kms->atomic_context->request = drmModeAtomicAlloc(); + p->drm_params.atomic_request_ptr = &p->kms->atomic_context->request; + } return ra_gl_ctx_start_frame(sw, out_fbo); } return false; @@ -266,15 +437,18 @@ static void drm_egl_swap_buffers(struct ra_ctx *ctx) struct drm_atomic_context *atomic_ctx = p->kms->atomic_context; int ret; + if (!p->active) + return; + eglSwapBuffers(p->egl.display, p->egl.surface); p->gbm.next_bo = gbm_surface_lock_front_buffer(p->gbm.surface); p->waiting_for_flip = true; update_framebuffer_from_bo(ctx, p->gbm.next_bo); if (atomic_ctx) { - drm_object_set_property(atomic_ctx->request, atomic_ctx->primary_plane, "FB_ID", p->fb->id); - drm_object_set_property(atomic_ctx->request, atomic_ctx->primary_plane, "CRTC_ID", atomic_ctx->crtc->id); - drm_object_set_property(atomic_ctx->request, atomic_ctx->primary_plane, "ZPOS", 1); + drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "FB_ID", p->fb->id); + drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "CRTC_ID", atomic_ctx->crtc->id); + drm_object_set_property(atomic_ctx->request, atomic_ctx->osd_plane, "ZPOS", 1); ret = drmModeAtomicCommit(p->kms->fd, atomic_ctx->request, DRM_MODE_ATOMIC_NONBLOCK | DRM_MODE_PAGE_FLIP_EVENT, NULL); @@ -304,7 +478,7 @@ static void drm_egl_swap_buffers(struct ra_ctx *ctx) if (atomic_ctx) { drmModeAtomicFree(atomic_ctx->request); - p->drm_params.atomic_request = atomic_ctx->request = NULL; + atomic_ctx->request = drmModeAtomicAlloc(); } gbm_surface_release_buffer(p->gbm.surface, p->gbm.bo); @@ -314,6 +488,15 @@ static void drm_egl_swap_buffers(struct ra_ctx *ctx) static void drm_egl_uninit(struct ra_ctx *ctx) { struct priv *p = ctx->priv; + struct drm_atomic_context *atomic_ctx = p->kms->atomic_context; + + if (atomic_ctx) { + int ret = drmModeAtomicCommit(p->kms->fd, atomic_ctx->request, 0, NULL); + if (ret) + MP_ERR(ctx->vo, "Failed to commit atomic request (%d)\n", ret); + drmModeAtomicFree(atomic_ctx->request); + } + ra_gl_ctx_uninit(ctx); crtc_release(ctx); @@ -330,48 +513,51 @@ static void drm_egl_uninit(struct ra_ctx *ctx) p->egl.context = EGL_NO_CONTEXT; eglDestroyContext(p->egl.display, p->egl.context); + close(p->drm_params.render_fd); + if (p->kms) { kms_destroy(p->kms); p->kms = 0; } } -// If primary plane supports ARGB8888 we want to use that, but if it doesn't we -// fall back on XRGB8888. If the driver does not support atomic there is no -// particular reason to be using ARGB8888, so we fall back to XRGB8888 (another -// reason is that we do not have the convenient atomic_ctx and its convenient -// primary_plane field). -static bool probe_primary_plane_format(struct ra_ctx *ctx) +// If the OSD plane supports ARGB we want to use that, but if it doesn't we fall +// back on XRGB. If the driver does not support atomic there is no particular +// reason to be using ARGB (drmprime hwdec will not work without atomic, +// anyway), so we fall back to XRGB (another reason is that we do not have the +// convenient atomic_ctx and its convenient plane fields). +static bool probe_gbm_format(struct ra_ctx *ctx, uint32_t argb_format, uint32_t xrgb_format) { struct priv *p = ctx->priv; + if (!p->kms->atomic_context) { - p->primary_plane_format = DRM_FORMAT_XRGB8888; - MP_VERBOSE(ctx->vo, "Not using DRM Atomic: Use DRM_FORMAT_XRGB8888 for primary plane.\n"); + p->gbm_format = xrgb_format; + MP_VERBOSE(ctx->vo, "Not using DRM Atomic: Use %s for OSD plane.\n", + gbm_format_to_string(xrgb_format)); return true; } drmModePlane *drmplane = - drmModeGetPlane(p->kms->fd, p->kms->atomic_context->primary_plane->id); - bool have_argb8888 = false; - bool have_xrgb8888 = false; + drmModeGetPlane(p->kms->fd, p->kms->atomic_context->osd_plane->id); + bool have_argb = false; + bool have_xrgb = false; bool result = false; for (unsigned int i = 0; i < drmplane->count_formats; ++i) { - if (drmplane->formats[i] == DRM_FORMAT_ARGB8888) { - have_argb8888 = true; - } else if (drmplane->formats[i] == DRM_FORMAT_XRGB8888) { - have_xrgb8888 = true; + if (drmplane->formats[i] == argb_format) { + have_argb = true; + } else if (drmplane->formats[i] == xrgb_format) { + have_xrgb = true; } } - if (have_argb8888) { - p->primary_plane_format = DRM_FORMAT_ARGB8888; - MP_VERBOSE(ctx->vo, "DRM_FORMAT_ARGB8888 supported by primary plane.\n"); + if (have_argb) { + p->gbm_format = argb_format; + MP_VERBOSE(ctx->vo, "%s supported by OSD plane.\n", gbm_format_to_string(argb_format)); result = true; - } else if (have_xrgb8888) { - p->primary_plane_format = DRM_FORMAT_XRGB8888; - MP_VERBOSE(ctx->vo, - "DRM_FORMAT_ARGB8888 not supported by primary plane: " - "Falling back to DRM_FORMAT_XRGB8888.\n"); + } else if (have_xrgb) { + p->gbm_format = xrgb_format; + MP_VERBOSE(ctx->vo, "%s not supported by OSD plane: Falling back to %s.\n", + gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format)); result = true; } @@ -400,14 +586,40 @@ static bool drm_egl_init(struct ra_ctx *ctx) MP_VERBOSE(ctx, "Initializing KMS\n"); p->kms = kms_create(ctx->log, ctx->vo->opts->drm_opts->drm_connector_spec, ctx->vo->opts->drm_opts->drm_mode_id, - ctx->vo->opts->drm_opts->drm_overlay_id); + ctx->vo->opts->drm_opts->drm_osd_plane_id, + ctx->vo->opts->drm_opts->drm_video_plane_id); if (!p->kms) { MP_ERR(ctx, "Failed to create KMS.\n"); return false; } - if (!probe_primary_plane_format(ctx)) { - MP_ERR(ctx->vo, "No suitable format found on DRM primary plane.\n"); + if (ctx->vo->opts->drm_opts->drm_osd_size.wh_valid) { + if (p->kms->atomic_context) { + p->osd_size.width = ctx->vo->opts->drm_opts->drm_osd_size.w; + p->osd_size.height = ctx->vo->opts->drm_opts->drm_osd_size.h; + } else { + p->osd_size.width = p->kms->mode.mode.hdisplay; + p->osd_size.height = p->kms->mode.mode.vdisplay; + MP_WARN(ctx, "Setting OSD size is only available with DRM atomic, defaulting to screen resolution\n"); + } + } else { + p->osd_size.width = p->kms->mode.mode.hdisplay; + p->osd_size.height = p->kms->mode.mode.vdisplay; + } + + uint32_t argb_format; + uint32_t xrgb_format; + if (DRM_OPTS_FORMAT_XRGB2101010 == ctx->vo->opts->drm_opts->drm_format) { + argb_format = GBM_FORMAT_ARGB2101010; + xrgb_format = GBM_FORMAT_XRGB2101010; + } else { + argb_format = GBM_FORMAT_ARGB8888; + xrgb_format = GBM_FORMAT_XRGB8888; + } + + if (!probe_gbm_format(ctx, argb_format, xrgb_format)) { + MP_ERR(ctx->vo, "No suitable format found on DRM primary plane (tried: %s and %s).\n", + gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format)); return false; } @@ -451,18 +663,34 @@ static bool drm_egl_init(struct ra_ctx *ctx) p->drm_params.fd = p->kms->fd; p->drm_params.crtc_id = p->kms->crtc_id; + p->drm_params.connector_id = p->kms->connector->connector_id; if (p->kms->atomic_context) - p->drm_params.atomic_request = p->kms->atomic_context->request; + p->drm_params.atomic_request_ptr = &p->kms->atomic_context->request; + char *rendernode_path = drmGetRenderDeviceNameFromFd(p->kms->fd); + if (rendernode_path) { + MP_VERBOSE(ctx, "Opening render node \"%s\"\n", rendernode_path); + p->drm_params.render_fd = open(rendernode_path, O_RDWR | O_CLOEXEC); + if (p->drm_params.render_fd < 0) { + MP_WARN(ctx, "Cannot open render node \"%s\": %s. VAAPI hwdec will be disabled\n", + rendernode_path, mp_strerror(errno)); + } + free(rendernode_path); + } else { + p->drm_params.render_fd = -1; + MP_VERBOSE(ctx, "Could not find path to render node. VAAPI hwdec will be disabled\n"); + } + struct ra_gl_ctx_params params = { .swap_buffers = drm_egl_swap_buffers, - .native_display_type = "opengl-cb-drm-params", - .native_display = &p->drm_params, .external_swapchain = p->kms->atomic_context ? &drm_atomic_swapchain : NULL, }; if (!ra_gl_ctx_init(ctx, &p->gl, params)) return false; + ra_add_native_resource(ctx->ra, "drm_params", &p->drm_params); + ra_add_native_resource(ctx->ra, "drm_osd_size", &p->osd_size); + return true; } @@ -491,6 +719,25 @@ static int drm_egl_control(struct ra_ctx *ctx, int *events, int request, return VO_NOTIMPL; } +static void wait_events(struct ra_ctx *ctx, int64_t until_time_us) +{ + struct priv *p = ctx->priv; + if (p->vt_switcher_active) { + int64_t wait_us = until_time_us - mp_time_us(); + int timeout_ms = MPCLAMP((wait_us + 500) / 1000, 0, 10000); + vt_switcher_poll(&p->vt_switcher, timeout_ms); + } else { + vo_wait_default(ctx->vo, until_time_us); + } +} + +static void wakeup(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + if (p->vt_switcher_active) + vt_switcher_interrupt_poll(&p->vt_switcher); +} + const struct ra_ctx_fns ra_ctx_drm_egl = { .type = "opengl", .name = "drm", @@ -498,4 +745,6 @@ const struct ra_ctx_fns ra_ctx_drm_egl = { .control = drm_egl_control, .init = drm_egl_init, .uninit = drm_egl_uninit, + .wait_events = wait_events, + .wakeup = wakeup, }; diff --git a/video/out/opengl/context_dxinterop.c b/video/out/opengl/context_dxinterop.c index 85d84bf..2e65a89 100644 --- a/video/out/opengl/context_dxinterop.c +++ b/video/out/opengl/context_dxinterop.c @@ -481,20 +481,6 @@ static int GLAPIENTRY dxgl_swap_interval(int interval) return 1; } -static void * GLAPIENTRY dxgl_get_native_display(const char *name) -{ - if (!current_ctx || !name) - return NULL; - struct priv *p = current_ctx->priv; - - if (p->device && strcmp("IDirect3DDevice9Ex", name) == 0) { - return p->device; - } else if (p->device_h && strcmp("dxinterop_device_HANDLE", name) == 0) { - return p->device_h; - } - return NULL; -} - static void dxgl_swap_buffers(struct ra_ctx *ctx) { struct priv *p = ctx->priv; @@ -560,7 +546,6 @@ static bool dxgl_init(struct ra_ctx *ctx) current_ctx = ctx; gl->SwapInterval = dxgl_swap_interval; - gl->MPGetNativeDisplay = dxgl_get_native_display; if (d3d_create(ctx) < 0) goto fail; @@ -577,6 +562,9 @@ static bool dxgl_init(struct ra_ctx *ctx) if (!ra_gl_ctx_init(ctx, gl, params)) goto fail; + ra_add_native_resource(ctx->ra, "IDirect3DDevice9Ex", p->device); + ra_add_native_resource(ctx->ra, "dxinterop_device_HANDLE", p->device_h); + DwmEnableMMCSS(TRUE); return true; fail: diff --git a/video/out/opengl/context_rpi.c b/video/out/opengl/context_rpi.c index 8b447d0..fbd9721 100644 --- a/video/out/opengl/context_rpi.c +++ b/video/out/opengl/context_rpi.c @@ -198,7 +198,8 @@ static bool recreate_dispmanx(struct ra_ctx *ctx) ctx->vo->dwidth = p->w; ctx->vo->dheight = p->h; - ra_gl_ctx_resize(ctx->swapchain, p->w, p->h, 0); + if (ctx->swapchain) + ra_gl_ctx_resize(ctx->swapchain, p->w, p->h, 0); ctx->vo->want_redraw = true; @@ -240,13 +241,14 @@ static bool rpi_init(struct ra_ctx *ctx) struct ra_gl_ctx_params params = { .swap_buffers = rpi_swap_buffers, - .native_display_type = "MPV_RPI_WINDOW", - .native_display = p->win_params, }; if (!ra_gl_ctx_init(ctx, &p->gl, params)) goto fail; + ra_add_native_resource(ctx->ra, "MPV_RPI_WINDOW", p->win_params); + + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); return true; fail: diff --git a/video/out/opengl/context_wayland.c b/video/out/opengl/context_wayland.c index f686fcc..650072c 100644 --- a/video/out/opengl/context_wayland.c +++ b/video/out/opengl/context_wayland.c @@ -78,13 +78,13 @@ static bool egl_create_context(struct ra_ctx *ctx) struct ra_gl_ctx_params params = { .swap_buffers = wayland_egl_swap_buffers, - .native_display_type = "wl", - .native_display = wl->display, }; if (!ra_gl_ctx_init(ctx, &p->gl, params)) return false; + ra_add_native_resource(ctx->ra, "wl", wl->display); + return true; } diff --git a/video/out/opengl/context_x11egl.c b/video/out/opengl/context_x11egl.c index 7ab4fe0..32530cc 100644 --- a/video/out/opengl/context_x11egl.c +++ b/video/out/opengl/context_x11egl.c @@ -142,13 +142,13 @@ static bool mpegl_init(struct ra_ctx *ctx) struct ra_gl_ctx_params params = { .swap_buffers = mpegl_swap_buffers, - .native_display_type = "x11", - .native_display = vo->x11->display, }; if (!ra_gl_ctx_init(ctx, &p->gl, params)) goto uninit; + ra_add_native_resource(ctx->ra, "x11", vo->x11->display); + return true; uninit: diff --git a/video/out/opengl/cuda_dynamic.c b/video/out/opengl/cuda_dynamic.c deleted file mode 100644 index 1135a1f..0000000 --- a/video/out/opengl/cuda_dynamic.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * This file is part of mpv. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#include "cuda_dynamic.h" - -#include <pthread.h> - -#if defined(_WIN32) -# include <windows.h> -# define dlopen(filename, flags) LoadLibrary(TEXT(filename)) -# define dlsym(handle, symbol) (void *)GetProcAddress(handle, symbol) -# define dlclose(handle) FreeLibrary(handle) -#else -# include <dlfcn.h> -#endif - -#if defined(_WIN32) || defined(__CYGWIN__) -# define CUDA_LIBNAME "nvcuda.dll" -#else -# define CUDA_LIBNAME "libcuda.so.1" -#endif - -#define CUDA_DECL(NAME, TYPE) \ - TYPE *mpv_ ## NAME; -CUDA_FNS(CUDA_DECL) - -static bool cuda_loaded = false; -static pthread_once_t cuda_load_once = PTHREAD_ONCE_INIT; - -static void cuda_do_load(void) -{ - void *lib = dlopen(CUDA_LIBNAME, RTLD_LAZY); - if (!lib) { - return; - } - -#define CUDA_LOAD_SYMBOL(NAME, TYPE) \ - mpv_ ## NAME = dlsym(lib, #NAME); if (!mpv_ ## NAME) return; - - CUDA_FNS(CUDA_LOAD_SYMBOL) - - cuda_loaded = true; -} - -bool cuda_load(void) -{ - pthread_once(&cuda_load_once, cuda_do_load); - return cuda_loaded; -} diff --git a/video/out/opengl/cuda_dynamic.h b/video/out/opengl/cuda_dynamic.h deleted file mode 100644 index 9d75b31..0000000 --- a/video/out/opengl/cuda_dynamic.h +++ /dev/null @@ -1,148 +0,0 @@ -/* - * This file is part of mpv. - * - * It is based on an equivalent file in ffmpeg that was - * constructed from documentation, rather than from any - * original cuda headers. - * - * mpv is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * mpv is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with mpv. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef MPV_CUDA_DYNAMIC_H -#define MPV_CUDA_DYNAMIC_H - -#include <stdbool.h> -#include <stddef.h> - -#include "gl_headers.h" - -#define CUDA_VERSION 7050 - -#if defined(_WIN32) || defined(__CYGWIN__) -#define CUDAAPI __stdcall -#else -#define CUDAAPI -#endif - -#define CU_CTX_SCHED_BLOCKING_SYNC 4 - -typedef int CUdevice; - -typedef struct CUarray_st *CUarray; -typedef struct CUgraphicsResource_st *CUgraphicsResource; -typedef struct CUstream_st *CUstream; - -typedef void* CUcontext; -#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) -typedef unsigned long long CUdeviceptr; -#else -typedef unsigned int CUdeviceptr; -#endif - -typedef enum cudaError_enum { - CUDA_SUCCESS = 0 -} CUresult; - -typedef enum CUmemorytype_enum { - CU_MEMORYTYPE_HOST = 1, - CU_MEMORYTYPE_DEVICE = 2, - CU_MEMORYTYPE_ARRAY = 3 -} CUmemorytype; - -typedef struct CUDA_MEMCPY2D_st { - size_t srcXInBytes; - size_t srcY; - CUmemorytype srcMemoryType; - const void *srcHost; - CUdeviceptr srcDevice; - CUarray srcArray; - size_t srcPitch; - - size_t dstXInBytes; - size_t dstY; - CUmemorytype dstMemoryType; - void *dstHost; - CUdeviceptr dstDevice; - CUarray dstArray; - size_t dstPitch; - - size_t WidthInBytes; - size_t Height; -} CUDA_MEMCPY2D; - -typedef enum CUGLDeviceList_enum { - CU_GL_DEVICE_LIST_ALL = 1, - CU_GL_DEVICE_LIST_CURRENT_FRAME = 2, - CU_GL_DEVICE_LIST_NEXT_FRAME = 3, -} CUGLDeviceList; - -#define CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD 2 - -typedef CUresult CUDAAPI tcuInit(unsigned int Flags); -typedef CUresult CUDAAPI tcuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev); -typedef CUresult CUDAAPI tcuCtxPushCurrent_v2(CUcontext *pctx); -typedef CUresult CUDAAPI tcuCtxPopCurrent_v2(CUcontext *pctx); -typedef CUresult CUDAAPI tcuCtxDestroy_v2(CUcontext ctx); -typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *pdevice, int ordinal); -typedef CUresult CUDAAPI tcuMemcpy2D_v2(const CUDA_MEMCPY2D *pcopy); -typedef CUresult CUDAAPI tcuGetErrorName(CUresult error, const char** pstr); -typedef CUresult CUDAAPI tcuGetErrorString(CUresult error, const char** pstr); -typedef CUresult CUDAAPI tcuGLGetDevices_v2(unsigned int* pCudaDeviceCount, CUdevice* pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList); -typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource* pCudaResource, GLuint image, GLenum target, unsigned int Flags); -typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource); -typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream); -typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource* resources, CUstream hStream); -typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray* pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); - -#define CUDA_FNS(FN) \ - FN(cuInit, tcuInit) \ - FN(cuCtxCreate_v2, tcuCtxCreate_v2) \ - FN(cuCtxPushCurrent_v2, tcuCtxPushCurrent_v2) \ - FN(cuCtxPopCurrent_v2, tcuCtxPopCurrent_v2) \ - FN(cuCtxDestroy_v2, tcuCtxDestroy_v2) \ - FN(cuDeviceGet, tcuDeviceGet) \ - FN(cuMemcpy2D_v2, tcuMemcpy2D_v2) \ - FN(cuGetErrorName, tcuGetErrorName) \ - FN(cuGetErrorString, tcuGetErrorString) \ - FN(cuGLGetDevices_v2, tcuGLGetDevices_v2) \ - FN(cuGraphicsGLRegisterImage, tcuGraphicsGLRegisterImage) \ - FN(cuGraphicsUnregisterResource, tcuGraphicsUnregisterResource) \ - FN(cuGraphicsMapResources, tcuGraphicsMapResources) \ - FN(cuGraphicsUnmapResources, tcuGraphicsUnmapResources) \ - FN(cuGraphicsSubResourceGetMappedArray, tcuGraphicsSubResourceGetMappedArray) \ - -#define CUDA_EXT_DECL(NAME, TYPE) \ - extern TYPE *mpv_ ## NAME; - -CUDA_FNS(CUDA_EXT_DECL) - -#define cuInit mpv_cuInit -#define cuCtxCreate mpv_cuCtxCreate_v2 -#define cuCtxPushCurrent mpv_cuCtxPushCurrent_v2 -#define cuCtxPopCurrent mpv_cuCtxPopCurrent_v2 -#define cuCtxDestroy mpv_cuCtxDestroy_v2 -#define cuDeviceGet mpv_cuDeviceGet -#define cuMemcpy2D mpv_cuMemcpy2D_v2 -#define cuGetErrorName mpv_cuGetErrorName -#define cuGetErrorString mpv_cuGetErrorString -#define cuGLGetDevices mpv_cuGLGetDevices_v2 -#define cuGraphicsGLRegisterImage mpv_cuGraphicsGLRegisterImage -#define cuGraphicsUnregisterResource mpv_cuGraphicsUnregisterResource -#define cuGraphicsMapResources mpv_cuGraphicsMapResources -#define cuGraphicsUnmapResources mpv_cuGraphicsUnmapResources -#define cuGraphicsSubResourceGetMappedArray mpv_cuGraphicsSubResourceGetMappedArray - -bool cuda_load(void); - -#endif // MPV_CUDA_DYNAMIC_H diff --git a/video/out/opengl/egl_helpers.c b/video/out/opengl/egl_helpers.c index 0033bf1..2905761 100644 --- a/video/out/opengl/egl_helpers.c +++ b/video/out/opengl/egl_helpers.c @@ -44,6 +44,38 @@ #define EGL_OPENGL_ES3_BIT 0x00000040 #endif +struct mp_egl_config_attr { + int attrib; + const char *name; +}; + +#define MP_EGL_ATTRIB(id) {id, # id} + +static const struct mp_egl_config_attr mp_egl_attribs[] = { + MP_EGL_ATTRIB(EGL_CONFIG_ID), + MP_EGL_ATTRIB(EGL_RED_SIZE), + MP_EGL_ATTRIB(EGL_GREEN_SIZE), + MP_EGL_ATTRIB(EGL_BLUE_SIZE), + MP_EGL_ATTRIB(EGL_ALPHA_SIZE), + MP_EGL_ATTRIB(EGL_COLOR_BUFFER_TYPE), + MP_EGL_ATTRIB(EGL_CONFIG_CAVEAT), + MP_EGL_ATTRIB(EGL_CONFORMANT), +}; + +static void dump_egl_config(struct mp_log *log, int msgl, EGLDisplay display, + EGLConfig config) +{ + for (int n = 0; n < MP_ARRAY_SIZE(mp_egl_attribs); n++) { + const char *name = mp_egl_attribs[n].name; + EGLint v = -1; + if (eglGetConfigAttrib(display, config, mp_egl_attribs[n].attrib, &v)) { + mp_msg(log, msgl, " %s=%d\n", name, v); + } else { + mp_msg(log, msgl, " %s=<error>\n", name); + } + } +} + // es_version: 0 (core), 2 or 3 static bool create_context(struct ra_ctx *ctx, EGLDisplay display, int es_version, struct mpegl_cb cb, @@ -83,9 +115,9 @@ static bool create_context(struct ra_ctx *ctx, EGLDisplay display, EGLint attributes[] = { EGL_SURFACE_TYPE, EGL_WINDOW_BIT, - EGL_RED_SIZE, 1, - EGL_GREEN_SIZE, 1, - EGL_BLUE_SIZE, 1, + EGL_RED_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_BLUE_SIZE, 8, EGL_ALPHA_SIZE, ctx->opts.want_alpha ? 1 : 0, EGL_RENDERABLE_TYPE, rend, EGL_NONE @@ -101,17 +133,28 @@ static bool create_context(struct ra_ctx *ctx, EGLDisplay display, if (!num_configs) { talloc_free(configs); - MP_MSG(ctx, msgl, "Could not choose EGLConfig!\n"); + MP_MSG(ctx, msgl, "Could not choose EGLConfig for %s!\n", name); return false; } + for (int n = 0; n < num_configs; n++) + dump_egl_config(ctx->log, MSGL_TRACE, display, configs[n]); + int chosen = 0; if (cb.refine_config) chosen = cb.refine_config(cb.user_data, configs, num_configs); + if (chosen < 0) { + talloc_free(configs); + MP_MSG(ctx, msgl, "Could not refine EGLConfig for %s!\n", name); + return false; + } EGLConfig config = configs[chosen]; talloc_free(configs); + MP_DBG(ctx, "Chosen EGLConfig:\n"); + dump_egl_config(ctx->log, MSGL_DEBUG, display, config); + EGLContext *egl_ctx = NULL; if (es_version) { @@ -152,7 +195,7 @@ static bool create_context(struct ra_ctx *ctx, EGLDisplay display, } if (!egl_ctx) { - MP_MSG(ctx, msgl, "Could not create EGL context!\n"); + MP_MSG(ctx, msgl, "Could not create EGL context for %s!\n", name); return false; } @@ -199,6 +242,14 @@ bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display, return false; } +static int GLAPIENTRY swap_interval(int interval) +{ + EGLDisplay display = eglGetCurrentDisplay(); + if (!display) + return 1; + return !eglSwapInterval(display, interval); +} + static void *mpegl_get_proc_address(void *ctx, const char *name) { void *p = eglGetProcAddress(name); @@ -223,4 +274,6 @@ void mpegl_load_functions(struct GL *gl, struct mp_log *log) egl_exts = eglQueryString(display, EGL_EXTENSIONS); mpgl_load_functions2(gl, mpegl_get_proc_address, NULL, egl_exts, log); + if (!gl->SwapInterval) + gl->SwapInterval = swap_interval; } diff --git a/video/out/opengl/egl_helpers.h b/video/out/opengl/egl_helpers.h index eaaf9d7..df489da 100644 --- a/video/out/opengl/egl_helpers.h +++ b/video/out/opengl/egl_helpers.h @@ -15,7 +15,9 @@ bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display, struct mpegl_cb { // if set, pick the desired config from the given list and return its index - // defaults to 0 (they are sorted by eglChooseConfig) + // defaults to 0 (they are sorted by eglChooseConfig). return a negative + // number to indicate an error condition or that no suitable configs could + // be found. int (*refine_config)(void *user_data, EGLConfig *configs, int num_configs); void *user_data; }; diff --git a/video/out/opengl/hwdec_cuda.c b/video/out/opengl/hwdec_cuda.c index 1a7df20..f80c145 100644 --- a/video/out/opengl/hwdec_cuda.c +++ b/video/out/opengl/hwdec_cuda.c @@ -27,8 +27,7 @@ * when decoding 10bit streams (there is some hardware dithering going on). */ -#include "cuda_dynamic.h" - +#include <ffnvcodec/dynlink_loader.h> #include <libavutil/hwcontext.h> #include <libavutil/hwcontext_cuda.h> @@ -39,6 +38,7 @@ struct priv_owner { struct mp_hwdec_ctx hwctx; + CudaFunctions *cu; CUcontext display_ctx; CUcontext decode_ctx; }; @@ -56,13 +56,15 @@ static int check_cu(struct ra_hwdec *hw, CUresult err, const char *func) const char *err_name; const char *err_string; + struct priv_owner *p = hw->priv; + MP_TRACE(hw, "Calling %s\n", func); if (err == CUDA_SUCCESS) return 0; - cuGetErrorName(err, &err_name); - cuGetErrorString(err, &err_string); + p->cu->cuGetErrorName(err, &err_name); + p->cu->cuGetErrorString(err, &err_string); MP_ERR(hw, "%s failed", func); if (err_name && err_string) @@ -82,6 +84,7 @@ static int cuda_init(struct ra_hwdec *hw) unsigned int device_count; int ret = 0; struct priv_owner *p = hw->priv; + CudaFunctions *cu; if (!ra_is_gl(hw->ra)) return -1; @@ -92,24 +95,25 @@ static int cuda_init(struct ra_hwdec *hw) return -1; } - bool loaded = cuda_load(); - if (!loaded) { + ret = cuda_load_functions(&p->cu, NULL); + if (ret != 0) { MP_VERBOSE(hw, "Failed to load CUDA symbols\n"); return -1; } + cu = p->cu; - ret = CHECK_CU(cuInit(0)); + ret = CHECK_CU(cu->cuInit(0)); if (ret < 0) goto error; // Allocate display context - ret = CHECK_CU(cuGLGetDevices(&device_count, &display_dev, 1, - CU_GL_DEVICE_LIST_ALL)); + ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1, + CU_GL_DEVICE_LIST_ALL)); if (ret < 0) goto error; - ret = CHECK_CU(cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, - display_dev)); + ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + display_dev)); if (ret < 0) goto error; @@ -121,7 +125,7 @@ static int cuda_init(struct ra_hwdec *hw) if (decode_dev_idx > -1) { CUdevice decode_dev; - ret = CHECK_CU(cuDeviceGet(&decode_dev, decode_dev_idx)); + ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx)); if (ret < 0) goto error; @@ -129,12 +133,12 @@ static int cuda_init(struct ra_hwdec *hw) MP_INFO(hw, "Using separate decoder and display devices\n"); // Pop the display context. We won't use it again during init() - ret = CHECK_CU(cuCtxPopCurrent(&dummy)); + ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (ret < 0) goto error; - ret = CHECK_CU(cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC, - decode_dev)); + ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + decode_dev)); if (ret < 0) goto error; } @@ -155,7 +159,7 @@ static int cuda_init(struct ra_hwdec *hw) goto error; } - ret = CHECK_CU(cuCtxPopCurrent(&dummy)); + ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (ret < 0) goto error; @@ -168,7 +172,7 @@ static int cuda_init(struct ra_hwdec *hw) error: av_buffer_unref(&hw_device_ctx); - CHECK_CU(cuCtxPopCurrent(&dummy)); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return -1; } @@ -176,15 +180,18 @@ static int cuda_init(struct ra_hwdec *hw) static void cuda_uninit(struct ra_hwdec *hw) { struct priv_owner *p = hw->priv; + CudaFunctions *cu = p->cu; hwdec_devices_remove(hw->devs, &p->hwctx); av_buffer_unref(&p->hwctx.av_device_ref); if (p->decode_ctx && p->decode_ctx != p->display_ctx) - CHECK_CU(cuCtxDestroy(p->decode_ctx)); + CHECK_CU(cu->cuCtxDestroy(p->decode_ctx)); if (p->display_ctx) - CHECK_CU(cuCtxDestroy(p->display_ctx)); + CHECK_CU(cu->cuCtxDestroy(p->display_ctx)); + + cuda_free_functions(&p->cu); } #undef CHECK_CU @@ -195,6 +202,7 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) struct priv_owner *p_owner = mapper->owner->priv; struct priv *p = mapper->priv; CUcontext dummy; + CudaFunctions *cu = p_owner->cu; int ret = 0, eret = 0; p->display_ctx = p_owner->display_ctx; @@ -212,7 +220,7 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) return -1; } - ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx)); + ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); if (ret < 0) return ret; @@ -239,27 +247,27 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) GLenum target; ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target); - ret = CHECK_CU(cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target, - CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD)); + ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&p->cu_res[n], texture, target, + CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD)); if (ret < 0) goto error; - ret = CHECK_CU(cuGraphicsMapResources(1, &p->cu_res[n], 0)); + ret = CHECK_CU(cu->cuGraphicsMapResources(1, &p->cu_res[n], 0)); if (ret < 0) goto error; - ret = CHECK_CU(cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n], - 0, 0)); + ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], p->cu_res[n], + 0, 0)); if (ret < 0) goto error; - ret = CHECK_CU(cuGraphicsUnmapResources(1, &p->cu_res[n], 0)); + ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &p->cu_res[n], 0)); if (ret < 0) goto error; } error: - eret = CHECK_CU(cuCtxPopCurrent(&dummy)); + eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (eret < 0) return eret; @@ -269,17 +277,19 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) static void mapper_uninit(struct ra_hwdec_mapper *mapper) { struct priv *p = mapper->priv; + struct priv_owner *p_owner = mapper->owner->priv; + CudaFunctions *cu = p_owner->cu; CUcontext dummy; // Don't bail if any CUDA calls fail. This is all best effort. - CHECK_CU(cuCtxPushCurrent(p->display_ctx)); + CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); for (int n = 0; n < 4; n++) { if (p->cu_res[n] > 0) - CHECK_CU(cuGraphicsUnregisterResource(p->cu_res[n])); + CHECK_CU(cu->cuGraphicsUnregisterResource(p->cu_res[n])); p->cu_res[n] = 0; ra_tex_free(mapper->ra, &mapper->tex[n]); } - CHECK_CU(cuCtxPopCurrent(&dummy)); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); } static void mapper_unmap(struct ra_hwdec_mapper *mapper) @@ -289,10 +299,12 @@ static void mapper_unmap(struct ra_hwdec_mapper *mapper) static int mapper_map(struct ra_hwdec_mapper *mapper) { struct priv *p = mapper->priv; + struct priv_owner *p_owner = mapper->owner->priv; + CudaFunctions *cu = p_owner->cu; CUcontext dummy; int ret = 0, eret = 0; - ret = CHECK_CU(cuCtxPushCurrent(p->display_ctx)); + ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); if (ret < 0) return ret; @@ -308,14 +320,14 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) mapper->tex[n]->params.format->pixel_size, .Height = mp_image_plane_h(&p->layout, n), }; - ret = CHECK_CU(cuMemcpy2D(&cpy)); + ret = CHECK_CU(cu->cuMemcpy2D(&cpy)); if (ret < 0) goto error; } error: - eret = CHECK_CU(cuCtxPopCurrent(&dummy)); + eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); if (eret < 0) return eret; diff --git a/video/out/opengl/hwdec_d3d11egl.c b/video/out/opengl/hwdec_d3d11egl.c index e741633..f9a6700 100644 --- a/video/out/opengl/hwdec_d3d11egl.c +++ b/video/out/opengl/hwdec_d3d11egl.c @@ -178,9 +178,12 @@ static int init(struct ra_hwdec *hw) ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); ID3D10Multithread_Release(multithread); + static const int subfmts[] = {IMGFMT_NV12, 0}; p->hwctx = (struct mp_hwdec_ctx){ .driver_name = hw->driver->name, .av_device_ref = d3d11_wrap_device_ref(p->d3d11_device), + .supported_formats = subfmts, + .hw_imgfmt = IMGFMT_D3D11, }; hwdec_devices_add(hw->devs, &p->hwctx); @@ -332,7 +335,7 @@ static void mapper_unmap(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_d3d11egl = { .name = "d3d11-egl", .priv_size = sizeof(struct priv_owner), - .imgfmts = {IMGFMT_D3D11NV12, 0}, + .imgfmts = {IMGFMT_D3D11, 0}, .init = init, .uninit = uninit, .mapper = &(const struct ra_hwdec_mapper_driver){ diff --git a/video/out/opengl/hwdec_d3d11eglrgb.c b/video/out/opengl/hwdec_d3d11eglrgb.c index c8f6580..db7b1cf 100644 --- a/video/out/opengl/hwdec_d3d11eglrgb.c +++ b/video/out/opengl/hwdec_d3d11eglrgb.c @@ -135,9 +135,12 @@ static int init(struct ra_hwdec *hw) goto fail; } + static const int subfmts[] = {IMGFMT_RGB0, 0}; p->hwctx = (struct mp_hwdec_ctx){ .driver_name = hw->driver->name, .av_device_ref = d3d11_wrap_device_ref(p->d3d11_device), + .supported_formats = subfmts, + .hw_imgfmt = IMGFMT_D3D11, }; hwdec_devices_add(hw->devs, &p->hwctx); @@ -159,6 +162,11 @@ static int mapper_init(struct ra_hwdec_mapper *mapper) struct priv *p = mapper->priv; GL *gl = ra_gl_get(mapper->ra); + if (mapper->src_params.hw_subfmt != IMGFMT_RGB0) { + MP_FATAL(mapper, "Format not supported.\n"); + return -1; + } + gl->GenTextures(1, &p->gl_texture); gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); @@ -258,7 +266,7 @@ static int mapper_map(struct ra_hwdec_mapper *mapper) const struct ra_hwdec_driver ra_hwdec_d3d11eglrgb = { .name = "d3d11-egl-rgb", .priv_size = sizeof(struct priv_owner), - .imgfmts = {IMGFMT_D3D11RGB, 0}, + .imgfmts = {IMGFMT_D3D11, 0}, .init = init, .uninit = uninit, .mapper = &(const struct ra_hwdec_mapper_driver){ diff --git a/video/out/opengl/hwdec_drmprime_drm.c b/video/out/opengl/hwdec_drmprime_drm.c index faa099a..5e09c5a 100644 --- a/video/out/opengl/hwdec_drmprime_drm.c +++ b/video/out/opengl/hwdec_drmprime_drm.c @@ -29,14 +29,12 @@ #include "video/hwdec.h" #include "common/msg.h" #include "options/m_config.h" -#include "libmpv/opengl_cb.h" +#include "libmpv/render_gl.h" #include "video/out/drm_common.h" #include "video/out/drm_prime.h" #include "video/out/gpu/hwdec.h" #include "video/mp_image.h" -#include "ra_gl.h" - extern const struct m_sub_options drm_conf; struct drm_frame { @@ -50,7 +48,7 @@ struct priv { struct mp_image_params params; struct drm_atomic_context *ctx; - struct drm_frame current_frame, old_frame; + struct drm_frame current_frame, last_frame, old_frame; struct mp_rect src, dst; @@ -71,8 +69,11 @@ static void set_current_frame(struct ra_hwdec *hw, struct drm_frame *frame) drm_prime_destroy_framebuffer(p->log, p->ctx->fd, &p->old_frame.fb); } - mp_image_setrefp(&p->old_frame.image, p->current_frame.image); - p->old_frame.fb = p->current_frame.fb; + mp_image_setrefp(&p->old_frame.image, p->last_frame.image); + p->old_frame.fb = p->last_frame.fb; + + mp_image_setrefp(&p->last_frame.image, p->current_frame.image); + p->last_frame.fb = p->current_frame.fb; if (frame) { p->current_frame.fb = frame->fb; @@ -86,15 +87,12 @@ static void set_current_frame(struct ra_hwdec *hw, struct drm_frame *frame) static void scale_dst_rect(struct ra_hwdec *hw, int source_w, int source_h ,struct mp_rect *src, struct mp_rect *dst) { struct priv *p = hw->priv; - double hratio, vratio, ratio; // drm can allow to have a layer that has a different size from framebuffer // we scale here the destination size to video mode - hratio = vratio = ratio = 1.0; - - hratio = (double)p->display_w / (double)source_w; - vratio = (double)p->display_h / (double)source_h; - ratio = hratio <= vratio ? hratio : vratio; + double hratio = p->display_w / (double)source_w; + double vratio = p->display_h / (double)source_h; + double ratio = hratio <= vratio ? hratio : vratio; dst->x0 = src->x0 * ratio; dst->x1 = src->x1 * ratio; @@ -110,37 +108,67 @@ static void scale_dst_rect(struct ra_hwdec *hw, int source_w, int source_h ,stru dst->y1 += offset_y; } +static void disable_video_plane(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + if (!p->ctx) + return; + + // Disabling video plane is needed on some devices when using the + // primary plane for video. Primary buffer can't be active with no + // framebuffer associated. So we need this function to commit it + // right away as mpv will free all framebuffers on playback end. + drmModeAtomicReqPtr request = drmModeAtomicAlloc(); + if (request) { + drm_object_set_property(request, p->ctx->video_plane, "FB_ID", 0); + drm_object_set_property(request, p->ctx->video_plane, "CRTC_ID", 0); + + int ret = drmModeAtomicCommit(p->ctx->fd, request, + DRM_MODE_ATOMIC_NONBLOCK, NULL); + + if (ret) + MP_ERR(hw, "Failed to commit disable plane request (code %d)", ret); + drmModeAtomicFree(request); + } +} + static int overlay_frame(struct ra_hwdec *hw, struct mp_image *hw_image, struct mp_rect *src, struct mp_rect *dst, bool newframe) { struct priv *p = hw->priv; - GL *gl = ra_gl_get(hw->ra); AVDRMFrameDescriptor *desc = NULL; drmModeAtomicReq *request = NULL; struct drm_frame next_frame = {0}; int ret; + // grab atomic request from native resources + if (p->ctx) { + struct mpv_opengl_drm_params *drm_params; + drm_params = (mpv_opengl_drm_params *)ra_get_native_resource(hw->ra, "drm_params"); + if (!drm_params) { + MP_ERR(hw, "Failed to retrieve drm params from native resources\n"); + return -1; + } + if (drm_params->atomic_request_ptr) { + request = *drm_params->atomic_request_ptr; + } else { + MP_ERR(hw, "drm params pointer to atomic request is invalid"); + return -1; + } + } + if (hw_image) { - // grab opengl-cb windowing info to eventually upscale the overlay - // as egl windows could be upscaled to primary plane. - struct mpv_opengl_cb_window_pos *glparams = - gl ? (struct mpv_opengl_cb_window_pos *) - mpgl_get_native_display(gl, "opengl-cb-window-pos") : NULL; - if (glparams) { - scale_dst_rect(hw, glparams->width, glparams->height, dst, &p->dst); + // grab osd windowing info to eventually upscale the overlay + // as egl windows could be upscaled to osd plane. + struct mpv_opengl_drm_osd_size *osd_size = ra_get_native_resource(hw->ra, "drm_osd_size"); + if (osd_size) { + scale_dst_rect(hw, osd_size->width, osd_size->height, dst, &p->dst); } else { p->dst = *dst; } p->src = *src; - // grab drm interop info - struct mpv_opengl_cb_drm_params *drmparams = - gl ? (struct mpv_opengl_cb_drm_params *) - mpgl_get_native_display(gl, "opengl-cb-drm-params") : NULL; - if (drmparams) - request = (drmModeAtomicReq *)drmparams->atomic_request; - next_frame.image = hw_image; desc = (AVDRMFrameDescriptor *)hw_image->planes[0]; @@ -156,28 +184,33 @@ static int overlay_frame(struct ra_hwdec *hw, struct mp_image *hw_image, } if (request) { - drm_object_set_property(request, p->ctx->overlay_plane, "FB_ID", next_frame.fb.fb_id); - drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_ID", p->ctx->crtc->id); - drm_object_set_property(request, p->ctx->overlay_plane, "SRC_X", p->src.x0 << 16); - drm_object_set_property(request, p->ctx->overlay_plane, "SRC_Y", p->src.y0 << 16); - drm_object_set_property(request, p->ctx->overlay_plane, "SRC_W", srcw << 16); - drm_object_set_property(request, p->ctx->overlay_plane, "SRC_H", srch << 16); - drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_X", MP_ALIGN_DOWN(p->dst.x0, 2)); - drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_Y", MP_ALIGN_DOWN(p->dst.y0, 2)); - drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_W", dstw); - drm_object_set_property(request, p->ctx->overlay_plane, "CRTC_H", dsth); - drm_object_set_property(request, p->ctx->overlay_plane, "ZPOS", 0); + drm_object_set_property(request, p->ctx->video_plane, "FB_ID", next_frame.fb.fb_id); + drm_object_set_property(request, p->ctx->video_plane, "CRTC_ID", p->ctx->crtc->id); + drm_object_set_property(request, p->ctx->video_plane, "SRC_X", p->src.x0 << 16); + drm_object_set_property(request, p->ctx->video_plane, "SRC_Y", p->src.y0 << 16); + drm_object_set_property(request, p->ctx->video_plane, "SRC_W", srcw << 16); + drm_object_set_property(request, p->ctx->video_plane, "SRC_H", srch << 16); + drm_object_set_property(request, p->ctx->video_plane, "CRTC_X", MP_ALIGN_DOWN(p->dst.x0, 2)); + drm_object_set_property(request, p->ctx->video_plane, "CRTC_Y", MP_ALIGN_DOWN(p->dst.y0, 2)); + drm_object_set_property(request, p->ctx->video_plane, "CRTC_W", dstw); + drm_object_set_property(request, p->ctx->video_plane, "CRTC_H", dsth); + drm_object_set_property(request, p->ctx->video_plane, "ZPOS", 0); } else { - ret = drmModeSetPlane(p->ctx->fd, p->ctx->overlay_plane->id, p->ctx->crtc->id, next_frame.fb.fb_id, 0, + ret = drmModeSetPlane(p->ctx->fd, p->ctx->video_plane->id, p->ctx->crtc->id, next_frame.fb.fb_id, 0, MP_ALIGN_DOWN(p->dst.x0, 2), MP_ALIGN_DOWN(p->dst.y0, 2), dstw, dsth, p->src.x0 << 16, p->src.y0 << 16 , srcw << 16, srch << 16); if (ret < 0) { - MP_ERR(hw, "Failed to set the plane %d (buffer %d).\n", p->ctx->overlay_plane->id, + MP_ERR(hw, "Failed to set the plane %d (buffer %d).\n", p->ctx->video_plane->id, next_frame.fb.fb_id); goto fail; } } } + } else { + disable_video_plane(hw); + + while (p->old_frame.fb.fb_id) + set_current_frame(hw, NULL); } set_current_frame(hw, &next_frame); @@ -192,6 +225,7 @@ static void uninit(struct ra_hwdec *hw) { struct priv *p = hw->priv; + disable_video_plane(hw); set_current_frame(hw, NULL); if (p->ctx) { @@ -203,36 +237,28 @@ static void uninit(struct ra_hwdec *hw) static int init(struct ra_hwdec *hw) { struct priv *p = hw->priv; - int drm_overlay; - - if (!ra_is_gl(hw->ra)) - return -1; + int osd_plane_id, video_plane_id; p->log = hw->log; void *tmp = talloc_new(NULL); struct drm_opts *opts = mp_get_config_group(tmp, hw->global, &drm_conf); - drm_overlay = opts->drm_overlay_id; + osd_plane_id = opts->drm_osd_plane_id; + video_plane_id = opts->drm_video_plane_id; talloc_free(tmp); - GL *gl = ra_gl_get(hw->ra); - struct mpv_opengl_cb_drm_params *params = - gl ? (struct mpv_opengl_cb_drm_params *) - mpgl_get_native_display(gl, "opengl-cb-drm-params") : NULL; - if (!params) { - MP_VERBOSE(hw, "Could not get drm interop info.\n"); - goto err; - } + struct mpv_opengl_drm_params *drm_params; - if (params->fd) { - p->ctx = drm_atomic_create_context(p->log, params->fd, params->crtc_id, - drm_overlay); + drm_params = ra_get_native_resource(hw->ra, "drm_params"); + if (drm_params) { + p->ctx = drm_atomic_create_context(p->log, drm_params->fd, drm_params->crtc_id, + drm_params->connector_id, osd_plane_id, video_plane_id); if (!p->ctx) { mp_err(p->log, "Failed to retrieve DRM atomic context.\n"); goto err; } } else { - mp_err(p->log, "Failed to retrieve DRM fd from native display.\n"); + mp_verbose(p->log, "Failed to retrieve DRM fd from native display.\n"); goto err; } @@ -244,13 +270,13 @@ static int init(struct ra_hwdec *hw) drmModeFreeCrtc(crtc); } - uint64_t has_prime; if (drmGetCap(p->ctx->fd, DRM_CAP_PRIME, &has_prime) < 0) { MP_ERR(hw, "Card does not support prime handles.\n"); goto err; } + disable_video_plane(hw); return 0; err: diff --git a/video/out/opengl/hwdec_dxva2gldx.c b/video/out/opengl/hwdec_dxva2gldx.c index 984fd7f..bbf76b0 100644 --- a/video/out/opengl/hwdec_dxva2gldx.c +++ b/video/out/opengl/hwdec_dxva2gldx.c @@ -67,12 +67,12 @@ static int init(struct ra_hwdec *hw) // AMD drivers won't open multiple dxinterop HANDLES on the same D3D device, // so we request the one already in use by context_dxinterop - p->device_h = mpgl_get_native_display(gl, "dxinterop_device_HANDLE"); + p->device_h = ra_get_native_resource(hw->ra, "dxinterop_device_HANDLE"); if (!p->device_h) return -1; // But we also still need the actual D3D device - p->device = mpgl_get_native_display(gl, "IDirect3DDevice9Ex"); + p->device = ra_get_native_resource(hw->ra, "IDirect3DDevice9Ex"); if (!p->device) return -1; IDirect3DDevice9Ex_AddRef(p->device); diff --git a/video/out/opengl/hwdec_ios.m b/video/out/opengl/hwdec_ios.m index b8d4876..a16a09f 100644 --- a/video/out/opengl/hwdec_ios.m +++ b/video/out/opengl/hwdec_ios.m @@ -253,8 +253,10 @@ static void mapper_uninit(struct ra_hwdec_mapper *mapper) struct priv *p = mapper->priv; CVPixelBufferRelease(p->pbuf); - CFRelease(p->gl_texture_cache); - p->gl_texture_cache = NULL; + if (p->gl_texture_cache) { + CFRelease(p->gl_texture_cache); + p->gl_texture_cache = NULL; + } } const struct ra_hwdec_driver ra_hwdec_videotoolbox = { diff --git a/video/out/opengl/hwdec_rpi.c b/video/out/opengl/hwdec_rpi.c index 6c080f1..045fa75 100644 --- a/video/out/opengl/hwdec_rpi.c +++ b/video/out/opengl/hwdec_rpi.c @@ -36,7 +36,6 @@ #include "video/out/gpu/hwdec.h" #include "common.h" -#include "ra_gl.h" struct priv { struct mp_log *log; @@ -126,13 +125,12 @@ static void disable_renderer(struct ra_hwdec *hw) static void update_overlay(struct ra_hwdec *hw, bool check_window_only) { struct priv *p = hw->priv; - GL *gl = ra_is_gl(hw->ra) ? ra_gl_get(hw->ra) : NULL; MMAL_PORT_T *input = p->renderer->input[0]; struct mp_rect src = p->src; struct mp_rect dst = p->dst; int defs[4] = {0, 0, 0, 0}; - int *z = gl ? mpgl_get_native_display(gl, "MPV_RPI_WINDOW") : NULL; + int *z = ra_get_native_resource(hw->ra, "MPV_RPI_WINDOW"); if (!z) z = defs; diff --git a/video/out/opengl/hwdec_vaegl.c b/video/out/opengl/hwdec_vaegl.c index b4587c5..2ff0d98 100644 --- a/video/out/opengl/hwdec_vaegl.c +++ b/video/out/opengl/hwdec_vaegl.c @@ -36,6 +36,7 @@ #include "video/vaapi.h" #include "common.h" #include "ra_gl.h" +#include "libmpv/render_gl.h" #ifndef GL_OES_EGL_image typedef void* GLeglImageOES; @@ -55,9 +56,9 @@ typedef void *EGLImageKHR; #if HAVE_VAAPI_X11 #include <va/va_x11.h> -static VADisplay *create_x11_va_display(GL *gl) +static VADisplay *create_x11_va_display(struct ra *ra) { - Display *x11 = mpgl_get_native_display(gl, "x11"); + Display *x11 = ra_get_native_resource(ra, "x11"); return x11 ? vaGetDisplay(x11) : NULL; } #endif @@ -65,9 +66,9 @@ static VADisplay *create_x11_va_display(GL *gl) #if HAVE_VAAPI_WAYLAND #include <va/va_wayland.h> -static VADisplay *create_wayland_va_display(GL *gl) +static VADisplay *create_wayland_va_display(struct ra *ra) { - struct wl_display *wl = mpgl_get_native_display(gl, "wl"); + struct wl_display *wl = ra_get_native_resource(ra, "wl"); return wl ? vaGetDisplayWl(wl) : NULL; } #endif @@ -75,19 +76,19 @@ static VADisplay *create_wayland_va_display(GL *gl) #if HAVE_VAAPI_DRM #include <va/va_drm.h> -static VADisplay *create_drm_va_display(GL *gl) +static VADisplay *create_drm_va_display(struct ra *ra) { - int drm_fd = (intptr_t)mpgl_get_native_display(gl, "drm"); - // Note: yes, drm_fd==0 could be valid - but it's rare and doesn't fit with - // our slightly crappy way of passing it through, so consider 0 not - // valid. - return drm_fd ? vaGetDisplayDRM(drm_fd) : NULL; + mpv_opengl_drm_params *params = ra_get_native_resource(ra, "drm_params"); + if (!params || params->render_fd < 0) + return NULL; + + return vaGetDisplayDRM(params->render_fd); } #endif struct va_create_native { const char *name; - VADisplay *(*create)(GL *gl); + VADisplay *(*create)(struct ra *ra); }; static const struct va_create_native create_native_cbs[] = { @@ -102,12 +103,12 @@ static const struct va_create_native create_native_cbs[] = { #endif }; -static VADisplay *create_native_va_display(GL *gl, struct mp_log *log) +static VADisplay *create_native_va_display(struct ra *ra, struct mp_log *log) { for (int n = 0; n < MP_ARRAY_SIZE(create_native_cbs); n++) { const struct va_create_native *disp = &create_native_cbs[n]; mp_verbose(log, "Trying to open a %s VA display...\n", disp->name); - VADisplay *display = disp->create(gl); + VADisplay *display = disp->create(ra); if (display) return display; } @@ -169,7 +170,7 @@ static int init(struct ra_hwdec *hw) !(gl->mpgl_caps & MPGL_CAP_TEX_RG)) return -1; - p->display = create_native_va_display(gl, hw->log); + p->display = create_native_va_display(hw->ra, hw->log); if (!p->display) { MP_VERBOSE(hw, "Could not create a VA display.\n"); return -1; diff --git a/video/out/opengl/libmpv_gl.c b/video/out/opengl/libmpv_gl.c new file mode 100644 index 0000000..ae6ec66 --- /dev/null +++ b/video/out/opengl/libmpv_gl.c @@ -0,0 +1,127 @@ +#include "common.h" +#include "context.h" +#include "ra_gl.h" +#include "options/m_config.h" +#include "libmpv/render_gl.h" +#include "video/out/gpu/libmpv_gpu.h" +#include "video/out/gpu/ra.h" + +struct priv { + GL *gl; + struct ra_ctx *ra_ctx; +}; + +static int init(struct libmpv_gpu_context *ctx, mpv_render_param *params) +{ + ctx->priv = talloc_zero(NULL, struct priv); + struct priv *p = ctx->priv; + + mpv_opengl_init_params *init_params = + get_mpv_render_param(params, MPV_RENDER_PARAM_OPENGL_INIT_PARAMS, NULL); + if (!init_params) + return MPV_ERROR_INVALID_PARAMETER; + + p->gl = talloc_zero(p, GL); + + mpgl_load_functions2(p->gl, init_params->get_proc_address, + init_params->get_proc_address_ctx, + init_params->extra_exts, ctx->log); + if (!p->gl->version && !p->gl->es) { + MP_FATAL(ctx, "OpenGL not initialized.\n"); + return MPV_ERROR_UNSUPPORTED; + } + + // initialize a blank ra_ctx to reuse ra_gl_ctx + p->ra_ctx = talloc_zero(p, struct ra_ctx); + p->ra_ctx->log = ctx->log; + p->ra_ctx->global = ctx->global; + p->ra_ctx->opts = (struct ra_ctx_opts) { + .probing = false, + .allow_sw = true, + }; + + static const struct ra_swapchain_fns empty_swapchain_fns = {0}; + struct ra_gl_ctx_params gl_params = { + // vo_opengl_cb is essentially like a gigantic external swapchain where + // the user is in charge of presentation / swapping etc. But we don't + // actually need to provide any of these functions, since we can just + // not call them to begin with - so just set it to an empty object to + // signal to ra_gl_p that we don't care about its latency emulation + // functionality + .external_swapchain = &empty_swapchain_fns + }; + + p->gl->SwapInterval = NULL; // we shouldn't randomly change this, so lock it + if (!ra_gl_ctx_init(p->ra_ctx, p->gl, gl_params)) + return MPV_ERROR_UNSUPPORTED; + + int debug; + mp_read_option_raw(ctx->global, "gpu-debug", &m_option_type_flag, &debug); + p->ra_ctx->opts.debug = debug; + p->gl->debug_context = debug; + ra_gl_set_debug(p->ra_ctx->ra, debug); + + ctx->ra = p->ra_ctx->ra; + + // Legacy API user loading for opengl-cb. Explicitly inactive for render API. + if (get_mpv_render_param(params, (mpv_render_param_type)-1, NULL) == + ctx->global && p->gl->MPGetNativeDisplay) + { + void *x11 = p->gl->MPGetNativeDisplay("x11"); + if (x11) + ra_add_native_resource(ctx->ra, "x11", x11); + void *wl = p->gl->MPGetNativeDisplay("wl"); + if (wl) + ra_add_native_resource(ctx->ra, "wl", wl); + } + + return 0; +} + +static int wrap_fbo(struct libmpv_gpu_context *ctx, mpv_render_param *params, + struct ra_tex **out) +{ + struct priv *p = ctx->priv; + + mpv_opengl_fbo *fbo = + get_mpv_render_param(params, MPV_RENDER_PARAM_OPENGL_FBO, NULL); + if (!fbo) + return MPV_ERROR_INVALID_PARAMETER; + + if (fbo->fbo && !(p->gl->mpgl_caps & MPGL_CAP_FB)) { + MP_FATAL(ctx, "Rendering to FBO requested, but no FBO extension found!\n"); + return MPV_ERROR_UNSUPPORTED; + } + + struct ra_swapchain *sw = p->ra_ctx->swapchain; + struct ra_fbo target; + ra_gl_ctx_resize(sw, fbo->w, fbo->h, fbo->fbo); + ra_gl_ctx_start_frame(sw, &target); + *out = target.tex; + return 0; +} + +static void done_frame(struct libmpv_gpu_context *ctx, bool ds) +{ + struct priv *p = ctx->priv; + + struct ra_swapchain *sw = p->ra_ctx->swapchain; + struct vo_frame dummy = {.display_synced = ds}; + ra_gl_ctx_submit_frame(sw, &dummy); +} + +static void destroy(struct libmpv_gpu_context *ctx) +{ + struct priv *p = ctx->priv; + + if (p->ra_ctx) + ra_gl_ctx_uninit(p->ra_ctx); +} + +const struct libmpv_gpu_context_fns libmpv_gpu_context_gl = { + .api_name = MPV_RENDER_API_TYPE_OPENGL, + .init = init, + .wrap_fbo = wrap_fbo, + .done_frame = done_frame, + .destroy = destroy, +}; diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c index 5b03368..7112464 100644 --- a/video/out/opengl/ra_gl.c +++ b/video/out/opengl/ra_gl.c @@ -101,6 +101,7 @@ static int ra_init_gl(struct ra *ra, GL *gl) {RA_CAP_TEX_1D, MPGL_CAP_1D_TEX}, {RA_CAP_TEX_3D, MPGL_CAP_3D_TEX}, {RA_CAP_COMPUTE, MPGL_CAP_COMPUTE_SHADER}, + {RA_CAP_NUM_GROUPS, MPGL_CAP_COMPUTE_SHADER}, {RA_CAP_NESTED_ARRAY, MPGL_CAP_NESTED_ARRAY}, }; @@ -276,6 +277,13 @@ static struct ra_tex *gl_tex_create_blank(struct ra *ra, tex_gl->target = GL_TEXTURE_EXTERNAL_OES; } + if (params->downloadable && !(params->dimensions == 2 && + params->format->renderable)) + { + gl_tex_destroy(ra, tex); + return NULL; + } + return tex; } @@ -283,6 +291,8 @@ static struct ra_tex *gl_tex_create(struct ra *ra, const struct ra_tex_params *params) { GL *gl = ra_gl_get(ra); + assert(!params->format->dummy_format); + struct ra_tex *tex = gl_tex_create_blank(ra, params); if (!tex) return NULL; @@ -326,8 +336,11 @@ static struct ra_tex *gl_tex_create(struct ra *ra, gl_check_error(gl, ra->log, "after creating texture"); - // Even blitting needs an FBO in OpenGL for strange reasons - if (tex->params.render_dst || tex->params.blit_src || tex->params.blit_dst) { + // Even blitting needs an FBO in OpenGL for strange reasons. + // Download is handled by reading from an FBO. + if (tex->params.render_dst || tex->params.blit_src || + tex->params.blit_dst || tex->params.downloadable) + { if (!tex->params.format->renderable) { MP_ERR(ra, "Trying to create renderable texture with unsupported " "format.\n"); @@ -382,6 +395,7 @@ static const struct ra_format fbo_dummy_format = { .flags = F_CR, }, .renderable = true, + .dummy_format = true, }; // Create a ra_tex that merely wraps an existing framebuffer. gl_fbo can be 0 @@ -508,6 +522,18 @@ static bool gl_tex_upload(struct ra *ra, return true; } +static bool gl_tex_download(struct ra *ra, struct ra_tex_download_params *params) +{ + GL *gl = ra_gl_get(ra); + struct ra_tex *tex = params->tex; + struct ra_tex_gl *tex_gl = tex->priv; + if (!tex_gl->fbo) + return false; + return gl_read_fbo_contents(gl, tex_gl->fbo, 1, tex_gl->format, tex_gl->type, + tex->params.w, tex->params.h, params->dst, + params->stride); +} + static void gl_buf_destroy(struct ra *ra, struct ra_buf *buf) { if (!buf) @@ -996,6 +1022,10 @@ static void gl_renderpass_run(struct ra *ra, assert(params->target->params.render_dst); assert(params->target->params.format == pass->params.target_format); gl->BindFramebuffer(GL_FRAMEBUFFER, target_gl->fbo); + if (pass->params.invalidate_target && gl->InvalidateFramebuffer) { + GLenum fb = target_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; + gl->InvalidateFramebuffer(GL_FRAMEBUFFER, 1, &fb); + } gl->Viewport(params->viewport.x0, params->viewport.y0, mp_rect_w(params->viewport), mp_rect_h(params->viewport)); @@ -1126,6 +1156,7 @@ static struct ra_fns ra_fns_gl = { .tex_create = gl_tex_create, .tex_destroy = gl_tex_destroy, .tex_upload = gl_tex_upload, + .tex_download = gl_tex_download, .buf_create = gl_buf_create, .buf_destroy = gl_buf_destroy, .buf_update = gl_buf_update, diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c index 34f4736..a551ce4 100644 --- a/video/out/opengl/utils.c +++ b/video/out/opengl/utils.c @@ -105,25 +105,23 @@ void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); } -mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h) +bool gl_read_fbo_contents(GL *gl, int fbo, int dir, GLenum format, GLenum type, + int w, int h, uint8_t *dst, int dst_stride) { - if (gl->es) - return NULL; // ES can't read from front buffer - mp_image_t *image = mp_image_alloc(IMGFMT_RGB24, w, h); - if (!image) - return NULL; + assert(dir == 1 || dir == -1); + if (fbo == 0 && gl->es) + return false; // ES can't read from front buffer gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; gl->PixelStorei(GL_PACK_ALIGNMENT, 1); gl->ReadBuffer(obj); - //flip image while reading (and also avoid stride-related trouble) - for (int y = 0; y < h; y++) { - gl->ReadPixels(0, h - y - 1, w, 1, GL_RGB, GL_UNSIGNED_BYTE, - image->planes[0] + y * image->stride[0]); - } + // reading by line allows flipping, and avoids stride-related trouble + int y1 = dir > 0 ? 0 : h; + for (int y = 0; y < h; y++) + gl->ReadPixels(0, y, w, 1, format, type, dst + (y1 + dir * y) * dst_stride); gl->PixelStorei(GL_PACK_ALIGNMENT, 4); gl->BindFramebuffer(GL_FRAMEBUFFER, 0); - return image; + return true; } static void gl_vao_enable_attribs(struct gl_vao *vao) diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h index 53127e4..9bcadae 100644 --- a/video/out/opengl/utils.h +++ b/video/out/opengl/utils.h @@ -32,7 +32,8 @@ void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, const void *dataptr, int stride, int x, int y, int w, int h); -mp_image_t *gl_read_fbo_contents(GL *gl, int fbo, int w, int h); +bool gl_read_fbo_contents(GL *gl, int fbo, int dir, GLenum format, GLenum type, + int w, int h, uint8_t *dst, int dst_stride); struct gl_vao { GL *gl; diff --git a/video/out/vo.c b/video/out/vo.c index 63f5b34..9ecfd76 100644 --- a/video/out/vo.c +++ b/video/out/vo.c @@ -23,8 +23,6 @@ #include <pthread.h> #include <math.h> -#include <libavutil/buffer.h> - #include "mpv_talloc.h" #include "config.h" @@ -37,6 +35,7 @@ #include "misc/bstr.h" #include "vo.h" #include "aspect.h" +#include "dr_helper.h" #include "input/input.h" #include "options/m_config.h" #include "common/msg.h" @@ -52,7 +51,7 @@ extern const struct vo_driver video_out_x11; extern const struct vo_driver video_out_vdpau; extern const struct vo_driver video_out_xv; extern const struct vo_driver video_out_gpu; -extern const struct vo_driver video_out_opengl_cb; +extern const struct vo_driver video_out_libmpv; extern const struct vo_driver video_out_null; extern const struct vo_driver video_out_image; extern const struct vo_driver video_out_lavc; @@ -66,6 +65,7 @@ extern const struct vo_driver video_out_tct; const struct vo_driver *const video_out_drivers[] = { + &video_out_libmpv, #if HAVE_ANDROID &video_out_mediacodec_embed, #endif @@ -101,20 +101,14 @@ const struct vo_driver *const video_out_drivers[] = #if HAVE_DRM &video_out_drm, #endif -#if HAVE_ENCODING &video_out_lavc, -#endif -#if HAVE_GL - &video_out_opengl_cb, -#endif NULL }; struct vo_internal { pthread_t thread; struct mp_dispatch_queue *dispatch; - - atomic_ullong dr_in_flight; + struct dr_helper *dr_helper; // --- The following fields are protected by lock pthread_mutex_t lock; @@ -149,6 +143,7 @@ struct vo_internal { int64_t num_successive_vsyncs; int64_t flip_queue_offset; // queue flip events at most this much in advance + int64_t timing_offset; // same (but from options; not VO configured) int64_t delayed_count; int64_t drop_count; @@ -164,7 +159,7 @@ struct vo_internal { uint64_t current_frame_id; double display_fps; - int opt_framedrop; + double reported_display_fps; }; extern const struct m_sub_options gl_video_conf; @@ -185,7 +180,7 @@ static bool get_desc(struct m_obj_desc *dst, int index) .options = vo->options, .options_prefix = vo->options_prefix, .global_opts = vo->global_opts, - .hidden = vo->encode || !strcmp(vo->name, "opengl-cb"), + .hidden = vo->encode, .p = vo, }; return true; @@ -199,6 +194,7 @@ const struct m_obj_list vo_obj_list = { {"gl", "gpu"}, {"direct3d_shaders", "direct3d"}, {"opengl", "gpu"}, + {"opengl-cb", "libmpv"}, {0} }, .allow_unknown_entries = true, @@ -213,18 +209,29 @@ static void dispatch_wakeup_cb(void *ptr) vo_wakeup(vo); } +// Initialize or update options from vo->opts +static void read_opts(struct vo *vo) +{ + struct vo_internal *in = vo->in; + + pthread_mutex_lock(&in->lock); + in->timing_offset = (uint64_t)(vo->opts->timing_offset * 1e6); + pthread_mutex_unlock(&in->lock); +} + static void update_opts(void *p) { struct vo *vo = p; if (m_config_cache_update(vo->opts_cache)) { + read_opts(vo); + // "Legacy" update of video position related options. if (vo->driver->control) vo->driver->control(vo, VOCTRL_SET_PANSCAN, NULL); } - if (vo->gl_opts_cache && m_config_cache_update(vo->gl_opts_cache)) - { + if (vo->gl_opts_cache && m_config_cache_update(vo->gl_opts_cache)) { // "Legacy" update of video GL renderer related options. if (vo->driver->control) vo->driver->control(vo, VOCTRL_UPDATE_RENDER_OPTS, NULL); @@ -519,30 +526,30 @@ static void update_display_fps(struct vo *vo) pthread_mutex_unlock(&in->lock); - mp_read_option_raw(vo->global, "framedrop", &m_option_type_choice, - &in->opt_framedrop); - - double display_fps; - mp_read_option_raw(vo->global, "display-fps", &m_option_type_double, - &display_fps); - - if (display_fps <= 0) - vo->driver->control(vo, VOCTRL_GET_DISPLAY_FPS, &display_fps); + double fps = 0; + vo->driver->control(vo, VOCTRL_GET_DISPLAY_FPS, &fps); pthread_mutex_lock(&in->lock); - if (in->display_fps != display_fps) { - in->display_fps = display_fps; - MP_VERBOSE(vo, "Assuming %f FPS for display sync.\n", display_fps); + in->reported_display_fps = fps; + } - // make sure to update the player - in->queued_events |= VO_EVENT_WIN_STATE; - wakeup_core(vo); - } + double display_fps = vo->opts->override_display_fps; + if (display_fps <= 0) + display_fps = in->reported_display_fps; - in->nominal_vsync_interval = in->display_fps > 0 ? 1e6 / in->display_fps : 0; + if (in->display_fps != display_fps) { + in->nominal_vsync_interval = display_fps > 0 ? 1e6 / display_fps : 0; in->vsync_interval = MPMAX(in->nominal_vsync_interval, 1); + in->display_fps = display_fps; + + MP_VERBOSE(vo, "Assuming %f FPS for display sync.\n", display_fps); + + // make sure to update the player + in->queued_events |= VO_EVENT_WIN_STATE; + wakeup_core(vo); } + pthread_mutex_unlock(&in->lock); } @@ -562,19 +569,27 @@ static void run_reconfig(void *p) { void **pp = p; struct vo *vo = pp[0]; - struct mp_image_params *params = pp[1]; + struct mp_image *img = pp[1]; int *ret = pp[2]; + struct mp_image_params *params = &img->params; + struct vo_internal *in = vo->in; + MP_VERBOSE(vo, "reconfig to %s\n", mp_image_params_to_str(params)); + m_config_cache_update(vo->opts_cache); mp_image_params_get_dsize(params, &vo->dwidth, &vo->dheight); talloc_free(vo->params); - vo->params = talloc_memdup(vo, params, sizeof(*params)); + vo->params = talloc_dup(vo, params); - *ret = vo->driver->reconfig(vo, vo->params); + if (vo->driver->reconfig2) { + *ret = vo->driver->reconfig2(vo, img); + } else { + *ret = vo->driver->reconfig(vo, vo->params); + } vo->config_ok = *ret >= 0; if (vo->config_ok) { check_vo_caps(vo); @@ -596,7 +611,17 @@ static void run_reconfig(void *p) int vo_reconfig(struct vo *vo, struct mp_image_params *params) { int ret; - void *p[] = {vo, params, &ret}; + struct mp_image dummy = {0}; + mp_image_set_params(&dummy, params); + void *p[] = {vo, &dummy, &ret}; + mp_dispatch_run(vo->in->dispatch, run_reconfig, p); + return ret; +} + +int vo_reconfig2(struct vo *vo, struct mp_image *img) +{ + int ret; + void *p[] = {vo, img, &ret}; mp_dispatch_run(vo->in->dispatch, run_reconfig, p); return ret; } @@ -630,7 +655,7 @@ void vo_control_async(struct vo *vo, int request, void *data) switch (request) { case VOCTRL_UPDATE_PLAYBACK_STATE: - d[2] = ta_xdup_ptrtype(d, (struct voctrl_playback_state *)data); + d[2] = talloc_dup(d, (struct voctrl_playback_state *)data); break; case VOCTRL_KILL_SCREENSAVER: case VOCTRL_RESTORE_SCREENSAVER: @@ -723,13 +748,16 @@ bool vo_is_ready_for_frame(struct vo *vo, int64_t next_pts) { struct vo_internal *in = vo->in; pthread_mutex_lock(&in->lock); - bool r = vo->config_ok && !in->frame_queued && + bool blocked = vo->driver->initially_blocked && + !(in->internal_events & VO_EVENT_INITIAL_UNBLOCK); + bool r = vo->config_ok && !in->frame_queued && !blocked && (!in->current_frame || in->current_frame->num_vsyncs < 1); if (r && next_pts >= 0) { // Don't show the frame too early - it would basically freeze the // display by disallowing OSD redrawing or VO interaction. - // Actually render the frame at earliest 50ms before target time. - next_pts -= (uint64_t)(0.050 * 1e6); + // Actually render the frame at earliest the given offset before target + // time. + next_pts -= in->timing_offset; next_pts -= in->flip_queue_offset; int64_t now = mp_time_us(); if (next_pts > now) @@ -833,7 +861,7 @@ bool vo_render_frame_external(struct vo *vo) in->dropped_frame &= !frame->display_synced; in->dropped_frame &= !(vo->driver->caps & VO_CAP_FRAMEDROP); - in->dropped_frame &= (in->opt_framedrop & 1); + in->dropped_frame &= frame->can_drop; // Even if we're hopelessly behind, rather degrade to 10 FPS playback, // instead of just freezing the display forever. in->dropped_frame &= now - in->prev_vsync < 100 * 1000; @@ -889,7 +917,7 @@ bool vo_render_frame_external(struct vo *vo) update_vsync_timing_after_swap(vo); } - if (vo->driver->caps & VO_CAP_NOREDRAW) { + if (vo->driver->caps & VO_CAP_NORETAIN) { talloc_free(in->current_frame); in->current_frame = NULL; } @@ -917,7 +945,7 @@ static void do_redraw(struct vo *vo) { struct vo_internal *in = vo->in; - if (!vo->config_ok || (vo->driver->caps & VO_CAP_NOREDRAW)) + if (!vo->config_ok || (vo->driver->caps & VO_CAP_NORETAIN)) return; pthread_mutex_lock(&in->lock); @@ -990,6 +1018,13 @@ void vo_disable_external_renderloop(struct vo *vo) in->external_renderloop_drive = false; } +static struct mp_image *get_image_vo(void *ctx, int imgfmt, int w, int h, + int stride_align) +{ + struct vo *vo = ctx; + return vo->driver->get_image(vo, imgfmt, w, h, stride_align); +} + static void *vo_thread(void *ptr) { struct vo *vo = ptr; @@ -998,11 +1033,15 @@ static void *vo_thread(void *ptr) mpthread_set_name("vo"); + if (vo->driver->get_image) + in->dr_helper = dr_helper_create(in->dispatch, get_image_vo, vo); + int r = vo->driver->preinit(vo) ? -1 : 0; mp_rendezvous(vo, r); // init barrier if (r < 0) - return NULL; + goto done; + read_opts(vo); update_display_fps(vo); vo_event(vo, VO_EVENT_WIN_STATE); @@ -1057,7 +1096,8 @@ static void *vo_thread(void *ptr) talloc_free(in->current_frame); in->current_frame = NULL; vo->driver->uninit(vo); - assert(atomic_load(&vo->in->dr_in_flight) == 0); +done: + TA_FREEP(&in->dr_helper); return NULL; } @@ -1188,7 +1228,7 @@ void vo_get_src_dst_rects(struct vo *vo, struct mp_rect *out_src, // flip_page[_timed] will be called offset_us microseconds too early. // (For vo_vdpau, which does its own timing.) // num_req_frames set the requested number of requested vo_frame.frames. -// (For vo_opengl interpolation.) +// (For vo_gpu interpolation.) void vo_set_queue_params(struct vo *vo, int64_t offset_us, int num_req_frames) { struct vo_internal *in = vo->in; @@ -1322,6 +1362,25 @@ struct mp_image *vo_get_current_frame(struct vo *vo) return r; } +struct vo_frame *vo_get_current_vo_frame(struct vo *vo) +{ + struct vo_internal *in = vo->in; + pthread_mutex_lock(&in->lock); + struct vo_frame *r = vo_frame_ref(vo->in->current_frame); + pthread_mutex_unlock(&in->lock); + return r; +} + +struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align) +{ + if (vo->driver->get_image_ts) + return vo->driver->get_image_ts(vo, imgfmt, w, h, stride_align); + if (vo->in->dr_helper) + return dr_helper_get_image(vo->in->dr_helper, imgfmt, w, h, stride_align); + return NULL; +} + static void destroy_frame(void *p) { struct vo_frame *frame = p; @@ -1361,88 +1420,3 @@ int lookup_keymap_table(const struct mp_keymap *map, int key) map++; return map->to; } - -struct free_dr_context { - struct vo *vo; - AVBufferRef *ref; -}; - -static void vo_thread_free(void *ptr) -{ - struct free_dr_context *ctx = ptr; - - unsigned long long v = atomic_fetch_add(&ctx->vo->in->dr_in_flight, -1); - assert(v); // value before sub is 0 - unexpected underflow. - - av_buffer_unref(&ctx->ref); - talloc_free(ctx); -} - -static void free_dr_buffer_on_vo_thread(void *opaque, uint8_t *data) -{ - struct free_dr_context *ctx = opaque; - - // The image could be unreffed even on the VO thread. In practice, this - // matters most on VO destruction. - if (pthread_equal(ctx->vo->in->thread, pthread_self())) { - vo_thread_free(ctx); - } else { - mp_dispatch_run(ctx->vo->in->dispatch, vo_thread_free, ctx); - } -} - -struct get_image_cmd { - struct vo *vo; - int imgfmt, w, h, stride_align; - struct mp_image *res; -}; - -static void sync_get_image(void *ptr) -{ - struct get_image_cmd *cmd = ptr; - struct vo *vo = cmd->vo; - - cmd->res = vo->driver->get_image(vo, cmd->imgfmt, cmd->w, cmd->h, - cmd->stride_align); - if (!cmd->res) - return; - - // We require exactly 1 AVBufferRef. - assert(cmd->res->bufs[0]); - assert(!cmd->res->bufs[1]); - - // Apply some magic to get it free'd on the VO thread as well. For this to - // work, we create a dummy-ref that aliases the original ref, which is why - // the original ref must be writable in the first place. (A newly allocated - // image should be always writable of course.) - assert(mp_image_is_writeable(cmd->res)); - - struct free_dr_context *ctx = talloc_zero(NULL, struct free_dr_context); - *ctx = (struct free_dr_context){ - .vo = vo, - .ref = cmd->res->bufs[0], - }; - - AVBufferRef *new_ref = av_buffer_create(ctx->ref->data, ctx->ref->size, - free_dr_buffer_on_vo_thread, ctx, 0); - if (!new_ref) - abort(); // tiny malloc OOM - - cmd->res->bufs[0] = new_ref; - - atomic_fetch_add(&vo->in->dr_in_flight, 1); -} - -struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h, - int stride_align) -{ - if (!vo->driver->get_image) - return NULL; - - struct get_image_cmd cmd = { - .vo = vo, - .imgfmt = imgfmt, .w = w, .h = h, .stride_align = stride_align, - }; - mp_dispatch_run(vo->in->dispatch, sync_get_image, &cmd); - return cmd.res; -} diff --git a/video/out/vo.h b/video/out/vo.h index 995d6b9..3c00bb9 100644 --- a/video/out/vo.h +++ b/video/out/vo.h @@ -45,10 +45,13 @@ enum { VO_EVENT_LIVE_RESIZING = 1 << 5, // Window fullscreen state changed via external influence. VO_EVENT_FULLSCREEN_STATE = 1 << 6, + // Special thing for encode mode (vo_driver.initially_blocked). + // Part of VO_EVENTS_USER to make vo_is_ready_for_frame() work properly. + VO_EVENT_INITIAL_UNBLOCK = 1 << 7, // Set of events the player core may be interested in. VO_EVENTS_USER = VO_EVENT_RESIZE | VO_EVENT_WIN_STATE | - VO_EVENT_FULLSCREEN_STATE, + VO_EVENT_FULLSCREEN_STATE | VO_EVENT_INITIAL_UNBLOCK, }; enum mp_voctrl { @@ -65,7 +68,7 @@ enum mp_voctrl { VOCTRL_SET_EQUALIZER, // struct voctrl_set_equalizer_args* VOCTRL_GET_EQUALIZER, // struct voctrl_get_equalizer_args* - /* private to vo_opengl */ + /* private to vo_gpu */ VOCTRL_LOAD_HWDEC_API, // Redraw the image previously passed to draw_image() (basically, repeat @@ -73,6 +76,11 @@ enum mp_voctrl { // be updated and redrawn. Optional; emulated if not available. VOCTRL_REDRAW_FRAME, + // Only used internally in vo_opengl_cb + VOCTRL_PREINIT, + VOCTRL_UNINIT, + VOCTRL_RECONFIG, + VOCTRL_FULLSCREEN, VOCTRL_ONTOP, VOCTRL_BORDER, @@ -102,8 +110,13 @@ enum mp_voctrl { VOCTRL_GET_DISPLAY_NAMES, // Retrieve window contents. (Normal screenshots use vo_get_current_frame().) + // Deprecated for VOCTRL_SCREENSHOT with corresponding flags. VOCTRL_SCREENSHOT_WIN, // struct mp_image** + // A normal screenshot - VOs can react to this if vo_get_current_frame() is + // not sufficient. + VOCTRL_SCREENSHOT, // struct voctrl_screenshot* + VOCTRL_UPDATE_RENDER_OPTS, VOCTRL_GET_ICC_PROFILE, // bstr* @@ -111,6 +124,9 @@ enum mp_voctrl { VOCTRL_GET_DISPLAY_FPS, // double* VOCTRL_GET_PREF_DEINT, // int* + + /* private to vo_gpu */ + VOCTRL_EXTERNAL_RESIZE, }; // VOCTRL_SET_EQUALIZER @@ -167,13 +183,18 @@ struct voctrl_performance_data { struct mp_frame_perf fresh, redraw; }; +struct voctrl_screenshot { + bool scaled, subs, osd, high_bit_depth; + struct mp_image *res; +}; + enum { // VO does handle mp_image_params.rotate in 90 degree steps VO_CAP_ROTATE90 = 1 << 0, // VO does framedrop itself (vo_vdpau). Untimed/encoding VOs never drop. VO_CAP_FRAMEDROP = 1 << 1, - // VO does not support redraws (vo_mediacodec_embed). - VO_CAP_NOREDRAW = 1 << 2, + // VO does not allow frames to be retained (vo_mediacodec_embed). + VO_CAP_NORETAIN = 1 << 2, }; #define VO_MAX_REQ_FRAMES 10 @@ -187,7 +208,6 @@ struct vo_extra { struct input_ctx *input_ctx; struct osd_state *osd; struct encode_lavc_context *encode_lavc_ctx; - struct mpv_opengl_cb_context *opengl_cb_context; void (*wakeup_cb)(void *ctx); void *wakeup_ctx; }; @@ -219,6 +239,8 @@ struct vo_frame { bool still; // Frames are output as fast as possible, with implied vsync blocking. bool display_synced; + // Dropping the frame is allowed if the VO is behind. + bool can_drop; // The current frame to be drawn. // Warning: When OSD should be redrawn in --force-window --idle mode, this // can be NULL. The VO should draw a black background, OSD on top. @@ -245,6 +267,12 @@ struct vo_driver { // Encoding functionality, which can be invoked via --o only. bool encode; + // This requires waiting for a VO_EVENT_INITIAL_UNBLOCK event before the + // first frame can be sent. Doing vo_reconfig*() calls is allowed though. + // Encode mode uses this, the core uses vo_is_ready_for_frame() to + // implicitly check for this. + bool initially_blocked; + // VO_CAP_* bits int caps; @@ -274,6 +302,12 @@ struct vo_driver { int (*reconfig)(struct vo *vo, struct mp_image_params *params); /* + * Like reconfig(), but provides the whole mp_image for which the change is + * required. (The image doesn't have to have real data.) + */ + int (*reconfig2)(struct vo *vo, struct mp_image *img); + + /* * Control interface */ int (*control)(struct vo *vo, uint32_t request, void *data); @@ -309,6 +343,14 @@ struct vo_driver { int stride_align); /* + * Thread-safe variant of get_image. Set at most one of these callbacks. + * This excludes _all_ synchronization magic. The only guarantee is that + * vo_driver.uninit is not called before this function returns. + */ + struct mp_image *(*get_image_ts)(struct vo *vo, int imgfmt, int w, int h, + int stride_align); + + /* * Render the given frame to the VO's backbuffer. This operation will be * followed by a draw_osd and a flip_page[_timed] call. * mpi belongs to the VO; the VO must free it eventually. @@ -321,6 +363,9 @@ struct vo_driver { /* Render the given frame. Note that this is also called when repeating * or redrawing frames. + * + * frame is freed by the caller, but the callee can still modify the + * contained data and references. */ void (*draw_frame)(struct vo *vo, struct vo_frame *frame); @@ -413,6 +458,7 @@ struct vo { struct mpv_global; struct vo *init_best_video_out(struct mpv_global *global, struct vo_extra *ex); int vo_reconfig(struct vo *vo, struct mp_image_params *p); +int vo_reconfig2(struct vo *vo, struct mp_image *img); int vo_control(struct vo *vo, int request, void *data); void vo_control_async(struct vo *vo, int request, void *data); @@ -444,6 +490,7 @@ double vo_get_estimated_vsync_jitter(struct vo *vo); double vo_get_display_fps(struct vo *vo); double vo_get_delay(struct vo *vo); void vo_discard_timing_info(struct vo *vo); +struct vo_frame *vo_get_current_vo_frame(struct vo *vo); struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h, int stride_align); diff --git a/video/out/vo_drm.c b/video/out/vo_drm.c index 24189d5..7f52901 100644 --- a/video/out/vo_drm.c +++ b/video/out/vo_drm.c @@ -41,6 +41,9 @@ #define USE_MASTER 0 #define BUF_COUNT 2 +// Modulo that works correctly for negative numbers +#define MOD(a,b) ((((a)%(b))+(b))%(b)) + struct framebuffer { uint32_t width; uint32_t height; @@ -71,6 +74,7 @@ struct priv { int32_t screen_h; struct mp_image *last_input; struct mp_image *cur_frame; + struct mp_image *cur_frame_cropped; struct mp_rect src; struct mp_rect dst; struct mp_osd_res osd; @@ -149,8 +153,8 @@ static bool fb_setup_double_buffering(struct vo *vo) p->front_buf = 0; for (unsigned int i = 0; i < 2; i++) { - p->bufs[i].width = p->kms->mode.hdisplay; - p->bufs[i].height = p->kms->mode.vdisplay; + p->bufs[i].width = p->kms->mode.mode.hdisplay; + p->bufs[i].height = p->kms->mode.mode.vdisplay; } for (unsigned int i = 0; i < BUF_COUNT; i++) { @@ -180,9 +184,9 @@ static bool crtc_setup(struct vo *vo) return true; p->old_crtc = drmModeGetCrtc(p->kms->fd, p->kms->crtc_id); int ret = drmModeSetCrtc(p->kms->fd, p->kms->crtc_id, - p->bufs[p->front_buf + BUF_COUNT - 1].fb, + p->bufs[MOD(p->front_buf - 1, BUF_COUNT)].fb, 0, 0, &p->kms->connector->connector_id, 1, - &p->kms->mode); + &p->kms->mode.mode); p->active = true; return ret == 0; } @@ -273,17 +277,7 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) int w = p->dst.x1 - p->dst.x0; int h = p->dst.y1 - p->dst.y0; - // p->osd contains the parameters assuming OSD rendering in window - // coordinates, but OSD can only be rendered in the intersection - // between window and video rectangle (i.e. not into panscan borders). - p->osd.w = w; - p->osd.h = h; - p->osd.mt = MPMIN(0, p->osd.mt); - p->osd.mb = MPMIN(0, p->osd.mb); - p->osd.mr = MPMIN(0, p->osd.mr); - p->osd.ml = MPMIN(0, p->osd.ml); - - mp_sws_set_from_cmdline(p->sws, vo->opts->sws_opts); + mp_sws_set_from_cmdline(p->sws, vo->global); p->sws->src = *params; p->sws->dst = (struct mp_image_params) { .imgfmt = IMGFMT, @@ -297,6 +291,15 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) p->cur_frame = mp_image_alloc(IMGFMT, p->screen_w, p->screen_h); mp_image_params_guess_csp(&p->sws->dst); mp_image_set_params(p->cur_frame, &p->sws->dst); + p->cur_frame[0].w = p->screen_w; + p->cur_frame[0].h = p->screen_h; + + talloc_free(p->cur_frame_cropped); + p->cur_frame_cropped = mp_image_new_dummy_ref(p->cur_frame); + mp_image_crop_rc(p->cur_frame_cropped, p->dst); + + talloc_free(p->last_input); + p->last_input = NULL; struct framebuffer *buf = p->bufs; for (unsigned int i = 0; i < BUF_COUNT; i++) @@ -320,7 +323,13 @@ static void draw_image(struct vo *vo, mp_image_t *mpi) src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, mpi->fmt.align_x); src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, mpi->fmt.align_y); mp_image_crop_rc(&src, src_rc); - mp_sws_scale(p->sws, p->cur_frame, &src); + + mp_image_clear(p->cur_frame, 0, 0, p->cur_frame->w, p->dst.y0); + mp_image_clear(p->cur_frame, 0, p->dst.y1, p->cur_frame->w, p->cur_frame->h); + mp_image_clear(p->cur_frame, 0, p->dst.y0, p->dst.x0, p->dst.y1); + mp_image_clear(p->cur_frame, p->dst.x1, p->dst.y0, p->cur_frame->w, p->dst.y1); + + mp_sws_scale(p->sws, p->cur_frame_cropped, &src); osd_draw_on_image(vo->osd, p->osd, src.pts, 0, p->cur_frame); } else { mp_image_clear(p->cur_frame, 0, 0, p->cur_frame->w, p->cur_frame->h); @@ -328,13 +337,9 @@ static void draw_image(struct vo *vo, mp_image_t *mpi) } struct framebuffer *front_buf = &p->bufs[p->front_buf]; - int w = p->dst.x1 - p->dst.x0; - int h = p->dst.y1 - p->dst.y0; - int x = (p->screen_w - w) >> 1; - int y = (p->screen_h - h) >> 1; - int shift = y * front_buf->stride + x * BYTES_PER_PIXEL; - memcpy_pic(front_buf->map + shift, p->cur_frame->planes[0], - w * BYTES_PER_PIXEL, h, front_buf->stride, + memcpy_pic(front_buf->map, p->cur_frame->planes[0], + p->cur_frame->w * BYTES_PER_PIXEL, p->cur_frame->h, + front_buf->stride, p->cur_frame->stride[0]); } @@ -354,7 +359,7 @@ static void flip_page(struct vo *vo) p->bufs[p->front_buf].fb, DRM_MODE_PAGE_FLIP_EVENT, p); if (ret) { - MP_WARN(vo, "Cannot flip page for connector\n"); + MP_WARN(vo, "Failed to queue page flip: %s\n", mp_strerror(errno)); } else { p->front_buf++; p->front_buf %= BUF_COUNT; @@ -394,6 +399,7 @@ static void uninit(struct vo *vo) talloc_free(p->last_input); talloc_free(p->cur_frame); + talloc_free(p->cur_frame_cropped); } static int preinit(struct vo *vo) @@ -414,7 +420,8 @@ static int preinit(struct vo *vo) p->kms = kms_create( vo->log, vo->opts->drm_opts->drm_connector_spec, vo->opts->drm_opts->drm_mode_id, - vo->opts->drm_opts->drm_overlay_id); + vo->opts->drm_opts->drm_osd_plane_id, + vo->opts->drm_opts->drm_video_plane_id); if (!p->kms) { MP_ERR(vo, "Failed to create KMS.\n"); goto err; @@ -440,6 +447,14 @@ static int preinit(struct vo *vo) goto err; } + if (vo->opts->force_monitor_aspect != 0.0) { + vo->monitor_par = p->screen_w / (double) p->screen_h / + vo->opts->force_monitor_aspect; + } else { + vo->monitor_par = 1 / vo->opts->monitor_pixel_aspect; + } + mp_verbose(vo->log, "Monitor pixel aspect: %g\n", vo->monitor_par); + return 0; err: diff --git a/video/out/vo_gpu.c b/video/out/vo_gpu.c index 95318d3..a80ba23 100644 --- a/video/out/vo_gpu.c +++ b/video/out/vo_gpu.c @@ -84,7 +84,7 @@ static void draw_frame(struct vo *vo, struct vo_frame *frame) if (!sw->fns->start_frame(sw, &fbo)) return; - gl_video_render_frame(p->renderer, frame, fbo); + gl_video_render_frame(p->renderer, frame, fbo, RENDER_FRAME_DEF); if (!sw->fns->submit_frame(sw, frame)) { MP_ERR(vo, "Failed presenting frame!\n"); return; @@ -168,7 +168,6 @@ static void get_and_update_ambient_lighting(struct gpu_priv *p) static int control(struct vo *vo, uint32_t request, void *data) { struct gpu_priv *p = vo->priv; - struct ra_swapchain *sw = p->ctx->swapchain; switch (request) { case VOCTRL_SET_PANSCAN: @@ -177,15 +176,11 @@ static int control(struct vo *vo, uint32_t request, void *data) case VOCTRL_SET_EQUALIZER: vo->want_redraw = true; return VO_TRUE; - case VOCTRL_SCREENSHOT_WIN: { - struct mp_image *screen = NULL; - if (sw->fns->screenshot) - screen = sw->fns->screenshot(sw); - if (!screen) - break; // redirect to backend - // set image parameters according to the display, if possible - screen->params.color = gl_video_get_output_colorspace(p->renderer); - *(struct mp_image **)data = screen; + case VOCTRL_SCREENSHOT: { + struct vo_frame *frame = vo_get_current_vo_frame(vo); + if (frame) + gl_video_screenshot(p->renderer, frame, data); + talloc_free(frame); return true; } case VOCTRL_LOAD_HWDEC_API: @@ -207,6 +202,10 @@ static int control(struct vo *vo, uint32_t request, void *data) case VOCTRL_PERFORMANCE_DATA: gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data); return true; + case VOCTRL_EXTERNAL_RESIZE: + p->ctx->fns->reconfig(p->ctx); + resize(vo); + return true; } int events = 0; diff --git a/video/out/vo_lavc.c b/video/out/vo_lavc.c index 4b69231..e817b53 100644 --- a/video/out/vo_lavc.c +++ b/video/out/vo_lavc.c @@ -36,89 +36,58 @@ #include "sub/osd.h" struct priv { - AVStream *stream; - AVCodecContext *codec; - int have_first_packet; - - int harddup; - - double lastpts; - int64_t lastipts; - int64_t lastframeipts; - int64_t lastencodedipts; - int64_t mindeltapts; - double expected_next_pts; - mp_image_t *lastimg; - int lastdisplaycount; - - AVRational worst_time_base; - int worst_time_base_is_stream; + struct encoder_context *enc; bool shutdown; }; static int preinit(struct vo *vo) { - struct priv *vc; - if (!encode_lavc_available(vo->encode_lavc_ctx)) { - MP_ERR(vo, "the option --o (output file) must be specified\n"); + struct priv *vc = vo->priv; + vc->enc = encoder_context_alloc(vo->encode_lavc_ctx, STREAM_VIDEO, vo->log); + if (!vc->enc) return -1; - } - vo->priv = talloc_zero(vo, struct priv); - vc = vo->priv; - vc->harddup = vo->encode_lavc_ctx->options->harddup; + talloc_steal(vc, vc->enc); return 0; } -static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi); static void uninit(struct vo *vo) { struct priv *vc = vo->priv; - if (!vc || vc->shutdown) - return; - - pthread_mutex_lock(&vo->encode_lavc_ctx->lock); - - if (vc->lastipts >= 0 && vc->stream) - draw_image_unlocked(vo, NULL); + struct encoder_context *enc = vc->enc; - mp_image_unrefp(&vc->lastimg); + if (!vc->shutdown) + encoder_encode(enc, NULL); // finish encoding +} - pthread_mutex_unlock(&vo->encode_lavc_ctx->lock); +static void on_ready(void *ptr) +{ + struct vo *vo = ptr; - vc->shutdown = true; + vo_event(vo, VO_EVENT_INITIAL_UNBLOCK); } -static int reconfig(struct vo *vo, struct mp_image_params *params) +static int reconfig2(struct vo *vo, struct mp_image *img) { struct priv *vc = vo->priv; + AVCodecContext *encoder = vc->enc->encoder; + + struct mp_image_params *params = &img->params; enum AVPixelFormat pix_fmt = imgfmt2pixfmt(params->imgfmt); AVRational aspect = {params->p_w, params->p_h}; - uint32_t width = params->w; - uint32_t height = params->h; + int width = params->w; + int height = params->h; - if (!vc || vc->shutdown) + if (vc->shutdown) return -1; - pthread_mutex_lock(&vo->encode_lavc_ctx->lock); - - if (vc->stream) { - /* NOTE: - * in debug builds we get a "comparison between signed and unsigned" - * warning here. We choose to ignore that; just because ffmpeg currently - * uses a plain 'int' for these struct fields, it doesn't mean it always - * will */ - if (width == vc->codec->width && - height == vc->codec->height) { - if (aspect.num != vc->codec->sample_aspect_ratio.num || - aspect.den != vc->codec->sample_aspect_ratio.den) { - /* aspect-only changes are not critical */ - MP_WARN(vo, "unsupported pixel aspect ratio change from %d:%d to %d:%d\n", - vc->codec->sample_aspect_ratio.num, - vc->codec->sample_aspect_ratio.den, - aspect.num, aspect.den); - } - goto done; + if (avcodec_is_open(encoder)) { + if (width == encoder->width && height == encoder->height && + pix_fmt == encoder->pix_fmt) + { + // consider these changes not critical + MP_ERR(vo, "Ignoring mid-stream parameter changes!\n"); + return 0; } /* FIXME Is it possible with raw video? */ @@ -132,11 +101,7 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) // - Second calls after reconfigure() already failed once fail (due to the // vc->shutdown check above). // - Second calls after reconfigure() already succeeded once return early - // (due to the vc->stream check above). - - vc->lastipts = AV_NOPTS_VALUE; - vc->lastframeipts = AV_NOPTS_VALUE; - vc->lastencodedipts = AV_NOPTS_VALUE; + // (due to the avcodec_is_open() check above). if (pix_fmt == AV_PIX_FMT_NONE) { MP_FATAL(vo, "Format %s not supported by lavc.\n", @@ -144,342 +109,122 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) goto error; } - if (encode_lavc_alloc_stream(vo->encode_lavc_ctx, - AVMEDIA_TYPE_VIDEO, - &vc->stream, &vc->codec) < 0) - goto error; - vc->stream->sample_aspect_ratio = vc->codec->sample_aspect_ratio = - aspect; - vc->codec->width = width; - vc->codec->height = height; - vc->codec->pix_fmt = pix_fmt; + encoder->sample_aspect_ratio = aspect; + encoder->width = width; + encoder->height = height; + encoder->pix_fmt = pix_fmt; + encoder->colorspace = mp_csp_to_avcol_spc(params->color.space); + encoder->color_range = mp_csp_levels_to_avcol_range(params->color.levels); + + AVRational tb; + + // we want to handle: + // 1/25 + // 1001/24000 + // 1001/30000 + // for this we would need 120000fps... + // however, mpeg-4 only allows 16bit values + // so let's take 1001/30000 out + tb.num = 24000; + tb.den = 1; - encode_lavc_set_csp(vo->encode_lavc_ctx, vc->codec, params->color.space); - encode_lavc_set_csp_levels(vo->encode_lavc_ctx, vc->codec, params->color.levels); + const AVRational *rates = encoder->codec->supported_framerates; + if (rates && rates[0].den) + tb = rates[av_find_nearest_q_idx(tb, rates)]; - if (encode_lavc_open_codec(vo->encode_lavc_ctx, vc->codec) < 0) + encoder->time_base = av_inv_q(tb); + + if (!encoder_init_codec_and_muxer(vc->enc, on_ready, vo)) goto error; -done: - pthread_mutex_unlock(&vo->encode_lavc_ctx->lock); return 0; error: - pthread_mutex_unlock(&vo->encode_lavc_ctx->lock); vc->shutdown = true; return -1; } static int query_format(struct vo *vo, int format) { - enum AVPixelFormat pix_fmt = imgfmt2pixfmt(format); - - if (!vo->encode_lavc_ctx) - return 0; - - pthread_mutex_lock(&vo->encode_lavc_ctx->lock); - int flags = 0; - if (encode_lavc_supports_pixfmt(vo->encode_lavc_ctx, pix_fmt)) - flags = 1; - pthread_mutex_unlock(&vo->encode_lavc_ctx->lock); - return flags; -} - -static void write_packet(struct vo *vo, AVPacket *packet) -{ struct priv *vc = vo->priv; - packet->stream_index = vc->stream->index; - if (packet->pts != AV_NOPTS_VALUE) { - packet->pts = av_rescale_q(packet->pts, - vc->codec->time_base, - vc->stream->time_base); - } else { - MP_VERBOSE(vo, "codec did not provide pts\n"); - packet->pts = av_rescale_q(vc->lastipts, - vc->worst_time_base, - vc->stream->time_base); - } - if (packet->dts != AV_NOPTS_VALUE) { - packet->dts = av_rescale_q(packet->dts, - vc->codec->time_base, - vc->stream->time_base); - } - if (packet->duration > 0) { - packet->duration = av_rescale_q(packet->duration, - vc->codec->time_base, - vc->stream->time_base); - } else { - // HACK: libavformat calculates dts wrong if the initial packet - // duration is not set, but ONLY if the time base is "high" and if we - // have b-frames! - if (!packet->duration) - if (!vc->have_first_packet) - if (vc->codec->has_b_frames - || vc->codec->max_b_frames) - if (vc->stream->time_base.num * 1000LL <= - vc->stream->time_base.den) - packet->duration = FFMAX(1, av_rescale_q(1, - vc->codec->time_base, vc->stream->time_base)); - } - - if (encode_lavc_write_frame(vo->encode_lavc_ctx, - vc->stream, packet) < 0) { - MP_ERR(vo, "error writing at %d %d/%d\n", - (int) packet->pts, - vc->stream->time_base.num, - vc->stream->time_base.den); - return; - } + enum AVPixelFormat pix_fmt = imgfmt2pixfmt(format); + const enum AVPixelFormat *p = vc->enc->encoder->codec->pix_fmts; - vc->have_first_packet = 1; -} + if (!p) + return 1; -static void encode_video_and_write(struct vo *vo, AVFrame *frame) -{ - struct priv *vc = vo->priv; - AVPacket packet = {0}; - - int status = avcodec_send_frame(vc->codec, frame); - if (status < 0) { - MP_ERR(vo, "error encoding at %d %d/%d\n", - frame ? (int) frame->pts : -1, - vc->codec->time_base.num, - vc->codec->time_base.den); - return; - } - for (;;) { - av_init_packet(&packet); - status = avcodec_receive_packet(vc->codec, &packet); - if (status == AVERROR(EAGAIN)) { // No more packets for now. - if (frame == NULL) { - MP_ERR(vo, "sent flush frame, got EAGAIN"); - } - break; - } - if (status == AVERROR_EOF) { // No more packets, ever. - if (frame != NULL) { - MP_ERR(vo, "sent image frame, got EOF"); - } - break; - } - if (status < 0) { - MP_ERR(vo, "error encoding at %d %d/%d\n", - frame ? (int) frame->pts : -1, - vc->codec->time_base.num, - vc->codec->time_base.den); - break; - } - encode_lavc_write_stats(vo->encode_lavc_ctx, vc->codec); - write_packet(vo, &packet); - av_packet_unref(&packet); + while (*p != AV_PIX_FMT_NONE) { + if (*p == pix_fmt) + return 1; + p++; } + + return 0; } -static void draw_image_unlocked(struct vo *vo, mp_image_t *mpi) +static void draw_frame(struct vo *vo, struct vo_frame *voframe) { struct priv *vc = vo->priv; - struct encode_lavc_context *ectx = vo->encode_lavc_ctx; - AVCodecContext *avc; - int64_t frameipts; - double nextpts; - - double pts = mpi ? mpi->pts : MP_NOPTS_VALUE; + struct encoder_context *enc = vc->enc; + struct encode_lavc_context *ectx = enc->encode_lavc_ctx; + AVCodecContext *avc = enc->encoder; - if (mpi) { - assert(vo->params); - - struct mp_osd_res dim = osd_res_from_image_params(vo->params); + if (voframe->redraw || voframe->repeat || voframe->num_frames < 1) + return; - osd_draw_on_image(vo->osd, dim, mpi->pts, OSD_DRAW_SUB_ONLY, mpi); - } + struct mp_image *mpi = voframe->frames[0]; - if (!vc || vc->shutdown) - goto done; - if (!encode_lavc_start(ectx)) { - MP_WARN(vo, "NOTE: skipped initial video frame (probably because audio is not there yet)\n"); - goto done; - } - if (pts == MP_NOPTS_VALUE) { - if (mpi) - MP_WARN(vo, "frame without pts, please report; synthesizing pts instead\n"); - pts = vc->expected_next_pts; - } + struct mp_osd_res dim = osd_res_from_image_params(vo->params); + osd_draw_on_image(vo->osd, dim, mpi->pts, OSD_DRAW_SUB_ONLY, mpi); - avc = vc->codec; - - if (vc->worst_time_base.den == 0) { - //if (avc->time_base.num / avc->time_base.den >= vc->stream->time_base.num / vc->stream->time_base.den) - if (avc->time_base.num * (double) vc->stream->time_base.den >= - vc->stream->time_base.num * (double) avc->time_base.den) { - MP_VERBOSE(vo, "NOTE: using codec time base " - "(%d/%d) for frame dropping; the stream base (%d/%d) is " - "not worse.\n", (int)avc->time_base.num, - (int)avc->time_base.den, (int)vc->stream->time_base.num, - (int)vc->stream->time_base.den); - vc->worst_time_base = avc->time_base; - vc->worst_time_base_is_stream = 0; - } else { - MP_WARN(vo, "NOTE: not using codec time base (%d/%d) for frame " - "dropping; the stream base (%d/%d) is worse.\n", - (int)avc->time_base.num, (int)avc->time_base.den, - (int)vc->stream->time_base.num, (int)vc->stream->time_base.den); - vc->worst_time_base = vc->stream->time_base; - vc->worst_time_base_is_stream = 1; - } - if (ectx->options->maxfps) - vc->mindeltapts = ceil(vc->worst_time_base.den / - (vc->worst_time_base.num * ectx->options->maxfps)); - else - vc->mindeltapts = 0; - - // NOTE: we use the following "axiom" of av_rescale_q: - // if time base A is worse than time base B, then - // av_rescale_q(av_rescale_q(x, A, B), B, A) == x - // this can be proven as long as av_rescale_q rounds to nearest, which - // it currently does - - // av_rescale_q(x, A, B) * B = "round x*A to nearest multiple of B" - // and: - // av_rescale_q(av_rescale_q(x, A, B), B, A) * A - // == "round av_rescale_q(x, A, B)*B to nearest multiple of A" - // == "round 'round x*A to nearest multiple of B' to nearest multiple of A" - // - // assume this fails. Then there is a value of x*A, for which the - // nearest multiple of B is outside the range [(x-0.5)*A, (x+0.5)*A[. - // Absurd, as this range MUST contain at least one multiple of B. - } + if (vc->shutdown) + return; - double timeunit = (double)vc->worst_time_base.num / vc->worst_time_base.den; + // Lock for shared timestamp fields. + pthread_mutex_lock(&ectx->lock); - double outpts; - if (ectx->options->rawts) - outpts = pts; - else if (ectx->options->copyts) { + double pts = mpi->pts; + double outpts = pts; + if (!enc->options->rawts) { // fix the discontinuity pts offset - nextpts = pts; if (ectx->discontinuity_pts_offset == MP_NOPTS_VALUE) { - ectx->discontinuity_pts_offset = ectx->next_in_pts - nextpts; - } - else if (fabs(nextpts + ectx->discontinuity_pts_offset - ectx->next_in_pts) > 30) { + ectx->discontinuity_pts_offset = ectx->next_in_pts - pts; + } else if (fabs(pts + ectx->discontinuity_pts_offset - + ectx->next_in_pts) > 30) + { MP_WARN(vo, "detected an unexpected discontinuity (pts jumped by " "%f seconds)\n", - nextpts + ectx->discontinuity_pts_offset - ectx->next_in_pts); - ectx->discontinuity_pts_offset = ectx->next_in_pts - nextpts; + pts + ectx->discontinuity_pts_offset - ectx->next_in_pts); + ectx->discontinuity_pts_offset = ectx->next_in_pts - pts; } outpts = pts + ectx->discontinuity_pts_offset; } - else { - // adjust pts by knowledge of audio pts vs audio playback time - double duration = 0; - if (ectx->last_video_in_pts != MP_NOPTS_VALUE) - duration = pts - ectx->last_video_in_pts; - if (duration < 0) - duration = timeunit; // XXX warn about discontinuity? - outpts = vc->lastpts + duration; - if (ectx->audio_pts_offset != MP_NOPTS_VALUE) { - double adj = outpts - pts - ectx->audio_pts_offset; - adj = FFMIN(adj, duration * 0.1); - adj = FFMAX(adj, -duration * 0.1); - outpts -= adj; - } - } - vc->lastpts = outpts; - ectx->last_video_in_pts = pts; - frameipts = floor((outpts + encode_lavc_getoffset(ectx, vc->codec)) - / timeunit + 0.5); - // calculate expected pts of next video frame - vc->expected_next_pts = pts + timeunit; + outpts += encoder_get_offset(enc); - if (!ectx->options->rawts && ectx->options->copyts) { + if (!enc->options->rawts) { + // calculate expected pts of next video frame + double timeunit = av_q2d(avc->time_base); + double expected_next_pts = pts + timeunit; // set next allowed output pts value - nextpts = vc->expected_next_pts + ectx->discontinuity_pts_offset; + double nextpts = expected_next_pts + ectx->discontinuity_pts_offset; if (nextpts > ectx->next_in_pts) ectx->next_in_pts = nextpts; } - // never-drop mode - if (ectx->options->neverdrop) { - int64_t step = vc->mindeltapts ? vc->mindeltapts : 1; - if (frameipts < vc->lastipts + step) { - MP_INFO(vo, "--oneverdrop increased pts by %d\n", - (int) (vc->lastipts - frameipts + step)); - frameipts = vc->lastipts + step; - vc->lastpts = frameipts * timeunit - encode_lavc_getoffset(ectx, vc->codec); - } - } + pthread_mutex_unlock(&ectx->lock); - if (vc->lastipts != AV_NOPTS_VALUE) { - - // we have a valid image in lastimg - while (vc->lastimg && vc->lastipts < frameipts) { - int64_t thisduration = vc->harddup ? 1 : (frameipts - vc->lastipts); - - // we will ONLY encode this frame if it can be encoded at at least - // vc->mindeltapts after the last encoded frame! - int64_t skipframes = - (vc->lastencodedipts == AV_NOPTS_VALUE) - ? 0 - : vc->lastencodedipts + vc->mindeltapts - vc->lastipts; - if (skipframes < 0) - skipframes = 0; - - if (thisduration > skipframes) { - AVFrame *frame = mp_image_to_av_frame(vc->lastimg); - if (!frame) - abort(); - - // this is a nop, unless the worst time base is the STREAM time base - frame->pts = av_rescale_q(vc->lastipts + skipframes, - vc->worst_time_base, avc->time_base); - frame->pict_type = 0; // keep this at unknown/undefined - frame->quality = avc->global_quality; - encode_video_and_write(vo, frame); - av_frame_free(&frame); - - ++vc->lastdisplaycount; - vc->lastencodedipts = vc->lastipts + skipframes; - } - - vc->lastipts += thisduration; - } - } - - if (!mpi) { - // finish encoding - encode_video_and_write(vo, NULL); - } else { - if (frameipts >= vc->lastframeipts) { - if (vc->lastframeipts != AV_NOPTS_VALUE && vc->lastdisplaycount != 1) - MP_INFO(vo, "Frame at pts %d got displayed %d times\n", - (int) vc->lastframeipts, vc->lastdisplaycount); - talloc_free(vc->lastimg); - vc->lastimg = mpi; - mpi = NULL; - - vc->lastframeipts = vc->lastipts = frameipts; - if (ectx->options->rawts && vc->lastipts < 0) { - MP_ERR(vo, "why does this happen? DEBUG THIS! vc->lastipts = %lld\n", (long long) vc->lastipts); - vc->lastipts = -1; - } - vc->lastdisplaycount = 0; - } else { - MP_INFO(vo, "Frame at pts %d got dropped " - "entirely because pts went backwards\n", (int) frameipts); - } - } + AVFrame *frame = mp_image_to_av_frame(mpi); + if (!frame) + abort(); -done: - talloc_free(mpi); -} - -static void draw_image(struct vo *vo, mp_image_t *mpi) -{ - pthread_mutex_lock(&vo->encode_lavc_ctx->lock); - draw_image_unlocked(vo, mpi); - pthread_mutex_unlock(&vo->encode_lavc_ctx->lock); + frame->pts = rint(outpts * av_q2d(av_inv_q(avc->time_base))); + frame->pict_type = 0; // keep this at unknown/undefined + frame->quality = avc->global_quality; + encoder_encode(enc, frame); + av_frame_free(&frame); } static void flip_page(struct vo *vo) @@ -495,13 +240,15 @@ const struct vo_driver video_out_lavc = { .encode = true, .description = "video encoding using libavcodec", .name = "lavc", + .initially_blocked = true, .untimed = true, + .priv_size = sizeof(struct priv), .preinit = preinit, .query_format = query_format, - .reconfig = reconfig, + .reconfig2 = reconfig2, .control = control, .uninit = uninit, - .draw_image = draw_image, + .draw_frame = draw_frame, .flip_page = flip_page, }; diff --git a/video/out/vo_libmpv.c b/video/out/vo_libmpv.c new file mode 100644 index 0000000..1df63a5 --- /dev/null +++ b/video/out/vo_libmpv.c @@ -0,0 +1,730 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <stdbool.h> +#include <limits.h> +#include <pthread.h> +#include <assert.h> + +#include "config.h" + +#include "mpv_talloc.h" +#include "common/common.h" +#include "misc/bstr.h" +#include "misc/dispatch.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "options/options.h" +#include "aspect.h" +#include "dr_helper.h" +#include "vo.h" +#include "video/mp_image.h" +#include "sub/osd.h" +#include "osdep/atomic.h" +#include "osdep/timer.h" + +#include "common/global.h" +#include "player/client.h" + +#include "libmpv.h" + +/* + * mpv_render_context is managed by the host application - the host application + * can access it any time, even if the VO is destroyed (or not created yet). + * + * - the libmpv user can mix render API and normal API; thus render API + * functions can wait on the core, but not the reverse + * - the core does blocking calls into the VO thread, thus the VO functions + * can't wait on the user calling the API functions + * - to make video timing work like it should, the VO thread waits on the + * render API user anyway, and the (unlikely) deadlock is avoided with + * a timeout + * + * Locking: mpv core > VO > mpv_render_context.lock > mp_client_api.lock + * > mpv_render_context.update_lock + * And: render thread > VO (wait for present) + * VO > render thread (wait for present done, via timeout) + */ + +struct vo_priv { + struct mpv_render_context *ctx; // immutable after init +}; + +struct mpv_render_context { + struct mp_log *log; + struct mpv_global *global; + struct mp_client_api *client_api; + + atomic_bool in_use; + + // --- Immutable after init + bool advanced_control; + struct mp_dispatch_queue *dispatch; // NULL if advanced_control disabled + struct dr_helper *dr; // NULL if advanced_control disabled + + pthread_mutex_t control_lock; + // --- Protected by control_lock + mp_render_cb_control_fn control_cb; + void *control_cb_ctx; + + pthread_mutex_t update_lock; + pthread_cond_t update_cond; // paired with update_lock + + // --- Protected by update_lock + mpv_render_update_fn update_cb; + void *update_cb_ctx; + bool had_kill_update; // update during termination + + pthread_mutex_t lock; + pthread_cond_t video_wait; // paired with lock + + // --- Protected by lock + struct vo_frame *next_frame; // next frame to draw + int64_t present_count; // incremented when next frame can be shown + int64_t expected_flip_count; // next vsync event for next_frame + bool redrawing; // next_frame was a redraw request + int64_t flip_count; + struct vo_frame *cur_frame; + struct mp_image_params img_params; + int vp_w, vp_h; + bool flip; + bool imgfmt_supported[IMGFMT_END - IMGFMT_START]; + bool need_reconfig; + bool need_resize; + bool need_reset; + bool need_update_external; + struct vo *vo; + + // --- Mostly immutable after init. + struct mp_hwdec_devices *hwdec_devs; + + // --- All of these can only be accessed from mpv_render_*() API, for + // which the user makes sure they're called synchronized. + struct render_backend *renderer; + struct m_config_cache *vo_opts_cache; + struct mp_vo_opts *vo_opts; +}; + +const struct render_backend_fns *render_backends[] = { + &render_backend_gpu, + NULL +}; + +static void update(struct mpv_render_context *ctx) +{ + pthread_mutex_lock(&ctx->update_lock); + if (ctx->update_cb) + ctx->update_cb(ctx->update_cb_ctx); + + // For the termination code. + ctx->had_kill_update = true; + pthread_cond_broadcast(&ctx->update_cond); + pthread_mutex_unlock(&ctx->update_lock); +} + +void *get_mpv_render_param(mpv_render_param *params, mpv_render_param_type type, + void *def) +{ + for (int n = 0; params && params[n].type; n++) { + if (params[n].type == type) + return params[n].data; + } + return def; +} + +static void forget_frames(struct mpv_render_context *ctx, bool all) +{ + pthread_cond_broadcast(&ctx->video_wait); + if (all) { + talloc_free(ctx->cur_frame); + ctx->cur_frame = NULL; + } +} + +static void dispatch_wakeup(void *ptr) +{ + struct mpv_render_context *ctx = ptr; + + update(ctx); +} + +static struct mp_image *render_get_image(void *ptr, int imgfmt, int w, int h, + int stride_align) +{ + struct mpv_render_context *ctx = ptr; + + return ctx->renderer->fns->get_image(ctx->renderer, imgfmt, w, h, stride_align); +} + +int mpv_render_context_create(mpv_render_context **res, mpv_handle *mpv, + mpv_render_param *params) +{ + mpv_render_context *ctx = talloc_zero(NULL, mpv_render_context); + pthread_mutex_init(&ctx->control_lock, NULL); + pthread_mutex_init(&ctx->lock, NULL); + pthread_mutex_init(&ctx->update_lock, NULL); + pthread_cond_init(&ctx->update_cond, NULL); + pthread_cond_init(&ctx->video_wait, NULL); + + ctx->global = mp_client_get_global(mpv); + ctx->client_api = ctx->global->client_api; + ctx->log = mp_log_new(ctx, ctx->global->log, "libmpv_render"); + + ctx->vo_opts_cache = m_config_cache_alloc(ctx, ctx->global, &vo_sub_opts); + ctx->vo_opts = ctx->vo_opts_cache->opts; + + if (GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_ADVANCED_CONTROL, int, 0)) { + ctx->advanced_control = true; + ctx->dispatch = mp_dispatch_create(ctx); + mp_dispatch_set_wakeup_fn(ctx->dispatch, dispatch_wakeup, ctx); + } + + int err = MPV_ERROR_NOT_IMPLEMENTED; + for (int n = 0; render_backends[n]; n++) { + ctx->renderer = talloc_zero(NULL, struct render_backend); + *ctx->renderer = (struct render_backend){ + .global = ctx->global, + .log = ctx->log, + .fns = render_backends[n], + }; + err = ctx->renderer->fns->init(ctx->renderer, params); + if (err >= 0) + break; + ctx->renderer->fns->destroy(ctx->renderer); + talloc_free(ctx->renderer->priv); + TA_FREEP(&ctx->renderer); + if (err != MPV_ERROR_NOT_IMPLEMENTED) + break; + } + + if (err < 0) { + mpv_render_context_free(ctx); + return err; + } + + ctx->hwdec_devs = ctx->renderer->hwdec_devs; + + for (int n = IMGFMT_START; n < IMGFMT_END; n++) { + ctx->imgfmt_supported[n - IMGFMT_START] = + ctx->renderer->fns->check_format(ctx->renderer, n); + } + + if (ctx->renderer->fns->get_image && ctx->dispatch) + ctx->dr = dr_helper_create(ctx->dispatch, render_get_image, ctx); + + if (!mp_set_main_render_context(ctx->client_api, ctx, true)) { + MP_ERR(ctx, "There is already a mpv_render_context set.\n"); + mpv_render_context_free(ctx); + return MPV_ERROR_GENERIC; + } + + *res = ctx; + return 0; +} + +void mpv_render_context_set_update_callback(mpv_render_context *ctx, + mpv_render_update_fn callback, + void *callback_ctx) +{ + pthread_mutex_lock(&ctx->update_lock); + ctx->update_cb = callback; + ctx->update_cb_ctx = callback_ctx; + if (ctx->update_cb) + ctx->update_cb(ctx->update_cb_ctx); + pthread_mutex_unlock(&ctx->update_lock); +} + +void mp_render_context_set_control_callback(mpv_render_context *ctx, + mp_render_cb_control_fn callback, + void *callback_ctx) +{ + pthread_mutex_lock(&ctx->control_lock); + ctx->control_cb = callback; + ctx->control_cb_ctx = callback_ctx; + pthread_mutex_unlock(&ctx->control_lock); +} + +static void kill_cb(void *ptr) +{ + struct mpv_render_context *ctx = ptr; + + pthread_mutex_lock(&ctx->update_lock); + ctx->had_kill_update = true; + pthread_cond_broadcast(&ctx->update_cond); + pthread_mutex_unlock(&ctx->update_lock); +} + +void mpv_render_context_free(mpv_render_context *ctx) +{ + if (!ctx) + return; + + // From here on, ctx becomes invisible and cannot be newly acquired. Only + // a VO could still hold a reference. + mp_set_main_render_context(ctx->client_api, ctx, false); + + // If it's still in use, a VO using it must be active. Destroy the VO, and + // also bring down the decoder etc., which still might be using the hwdec + // context. The above removal guarantees it can't come back (so ctx->vo + // can't change to non-NULL). + if (atomic_load(&ctx->in_use)) { + kill_video_async(ctx->client_api, kill_cb, ctx); + + while (atomic_load(&ctx->in_use)) { + // As long as the video decoders are not destroyed, they can still + // try to allocate new DR images and so on. This is a grotesque + // corner case, but possible. Also, more likely, DR images need to + // be released while the video chain is destroyed. + if (ctx->dispatch) + mp_dispatch_queue_process(ctx->dispatch, 0); + + // Wait for kill_cb() or update() calls. + pthread_mutex_lock(&ctx->update_lock); + if (!ctx->had_kill_update) + pthread_cond_wait(&ctx->update_cond, &ctx->update_lock); + ctx->had_kill_update = false; + pthread_mutex_unlock(&ctx->update_lock); + } + } + + assert(!atomic_load(&ctx->in_use)); + assert(!ctx->vo); + + // Possibly remaining outstanding work. + if (ctx->dispatch) + mp_dispatch_queue_process(ctx->dispatch, 0); + + forget_frames(ctx, true); + + ctx->renderer->fns->destroy(ctx->renderer); + talloc_free(ctx->renderer->priv); + talloc_free(ctx->renderer); + talloc_free(ctx->dr); + talloc_free(ctx->dispatch); + + pthread_cond_destroy(&ctx->update_cond); + pthread_cond_destroy(&ctx->video_wait); + pthread_mutex_destroy(&ctx->update_lock); + pthread_mutex_destroy(&ctx->lock); + pthread_mutex_destroy(&ctx->control_lock); + + talloc_free(ctx); +} + +// Try to mark the context as "in exclusive use" (e.g. by a VO). +// Note: the function must not acquire any locks, because it's called with an +// external leaf lock held. +bool mp_render_context_acquire(mpv_render_context *ctx) +{ + bool prev = false; + return atomic_compare_exchange_strong(&ctx->in_use, &prev, true); +} + +int mpv_render_context_render(mpv_render_context *ctx, mpv_render_param *params) +{ + pthread_mutex_lock(&ctx->lock); + + int do_render = + !GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_SKIP_RENDERING, int, 0); + + if (do_render) { + int vp_w, vp_h; + int err = ctx->renderer->fns->get_target_size(ctx->renderer, params, + &vp_w, &vp_h); + if (err < 0) { + pthread_mutex_unlock(&ctx->lock); + return err; + } + + if (ctx->vo && (ctx->vp_w != vp_w || ctx->vp_h != vp_h || + ctx->need_resize)) + { + ctx->vp_w = vp_w; + ctx->vp_h = vp_h; + + m_config_cache_update(ctx->vo_opts_cache); + + struct mp_rect src, dst; + struct mp_osd_res osd; + mp_get_src_dst_rects(ctx->log, ctx->vo_opts, ctx->vo->driver->caps, + &ctx->img_params, vp_w, abs(vp_h), + 1.0, &src, &dst, &osd); + + ctx->renderer->fns->resize(ctx->renderer, &src, &dst, &osd); + } + ctx->need_resize = false; + } + + if (ctx->need_reconfig) + ctx->renderer->fns->reconfig(ctx->renderer, &ctx->img_params); + ctx->need_reconfig = false; + + if (ctx->need_update_external) + ctx->renderer->fns->update_external(ctx->renderer, ctx->vo); + ctx->need_update_external = false; + + if (ctx->need_reset) { + ctx->renderer->fns->reset(ctx->renderer); + if (ctx->cur_frame) + ctx->cur_frame->still = true; + } + ctx->need_reset = false; + + struct vo_frame *frame = ctx->next_frame; + int64_t wait_present_count = ctx->present_count; + if (frame) { + ctx->next_frame = NULL; + if (!(frame->redraw || !frame->current)) + wait_present_count += 1; + pthread_cond_broadcast(&ctx->video_wait); + talloc_free(ctx->cur_frame); + ctx->cur_frame = vo_frame_ref(frame); + } else { + frame = vo_frame_ref(ctx->cur_frame); + if (frame) + frame->redraw = true; + MP_STATS(ctx, "glcb-noframe"); + } + struct vo_frame dummy = {0}; + if (!frame) + frame = &dummy; + + pthread_mutex_unlock(&ctx->lock); + + MP_STATS(ctx, "glcb-render"); + + int err = 0; + + if (do_render) + err = ctx->renderer->fns->render(ctx->renderer, params, frame); + + if (frame != &dummy) + talloc_free(frame); + + if (GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_BLOCK_FOR_TARGET_TIME, + int, 1)) + { + pthread_mutex_lock(&ctx->lock); + while (wait_present_count > ctx->present_count) + pthread_cond_wait(&ctx->video_wait, &ctx->lock); + pthread_mutex_unlock(&ctx->lock); + } + + return err; +} + +void mpv_render_context_report_swap(mpv_render_context *ctx) +{ + MP_STATS(ctx, "glcb-reportflip"); + + pthread_mutex_lock(&ctx->lock); + ctx->flip_count += 1; + pthread_cond_broadcast(&ctx->video_wait); + pthread_mutex_unlock(&ctx->lock); +} + +uint64_t mpv_render_context_update(mpv_render_context *ctx) +{ + uint64_t res = 0; + + if (ctx->dispatch) + mp_dispatch_queue_process(ctx->dispatch, 0); + + pthread_mutex_lock(&ctx->lock); + if (ctx->next_frame) + res |= MPV_RENDER_UPDATE_FRAME; + pthread_mutex_unlock(&ctx->lock); + return res; +} + +int mpv_render_context_set_parameter(mpv_render_context *ctx, + mpv_render_param param) +{ + return ctx->renderer->fns->set_parameter(ctx->renderer, param); +} + +int mpv_render_context_get_info(mpv_render_context *ctx, + mpv_render_param param) +{ + int res = MPV_ERROR_NOT_IMPLEMENTED; + pthread_mutex_lock(&ctx->lock); + + switch (param.type) { + case MPV_RENDER_PARAM_NEXT_FRAME_INFO: { + mpv_render_frame_info *info = param.data; + *info = (mpv_render_frame_info){0}; + struct vo_frame *frame = ctx->next_frame; + if (frame) { + info->flags = + MPV_RENDER_FRAME_INFO_PRESENT | + (frame->redraw ? MPV_RENDER_FRAME_INFO_REDRAW : 0) | + (frame->repeat ? MPV_RENDER_FRAME_INFO_REPEAT : 0) | + (frame->display_synced && !frame->redraw ? + MPV_RENDER_FRAME_INFO_BLOCK_VSYNC : 0); + info->target_time = frame->pts; + } + res = 0; + break; + } + default:; + } + + pthread_mutex_unlock(&ctx->lock); + return res; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + pthread_mutex_lock(&ctx->lock); + assert(!ctx->next_frame); + ctx->next_frame = vo_frame_ref(frame); + ctx->expected_flip_count = ctx->flip_count + 1; + ctx->redrawing = frame->redraw || !frame->current; + pthread_mutex_unlock(&ctx->lock); + + update(ctx); +} + +static void flip_page(struct vo *vo) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + struct timespec ts = mp_rel_time_to_timespec(0.2); + + pthread_mutex_lock(&ctx->lock); + + // Wait until frame was rendered + while (ctx->next_frame) { + if (pthread_cond_timedwait(&ctx->video_wait, &ctx->lock, &ts)) { + if (ctx->next_frame) { + MP_VERBOSE(vo, "mpv_render_context_render() not being called " + "or stuck.\n"); + goto done; + } + } + } + + // Unblock mpv_render_context_render(). + ctx->present_count += 1; + pthread_cond_broadcast(&ctx->video_wait); + + if (ctx->redrawing) + goto done; // do not block for redrawing + + // Wait until frame was presented + while (ctx->expected_flip_count > ctx->flip_count) { + // mpv_render_report_swap() is declared as optional API. + // Assume the user calls it consistently _if_ it's called at all. + if (!ctx->flip_count) + break; + if (pthread_cond_timedwait(&ctx->video_wait, &ctx->lock, &ts)) { + MP_VERBOSE(vo, "mpv_render_report_swap() not being called.\n"); + goto done; + } + } + +done: + + // Cleanup after the API user is not reacting, or is being unusually slow. + if (ctx->next_frame) { + talloc_free(ctx->cur_frame); + ctx->cur_frame = ctx->next_frame; + ctx->next_frame = NULL; + ctx->present_count += 2; + pthread_cond_signal(&ctx->video_wait); + vo_increment_drop_count(vo, 1); + } + + pthread_mutex_unlock(&ctx->lock); +} + +static int query_format(struct vo *vo, int format) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + bool ok = false; + pthread_mutex_lock(&ctx->lock); + if (format >= IMGFMT_START && format < IMGFMT_END) + ok = ctx->imgfmt_supported[format - IMGFMT_START]; + pthread_mutex_unlock(&ctx->lock); + return ok; +} + +static void run_control_on_render_thread(void *p) +{ + void **args = p; + struct mpv_render_context *ctx = args[0]; + int request = (intptr_t)args[1]; + void *data = args[2]; + int ret = VO_NOTIMPL; + + switch (request) { + case VOCTRL_SCREENSHOT: { + pthread_mutex_lock(&ctx->lock); + struct vo_frame *frame = vo_frame_ref(ctx->cur_frame); + pthread_mutex_unlock(&ctx->lock); + if (frame && ctx->renderer->fns->screenshot) + ctx->renderer->fns->screenshot(ctx->renderer, frame, data); + talloc_free(frame); + break; + } + } + + *(int *)args[3] = ret; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + switch (request) { + case VOCTRL_RESET: + pthread_mutex_lock(&ctx->lock); + forget_frames(ctx, false); + ctx->need_reset = true; + pthread_mutex_unlock(&ctx->lock); + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_PAUSE: + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_SET_EQUALIZER: + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_SET_PANSCAN: + pthread_mutex_lock(&ctx->lock); + ctx->need_resize = true; + pthread_mutex_unlock(&ctx->lock); + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_UPDATE_RENDER_OPTS: + pthread_mutex_lock(&ctx->lock); + ctx->need_update_external = true; + pthread_mutex_unlock(&ctx->lock); + vo->want_redraw = true; + return VO_TRUE; + } + + // VOCTRLs to be run on the renderer thread (if possible at all). + switch (request) { + case VOCTRL_SCREENSHOT: + if (ctx->dispatch) { + int ret; + void *args[] = {ctx, (void *)(intptr_t)request, data, &ret}; + mp_dispatch_run(ctx->dispatch, run_control_on_render_thread, args); + return ret; + } + } + + int r = VO_NOTIMPL; + pthread_mutex_lock(&ctx->control_lock); + if (ctx->control_cb) { + int events = 0; + r = p->ctx->control_cb(vo, p->ctx->control_cb_ctx, + &events, request, data); + vo_event(vo, events); + } + pthread_mutex_unlock(&ctx->control_lock); + + return r; +} + +static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + if (ctx->dr) + return dr_helper_get_image(ctx->dr, imgfmt, w, h, stride_align); + + return NULL; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + pthread_mutex_lock(&ctx->lock); + forget_frames(ctx, true); + ctx->img_params = *params; + ctx->need_reconfig = true; + ctx->need_resize = true; + pthread_mutex_unlock(&ctx->lock); + + control(vo, VOCTRL_RECONFIG, NULL); + + return 0; +} + +static void uninit(struct vo *vo) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + control(vo, VOCTRL_UNINIT, NULL); + + pthread_mutex_lock(&ctx->lock); + + forget_frames(ctx, true); + ctx->img_params = (struct mp_image_params){0}; + ctx->need_reconfig = true; + ctx->need_resize = true; + ctx->need_update_external = true; + ctx->need_reset = true; + ctx->vo = NULL; + pthread_mutex_unlock(&ctx->lock); + + bool state = atomic_exchange(&ctx->in_use, false); + assert(state); // obviously must have been set + + update(ctx); +} + +static int preinit(struct vo *vo) +{ + struct vo_priv *p = vo->priv; + + struct mpv_render_context *ctx = + mp_client_api_acquire_render_context(vo->global->client_api); + p->ctx = ctx; + + if (!ctx) { + if (!vo->probing) + MP_FATAL(vo, "No render context set.\n"); + return -1; + } + + pthread_mutex_lock(&ctx->lock); + ctx->vo = vo; + ctx->need_resize = true; + ctx->need_update_external = true; + pthread_mutex_unlock(&ctx->lock); + + vo->hwdec_devs = ctx->hwdec_devs; + control(vo, VOCTRL_PREINIT, NULL); + + return 0; +} + +const struct vo_driver video_out_libmpv = { + .description = "render API for libmpv", + .name = "libmpv", + .caps = VO_CAP_ROTATE90, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .get_image_ts = get_image, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(struct vo_priv), +}; diff --git a/video/out/vo_mediacodec_embed.c b/video/out/vo_mediacodec_embed.c index 63975e9..dc29749 100644 --- a/video/out/vo_mediacodec_embed.c +++ b/video/out/vo_mediacodec_embed.c @@ -107,7 +107,7 @@ static void uninit(struct vo *vo) const struct vo_driver video_out_mediacodec_embed = { .description = "Android (Embedded MediaCodec Surface)", .name = "mediacodec_embed", - .caps = VO_CAP_NOREDRAW, + .caps = VO_CAP_NORETAIN, .preinit = preinit, .query_format = query_format, .control = control, diff --git a/video/out/vo_opengl_cb.c b/video/out/vo_opengl_cb.c deleted file mode 100644 index c8dab15..0000000 --- a/video/out/vo_opengl_cb.c +++ /dev/null @@ -1,542 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <math.h> -#include <stdbool.h> -#include <limits.h> -#include <pthread.h> -#include <assert.h> - -#include "config.h" - -#include "mpv_talloc.h" -#include "common/common.h" -#include "misc/bstr.h" -#include "common/msg.h" -#include "options/m_config.h" -#include "options/options.h" -#include "aspect.h" -#include "vo.h" -#include "video/mp_image.h" -#include "sub/osd.h" -#include "osdep/timer.h" - -#include "common/global.h" -#include "player/client.h" - -#include "gpu/video.h" -#include "gpu/hwdec.h" -#include "opengl/common.h" -#include "opengl/context.h" -#include "opengl/ra_gl.h" - -#include "libmpv/opengl_cb.h" - -/* - * mpv_opengl_cb_context is created by the host application - the host application - * can access it any time, even if the VO is destroyed (or not created yet). - * The OpenGL object allows initializing the renderer etc. The VO object is only - * here to transfer the video frames somehow. - * - * Locking hierarchy: - * - the libmpv user can mix openglcb and normal API; thus openglcb API - * functions can wait on the core, but not the reverse - * - the core does blocking calls into the VO thread, thus the VO functions - * can't wait on the user calling the API functions - * - to make video timing work like it should, the VO thread waits on the - * openglcb API user anyway, and the (unlikely) deadlock is avoided with - * a timeout - */ - -struct vo_priv { - struct mpv_opengl_cb_context *ctx; -}; - -struct mpv_opengl_cb_context { - struct mp_log *log; - struct mpv_global *global; - struct mp_client_api *client_api; - - pthread_mutex_t lock; - pthread_cond_t wakeup; - - // --- Protected by lock - bool initialized; - mpv_opengl_cb_update_fn update_cb; - void *update_cb_ctx; - struct vo_frame *next_frame; // next frame to draw - int64_t present_count; // incremented when next frame can be shown - int64_t expected_flip_count; // next vsync event for next_frame - bool redrawing; // next_frame was a redraw request - int64_t flip_count; - struct vo_frame *cur_frame; - struct mp_image_params img_params; - bool reconfigured, reset; - int vp_w, vp_h; - bool flip; - bool force_update; - bool imgfmt_supported[IMGFMT_END - IMGFMT_START]; - bool update_new_opts; - struct vo *active; - - // --- This is only mutable while initialized=false, during which nothing - // except the OpenGL context manager is allowed to access it. - struct mp_hwdec_devices *hwdec_devs; - - // --- All of these can only be accessed from the thread where the host - // application's OpenGL context is current - i.e. only while the - // host application is calling certain mpv_opengl_cb_* APIs. - GL *gl; - struct ra_ctx *ra_ctx; - struct gl_video *renderer; - struct m_config_cache *vo_opts_cache; - struct mp_vo_opts *vo_opts; -}; - -static void update(struct vo_priv *p); - -static void forget_frames(struct mpv_opengl_cb_context *ctx, bool all) -{ - pthread_cond_broadcast(&ctx->wakeup); - if (all) { - talloc_free(ctx->cur_frame); - ctx->cur_frame = NULL; - } -} - -static void free_ctx(void *ptr) -{ - mpv_opengl_cb_context *ctx = ptr; - - // This can trigger if the client API user doesn't call - // mpv_opengl_cb_uninit_gl() properly. - assert(!ctx->initialized); - - pthread_cond_destroy(&ctx->wakeup); - pthread_mutex_destroy(&ctx->lock); -} - -struct mpv_opengl_cb_context *mp_opengl_create(struct mpv_global *g, - struct mp_client_api *client_api) -{ - mpv_opengl_cb_context *ctx = talloc_zero(NULL, mpv_opengl_cb_context); - talloc_set_destructor(ctx, free_ctx); - pthread_mutex_init(&ctx->lock, NULL); - pthread_cond_init(&ctx->wakeup, NULL); - - ctx->global = g; - ctx->log = mp_log_new(ctx, g->log, "opengl-cb"); - ctx->client_api = client_api; - - ctx->vo_opts_cache = m_config_cache_alloc(ctx, ctx->global, &vo_sub_opts); - ctx->vo_opts = ctx->vo_opts_cache->opts; - - return ctx; -} - -void mpv_opengl_cb_set_update_callback(struct mpv_opengl_cb_context *ctx, - mpv_opengl_cb_update_fn callback, - void *callback_ctx) -{ - pthread_mutex_lock(&ctx->lock); - ctx->update_cb = callback; - ctx->update_cb_ctx = callback_ctx; - pthread_mutex_unlock(&ctx->lock); -} - -// Reset some GL attributes the user might clobber. For mid-term compatibility -// only - we expect both user code and our code to do this correctly. -static void reset_gl_state(GL *gl) -{ - gl->ActiveTexture(GL_TEXTURE0); - if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) - gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); - gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); -} - -int mpv_opengl_cb_init_gl(struct mpv_opengl_cb_context *ctx, const char *exts, - mpv_opengl_cb_get_proc_address_fn get_proc_address, - void *get_proc_address_ctx) -{ - if (ctx->renderer) - return MPV_ERROR_INVALID_PARAMETER; - - talloc_free(ctx->gl); - ctx->gl = talloc_zero(ctx, GL); - - mpgl_load_functions2(ctx->gl, get_proc_address, get_proc_address_ctx, - exts, ctx->log); - if (!ctx->gl->version && !ctx->gl->es) { - MP_FATAL(ctx, "OpenGL not initialized.\n"); - return MPV_ERROR_UNSUPPORTED; - } - - // initialize a blank ra_ctx to reuse ra_gl_ctx - ctx->ra_ctx = talloc_zero(ctx, struct ra_ctx); - ctx->ra_ctx->log = ctx->log; - ctx->ra_ctx->global = ctx->global; - ctx->ra_ctx->opts = (struct ra_ctx_opts) { - .probing = false, - .allow_sw = true, - }; - - static const struct ra_swapchain_fns empty_swapchain_fns = {0}; - struct ra_gl_ctx_params gl_params = { - // vo_opengl_cb is essentially like a gigantic external swapchain where - // the user is in charge of presentation / swapping etc. But we don't - // actually need to provide any of these functions, since we can just - // not call them to begin with - so just set it to an empty object to - // signal to ra_gl_ctx that we don't care about its latency emulation - // functionality - .external_swapchain = &empty_swapchain_fns - }; - - ctx->gl->SwapInterval = NULL; // we shouldn't randomly change this, so lock it - if (!ra_gl_ctx_init(ctx->ra_ctx, ctx->gl, gl_params)) - return MPV_ERROR_UNSUPPORTED; - - ctx->renderer = gl_video_init(ctx->ra_ctx->ra, ctx->log, ctx->global); - - ctx->hwdec_devs = hwdec_devices_create(); - gl_video_load_hwdecs(ctx->renderer, ctx->hwdec_devs, true); - - pthread_mutex_lock(&ctx->lock); - for (int n = IMGFMT_START; n < IMGFMT_END; n++) { - ctx->imgfmt_supported[n - IMGFMT_START] = - gl_video_check_format(ctx->renderer, n); - } - ctx->initialized = true; - pthread_mutex_unlock(&ctx->lock); - - reset_gl_state(ctx->gl); - return 0; -} - -int mpv_opengl_cb_uninit_gl(struct mpv_opengl_cb_context *ctx) -{ - if (!ctx) - return 0; - - // Bring down the decoder etc., which still might be using the hwdec - // context. Setting initialized=false guarantees it can't come back. - - pthread_mutex_lock(&ctx->lock); - forget_frames(ctx, true); - ctx->initialized = false; - pthread_mutex_unlock(&ctx->lock); - - kill_video(ctx->client_api); - - pthread_mutex_lock(&ctx->lock); - assert(!ctx->active); - pthread_mutex_unlock(&ctx->lock); - - gl_video_uninit(ctx->renderer); - ctx->renderer = NULL; - hwdec_devices_destroy(ctx->hwdec_devs); - ctx->hwdec_devs = NULL; - ra_gl_ctx_uninit(ctx->ra_ctx); - talloc_free(ctx->ra_ctx); - talloc_free(ctx->gl); - ctx->ra_ctx = NULL; - ctx->gl = NULL; - return 0; -} - -int mpv_opengl_cb_draw(mpv_opengl_cb_context *ctx, int fbo, int vp_w, int vp_h) -{ - assert(ctx->renderer); - - if (fbo && !(ctx->gl->mpgl_caps & MPGL_CAP_FB)) { - MP_FATAL(ctx, "Rendering to FBO requested, but no FBO extension found!\n"); - return MPV_ERROR_UNSUPPORTED; - } - - reset_gl_state(ctx->gl); - - pthread_mutex_lock(&ctx->lock); - - struct vo *vo = ctx->active; - - ctx->force_update |= ctx->reconfigured; - - if (ctx->vp_w != vp_w || ctx->vp_h != vp_h) - ctx->force_update = true; - - if (ctx->force_update && vo) { - ctx->force_update = false; - ctx->vp_w = vp_w; - ctx->vp_h = vp_h; - - m_config_cache_update(ctx->vo_opts_cache); - - struct mp_rect src, dst; - struct mp_osd_res osd; - mp_get_src_dst_rects(ctx->log, ctx->vo_opts, vo->driver->caps, - &ctx->img_params, vp_w, abs(vp_h), - 1.0, &src, &dst, &osd); - - gl_video_resize(ctx->renderer, &src, &dst, &osd); - } - - if (ctx->reconfigured) { - gl_video_set_osd_source(ctx->renderer, vo ? vo->osd : NULL); - gl_video_config(ctx->renderer, &ctx->img_params); - } - if (ctx->update_new_opts) { - if (vo) - gl_video_configure_queue(ctx->renderer, vo); - int debug; - mp_read_option_raw(ctx->global, "gpu-debug", &m_option_type_flag, - &debug); - ctx->gl->debug_context = debug; - ra_gl_set_debug(ctx->ra_ctx->ra, debug); - if (gl_video_icc_auto_enabled(ctx->renderer)) - MP_ERR(ctx, "icc-profile-auto is not available with opengl-cb\n"); - } - ctx->reconfigured = false; - ctx->update_new_opts = false; - - if (ctx->reset) { - gl_video_reset(ctx->renderer); - ctx->reset = false; - if (ctx->cur_frame) - ctx->cur_frame->still = true; - } - - struct vo_frame *frame = ctx->next_frame; - int64_t wait_present_count = ctx->present_count; - if (frame) { - ctx->next_frame = NULL; - if (!(frame->redraw || !frame->current)) - wait_present_count += 1; - pthread_cond_signal(&ctx->wakeup); - talloc_free(ctx->cur_frame); - ctx->cur_frame = vo_frame_ref(frame); - } else { - frame = vo_frame_ref(ctx->cur_frame); - if (frame) - frame->redraw = true; - MP_STATS(ctx, "glcb-noframe"); - } - struct vo_frame dummy = {0}; - if (!frame) - frame = &dummy; - - pthread_mutex_unlock(&ctx->lock); - - MP_STATS(ctx, "glcb-render"); - struct ra_swapchain *sw = ctx->ra_ctx->swapchain; - struct ra_fbo target; - ra_gl_ctx_resize(sw, vp_w, abs(vp_h), fbo); - ra_gl_ctx_start_frame(sw, &target); - target.flip = vp_h < 0; - gl_video_render_frame(ctx->renderer, frame, target); - ra_gl_ctx_submit_frame(sw, frame); - - reset_gl_state(ctx->gl); - - if (frame != &dummy) - talloc_free(frame); - - pthread_mutex_lock(&ctx->lock); - while (wait_present_count > ctx->present_count) - pthread_cond_wait(&ctx->wakeup, &ctx->lock); - pthread_mutex_unlock(&ctx->lock); - - return 0; -} - -int mpv_opengl_cb_report_flip(mpv_opengl_cb_context *ctx, int64_t time) -{ - MP_STATS(ctx, "glcb-reportflip"); - - pthread_mutex_lock(&ctx->lock); - ctx->flip_count += 1; - pthread_cond_signal(&ctx->wakeup); - pthread_mutex_unlock(&ctx->lock); - - return 0; -} - -// Called locked. -static void update(struct vo_priv *p) -{ - if (p->ctx->update_cb) - p->ctx->update_cb(p->ctx->update_cb_ctx); -} - -static void draw_frame(struct vo *vo, struct vo_frame *frame) -{ - struct vo_priv *p = vo->priv; - - pthread_mutex_lock(&p->ctx->lock); - assert(!p->ctx->next_frame); - p->ctx->next_frame = vo_frame_ref(frame); - p->ctx->expected_flip_count = p->ctx->flip_count + 1; - p->ctx->redrawing = frame->redraw || !frame->current; - update(p); - pthread_mutex_unlock(&p->ctx->lock); -} - -static void flip_page(struct vo *vo) -{ - struct vo_priv *p = vo->priv; - struct timespec ts = mp_rel_time_to_timespec(0.2); - - pthread_mutex_lock(&p->ctx->lock); - - // Wait until frame was rendered - while (p->ctx->next_frame) { - if (pthread_cond_timedwait(&p->ctx->wakeup, &p->ctx->lock, &ts)) { - if (p->ctx->next_frame) { - MP_VERBOSE(vo, "mpv_opengl_cb_draw() not being called or stuck.\n"); - goto done; - } - } - } - - // Unblock mpv_opengl_cb_draw(). - p->ctx->present_count += 1; - pthread_cond_signal(&p->ctx->wakeup); - - if (p->ctx->redrawing) - goto done; // do not block for redrawing - - // Wait until frame was presented - while (p->ctx->expected_flip_count > p->ctx->flip_count) { - // mpv_opengl_cb_report_flip() is declared as optional API. - // Assume the user calls it consistently _if_ it's called at all. - if (!p->ctx->flip_count) - break; - if (pthread_cond_timedwait(&p->ctx->wakeup, &p->ctx->lock, &ts)) { - MP_VERBOSE(vo, "mpv_opengl_cb_report_flip() not being called.\n"); - goto done; - } - } - -done: - - // Cleanup after the API user is not reacting, or is being unusually slow. - if (p->ctx->next_frame) { - talloc_free(p->ctx->cur_frame); - p->ctx->cur_frame = p->ctx->next_frame; - p->ctx->next_frame = NULL; - p->ctx->present_count += 2; - pthread_cond_signal(&p->ctx->wakeup); - vo_increment_drop_count(vo, 1); - } - - pthread_mutex_unlock(&p->ctx->lock); -} - -static int query_format(struct vo *vo, int format) -{ - struct vo_priv *p = vo->priv; - - bool ok = false; - pthread_mutex_lock(&p->ctx->lock); - if (format >= IMGFMT_START && format < IMGFMT_END) - ok = p->ctx->imgfmt_supported[format - IMGFMT_START]; - pthread_mutex_unlock(&p->ctx->lock); - return ok; -} - -static int reconfig(struct vo *vo, struct mp_image_params *params) -{ - struct vo_priv *p = vo->priv; - - pthread_mutex_lock(&p->ctx->lock); - forget_frames(p->ctx, true); - p->ctx->img_params = *params; - p->ctx->reconfigured = true; - pthread_mutex_unlock(&p->ctx->lock); - - return 0; -} - -static int control(struct vo *vo, uint32_t request, void *data) -{ - struct vo_priv *p = vo->priv; - - switch (request) { - case VOCTRL_RESET: - pthread_mutex_lock(&p->ctx->lock); - forget_frames(p->ctx, false); - p->ctx->reset = true; - pthread_mutex_unlock(&p->ctx->lock); - return VO_TRUE; - case VOCTRL_PAUSE: - vo->want_redraw = true; - return VO_TRUE; - case VOCTRL_SET_EQUALIZER: - vo->want_redraw = true; - return VO_TRUE; - case VOCTRL_SET_PANSCAN: - pthread_mutex_lock(&p->ctx->lock); - p->ctx->force_update = true; - update(p); - pthread_mutex_unlock(&p->ctx->lock); - return VO_TRUE; - case VOCTRL_UPDATE_RENDER_OPTS: - pthread_mutex_lock(&p->ctx->lock); - p->ctx->update_new_opts = true; - update(p); - pthread_mutex_unlock(&p->ctx->lock); - return VO_TRUE; - } - - return VO_NOTIMPL; -} - -static void uninit(struct vo *vo) -{ - struct vo_priv *p = vo->priv; - - pthread_mutex_lock(&p->ctx->lock); - forget_frames(p->ctx, true); - p->ctx->img_params = (struct mp_image_params){0}; - p->ctx->reconfigured = true; - p->ctx->active = NULL; - update(p); - pthread_mutex_unlock(&p->ctx->lock); -} - -static int preinit(struct vo *vo) -{ - struct vo_priv *p = vo->priv; - p->ctx = vo->extra.opengl_cb_context; - if (!p->ctx) { - MP_FATAL(vo, "No context set.\n"); - return -1; - } - - pthread_mutex_lock(&p->ctx->lock); - if (!p->ctx->initialized) { - MP_FATAL(vo, "OpenGL context not initialized.\n"); - pthread_mutex_unlock(&p->ctx->lock); - return -1; - } - p->ctx->active = vo; - p->ctx->reconfigured = true; - p->ctx->update_new_opts = true; - pthread_mutex_unlock(&p->ctx->lock); - - vo->hwdec_devs = p->ctx->hwdec_devs; - - return 0; -} - -const struct vo_driver video_out_opengl_cb = { - .description = "OpenGL Callbacks for libmpv", - .name = "opengl-cb", - .caps = VO_CAP_ROTATE90, - .preinit = preinit, - .query_format = query_format, - .reconfig = reconfig, - .control = control, - .draw_frame = draw_frame, - .flip_page = flip_page, - .uninit = uninit, - .priv_size = sizeof(struct vo_priv), -}; diff --git a/video/out/vo_rpi.c b/video/out/vo_rpi.c index 4322a3f..2065151 100644 --- a/video/out/vo_rpi.c +++ b/video/out/vo_rpi.c @@ -266,7 +266,7 @@ static void update_osd(struct vo *vo) .flip = true, }; gl_video_set_osd_pts(p->gl_video, p->osd_pts); - gl_video_render_frame(p->gl_video, &frame, target); + gl_video_render_frame(p->gl_video, &frame, target, RENDER_FRAME_DEF); ra_tex_free(p->egl.ra, &target.tex); MP_STATS(vo, "stop rpi_osd"); diff --git a/video/out/vo_sdl.c b/video/out/vo_sdl.c index 1667b2c..a7450e5 100644 --- a/video/out/vo_sdl.c +++ b/video/out/vo_sdl.c @@ -180,6 +180,7 @@ struct priv { int brightness, contrast; char *window_title; Uint32 wakeup_event; + bool screensaver_enabled; // options int allow_sw; @@ -402,10 +403,22 @@ static void check_resize(struct vo *vo) resize(vo, w, h); } +static inline void set_screensaver(bool enabled) +{ + if (!!enabled == !!SDL_IsScreenSaverEnabled()) + return; + + if (enabled) + SDL_EnableScreenSaver(); + else + SDL_DisableScreenSaver(); +} + static void set_fullscreen(struct vo *vo) { struct priv *vc = vo->priv; int fs = vo->opts->fullscreen; + SDL_bool prev_screensaver_state = SDL_IsScreenSaverEnabled(); Uint32 fs_flag; if (vc->switch_mode) @@ -428,7 +441,7 @@ static void set_fullscreen(struct vo *vo) } // toggling fullscreen might recreate the window, so better guard for this - SDL_DisableScreenSaver(); + set_screensaver(prev_screensaver_state); force_resize(vo); } @@ -507,8 +520,7 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) resize(vo, win_w, win_h); - SDL_DisableScreenSaver(); - + set_screensaver(vc->screensaver_enabled); set_fullscreen(vo); SDL_ShowWindow(vc->window); @@ -917,6 +929,14 @@ static int control(struct vo *vo, uint32_t request, void *data) case VOCTRL_SET_CURSOR_VISIBILITY: SDL_ShowCursor(*(bool *)data); return true; + case VOCTRL_KILL_SCREENSAVER: + vc->screensaver_enabled = false; + set_screensaver(vc->screensaver_enabled); + return VO_TRUE; + case VOCTRL_RESTORE_SCREENSAVER: + vc->screensaver_enabled = true; + set_screensaver(vc->screensaver_enabled); + return VO_TRUE; case VOCTRL_UPDATE_WINDOW_TITLE: talloc_free(vc->window_title); vc->window_title = talloc_strdup(vc, (char *)data); @@ -936,6 +956,7 @@ const struct vo_driver video_out_sdl = { .priv_defaults = &(const struct priv) { .renderer_index = -1, .vsync = 1, + .screensaver_enabled = false, }, .options = (const struct m_option []){ OPT_FLAG("sw", allow_sw, 0), diff --git a/video/out/vo_tct.c b/video/out/vo_tct.c index dbe5d69..6a07786 100644 --- a/video/out/vo_tct.c +++ b/video/out/vo_tct.c @@ -207,7 +207,7 @@ static int reconfig(struct vo *vo, struct mp_image_params *params) if (p->buffer) free(p->buffer); - mp_sws_set_from_cmdline(p->sws, vo->opts->sws_opts); + mp_sws_set_from_cmdline(p->sws, vo->global); p->sws->src = *params; p->sws->dst = (struct mp_image_params) { .imgfmt = IMGFMT, diff --git a/video/out/vo_vaapi.c b/video/out/vo_vaapi.c index a3f7015..c8ffffc 100644 --- a/video/out/vo_vaapi.c +++ b/video/out/vo_vaapi.c @@ -816,7 +816,7 @@ static int preinit(struct vo *vo) if (!p->image_formats) goto fail; - p->pool = mp_image_pool_new(MAX_OUTPUT_SURFACES + 3); + p->pool = mp_image_pool_new(p); va_pool_set_allocator(p->pool, p->mpvaapi, VA_RT_FORMAT_YUV420); int max_subpic_formats = vaMaxNumSubpictureFormats(p->display); diff --git a/video/out/vo_vdpau.c b/video/out/vo_vdpau.c index ada3fb8..9871355 100644 --- a/video/out/vo_vdpau.c +++ b/video/out/vo_vdpau.c @@ -86,6 +86,7 @@ struct vdpctx { int current_duration; int output_surface_w, output_surface_h; + int rotation; int force_yuv; struct mp_vdpau_mixer *video_mixer; @@ -244,8 +245,7 @@ static void forget_frames(struct vo *vo, bool seek_reset) static int s_size(int max, int s, int disp) { disp = MPMAX(1, disp); - s += s / 2; - return MPMIN(max, s >= disp ? s : disp); + return MPMIN(max, MPMAX(s, disp)); } static void resize(struct vo *vo) @@ -285,7 +285,9 @@ static void resize(struct vo *vo) 1000LL * vc->flip_offset_window; vo_set_queue_params(vo, vc->flip_offset_us, 1); - if (vc->output_surface_w < vo->dwidth || vc->output_surface_h < vo->dheight) { + if (vc->output_surface_w < vo->dwidth || vc->output_surface_h < vo->dheight || + vc->rotation != vo->params->rotate) + { vc->output_surface_w = s_size(max_w, vc->output_surface_w, vo->dwidth); vc->output_surface_h = s_size(max_h, vc->output_surface_h, vo->dheight); // Creation of output_surfaces @@ -309,6 +311,7 @@ static void resize(struct vo *vo) vdp_st = vdp->output_surface_destroy(vc->rotation_surface); CHECK_VDP_WARNING(vo, "Error when calling " "vdp_output_surface_destroy"); + vc->rotation_surface = VDP_INVALID_HANDLE; } if (vo->params->rotate == 90 || vo->params->rotate == 270) { vdp_st = vdp->output_surface_create(vc->vdp_device, @@ -327,6 +330,7 @@ static void resize(struct vo *vo) MP_DBG(vo, "vdpau rotation surface create: %u\n", vc->rotation_surface); } + vc->rotation = vo->params->rotate; vo->want_redraw = true; } @@ -1027,7 +1031,7 @@ static int preinit(struct vo *vo) if (mp_vdpau_guess_if_emulated(vc->mpvdp)) { MP_WARN(vo, "VDPAU is most likely emulated via VA-API.\n" - "This is inefficient. Use --vo=opengl instead.\n"); + "This is inefficient. Use --vo=gpu instead.\n"); } // Mark everything as invalid first so uninit() can tell what has been diff --git a/video/out/vo_x11.c b/video/out/vo_x11.c index f29d06a..13b22d1 100644 --- a/video/out/vo_x11.c +++ b/video/out/vo_x11.c @@ -28,7 +28,6 @@ #include "vo.h" #include "video/csputils.h" #include "video/mp_image.h" -#include "video/filter/vf.h" #include <X11/Xlib.h> #include <X11/Xutil.h> @@ -245,7 +244,7 @@ static bool resize(struct vo *vo) return -1; } - mp_sws_set_from_cmdline(p->sws, vo->opts->sws_opts); + mp_sws_set_from_cmdline(p->sws, vo->global); p->sws->dst = (struct mp_image_params) { .imgfmt = fmte->mpfmt, .w = p->dst_w, diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h index 6e82bfa..1a4c3b8 100644 --- a/video/out/vulkan/common.h +++ b/video/out/vulkan/common.h @@ -48,11 +48,29 @@ struct mpvk_ctx { VkSurfaceKHR surf; VkSurfaceFormatKHR surf_format; // picked at surface initialization time - struct vk_malloc *alloc; // memory allocator for this device - struct vk_cmdpool *pool; // primary command pool for this device - struct vk_cmd *last_cmd; // most recently submitted command + struct vk_malloc *alloc; // memory allocator for this device struct spirv_compiler *spirv; // GLSL -> SPIR-V compiler + struct vk_cmdpool **pools; // command pools (one per queue family) + int num_pools; + struct vk_cmd *last_cmd; // most recently submitted command + + // Queued/pending commands. These are shared for the entire mpvk_ctx to + // ensure submission and callbacks are FIFO + struct vk_cmd **cmds_queued; // recorded but not yet submitted + struct vk_cmd **cmds_pending; // submitted but not completed + int num_cmds_queued; + int num_cmds_pending; + + // Pointers into *pools + struct vk_cmdpool *pool_graphics; // required + struct vk_cmdpool *pool_compute; // optional + struct vk_cmdpool *pool_transfer; // optional + + // Common pool of signals, to avoid having to re-create these objects often + struct vk_signal **signals; + int num_signals; // Cached capabilities VkPhysicalDeviceLimits limits; + VkPhysicalDeviceFeatures features; }; diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c index 0bca198..cbe0911 100644 --- a/video/out/vulkan/context.c +++ b/video/out/vulkan/context.c @@ -102,11 +102,18 @@ const struct m_sub_options vulkan_conf = { {"fifo-relaxed", SWAP_FIFO_RELAXED}, {"mailbox", SWAP_MAILBOX}, {"immediate", SWAP_IMMEDIATE})), - OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1, - MPVK_MAX_QUEUES, OPTDEF_INT(1)), + OPT_INTRANGE("vulkan-queue-count", dev_opts.queue_count, 0, 1, 8, + OPTDEF_INT(1)), + OPT_FLAG("vulkan-async-transfer", dev_opts.async_transfer, 0), + OPT_FLAG("vulkan-async-compute", dev_opts.async_compute, 0), {0} }, - .size = sizeof(struct vulkan_opts) + .size = sizeof(struct vulkan_opts), + .defaults = &(struct vulkan_opts) { + .dev_opts = { + .async_transfer = 1, + }, + }, }; struct priv { @@ -121,9 +128,10 @@ struct priv { // state of the images: struct ra_tex **images; // ra_tex wrappers for the vkimages int num_images; // size of images - VkSemaphore *acquired; // pool of semaphores used to synchronize images - int num_acquired; // size of this pool - int idx_acquired; // index of next free semaphore within this pool + VkSemaphore *sems_in; // pool of semaphores used to synchronize images + VkSemaphore *sems_out; // outgoing semaphores (rendering complete) + int num_sems; + int idx_sems; // index of next free semaphore pair int last_imgidx; // the image index last acquired (for submit) }; @@ -244,17 +252,17 @@ void ra_vk_ctx_uninit(struct ra_ctx *ctx) struct priv *p = ctx->swapchain->priv; struct mpvk_ctx *vk = p->vk; - mpvk_pool_wait_idle(vk, vk->pool); + mpvk_flush_commands(vk); + mpvk_poll_commands(vk, UINT64_MAX); for (int i = 0; i < p->num_images; i++) ra_tex_free(ctx->ra, &p->images[i]); - for (int i = 0; i < p->num_acquired; i++) - vkDestroySemaphore(vk->dev, p->acquired[i], MPVK_ALLOCATOR); + for (int i = 0; i < p->num_sems; i++) { + vkDestroySemaphore(vk->dev, p->sems_in[i], MPVK_ALLOCATOR); + vkDestroySemaphore(vk->dev, p->sems_out[i], MPVK_ALLOCATOR); + } vkDestroySwapchainKHR(vk->dev, p->swapchain, MPVK_ALLOCATOR); - - talloc_free(p->images); - talloc_free(p->acquired); ctx->ra->fns->destroy(ctx->ra); ctx->ra = NULL; } @@ -355,7 +363,7 @@ bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h) // more than one swapchain already active, so we need to flush any pending // asynchronous swapchain release operations that may be ongoing. while (p->old_swapchain) - mpvk_dev_poll_cmds(vk, 100000); // 100μs + mpvk_poll_commands(vk, 100000); // 100μs VkSwapchainCreateInfoKHR sinfo = p->protoInfo; sinfo.imageExtent = (VkExtent2D){ w, h }; @@ -382,13 +390,19 @@ bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h) VK(vkGetSwapchainImagesKHR(vk->dev, p->swapchain, &num, vkimages)); // If needed, allocate some more semaphores - while (num > p->num_acquired) { - VkSemaphore sem; + while (num > p->num_sems) { + VkSemaphore sem_in, sem_out; static const VkSemaphoreCreateInfo seminfo = { .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, }; - VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem)); - MP_TARRAY_APPEND(NULL, p->acquired, p->num_acquired, sem); + VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_in)); + VK(vkCreateSemaphore(vk->dev, &seminfo, MPVK_ALLOCATOR, &sem_out)); + + int idx = p->num_sems++; + MP_TARRAY_GROW(p, p->sems_in, idx); + MP_TARRAY_GROW(p, p->sems_out, idx); + p->sems_in[idx] = sem_in; + p->sems_out[idx] = sem_out; } // Recreate the ra_tex wrappers @@ -396,7 +410,7 @@ bool ra_vk_ctx_resize(struct ra_swapchain *sw, int w, int h) ra_tex_free(ra, &p->images[i]); p->num_images = num; - MP_TARRAY_GROW(NULL, p->images, p->num_images); + MP_TARRAY_GROW(p, p->images, p->num_images); for (int i = 0; i < num; i++) { p->images[i] = ra_vk_wrap_swapchain_img(ra, vkimages[i], sinfo); if (!p->images[i]) @@ -439,66 +453,107 @@ static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) struct priv *p = sw->priv; struct mpvk_ctx *vk = p->vk; if (!p->swapchain) - goto error; + return false; + + VkSemaphore sem_in = p->sems_in[p->idx_sems]; + MP_TRACE(vk, "vkAcquireNextImageKHR signals %p\n", (void *)sem_in); + + for (int attempts = 0; attempts < 2; attempts++) { + uint32_t imgidx = 0; + VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX, + sem_in, NULL, &imgidx); + + switch (res) { + case VK_SUCCESS: + p->last_imgidx = imgidx; + *out_fbo = (struct ra_fbo) { + .tex = p->images[imgidx], + .flip = false, + }; + ra_tex_vk_external_dep(sw->ctx->ra, out_fbo->tex, sem_in); + return true; + + case VK_ERROR_OUT_OF_DATE_KHR: { + // In these cases try recreating the swapchain + int w = p->w, h = p->h; + p->w = p->h = 0; // invalidate the current state + if (!ra_vk_ctx_resize(sw, w, h)) + return false; + continue; + } - uint32_t imgidx = 0; - MP_TRACE(vk, "vkAcquireNextImageKHR\n"); - VkResult res = vkAcquireNextImageKHR(vk->dev, p->swapchain, UINT64_MAX, - p->acquired[p->idx_acquired], NULL, - &imgidx); - if (res == VK_ERROR_OUT_OF_DATE_KHR) - goto error; // just return in this case - VK_ASSERT(res, "Failed acquiring swapchain image"); - - p->last_imgidx = imgidx; - *out_fbo = (struct ra_fbo) { - .tex = p->images[imgidx], - .flip = false, - }; - return true; + default: + MP_ERR(vk, "Failed acquiring swapchain image: %s\n", vk_err(res)); + return false; + } + } -error: + // If we've exhausted the number of attempts to recreate the swapchain, + // just give up silently. return false; } +static void present_cb(struct priv *p, void *arg) +{ + p->frames_in_flight--; +} + static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) { struct priv *p = sw->priv; struct ra *ra = sw->ctx->ra; struct mpvk_ctx *vk = p->vk; if (!p->swapchain) - goto error; + return false; - VkSemaphore acquired = p->acquired[p->idx_acquired++]; - p->idx_acquired %= p->num_acquired; + struct vk_cmd *cmd = ra_vk_submit(ra, p->images[p->last_imgidx]); + if (!cmd) + return false; - VkSemaphore done; - if (!ra_vk_submit(ra, p->images[p->last_imgidx], acquired, &done, - &p->frames_in_flight)) - goto error; + VkSemaphore sem_out = p->sems_out[p->idx_sems++]; + p->idx_sems %= p->num_sems; + vk_cmd_sig(cmd, sem_out); + + p->frames_in_flight++; + vk_cmd_callback(cmd, (vk_cb) present_cb, p, NULL); + + vk_cmd_queue(vk, cmd); + if (!mpvk_flush_commands(vk)) + return false; // Older nvidia drivers can spontaneously combust when submitting to the // same queue as we're rendering from, in a multi-queue scenario. Safest - // option is to cycle the queues first and then submit to the next queue. + // option is to flush the commands first and then submit to the next queue. // We can drop this hack in the future, I suppose. - vk_cmd_cycle_queues(vk); - struct vk_cmdpool *pool = vk->pool; - VkQueue queue = pool->queues[pool->qindex]; + struct vk_cmdpool *pool = vk->pool_graphics; + VkQueue queue = pool->queues[pool->idx_queues]; VkPresentInfoKHR pinfo = { .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .waitSemaphoreCount = 1, - .pWaitSemaphores = &done, + .pWaitSemaphores = &sem_out, .swapchainCount = 1, .pSwapchains = &p->swapchain, .pImageIndices = &p->last_imgidx, }; - VK(vkQueuePresentKHR(queue, &pinfo)); - return true; + MP_TRACE(vk, "vkQueuePresentKHR waits on %p\n", (void *)sem_out); + VkResult res = vkQueuePresentKHR(queue, &pinfo); + switch (res) { + case VK_SUCCESS: + case VK_SUBOPTIMAL_KHR: + return true; -error: - return false; + case VK_ERROR_OUT_OF_DATE_KHR: + // We can silently ignore this error, since the next start_frame will + // recreate the swapchain automatically. + return true; + + default: + MP_ERR(vk, "Failed presenting to queue %p: %s\n", (void *)queue, + vk_err(res)); + return false; + } } static void swap_buffers(struct ra_swapchain *sw) @@ -506,11 +561,10 @@ static void swap_buffers(struct ra_swapchain *sw) struct priv *p = sw->priv; while (p->frames_in_flight >= sw->ctx->opts.swapchain_depth) - mpvk_dev_poll_cmds(p->vk, 100000); // 100μs + mpvk_poll_commands(p->vk, 100000); // 100μs } static const struct ra_swapchain_fns vulkan_swapchain = { - // .screenshot is not currently supported .color_depth = color_depth, .start_frame = start_frame, .submit_frame = submit_frame, diff --git a/video/out/vulkan/formats.c b/video/out/vulkan/formats.c index b44bead..327a7ac 100644 --- a/video/out/vulkan/formats.c +++ b/video/out/vulkan/formats.c @@ -25,7 +25,7 @@ const struct vk_format vk_formats[] = { {"rg4", VK_FORMAT_R4G4_UNORM_PACK8, 2, 1, {4, 4 }, RA_CTYPE_UNORM }, {"rgba4", VK_FORMAT_R4G4B4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM }, {"rgb565", VK_FORMAT_R5G6B5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM }, - {"rgb565a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM }, + {"rgb5a1", VK_FORMAT_R5G5B5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM }, // Float formats (native formats, hf = half float, df = double float) {"r16hf", VK_FORMAT_R16_SFLOAT, 1, 2, {16 }, RA_CTYPE_FLOAT }, @@ -46,7 +46,7 @@ const struct vk_format vk_formats[] = { {"bgra8", VK_FORMAT_B8G8R8A8_UNORM, 4, 4, {8, 8, 8, 8 }, RA_CTYPE_UNORM, true }, {"bgra4", VK_FORMAT_B4G4R4A4_UNORM_PACK16, 4, 2, {4, 4, 4, 4 }, RA_CTYPE_UNORM, true }, {"bgr565", VK_FORMAT_B5G6R5_UNORM_PACK16, 3, 2, {5, 6, 5 }, RA_CTYPE_UNORM, true }, - {"bgr565a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM, true }, + {"bgr5a1", VK_FORMAT_B5G5R5A1_UNORM_PACK16, 4, 2, {5, 5, 5, 1 }, RA_CTYPE_UNORM, true }, {"a1rgb5", VK_FORMAT_A1R5G5B5_UNORM_PACK16, 4, 2, {1, 5, 5, 5 }, RA_CTYPE_UNORM, true }, {"a2rgb10", VK_FORMAT_A2R10G10B10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true }, {"a2bgr10", VK_FORMAT_A2B10G10R10_UNORM_PACK32, 4, 4, {2, 10, 10, 10}, RA_CTYPE_UNORM, true }, diff --git a/video/out/vulkan/malloc.c b/video/out/vulkan/malloc.c index f6cb114..32c2c6b 100644 --- a/video/out/vulkan/malloc.c +++ b/video/out/vulkan/malloc.c @@ -133,11 +133,22 @@ static struct vk_slab *slab_alloc(struct mpvk_ctx *vk, struct vk_heap *heap, uint32_t typeBits = heap->typeBits ? heap->typeBits : UINT32_MAX; if (heap->usage) { + // FIXME: Since we can't keep track of queue family ownership properly, + // and we don't know in advance what types of queue families this buffer + // will belong to, we're forced to share all of our buffers between all + // command pools. + uint32_t qfs[3] = {0}; + for (int i = 0; i < vk->num_pools; i++) + qfs[i] = vk->pools[i]->qf; + VkBufferCreateInfo binfo = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = slab->size, .usage = heap->usage, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT + : VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = vk->num_pools, + .pQueueFamilyIndices = qfs, }; VK(vkCreateBuffer(vk->dev, &binfo, MPVK_ALLOCATOR, &slab->buffer)); diff --git a/video/out/vulkan/ra_vk.c b/video/out/vulkan/ra_vk.c index f85e30e..cffb895 100644 --- a/video/out/vulkan/ra_vk.c +++ b/video/out/vulkan/ra_vk.c @@ -6,6 +6,12 @@ static struct ra_fns ra_fns_vk; +enum queue_type { + GRAPHICS, + COMPUTE, + TRANSFER, +}; + // For ra.priv struct ra_vk { struct mpvk_ctx *vk; @@ -22,51 +28,57 @@ struct mpvk_ctx *ra_vk_get(struct ra *ra) return p->vk; } -// Returns a command buffer, or NULL on error -static struct vk_cmd *vk_require_cmd(struct ra *ra) -{ - struct ra_vk *p = ra->priv; - struct mpvk_ctx *vk = ra_vk_get(ra); - - if (!p->cmd) - p->cmd = vk_cmd_begin(vk, vk->pool); - - return p->cmd; -} - -// Note: This technically follows the flush() API, but we don't need -// to expose that (and in fact, it's a bad idea) since we control flushing -// behavior with ra_vk_present_frame already. -static bool vk_flush(struct ra *ra, VkSemaphore *done) +static void vk_submit(struct ra *ra) { struct ra_vk *p = ra->priv; struct mpvk_ctx *vk = ra_vk_get(ra); if (p->cmd) { - if (!vk_cmd_submit(vk, p->cmd, done)) - return false; + vk_cmd_queue(vk, p->cmd); p->cmd = NULL; } - - return true; } -// The callback's *priv will always be set to `ra` -static void vk_callback(struct ra *ra, vk_cb callback, void *arg) +// Returns a command buffer, or NULL on error +static struct vk_cmd *vk_require_cmd(struct ra *ra, enum queue_type type) { struct ra_vk *p = ra->priv; struct mpvk_ctx *vk = ra_vk_get(ra); - if (p->cmd) { - vk_cmd_callback(p->cmd, callback, ra, arg); - } else { - vk_dev_callback(vk, callback, ra, arg); + struct vk_cmdpool *pool; + switch (type) { + case GRAPHICS: pool = vk->pool_graphics; break; + case COMPUTE: pool = vk->pool_compute; break; + + // GRAPHICS and COMPUTE also imply TRANSFER capability (vulkan spec) + case TRANSFER: + pool = vk->pool_transfer; + if (!pool) + pool = vk->pool_compute; + if (!pool) + pool = vk->pool_graphics; + break; + default: abort(); } + + assert(pool); + if (p->cmd && p->cmd->pool == pool) + return p->cmd; + + vk_submit(ra); + p->cmd = vk_cmd_begin(vk, pool); + return p->cmd; } #define MAKE_LAZY_DESTRUCTOR(fun, argtype) \ static void fun##_lazy(struct ra *ra, argtype *arg) { \ - vk_callback(ra, (vk_cb) fun, arg); \ + struct ra_vk *p = ra->priv; \ + struct mpvk_ctx *vk = ra_vk_get(ra); \ + if (p->cmd) { \ + vk_cmd_callback(p->cmd, (vk_cb) fun, ra, arg); \ + } else { \ + vk_dev_callback(vk, (vk_cb) fun, ra, arg); \ + } \ } static void vk_destroy_ra(struct ra *ra) @@ -74,8 +86,9 @@ static void vk_destroy_ra(struct ra *ra) struct ra_vk *p = ra->priv; struct mpvk_ctx *vk = ra_vk_get(ra); - vk_flush(ra, NULL); - mpvk_dev_wait_idle(vk); + vk_submit(ra); + mpvk_flush_commands(vk); + mpvk_poll_commands(vk, UINT64_MAX); ra_tex_free(ra, &p->clear_tex); talloc_free(ra); @@ -195,8 +208,13 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log) ra->max_shmem = vk->limits.maxComputeSharedMemorySize; ra->max_pushc_size = vk->limits.maxPushConstantsSize; - if (vk->pool->props.queueFlags & VK_QUEUE_COMPUTE_BIT) - ra->caps |= RA_CAP_COMPUTE; + if (vk->pool_compute) { + ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS; + // If we have more compute queues than graphics queues, we probably + // want to be using them. (This seems mostly relevant for AMD) + if (vk->pool_compute->num_queues > vk->pool_graphics->num_queues) + ra->caps |= RA_CAP_PARALLEL_COMPUTE; + } if (!vk_setup_formats(ra)) goto error; @@ -204,8 +222,8 @@ struct ra *ra_create_vk(struct mpvk_ctx *vk, struct mp_log *log) // UBO support is required ra->caps |= RA_CAP_BUF_RO | RA_CAP_FRAGCOORD; - // textureGather is only supported in GLSL 400+ - if (ra->glsl_version >= 400) + // textureGather requires the ImageGatherExtended capability + if (vk->features.shaderImageGatherExtended) ra->caps |= RA_CAP_GATHER; // Try creating a shader storage buffer @@ -246,9 +264,13 @@ error: } // Boilerplate wrapper around vkCreateRenderPass to ensure passes remain -// compatible +// compatible. The renderpass will automatically transition the image out of +// initialLayout and into finalLayout. static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt, - bool load_fbo, VkRenderPass *out) + VkAttachmentLoadOp loadOp, + VkImageLayout initialLayout, + VkImageLayout finalLayout, + VkRenderPass *out) { struct vk_format *vk_fmt = fmt->priv; assert(fmt->renderable); @@ -259,12 +281,10 @@ static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt, .pAttachments = &(VkAttachmentDescription) { .format = vk_fmt->iformat, .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = load_fbo ? VK_ATTACHMENT_LOAD_OP_LOAD - : VK_ATTACHMENT_LOAD_OP_DONT_CARE, + .loadOp = loadOp, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = load_fbo ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL - : VK_IMAGE_LAYOUT_UNDEFINED, - .finalLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + .initialLayout = initialLayout, + .finalLayout = finalLayout, }, .subpassCount = 1, .pSubpasses = &(VkSubpassDescription) { @@ -283,6 +303,7 @@ static VkResult vk_create_render_pass(VkDevice dev, const struct ra_format *fmt, // For ra_tex.priv struct ra_tex_vk { bool external_img; + enum queue_type upload_queue; VkImageType type; VkImage img; struct vk_memslice mem; @@ -296,16 +317,34 @@ struct ra_tex_vk { struct ra_buf_pool pbo; // "current" metadata, can change during the course of execution VkImageLayout current_layout; - VkPipelineStageFlags current_stage; VkAccessFlags current_access; + // the signal guards reuse, and can be NULL + struct vk_signal *sig; + VkPipelineStageFlags sig_stage; + VkSemaphore ext_dep; // external semaphore, not owned by the ra_tex }; +void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep) +{ + struct ra_tex_vk *tex_vk = tex->priv; + assert(!tex_vk->ext_dep); + tex_vk->ext_dep = dep; +} + // Small helper to ease image barrier creation. if `discard` is set, the contents // of the image will be undefined after the barrier -static void tex_barrier(struct vk_cmd *cmd, struct ra_tex_vk *tex_vk, - VkPipelineStageFlags newStage, VkAccessFlags newAccess, +static void tex_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex, + VkPipelineStageFlags stage, VkAccessFlags newAccess, VkImageLayout newLayout, bool discard) { + struct mpvk_ctx *vk = ra_vk_get(ra); + struct ra_tex_vk *tex_vk = tex->priv; + + if (tex_vk->ext_dep) { + vk_cmd_dep(cmd, tex_vk->ext_dep, stage); + tex_vk->ext_dep = NULL; + } + VkImageMemoryBarrier imgBarrier = { .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .oldLayout = tex_vk->current_layout, @@ -323,18 +362,43 @@ static void tex_barrier(struct vk_cmd *cmd, struct ra_tex_vk *tex_vk, imgBarrier.srcAccessMask = 0; } - if (imgBarrier.oldLayout != imgBarrier.newLayout || - imgBarrier.srcAccessMask != imgBarrier.dstAccessMask) - { - vkCmdPipelineBarrier(cmd->buf, tex_vk->current_stage, newStage, 0, - 0, NULL, 0, NULL, 1, &imgBarrier); + VkEvent event = NULL; + vk_cmd_wait(vk, cmd, &tex_vk->sig, stage, &event); + + bool need_trans = tex_vk->current_layout != newLayout || + tex_vk->current_access != newAccess; + + // Transitioning to VK_IMAGE_LAYOUT_UNDEFINED is a pseudo-operation + // that for us means we don't need to perform the actual transition + if (need_trans && newLayout != VK_IMAGE_LAYOUT_UNDEFINED) { + if (event) { + vkCmdWaitEvents(cmd->buf, 1, &event, tex_vk->sig_stage, + stage, 0, NULL, 0, NULL, 1, &imgBarrier); + } else { + // If we're not using an event, then the source stage is irrelevant + // because we're coming from a different queue anyway, so we can + // safely set it to TOP_OF_PIPE. + imgBarrier.srcAccessMask = 0; + vkCmdPipelineBarrier(cmd->buf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + stage, 0, 0, NULL, 0, NULL, 1, &imgBarrier); + } } - tex_vk->current_stage = newStage; tex_vk->current_layout = newLayout; tex_vk->current_access = newAccess; } +static void tex_signal(struct ra *ra, struct vk_cmd *cmd, struct ra_tex *tex, + VkPipelineStageFlags stage) +{ + struct ra_tex_vk *tex_vk = tex->priv; + struct mpvk_ctx *vk = ra_vk_get(ra); + assert(!tex_vk->sig); + + tex_vk->sig = vk_cmd_signal(vk, cmd, stage); + tex_vk->sig_stage = stage; +} + static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex) { if (!tex) @@ -344,6 +408,7 @@ static void vk_tex_destroy(struct ra *ra, struct ra_tex *tex) struct ra_tex_vk *tex_vk = tex->priv; ra_buf_pool_uninit(ra, &tex_vk->pbo); + vk_signal_destroy(vk, &tex_vk->sig); vkDestroyFramebuffer(vk->dev, tex_vk->framebuffer, MPVK_ALLOCATOR); vkDestroyRenderPass(vk->dev, tex_vk->dummyPass, MPVK_ALLOCATOR); vkDestroySampler(vk->dev, tex_vk->sampler, MPVK_ALLOCATOR); @@ -368,7 +433,6 @@ static bool vk_init_image(struct ra *ra, struct ra_tex *tex) assert(tex_vk->img); tex_vk->current_layout = VK_IMAGE_LAYOUT_UNDEFINED; - tex_vk->current_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; tex_vk->current_access = 0; if (params->render_src || params->render_dst) { @@ -415,7 +479,11 @@ static bool vk_init_image(struct ra *ra, struct ra_tex *tex) // Framebuffers need to be created against a specific render pass // layout, so we need to temporarily create a skeleton/dummy render // pass for vulkan to figure out the compatibility - VK(vk_create_render_pass(vk->dev, params->format, false, &tex_vk->dummyPass)); + VK(vk_create_render_pass(vk->dev, params->format, + VK_ATTACHMENT_LOAD_OP_DONT_CARE, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, + &tex_vk->dummyPass)); VkFramebufferCreateInfo finfo = { .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, @@ -444,12 +512,14 @@ static struct ra_tex *vk_tex_create(struct ra *ra, const struct ra_tex_params *params) { struct mpvk_ctx *vk = ra_vk_get(ra); + assert(!params->format->dummy_format); struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); tex->params = *params; tex->params.initial_data = NULL; struct ra_tex_vk *tex_vk = tex->priv = talloc_zero(tex, struct ra_tex_vk); + tex_vk->upload_queue = GRAPHICS; const struct vk_format *fmt = params->format->priv; switch (params->dimensions) { @@ -471,6 +541,10 @@ static struct ra_tex *vk_tex_create(struct ra *ra, if (params->host_mutable || params->blit_dst || params->initial_data) usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; + // Always use the transfer pool if available, for efficiency + if (params->host_mutable && vk->pool_transfer) + tex_vk->upload_queue = TRANSFER; + // Double-check image usage support and fail immediately if invalid VkImageFormatProperties iprop; VkResult res = vkGetPhysicalDeviceImageFormatProperties(vk->physd, @@ -498,6 +572,14 @@ static struct ra_tex *vk_tex_create(struct ra *ra, return NULL; } + // FIXME: Since we can't keep track of queue family ownership properly, + // and we don't know in advance what types of queue families this image + // will belong to, we're forced to share all of our images between all + // command pools. + uint32_t qfs[3] = {0}; + for (int i = 0; i < vk->num_pools; i++) + qfs[i] = vk->pools[i]->qf; + VkImageCreateInfo iinfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .imageType = tex_vk->type, @@ -509,9 +591,10 @@ static struct ra_tex *vk_tex_create(struct ra *ra, .tiling = VK_IMAGE_TILING_OPTIMAL, .usage = usage, .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 1, - .pQueueFamilyIndices = &vk->pool->qf, + .sharingMode = vk->num_pools > 1 ? VK_SHARING_MODE_CONCURRENT + : VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = vk->num_pools, + .pQueueFamilyIndices = qfs, }; VK(vkCreateImage(vk->dev, &iinfo, MPVK_ALLOCATOR, &tex_vk->img)); @@ -602,6 +685,7 @@ struct ra_buf_vk { struct vk_bufslice slice; int refcount; // 1 = object allocated but not in use, > 1 = in use bool needsflush; + enum queue_type update_queue; // "current" metadata, can change during course of execution VkPipelineStageFlags current_stage; VkAccessFlags current_access; @@ -631,6 +715,8 @@ static void buf_barrier(struct ra *ra, struct vk_cmd *cmd, struct ra_buf *buf, .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = buf_vk->current_access, .dstAccessMask = newAccess, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = buf_vk->slice.buf, .offset = offset, .size = size, @@ -670,7 +756,7 @@ static void vk_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, memcpy((void *)addr, data, size); buf_vk->needsflush = true; } else { - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, buf_vk->update_queue); if (!cmd) { MP_ERR(ra, "Failed updating buffer!\n"); return; @@ -706,6 +792,9 @@ static struct ra_buf *vk_buf_create(struct ra *ra, case RA_BUF_TYPE_TEX_UPLOAD: bufFlags |= VK_BUFFER_USAGE_TRANSFER_SRC_BIT; memFlags |= VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; + // Use TRANSFER-style updates for large enough buffers for efficiency + if (params->size > 1024*1024) // 1 MB + buf_vk->update_queue = TRANSFER; break; case RA_BUF_TYPE_UNIFORM: bufFlags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; @@ -716,6 +805,7 @@ static struct ra_buf *vk_buf_create(struct ra *ra, bufFlags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; memFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; align = MP_ALIGN_UP(align, vk->limits.minStorageBufferOffsetAlignment); + buf_vk->update_queue = COMPUTE; break; case RA_BUF_TYPE_VERTEX: bufFlags |= VK_BUFFER_USAGE_VERTEX_BUFFER_BIT; @@ -802,14 +892,14 @@ static bool vk_tex_upload(struct ra *ra, uint64_t size = region.bufferRowLength * region.bufferImageHeight * region.imageExtent.depth; - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, tex_vk->upload_queue); if (!cmd) goto error; buf_barrier(ra, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, region.bufferOffset, size); - tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT, + tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, params->invalidate); @@ -817,6 +907,8 @@ static bool vk_tex_upload(struct ra *ra, vkCmdCopyBufferToImage(cmd->buf, buf_vk->slice.buf, tex_vk->img, tex_vk->current_layout, 1, ®ion); + tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT); + return true; error: @@ -831,6 +923,8 @@ struct ra_renderpass_vk { VkPipeline pipe; VkPipelineLayout pipeLayout; VkRenderPass renderPass; + VkImageLayout initialLayout; + VkImageLayout finalLayout; // Descriptor set (bindings) VkDescriptorSetLayout dsLayout; VkDescriptorPool dsPool; @@ -1158,8 +1252,27 @@ static struct ra_renderpass *vk_renderpass_create(struct ra *ra, goto error; } } - VK(vk_create_render_pass(vk->dev, params->target_format, - params->enable_blend, &pass_vk->renderPass)); + + // This is the most common case, so optimize towards it. In this case, + // the renderpass will take care of almost all layout transitions + pass_vk->initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + pass_vk->finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + VkAttachmentLoadOp loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + + // If we're blending, then we need to explicitly load the previous + // contents of the color attachment + if (pass->params.enable_blend) + loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; + + // If we're invalidating the target, we don't need to load or transition + if (pass->params.invalidate_target) { + pass_vk->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } + + VK(vk_create_render_pass(vk->dev, params->target_format, loadOp, + pass_vk->initialLayout, pass_vk->finalLayout, + &pass_vk->renderPass)); static const VkBlendFactor blendFactors[] = { [RA_BLEND_ZERO] = VK_BLEND_FACTOR_ZERO, @@ -1312,6 +1425,11 @@ error: return pass; } +static const VkPipelineStageFlags passStages[] = { + [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, +}; + static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd, struct ra_renderpass *pass, struct ra_renderpass_input_val val, @@ -1329,18 +1447,13 @@ static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd, .descriptorType = dsType[inp->type], }; - static const VkPipelineStageFlags passStages[] = { - [RA_RENDERPASS_TYPE_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - [RA_RENDERPASS_TYPE_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - }; - switch (inp->type) { case RA_VARTYPE_TEX: { struct ra_tex *tex = *(struct ra_tex **)val.data; struct ra_tex_vk *tex_vk = tex->priv; assert(tex->params.render_src); - tex_barrier(cmd, tex_vk, passStages[pass->params.type], + tex_barrier(ra, cmd, tex, passStages[pass->params.type], VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, false); @@ -1359,7 +1472,7 @@ static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd, struct ra_tex_vk *tex_vk = tex->priv; assert(tex->params.storage_dst); - tex_barrier(cmd, tex_vk, passStages[pass->params.type], + tex_barrier(ra, cmd, tex, passStages[pass->params.type], VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, false); @@ -1397,6 +1510,22 @@ static void vk_update_descriptor(struct ra *ra, struct vk_cmd *cmd, } } +static void vk_release_descriptor(struct ra *ra, struct vk_cmd *cmd, + struct ra_renderpass *pass, + struct ra_renderpass_input_val val) +{ + struct ra_renderpass_input *inp = &pass->params.inputs[val.index]; + + switch (inp->type) { + case RA_VARTYPE_IMG_W: + case RA_VARTYPE_TEX: { + struct ra_tex *tex = *(struct ra_tex **)val.data; + tex_signal(ra, cmd, tex, passStages[pass->params.type]); + break; + } + } +} + static void vk_renderpass_run(struct ra *ra, const struct ra_renderpass_run_params *params) { @@ -1404,7 +1533,12 @@ static void vk_renderpass_run(struct ra *ra, struct ra_renderpass *pass = params->pass; struct ra_renderpass_vk *pass_vk = pass->priv; - struct vk_cmd *cmd = vk_require_cmd(ra); + static const enum queue_type types[] = { + [RA_RENDERPASS_TYPE_RASTER] = GRAPHICS, + [RA_RENDERPASS_TYPE_COMPUTE] = COMPUTE, + }; + + struct vk_cmd *cmd = vk_require_cmd(ra, types[pass->params.type]); if (!cmd) goto error; @@ -1469,13 +1603,9 @@ static void vk_renderpass_run(struct ra *ra, vkCmdBindVertexBuffers(cmd->buf, 0, 1, &buf_vk->slice.buf, &buf_vk->slice.mem.offset); - if (pass->params.enable_blend) { - // Normally this transition is handled implicitly by the renderpass, - // but if we need to preserve the FBO we have to do it manually. - tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, false); - } + tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, pass_vk->initialLayout, + pass->params.invalidate_target); VkViewport viewport = { .x = params->viewport.x0, @@ -1504,14 +1634,21 @@ static void vk_renderpass_run(struct ra *ra, vkCmdEndRenderPass(cmd->buf); // The renderPass implicitly transitions the texture to this layout - tex_vk->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - tex_vk->current_access = VK_ACCESS_SHADER_READ_BIT; - tex_vk->current_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + tex_vk->current_layout = pass_vk->finalLayout; + tex_vk->current_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); break; } default: abort(); }; + for (int i = 0; i < params->num_values; i++) + vk_release_descriptor(ra, cmd, pass, params->values[i]); + + // flush the work so far into its own command buffer, for better cross-frame + // granularity + vk_submit(ra); + error: return; } @@ -1525,11 +1662,11 @@ static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, struct ra_tex_vk *src_vk = src->priv; struct ra_tex_vk *dst_vk = dst->priv; - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS); if (!cmd) return; - tex_barrier(cmd, src_vk, VK_PIPELINE_STAGE_TRANSFER_BIT, + tex_barrier(ra, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, false); @@ -1539,20 +1676,46 @@ static void vk_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, dst_rc->x1 == dst->params.w && dst_rc->y1 == dst->params.h; - tex_barrier(cmd, dst_vk, VK_PIPELINE_STAGE_TRANSFER_BIT, + tex_barrier(ra, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, discard); - VkImageBlit region = { - .srcSubresource = vk_layers, - .srcOffsets = {{src_rc->x0, src_rc->y0, 0}, {src_rc->x1, src_rc->y1, 1}}, - .dstSubresource = vk_layers, - .dstOffsets = {{dst_rc->x0, dst_rc->y0, 0}, {dst_rc->x1, dst_rc->y1, 1}}, - }; + // Under certain conditions we can use vkCmdCopyImage instead of + // vkCmdBlitImage, namely when the blit operation does not require + // scaling. and the formats are compatible. + if (src->params.format->pixel_size == dst->params.format->pixel_size && + mp_rect_w(*src_rc) == mp_rect_w(*dst_rc) && + mp_rect_h(*src_rc) == mp_rect_h(*dst_rc) && + mp_rect_w(*src_rc) >= 0 && mp_rect_h(*src_rc) >= 0) + { + VkImageCopy region = { + .srcSubresource = vk_layers, + .dstSubresource = vk_layers, + .srcOffset = {src_rc->x0, src_rc->y0, 0}, + .dstOffset = {dst_rc->x0, dst_rc->y0, 0}, + .extent = {mp_rect_w(*src_rc), mp_rect_h(*src_rc), 1}, + }; - vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout, dst_vk->img, - dst_vk->current_layout, 1, ®ion, VK_FILTER_NEAREST); + vkCmdCopyImage(cmd->buf, src_vk->img, src_vk->current_layout, + dst_vk->img, dst_vk->current_layout, 1, ®ion); + } else { + VkImageBlit region = { + .srcSubresource = vk_layers, + .dstSubresource = vk_layers, + .srcOffsets = {{src_rc->x0, src_rc->y0, 0}, + {src_rc->x1, src_rc->y1, 1}}, + .dstOffsets = {{dst_rc->x0, dst_rc->y0, 0}, + {dst_rc->x1, dst_rc->y1, 1}}, + }; + + vkCmdBlitImage(cmd->buf, src_vk->img, src_vk->current_layout, + dst_vk->img, dst_vk->current_layout, 1, ®ion, + VK_FILTER_NEAREST); + } + + tex_signal(ra, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT); + tex_signal(ra, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT); } static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4], @@ -1562,14 +1725,14 @@ static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4], struct ra_tex_vk *tex_vk = tex->priv; assert(tex->params.blit_dst); - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS); if (!cmd) return; struct mp_rect full = {0, 0, tex->params.w, tex->params.h}; if (!rc || mp_rect_equals(rc, &full)) { // To clear the entire image, we can use the efficient clear command - tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_TRANSFER_BIT, + tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, true); @@ -1579,6 +1742,8 @@ static void vk_clear(struct ra *ra, struct ra_tex *tex, float color[4], vkCmdClearColorImage(cmd->buf, tex_vk->img, tex_vk->current_layout, &clearColor, 1, &vk_range); + + tex_signal(ra, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT); } else { // To simulate per-region clearing, we blit from a 1x1 texture instead struct ra_tex_upload_params ul_params = { @@ -1600,6 +1765,7 @@ static int vk_desc_namespace(enum ra_vartype type) struct vk_timer { VkQueryPool pool; + int index_seen; // keeps track of which indices have been used at least once int index; uint64_t result; }; @@ -1624,6 +1790,7 @@ static ra_timer *vk_timer_create(struct ra *ra) struct mpvk_ctx *vk = ra_vk_get(ra); struct vk_timer *timer = talloc_zero(NULL, struct vk_timer); + timer->index_seen = -1; struct VkQueryPoolCreateInfo qinfo = { .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, @@ -1643,7 +1810,7 @@ error: static void vk_timer_record(struct ra *ra, VkQueryPool pool, int index, VkPipelineStageFlags stage) { - struct vk_cmd *cmd = vk_require_cmd(ra); + struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS); if (!cmd) return; @@ -1655,12 +1822,15 @@ static void vk_timer_start(struct ra *ra, ra_timer *ratimer) struct mpvk_ctx *vk = ra_vk_get(ra); struct vk_timer *timer = ratimer; - timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE; - + VkResult res = VK_NOT_READY; uint64_t out[2]; - VkResult res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2, - sizeof(out), &out[0], sizeof(uint64_t), - VK_QUERY_RESULT_64_BIT); + + if (timer->index <= timer->index_seen) { + res = vkGetQueryPoolResults(vk->dev, timer->pool, timer->index, 2, + sizeof(out), &out[0], sizeof(uint64_t), + VK_QUERY_RESULT_64_BIT); + } + switch (res) { case VK_SUCCESS: timer->result = (out[1] - out[0]) * vk->limits.timestampPeriod; @@ -1683,6 +1853,9 @@ static uint64_t vk_timer_stop(struct ra *ra, ra_timer *ratimer) vk_timer_record(ra, timer->pool, timer->index + 1, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT); + timer->index_seen = MPMAX(timer->index_seen, timer->index); + timer->index = (timer->index + 2) % VK_QUERY_POOL_SIZE; + return timer->result; } @@ -1709,39 +1882,20 @@ static struct ra_fns ra_fns_vk = { .timer_stop = vk_timer_stop, }; -static void present_cb(void *priv, int *inflight) -{ - *inflight -= 1; -} - -bool ra_vk_submit(struct ra *ra, struct ra_tex *tex, VkSemaphore acquired, - VkSemaphore *done, int *inflight) +struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex) { - struct vk_cmd *cmd = vk_require_cmd(ra); + struct ra_vk *p = ra->priv; + struct vk_cmd *cmd = vk_require_cmd(ra, GRAPHICS); if (!cmd) - goto error; - - if (inflight) { - *inflight += 1; - vk_cmd_callback(cmd, (vk_cb)present_cb, NULL, inflight); - } + return NULL; struct ra_tex_vk *tex_vk = tex->priv; assert(tex_vk->external_img); - tex_barrier(cmd, tex_vk, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, - VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, false); - - // These are the only two stages that we use/support for actually - // outputting to swapchain imagechain images, so just add a dependency - // on both of them. In theory, we could maybe come up with some more - // advanced mechanism of tracking dynamic dependencies, but that seems - // like overkill. - vk_cmd_dep(cmd, acquired, - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_TRANSFER_BIT); - - return vk_flush(ra, done); + tex_barrier(ra, cmd, tex, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + VK_ACCESS_MEMORY_READ_BIT, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + false); -error: - return false; + // Return this directly instead of going through vk_submit + p->cmd = NULL; + return cmd; } diff --git a/video/out/vulkan/ra_vk.h b/video/out/vulkan/ra_vk.h index 893421b..da613c7 100644 --- a/video/out/vulkan/ra_vk.h +++ b/video/out/vulkan/ra_vk.h @@ -16,15 +16,15 @@ VkDevice ra_vk_get_dev(struct ra *ra); struct ra_tex *ra_vk_wrap_swapchain_img(struct ra *ra, VkImage vkimg, VkSwapchainCreateInfoKHR info); -// This function flushes the command buffers, transitions `tex` (which must be -// a wrapped swapchain image) into a format suitable for presentation, and -// submits the current rendering commands. The indicated semaphore must fire -// before the submitted command can run. If `done` is non-NULL, it will be -// set to a semaphore that fires once the command completes. If `inflight` -// is non-NULL, it will be incremented when the command starts and decremented -// when it completes. -bool ra_vk_submit(struct ra *ra, struct ra_tex *tex, VkSemaphore acquired, - VkSemaphore *done, int *inflight); +// Associates an external semaphore (dependency) with a ra_tex, such that this +// ra_tex will not be used by the ra_vk until the external semaphore fires. +void ra_tex_vk_external_dep(struct ra *ra, struct ra_tex *tex, VkSemaphore dep); + +// This function finalizes rendering, transitions `tex` (which must be a +// wrapped swapchain image) into a format suitable for presentation, and returns +// the resulting command buffer (or NULL on error). The caller may add their +// own semaphores to this command buffer, and must submit it afterwards. +struct vk_cmd *ra_vk_submit(struct ra *ra, struct ra_tex *tex); // May be called on a struct ra of any type. Returns NULL if the ra is not // a vulkan ra. diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c index baf0ebc..cfe9737 100644 --- a/video/out/vulkan/utils.c +++ b/video/out/vulkan/utils.c @@ -128,20 +128,10 @@ static VkBool32 vk_dbg_callback(VkDebugReportFlagsEXT flags, return (flags & VK_DEBUG_REPORT_ERROR_BIT_EXT); } -static void vk_cmdpool_uninit(struct mpvk_ctx *vk, struct vk_cmdpool *pool) -{ - if (!pool) - return; - - // also frees associated command buffers - vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR); - for (int n = 0; n < MPVK_MAX_CMDS; n++) { - vkDestroyFence(vk->dev, pool->cmds[n].fence, MPVK_ALLOCATOR); - vkDestroySemaphore(vk->dev, pool->cmds[n].done, MPVK_ALLOCATOR); - talloc_free(pool->cmds[n].callbacks); - } - talloc_free(pool); -} +static void vk_cmdpool_destroy(struct mpvk_ctx *vk, struct vk_cmdpool *pool); +static struct vk_cmdpool *vk_cmdpool_create(struct mpvk_ctx *vk, + VkDeviceQueueCreateInfo qinfo, + VkQueueFamilyProperties props); void mpvk_uninit(struct mpvk_ctx *vk) { @@ -149,7 +139,18 @@ void mpvk_uninit(struct mpvk_ctx *vk) return; if (vk->dev) { - vk_cmdpool_uninit(vk, vk->pool); + mpvk_flush_commands(vk); + mpvk_poll_commands(vk, UINT64_MAX); + assert(vk->num_cmds_queued == 0); + assert(vk->num_cmds_pending == 0); + talloc_free(vk->cmds_queued); + talloc_free(vk->cmds_pending); + for (int i = 0; i < vk->num_pools; i++) + vk_cmdpool_destroy(vk, vk->pools[i]); + talloc_free(vk->pools); + for (int i = 0; i < vk->num_signals; i++) + vk_signal_destroy(vk, &vk->signals[i]); + talloc_free(vk->signals); vk_malloc_uninit(vk); vkDestroyDevice(vk->dev, MPVK_ALLOCATOR); } @@ -315,6 +316,7 @@ bool mpvk_find_phys_device(struct mpvk_ctx *vk, const char *name, bool sw) (int)VK_VERSION_PATCH(prop.apiVersion)); vk->physd = devices[i]; vk->limits = prop.limits; + vkGetPhysicalDeviceFeatures(vk->physd, &vk->features); talloc_free(devices); return true; } @@ -384,62 +386,56 @@ error: return false; } -static bool vk_cmdpool_init(struct mpvk_ctx *vk, VkDeviceQueueCreateInfo qinfo, - VkQueueFamilyProperties props, - struct vk_cmdpool **out) +// Find the most specialized queue supported a combination of flags. In cases +// where there are multiple queue families at the same specialization level, +// this finds the one with the most queues. Returns -1 if no queue was found. +static int find_qf(VkQueueFamilyProperties *qfs, int qfnum, VkQueueFlags flags) { - struct vk_cmdpool *pool = *out = talloc_ptrtype(NULL, pool); - *pool = (struct vk_cmdpool) { - .qf = qinfo.queueFamilyIndex, - .props = props, - .qcount = qinfo.queueCount, - }; - - for (int n = 0; n < pool->qcount; n++) - vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]); - - VkCommandPoolCreateInfo cinfo = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | - VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = pool->qf, - }; - - VK(vkCreateCommandPool(vk->dev, &cinfo, MPVK_ALLOCATOR, &pool->pool)); - - VkCommandBufferAllocateInfo ainfo = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .commandPool = pool->pool, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = MPVK_MAX_CMDS, - }; + int idx = -1; + for (int i = 0; i < qfnum; i++) { + if (!(qfs[i].queueFlags & flags)) + continue; - VkCommandBuffer cmdbufs[MPVK_MAX_CMDS]; - VK(vkAllocateCommandBuffers(vk->dev, &ainfo, cmdbufs)); + // QF is more specialized. Since we don't care about other bits like + // SPARSE_BIT, mask the ones we're interestew in + const VkQueueFlags mask = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_TRANSFER_BIT | + VK_QUEUE_COMPUTE_BIT; - for (int n = 0; n < MPVK_MAX_CMDS; n++) { - struct vk_cmd *cmd = &pool->cmds[n]; - cmd->pool = pool; - cmd->buf = cmdbufs[n]; + if (idx < 0 || (qfs[i].queueFlags & mask) < (qfs[idx].queueFlags & mask)) + idx = i; - VkFenceCreateInfo finfo = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .flags = VK_FENCE_CREATE_SIGNALED_BIT, - }; + // QF has more queues (at the same specialization level) + if (qfs[i].queueFlags == qfs[idx].queueFlags && + qfs[i].queueCount > qfs[idx].queueCount) + idx = i; + } - VK(vkCreateFence(vk->dev, &finfo, MPVK_ALLOCATOR, &cmd->fence)); + return idx; +} - VkSemaphoreCreateInfo sinfo = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - }; +static void add_qinfo(void *tactx, VkDeviceQueueCreateInfo **qinfos, + int *num_qinfos, VkQueueFamilyProperties *qfs, int idx, + int qcount) +{ + if (idx < 0) + return; - VK(vkCreateSemaphore(vk->dev, &sinfo, MPVK_ALLOCATOR, &cmd->done)); + // Check to see if we've already added this queue family + for (int i = 0; i < *num_qinfos; i++) { + if ((*qinfos)[i].queueFamilyIndex == idx) + return; } - return true; + float *priorities = talloc_zero_array(tactx, float, qcount); + VkDeviceQueueCreateInfo qinfo = { + .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, + .queueFamilyIndex = idx, + .queueCount = MPMIN(qcount, qfs[idx].queueCount), + .pQueuePriorities = priorities, + }; -error: - return false; + MP_TARRAY_APPEND(tactx, *qinfos, *num_qinfos, qinfo); } bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) @@ -460,47 +456,42 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) (unsigned)qfs[i].queueFlags, (int)qfs[i].queueCount); } - // For most of our rendering operations, we want to use one "primary" pool, - // so just pick the queue family with the most features. - int idx = -1; - for (int i = 0; i < qfnum; i++) { - if (!(qfs[i].queueFlags & VK_QUEUE_GRAPHICS_BIT)) - continue; - - // QF supports more features - if (idx < 0 || qfs[i].queueFlags > qfs[idx].queueFlags) - idx = i; - - // QF supports more queues (at the same specialization level) - if (qfs[i].queueFlags == qfs[idx].queueFlags && - qfs[i].queueCount > qfs[idx].queueCount) - { - idx = i; - } - } + int idx_gfx = -1, idx_comp = -1, idx_tf = -1; + idx_gfx = find_qf(qfs, qfnum, VK_QUEUE_GRAPHICS_BIT); + if (opts.async_compute) + idx_comp = find_qf(qfs, qfnum, VK_QUEUE_COMPUTE_BIT); + if (opts.async_transfer) + idx_tf = find_qf(qfs, qfnum, VK_QUEUE_TRANSFER_BIT); // Vulkan requires at least one GRAPHICS queue, so if this fails something // is horribly wrong. - assert(idx >= 0); + assert(idx_gfx >= 0); + MP_VERBOSE(vk, "Using graphics queue (QF %d)\n", idx_gfx); // Ensure we can actually present to the surface using this queue VkBool32 sup; - VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx, vk->surf, &sup)); + VK(vkGetPhysicalDeviceSurfaceSupportKHR(vk->physd, idx_gfx, vk->surf, &sup)); if (!sup) { MP_ERR(vk, "Queue family does not support surface presentation!\n"); goto error; } - // Now that we know which queue families we want, we can create the logical - // device - assert(opts.queue_count <= MPVK_MAX_QUEUES); - static const float priorities[MPVK_MAX_QUEUES] = {0}; - VkDeviceQueueCreateInfo qinfo = { - .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, - .queueFamilyIndex = idx, - .queueCount = MPMIN(qfs[idx].queueCount, opts.queue_count), - .pQueuePriorities = priorities, - }; + if (idx_tf >= 0 && idx_tf != idx_gfx) + MP_VERBOSE(vk, "Using async transfer (QF %d)\n", idx_tf); + if (idx_comp >= 0 && idx_comp != idx_gfx) + MP_VERBOSE(vk, "Using async compute (QF %d)\n", idx_comp); + + // Fall back to supporting compute shaders via the graphics pool for + // devices which support compute shaders but not async compute. + if (idx_comp < 0 && qfs[idx_gfx].queueFlags & VK_QUEUE_COMPUTE_BIT) + idx_comp = idx_gfx; + + // Now that we know which QFs we want, we can create the logical device + VkDeviceQueueCreateInfo *qinfos = NULL; + int num_qinfos = 0; + add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_gfx, opts.queue_count); + add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_comp, opts.queue_count); + add_qinfo(tmp, &qinfos, &num_qinfos, qfs, idx_tf, opts.queue_count); const char **exts = NULL; int num_exts = 0; @@ -508,12 +499,21 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) if (vk->spirv->required_ext) MP_TARRAY_APPEND(tmp, exts, num_exts, vk->spirv->required_ext); + // Enable all features we optionally use +#define FEATURE(name) .name = vk->features.name + VkPhysicalDeviceFeatures feats = { + FEATURE(shaderImageGatherExtended), + FEATURE(shaderStorageImageExtendedFormats), + }; +#undef FEATURE + VkDeviceCreateInfo dinfo = { .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, - .queueCreateInfoCount = 1, - .pQueueCreateInfos = &qinfo, + .pQueueCreateInfos = qinfos, + .queueCreateInfoCount = num_qinfos, .ppEnabledExtensionNames = exts, .enabledExtensionCount = num_exts, + .pEnabledFeatures = &feats, }; MP_VERBOSE(vk, "Creating vulkan device with extensions:\n"); @@ -522,12 +522,24 @@ bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts) VK(vkCreateDevice(vk->physd, &dinfo, MPVK_ALLOCATOR, &vk->dev)); - vk_malloc_init(vk); - - // Create the vk_cmdpools and all required queues / synchronization objects - if (!vk_cmdpool_init(vk, qinfo, qfs[idx], &vk->pool)) - goto error; + // Create the command pools and memory allocator + for (int i = 0; i < num_qinfos; i++) { + int qf = qinfos[i].queueFamilyIndex; + struct vk_cmdpool *pool = vk_cmdpool_create(vk, qinfos[i], qfs[qf]); + if (!pool) + goto error; + MP_TARRAY_APPEND(NULL, vk->pools, vk->num_pools, pool); + + // Update the pool_* pointers based on the corresponding QF index + if (qf == idx_gfx) + vk->pool_graphics = pool; + if (qf == idx_comp) + vk->pool_compute = pool; + if (qf == idx_tf) + vk->pool_transfer = pool; + } + vk_malloc_init(vk); talloc_free(tmp); return true; @@ -537,83 +549,197 @@ error: return false; } -static void run_callbacks(struct mpvk_ctx *vk, struct vk_cmd *cmd) +// returns VK_SUCCESS (completed), VK_TIMEOUT (not yet completed) or an error +static VkResult vk_cmd_poll(struct mpvk_ctx *vk, struct vk_cmd *cmd, + uint64_t timeout) +{ + return vkWaitForFences(vk->dev, 1, &cmd->fence, false, timeout); +} + +static void vk_cmd_reset(struct mpvk_ctx *vk, struct vk_cmd *cmd) { for (int i = 0; i < cmd->num_callbacks; i++) { struct vk_callback *cb = &cmd->callbacks[i]; cb->run(cb->priv, cb->arg); - *cb = (struct vk_callback){0}; } cmd->num_callbacks = 0; + cmd->num_deps = 0; + cmd->num_sigs = 0; - // Also reset vk->last_cmd in case this was the last command to run + // also make sure to reset vk->last_cmd in case this was the last command if (vk->last_cmd == cmd) vk->last_cmd = NULL; } -static void wait_for_cmds(struct mpvk_ctx *vk, struct vk_cmd cmds[], int num) +static void vk_cmd_destroy(struct mpvk_ctx *vk, struct vk_cmd *cmd) { - if (!num) + if (!cmd) return; - VkFence fences[MPVK_MAX_CMDS]; - for (int i = 0; i < num; i++) - fences[i] = cmds[i].fence; + vk_cmd_poll(vk, cmd, UINT64_MAX); + vk_cmd_reset(vk, cmd); + vkDestroyFence(vk->dev, cmd->fence, MPVK_ALLOCATOR); + vkFreeCommandBuffers(vk->dev, cmd->pool->pool, 1, &cmd->buf); + + talloc_free(cmd); +} - vkWaitForFences(vk->dev, num, fences, true, UINT64_MAX); +static struct vk_cmd *vk_cmd_create(struct mpvk_ctx *vk, struct vk_cmdpool *pool) +{ + struct vk_cmd *cmd = talloc_zero(NULL, struct vk_cmd); + cmd->pool = pool; - for (int i = 0; i < num; i++) - run_callbacks(vk, &cmds[i]); + VkCommandBufferAllocateInfo ainfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .commandPool = pool->pool, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandBufferCount = 1, + }; + + VK(vkAllocateCommandBuffers(vk->dev, &ainfo, &cmd->buf)); + + VkFenceCreateInfo finfo = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + + VK(vkCreateFence(vk->dev, &finfo, MPVK_ALLOCATOR, &cmd->fence)); + + return cmd; + +error: + vk_cmd_destroy(vk, cmd); + return NULL; } -void mpvk_pool_wait_idle(struct mpvk_ctx *vk, struct vk_cmdpool *pool) +void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg) { - if (!pool) - return; + MP_TARRAY_APPEND(cmd, cmd->callbacks, cmd->num_callbacks, (struct vk_callback) { + .run = callback, + .priv = p, + .arg = arg, + }); +} - int idx = pool->cindex, pidx = pool->cindex_pending; - if (pidx < idx) { // range doesn't wrap - wait_for_cmds(vk, &pool->cmds[pidx], idx - pidx); - } else if (pidx > idx) { // range wraps - wait_for_cmds(vk, &pool->cmds[pidx], MPVK_MAX_CMDS - pidx); - wait_for_cmds(vk, &pool->cmds[0], idx); - } - pool->cindex_pending = pool->cindex; +void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage) +{ + int idx = cmd->num_deps++; + MP_TARRAY_GROW(cmd, cmd->deps, idx); + MP_TARRAY_GROW(cmd, cmd->depstages, idx); + cmd->deps[idx] = dep; + cmd->depstages[idx] = stage; } -void mpvk_dev_wait_idle(struct mpvk_ctx *vk) +void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig) { - mpvk_pool_wait_idle(vk, vk->pool); + MP_TARRAY_APPEND(cmd, cmd->sigs, cmd->num_sigs, sig); } -void mpvk_pool_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool, - uint64_t timeout) +static void vk_cmdpool_destroy(struct mpvk_ctx *vk, struct vk_cmdpool *pool) { if (!pool) return; - // If requested, hard block until at least one command completes - if (timeout > 0 && pool->cindex_pending != pool->cindex) { - vkWaitForFences(vk->dev, 1, &pool->cmds[pool->cindex_pending].fence, - true, timeout); - } + for (int i = 0; i < pool->num_cmds; i++) + vk_cmd_destroy(vk, pool->cmds[i]); + + vkDestroyCommandPool(vk->dev, pool->pool, MPVK_ALLOCATOR); + talloc_free(pool); +} + +static struct vk_cmdpool *vk_cmdpool_create(struct mpvk_ctx *vk, + VkDeviceQueueCreateInfo qinfo, + VkQueueFamilyProperties props) +{ + struct vk_cmdpool *pool = talloc_ptrtype(NULL, pool); + *pool = (struct vk_cmdpool) { + .props = props, + .qf = qinfo.queueFamilyIndex, + .queues = talloc_array(pool, VkQueue, qinfo.queueCount), + .num_queues = qinfo.queueCount, + }; + + for (int n = 0; n < pool->num_queues; n++) + vkGetDeviceQueue(vk->dev, pool->qf, n, &pool->queues[n]); + + VkCommandPoolCreateInfo cinfo = { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = pool->qf, + }; + + VK(vkCreateCommandPool(vk->dev, &cinfo, MPVK_ALLOCATOR, &pool->pool)); + + return pool; - // Lazily garbage collect the commands based on their status - while (pool->cindex_pending != pool->cindex) { - struct vk_cmd *cmd = &pool->cmds[pool->cindex_pending]; - VkResult res = vkGetFenceStatus(vk->dev, cmd->fence); - if (res != VK_SUCCESS) +error: + vk_cmdpool_destroy(vk, pool); + return NULL; +} + +void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout) +{ + while (vk->num_cmds_pending > 0) { + struct vk_cmd *cmd = vk->cmds_pending[0]; + struct vk_cmdpool *pool = cmd->pool; + VkResult res = vk_cmd_poll(vk, cmd, timeout); + if (res == VK_TIMEOUT) break; - run_callbacks(vk, cmd); - pool->cindex_pending++; - pool->cindex_pending %= MPVK_MAX_CMDS; + vk_cmd_reset(vk, cmd); + MP_TARRAY_REMOVE_AT(vk->cmds_pending, vk->num_cmds_pending, 0); + MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); } } -void mpvk_dev_poll_cmds(struct mpvk_ctx *vk, uint32_t timeout) +bool mpvk_flush_commands(struct mpvk_ctx *vk) { - mpvk_pool_poll_cmds(vk, vk->pool, timeout); + bool ret = true; + + for (int i = 0; i < vk->num_cmds_queued; i++) { + struct vk_cmd *cmd = vk->cmds_queued[i]; + struct vk_cmdpool *pool = cmd->pool; + + VkSubmitInfo sinfo = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, + .commandBufferCount = 1, + .pCommandBuffers = &cmd->buf, + .waitSemaphoreCount = cmd->num_deps, + .pWaitSemaphores = cmd->deps, + .pWaitDstStageMask = cmd->depstages, + .signalSemaphoreCount = cmd->num_sigs, + .pSignalSemaphores = cmd->sigs, + }; + + VK(vkQueueSubmit(cmd->queue, 1, &sinfo, cmd->fence)); + MP_TARRAY_APPEND(NULL, vk->cmds_pending, vk->num_cmds_pending, cmd); + + if (mp_msg_test(vk->log, MSGL_TRACE)) { + MP_TRACE(vk, "Submitted command on queue %p (QF %d):\n", + (void *)cmd->queue, pool->qf); + for (int n = 0; n < cmd->num_deps; n++) + MP_TRACE(vk, " waits on semaphore %p\n", (void *)cmd->deps[n]); + for (int n = 0; n < cmd->num_sigs; n++) + MP_TRACE(vk, " signals semaphore %p\n", (void *)cmd->sigs[n]); + } + continue; + +error: + vk_cmd_reset(vk, cmd); + MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); + ret = false; + } + + vk->num_cmds_queued = 0; + + // Rotate the queues to ensure good parallelism across frames + for (int i = 0; i < vk->num_pools; i++) { + struct vk_cmdpool *pool = vk->pools[i]; + pool->idx_queues = (pool->idx_queues + 1) % pool->num_queues; + } + + return ret; } void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg) @@ -626,39 +752,22 @@ void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg) } } -void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg) -{ - MP_TARRAY_GROW(NULL, cmd->callbacks, cmd->num_callbacks); - cmd->callbacks[cmd->num_callbacks++] = (struct vk_callback) { - .run = callback, - .priv = p, - .arg = arg, - }; -} - -void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, - VkPipelineStageFlags depstage) -{ - assert(cmd->num_deps < MPVK_MAX_CMD_DEPS); - cmd->deps[cmd->num_deps] = dep; - cmd->depstages[cmd->num_deps++] = depstage; -} - struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool) { - // Garbage collect the cmdpool first - mpvk_pool_poll_cmds(vk, pool, 0); + // garbage collect the cmdpool first, to increase the chances of getting + // an already-available command buffer + mpvk_poll_commands(vk, 0); - int next = (pool->cindex + 1) % MPVK_MAX_CMDS; - if (next == pool->cindex_pending) { - MP_ERR(vk, "No free command buffers!\n"); - goto error; - } + struct vk_cmd *cmd = NULL; + if (MP_TARRAY_POP(pool->cmds, pool->num_cmds, &cmd)) + goto done; - struct vk_cmd *cmd = &pool->cmds[pool->cindex]; - pool->cindex = next; + // No free command buffers => allocate another one + cmd = vk_cmd_create(vk, pool); + if (!cmd) + goto error; - VK(vkResetCommandBuffer(cmd->buf, 0)); +done: ; VkCommandBufferBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, @@ -667,54 +776,145 @@ struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool) VK(vkBeginCommandBuffer(cmd->buf, &binfo)); + cmd->queue = pool->queues[pool->idx_queues]; return cmd; error: + // Something has to be seriously messed up if we get to this point + vk_cmd_destroy(vk, cmd); return NULL; } -bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done) +void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd) { + struct vk_cmdpool *pool = cmd->pool; + VK(vkEndCommandBuffer(cmd->buf)); - struct vk_cmdpool *pool = cmd->pool; - VkQueue queue = pool->queues[pool->qindex]; + VK(vkResetFences(vk->dev, 1, &cmd->fence)); + MP_TARRAY_APPEND(NULL, vk->cmds_queued, vk->num_cmds_queued, cmd); + vk->last_cmd = cmd; + return; - VkSubmitInfo sinfo = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .commandBufferCount = 1, - .pCommandBuffers = &cmd->buf, - .waitSemaphoreCount = cmd->num_deps, - .pWaitSemaphores = cmd->deps, - .pWaitDstStageMask = cmd->depstages, +error: + vk_cmd_reset(vk, cmd); + MP_TARRAY_APPEND(pool, pool->cmds, pool->num_cmds, cmd); +} + +void vk_signal_destroy(struct mpvk_ctx *vk, struct vk_signal **sig) +{ + if (!*sig) + return; + + vkDestroySemaphore(vk->dev, (*sig)->semaphore, MPVK_ALLOCATOR); + vkDestroyEvent(vk->dev, (*sig)->event, MPVK_ALLOCATOR); + talloc_free(*sig); + *sig = NULL; +} + +struct vk_signal *vk_cmd_signal(struct mpvk_ctx *vk, struct vk_cmd *cmd, + VkPipelineStageFlags stage) +{ + struct vk_signal *sig = NULL; + if (MP_TARRAY_POP(vk->signals, vk->num_signals, &sig)) + goto done; + + // no available signal => initialize a new one + sig = talloc_zero(NULL, struct vk_signal); + static const VkSemaphoreCreateInfo sinfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, }; - if (done) { - sinfo.signalSemaphoreCount = 1; - sinfo.pSignalSemaphores = &cmd->done; - *done = cmd->done; - } + VK(vkCreateSemaphore(vk->dev, &sinfo, MPVK_ALLOCATOR, &sig->semaphore)); - VK(vkResetFences(vk->dev, 1, &cmd->fence)); - VK(vkQueueSubmit(queue, 1, &sinfo, cmd->fence)); - MP_TRACE(vk, "Submitted command on queue %p (QF %d)\n", (void *)queue, - pool->qf); + static const VkEventCreateInfo einfo = { + .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, + }; - for (int i = 0; i < cmd->num_deps; i++) - cmd->deps[i] = NULL; - cmd->num_deps = 0; + VK(vkCreateEvent(vk->dev, &einfo, MPVK_ALLOCATOR, &sig->event)); - vk->last_cmd = cmd; - return true; +done: + // Signal both the semaphore and the event if possible. (We will only + // end up using one or the other) + vk_cmd_sig(cmd, sig->semaphore); + + VkQueueFlags req = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + if (cmd->pool->props.queueFlags & req) { + vkCmdSetEvent(cmd->buf, sig->event, stage); + sig->event_source = cmd->queue; + } + + return sig; error: + vk_signal_destroy(vk, &sig); + return NULL; +} + +static bool unsignal_cmd(struct vk_cmd *cmd, VkSemaphore sem) +{ + for (int n = 0; n < cmd->num_sigs; n++) { + if (cmd->sigs[n] == sem) { + MP_TARRAY_REMOVE_AT(cmd->sigs, cmd->num_sigs, n); + return true; + } + } + return false; } -void vk_cmd_cycle_queues(struct mpvk_ctx *vk) +// Attempts to remove a queued signal operation. Returns true if sucessful, +// i.e. the signal could be removed before it ever got fired. +static bool unsignal(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore sem) { - struct vk_cmdpool *pool = vk->pool; - pool->qindex = (pool->qindex + 1) % pool->qcount; + if (unsignal_cmd(cmd, sem)) + return true; + + // Attempt to remove it from any queued commands + for (int i = 0; i < vk->num_cmds_queued; i++) { + if (unsignal_cmd(vk->cmds_queued[i], sem)) + return true; + } + + return false; +} + +static void release_signal(struct mpvk_ctx *vk, struct vk_signal *sig) +{ + // The semaphore never needs to be recreated, because it's either + // unsignaled while still queued, or unsignaled as a result of a device + // wait. But the event *may* need to be reset, so just always reset it. + if (sig->event_source) + vkResetEvent(vk->dev, sig->event); + sig->event_source = NULL; + MP_TARRAY_APPEND(NULL, vk->signals, vk->num_signals, sig); +} + +void vk_cmd_wait(struct mpvk_ctx *vk, struct vk_cmd *cmd, + struct vk_signal **sigptr, VkPipelineStageFlags stage, + VkEvent *out_event) +{ + struct vk_signal *sig = *sigptr; + if (!sig) + return; + + if (out_event && sig->event && sig->event_source == cmd->queue && + unsignal(vk, cmd, sig->semaphore)) + { + // If we can remove the semaphore signal operation from the history and + // pretend it never happened, then we get to use the VkEvent. This also + // requires that the VkEvent was signalled from the same VkQueue. + *out_event = sig->event; + } else if (sig->semaphore) { + // Otherwise, we use the semaphore. (This also unsignals it as a result + // of the command execution) + vk_cmd_dep(cmd, sig->semaphore, stage); + } + + // In either case, once the command completes, we can release the signal + // resource back to the pool. + vk_cmd_callback(cmd, (vk_cb) release_signal, vk, sig); + *sigptr = NULL; } const VkImageSubresourceRange vk_range = { diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h index 0cc8a29..2962313 100644 --- a/video/out/vulkan/utils.h +++ b/video/out/vulkan/utils.h @@ -55,22 +55,26 @@ bool mpvk_pick_surface_format(struct mpvk_ctx *vk); struct mpvk_device_opts { int queue_count; // number of queues to use + int async_transfer; // enable async transfer + int async_compute; // enable async compute }; // Create a logical device and initialize the vk_cmdpools bool mpvk_device_init(struct mpvk_ctx *vk, struct mpvk_device_opts opts); -// Wait until all commands submitted to all queues have completed -void mpvk_pool_wait_idle(struct mpvk_ctx *vk, struct vk_cmdpool *pool); -void mpvk_dev_wait_idle(struct mpvk_ctx *vk); +// Wait for all currently pending commands to have completed. This is the only +// function that actually processes the callbacks. Will wait at most `timeout` +// nanoseconds for the completion of each command. Using it with a value of +// UINT64_MAX effectively means waiting until the pool/device is idle. The +// timeout may also be passed as 0, in which case this function will not block, +// but only poll for completed commands. +void mpvk_poll_commands(struct mpvk_ctx *vk, uint64_t timeout); -// Wait until at least one command submitted to any queue has completed, and -// process the callbacks. Good for event loops that need to delay until a -// command completes. Will block at most `timeout` nanoseconds. If used with -// 0, it only garbage collects completed commands without blocking. -void mpvk_pool_poll_cmds(struct mpvk_ctx *vk, struct vk_cmdpool *pool, - uint64_t timeout); -void mpvk_dev_poll_cmds(struct mpvk_ctx *vk, uint32_t timeout); +// Flush all currently queued commands. Call this once per frame, after +// submitting all of the command buffers for that frame. Calling this more +// often than that is possible but bad for performance. +// Returns whether successful. Failed commands will be implicitly dropped. +bool mpvk_flush_commands(struct mpvk_ctx *vk); // Since lots of vulkan operations need to be done lazily once the affected // resources are no longer in use, provide an abstraction for tracking these. @@ -88,20 +92,22 @@ struct vk_callback { // This will essentially run once the device is completely idle. void vk_dev_callback(struct mpvk_ctx *vk, vk_cb callback, void *p, void *arg); -#define MPVK_MAX_CMD_DEPS 8 - // Helper wrapper around command buffers that also track dependencies, // callbacks and synchronization primitives struct vk_cmd { struct vk_cmdpool *pool; // pool it was allocated from - VkCommandBuffer buf; - VkFence fence; // the fence guards cmd buffer reuse - VkSemaphore done; // the semaphore signals when execution is done + VkQueue queue; // the submission queue (for recording/pending) + VkCommandBuffer buf; // the command buffer itself + VkFence fence; // the fence guards cmd buffer reuse // The semaphores represent dependencies that need to complete before // this command can be executed. These are *not* owned by the vk_cmd - VkSemaphore deps[MPVK_MAX_CMD_DEPS]; - VkPipelineStageFlags depstages[MPVK_MAX_CMD_DEPS]; + VkSemaphore *deps; + VkPipelineStageFlags *depstages; int num_deps; + // The signals represent semaphores that fire once the command finishes + // executing. These are also not owned by the vk_cmd + VkSemaphore *sigs; + int num_sigs; // Since VkFences are useless, we have to manually track "callbacks" // to fire once the VkFence completes. These are used for multiple purposes, // ranging from garbage collection (resource deallocation) to fencing. @@ -113,41 +119,64 @@ struct vk_cmd { // bool will be set to `true` once the command completes, or shortly thereafter. void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, void *p, void *arg); -// Associate a dependency for the current command. This semaphore must signal -// by the corresponding stage before the command may execute. -void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, - VkPipelineStageFlags depstage); +// Associate a raw dependency for the current command. This semaphore must +// signal by the corresponding stage before the command may execute. +void vk_cmd_dep(struct vk_cmd *cmd, VkSemaphore dep, VkPipelineStageFlags stage); + +// Associate a raw signal with the current command. This semaphore will signal +// after the command completes. +void vk_cmd_sig(struct vk_cmd *cmd, VkSemaphore sig); + +// Signal abstraction: represents an abstract synchronization mechanism. +// Internally, this may either resolve as a semaphore or an event depending +// on whether the appropriate conditions are met. +struct vk_signal { + VkSemaphore semaphore; + VkEvent event; + VkQueue event_source; +}; -#define MPVK_MAX_QUEUES 8 -#define MPVK_MAX_CMDS 64 +// Generates a signal after the execution of all previous commands matching the +// given the pipeline stage. The signal is owned by the caller, and must be +// consumed eith vk_cmd_wait or released with vk_signal_cancel in order to +// free the resources. +struct vk_signal *vk_cmd_signal(struct mpvk_ctx *vk, struct vk_cmd *cmd, + VkPipelineStageFlags stage); + +// Consumes a previously generated signal. This signal must fire by the +// indicated stage before the command can run. If *event is not NULL, then it +// MAY be set to a VkEvent which the caller MUST manually wait on in the most +// appropriate way. This function takes over ownership of the signal (and the +// signal will be released/reused automatically) +void vk_cmd_wait(struct mpvk_ctx *vk, struct vk_cmd *cmd, + struct vk_signal **sigptr, VkPipelineStageFlags stage, + VkEvent *out_event); + +// Destroys a currently pending signal, for example if the resource is no +// longer relevant. +void vk_signal_destroy(struct mpvk_ctx *vk, struct vk_signal **sig); // Command pool / queue family hybrid abstraction struct vk_cmdpool { VkQueueFamilyProperties props; - uint32_t qf; // queue family index + int qf; // queue family index VkCommandPool pool; - VkQueue queues[MPVK_MAX_QUEUES]; - int qcount; - int qindex; - // Command buffers associated with this queue - struct vk_cmd cmds[MPVK_MAX_CMDS]; - int cindex; - int cindex_pending; + VkQueue *queues; + int num_queues; + int idx_queues; + // Command buffers associated with this queue. These are available for + // re-recording + struct vk_cmd **cmds; + int num_cmds; }; -// Fetch the next command buffer from a command pool and begin recording to it. +// Fetch a command buffer from a command pool and begin recording to it. // Returns NULL on failure. struct vk_cmd *vk_cmd_begin(struct mpvk_ctx *vk, struct vk_cmdpool *pool); -// Finish the currently recording command buffer and submit it for execution. -// If `done` is not NULL, it will be set to a semaphore that will signal once -// the command completes. (And MUST have a corresponding semaphore wait) -// Returns whether successful. -bool vk_cmd_submit(struct mpvk_ctx *vk, struct vk_cmd *cmd, VkSemaphore *done); - -// Rotate the queues for each vk_cmdpool. Call this once per frame to ensure -// good parallelism between frames when using multiple queues -void vk_cmd_cycle_queues(struct mpvk_ctx *vk); +// Finish recording a command buffer and queue it for execution. This function +// takes over ownership of *cmd, i.e. the caller should not touch it again. +void vk_cmd_queue(struct mpvk_ctx *vk, struct vk_cmd *cmd); // Predefined structs for a simple non-layered, non-mipped image extern const VkImageSubresourceRange vk_range; diff --git a/video/out/w32_common.c b/video/out/w32_common.c index feeae81..de81b35 100644 --- a/video/out/w32_common.c +++ b/video/out/w32_common.c @@ -79,6 +79,7 @@ struct vo_w32_state { pthread_t thread; bool terminate; struct mp_dispatch_queue *dispatch; // used to run stuff on the GUI thread + bool in_dispatch; struct w32_api api; // stores functions from dynamically loaded DLLs @@ -920,7 +921,11 @@ static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, // The dispatch queue should be processed as soon as possible to prevent // playback glitches, since it is likely blocking the VO thread - mp_dispatch_queue_process(w32->dispatch, 0); + if (!w32->in_dispatch) { + w32->in_dispatch = true; + mp_dispatch_queue_process(w32->dispatch, 0); + w32->in_dispatch = false; + } switch (message) { case WM_ERASEBKGND: // no need to erase background separately diff --git a/video/out/wayland_common.c b/video/out/wayland_common.c index 19adf01..0ed1468 100644 --- a/video/out/wayland_common.c +++ b/video/out/wayland_common.c @@ -26,8 +26,8 @@ #include "win_state.h" #include "wayland_common.h" -// Generated from xdg-shell-unstable-v6.xml -#include "video/out/wayland/xdg-shell-v6.h" +// Generated from xdg-shell.xml +#include "video/out/wayland/xdg-shell.h" // Generated from idle-inhibit-unstable-v1.xml #include "video/out/wayland/idle-inhibit-v1.h" @@ -35,12 +35,12 @@ // Generated from server-decoration.xml #include "video/out/wayland/srv-decor.h" -static void xdg_shell_ping(void *data, struct zxdg_shell_v6 *shell, uint32_t serial) +static void xdg_shell_ping(void *data, struct xdg_wm_base *shell, uint32_t serial) { - zxdg_shell_v6_pong(shell, serial); + xdg_wm_base_pong(shell, serial); } -static const struct zxdg_shell_v6_listener xdg_shell_listener = { +static const struct xdg_wm_base_listener xdg_shell_listener = { xdg_shell_ping, }; @@ -125,7 +125,7 @@ static void pointer_handle_motion(void *data, struct wl_pointer *pointer, static void window_move(struct vo_wayland_state *wl, uint32_t serial) { if (wl->xdg_toplevel) - zxdg_toplevel_v6_move(wl->xdg_toplevel, wl->seat, serial); + xdg_toplevel_move(wl->xdg_toplevel, wl->seat, serial); } static void pointer_handle_button(void *data, struct wl_pointer *wl_pointer, @@ -177,9 +177,9 @@ static const struct wl_pointer_listener pointer_listener = { }; static int check_for_resize(struct vo_wayland_state *wl, wl_fixed_t x_w, wl_fixed_t y_w, - enum zxdg_toplevel_v6_resize_edge *edge) + enum xdg_toplevel_resize_edge *edge) { - if (wl->touch_entries || wl->fullscreen) + if (wl->touch_entries || wl->fullscreen || wl->maximized) return 0; const int edge_pixels = 64; @@ -190,21 +190,21 @@ static int check_for_resize(struct vo_wayland_state *wl, wl_fixed_t x_w, wl_fixe int bottom_edge = pos[1] > (mp_rect_h(wl->geometry) - edge_pixels); if (left_edge) { - *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_LEFT; + *edge = XDG_TOPLEVEL_RESIZE_EDGE_LEFT; if (top_edge) - *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_TOP_LEFT; + *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP_LEFT; else if (bottom_edge) - *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_BOTTOM_LEFT; + *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM_LEFT; } else if (right_edge) { - *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_RIGHT; + *edge = XDG_TOPLEVEL_RESIZE_EDGE_RIGHT; if (top_edge) - *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_TOP_RIGHT; + *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP_RIGHT; else if (bottom_edge) - *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_BOTTOM_RIGHT; + *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM_RIGHT; } else if (top_edge) { - *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_TOP; + *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP; } else if (bottom_edge) { - *edge = ZXDG_TOPLEVEL_V6_RESIZE_EDGE_BOTTOM; + *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM; } else { *edge = 0; return 0; @@ -219,14 +219,14 @@ static void touch_handle_down(void *data, struct wl_touch *wl_touch, { struct vo_wayland_state *wl = data; - enum zxdg_toplevel_v6_resize_edge edge; + enum xdg_toplevel_resize_edge edge; if (check_for_resize(wl, x_w, y_w, &edge)) { wl->touch_entries = 0; - zxdg_toplevel_v6_resize(wl->xdg_toplevel, wl->seat, serial, edge); + xdg_toplevel_resize(wl->xdg_toplevel, wl->seat, serial, edge); return; } else if (wl->touch_entries) { wl->touch_entries = 0; - zxdg_toplevel_v6_move(wl->xdg_toplevel, wl->seat, serial); + xdg_toplevel_move(wl->xdg_toplevel, wl->seat, serial); return; } @@ -782,9 +782,8 @@ static void registry_handle_add(void *data, struct wl_registry *reg, uint32_t id int found = 1; struct vo_wayland_state *wl = data; - if (!strcmp(interface, wl_compositor_interface.name) && found++) { - ver = MPMIN(ver, 4); /* Cap the version */ - wl->compositor = wl_registry_bind(reg, id, &wl_compositor_interface, ver); + if (!strcmp(interface, wl_compositor_interface.name) && (ver >= 3) && found++) { + wl->compositor = wl_registry_bind(reg, id, &wl_compositor_interface, 3); wl->surface = wl_compositor_create_surface(wl->compositor); wl->cursor_surface = wl_compositor_create_surface(wl->compositor); wl_surface_add_listener(wl->surface, &surface_listener, wl); @@ -805,9 +804,10 @@ static void registry_handle_add(void *data, struct wl_registry *reg, uint32_t id wl_list_insert(&wl->output_list, &output->link); } - if (!strcmp(interface, zxdg_shell_v6_interface.name) && found++) { - wl->shell = wl_registry_bind(reg, id, &zxdg_shell_v6_interface, 1); - zxdg_shell_v6_add_listener(wl->shell, &xdg_shell_listener, wl); + if (!strcmp(interface, xdg_wm_base_interface.name) && found++) { + ver = MPMIN(ver, 2); /* We can use either 1 or 2 */ + wl->shell = wl_registry_bind(reg, id, &xdg_wm_base_interface, ver); + xdg_wm_base_add_listener(wl->shell, &xdg_shell_listener, wl); } if (!strcmp(interface, wl_seat_interface.name) && found++) { @@ -866,38 +866,42 @@ static const struct wl_registry_listener registry_listener = { registry_handle_remove, }; -static void handle_surface_config(void *data, struct zxdg_surface_v6 *surface, +static void handle_surface_config(void *data, struct xdg_surface *surface, uint32_t serial) { - zxdg_surface_v6_ack_configure(surface, serial); + xdg_surface_ack_configure(surface, serial); } -static const struct zxdg_surface_v6_listener xdg_surface_listener = { +static const struct xdg_surface_listener xdg_surface_listener = { handle_surface_config, }; -static void handle_toplevel_config(void *data, struct zxdg_toplevel_v6 *toplevel, +static void handle_toplevel_config(void *data, struct xdg_toplevel *toplevel, int32_t width, int32_t height, struct wl_array *states) { struct vo_wayland_state *wl = data; struct mp_rect old_geometry = wl->geometry; int prev_fs_state = wl->fullscreen; - bool maximized = false; + wl->maximized = false; wl->fullscreen = false; - enum zxdg_toplevel_v6_state *state; + enum xdg_toplevel_state *state; wl_array_for_each(state, states) { switch (*state) { - case ZXDG_TOPLEVEL_V6_STATE_FULLSCREEN: + case XDG_TOPLEVEL_STATE_FULLSCREEN: wl->fullscreen = true; break; - case ZXDG_TOPLEVEL_V6_STATE_RESIZING: + case XDG_TOPLEVEL_STATE_RESIZING: wl->pending_vo_events |= VO_EVENT_LIVE_RESIZING; break; - case ZXDG_TOPLEVEL_V6_STATE_MAXIMIZED: - maximized = true; + case XDG_TOPLEVEL_STATE_ACTIVATED: break; - case ZXDG_TOPLEVEL_V6_STATE_ACTIVATED: + case XDG_TOPLEVEL_STATE_TILED_TOP: + case XDG_TOPLEVEL_STATE_TILED_LEFT: + case XDG_TOPLEVEL_STATE_TILED_RIGHT: + case XDG_TOPLEVEL_STATE_TILED_BOTTOM: + case XDG_TOPLEVEL_STATE_MAXIMIZED: + wl->maximized = true; break; } } @@ -910,7 +914,7 @@ static void handle_toplevel_config(void *data, struct zxdg_toplevel_v6 *toplevel if (width > 0 && height > 0) { if (!wl->fullscreen) { if (wl->vo->opts->keepaspect && wl->vo->opts->keepaspect_window && - !maximized) { + !wl->maximized) { if (width > height) width = height * wl->aspect_ratio; else @@ -939,27 +943,27 @@ static void handle_toplevel_config(void *data, struct zxdg_toplevel_v6 *toplevel wl->pending_vo_events |= VO_EVENT_RESIZE; } -static void handle_toplevel_close(void *data, struct zxdg_toplevel_v6 *xdg_toplevel) +static void handle_toplevel_close(void *data, struct xdg_toplevel *xdg_toplevel) { struct vo_wayland_state *wl = data; mp_input_put_key(wl->vo->input_ctx, MP_KEY_CLOSE_WIN); } -static const struct zxdg_toplevel_v6_listener xdg_toplevel_listener = { +static const struct xdg_toplevel_listener xdg_toplevel_listener = { handle_toplevel_config, handle_toplevel_close, }; static int create_xdg_surface(struct vo_wayland_state *wl) { - wl->xdg_surface = zxdg_shell_v6_get_xdg_surface(wl->shell, wl->surface); - zxdg_surface_v6_add_listener(wl->xdg_surface, &xdg_surface_listener, wl); + wl->xdg_surface = xdg_wm_base_get_xdg_surface(wl->shell, wl->surface); + xdg_surface_add_listener(wl->xdg_surface, &xdg_surface_listener, wl); - wl->xdg_toplevel = zxdg_surface_v6_get_toplevel(wl->xdg_surface); - zxdg_toplevel_v6_add_listener(wl->xdg_toplevel, &xdg_toplevel_listener, wl); + wl->xdg_toplevel = xdg_surface_get_toplevel(wl->xdg_surface); + xdg_toplevel_add_listener(wl->xdg_toplevel, &xdg_toplevel_listener, wl); - zxdg_toplevel_v6_set_title (wl->xdg_toplevel, "mpv"); - zxdg_toplevel_v6_set_app_id(wl->xdg_toplevel, "mpv"); + xdg_toplevel_set_title (wl->xdg_toplevel, "mpv"); + xdg_toplevel_set_app_id(wl->xdg_toplevel, "mpv"); return 0; } @@ -1010,7 +1014,7 @@ int vo_wayland_init(struct vo *vo) if (!wl->shell) { MP_FATAL(wl, "Compositor doesn't support the required %s protocol!\n", - zxdg_shell_v6_interface.name); + xdg_wm_base_interface.name); return false; } @@ -1074,7 +1078,7 @@ void vo_wayland_uninit(struct vo *vo) zwp_idle_inhibit_manager_v1_destroy(wl->idle_inhibit_manager); if (wl->shell) - zxdg_shell_v6_destroy(wl->shell); + xdg_wm_base_destroy(wl->shell); if (wl->shm) wl_shm_destroy(wl->shm); @@ -1153,11 +1157,14 @@ int vo_wayland_reconfig(struct vo *vo) vo_calc_window_geometry(vo, &screenrc, &geo); vo_apply_window_geometry(vo, &geo); - wl->geometry.x0 = 0; - wl->geometry.y0 = 0; - wl->geometry.x1 = vo->dwidth / wl->scaling; - wl->geometry.y1 = vo->dheight / wl->scaling; - wl->window_size = wl->geometry; + if (!wl->configured || !wl->maximized) { + wl->geometry.x0 = 0; + wl->geometry.y0 = 0; + wl->geometry.x1 = vo->dwidth / wl->scaling; + wl->geometry.y1 = vo->dheight / wl->scaling; + wl->window_size = wl->geometry; + } + wl->aspect_ratio = vo->dwidth / (float)vo->dheight; if (vo->opts->fullscreen) { @@ -1168,7 +1175,7 @@ int vo_wayland_reconfig(struct vo *vo) wl->geometry.x1 = mp_rect_w(wl->current_output->geometry)/wl->scaling; wl->geometry.y1 = mp_rect_h(wl->current_output->geometry)/wl->scaling; } else { - zxdg_toplevel_v6_set_fullscreen(wl->xdg_toplevel, wl_out); + xdg_toplevel_set_fullscreen(wl->xdg_toplevel, wl_out); } } @@ -1198,6 +1205,7 @@ static int set_screensaver_inhibitor(struct vo_wayland_state *wl, int state) } else { MP_VERBOSE(wl, "Disabling the idle inhibitor\n"); zwp_idle_inhibitor_v1_destroy(wl->idle_inhibitor); + wl->idle_inhibitor = NULL; } return VO_TRUE; } @@ -1207,9 +1215,9 @@ static int toggle_fullscreen(struct vo_wayland_state *wl) if (!wl->xdg_toplevel) return VO_NOTAVAIL; if (wl->fullscreen) - zxdg_toplevel_v6_unset_fullscreen(wl->xdg_toplevel); + xdg_toplevel_unset_fullscreen(wl->xdg_toplevel); else - zxdg_toplevel_v6_set_fullscreen(wl->xdg_toplevel, NULL); + xdg_toplevel_set_fullscreen(wl->xdg_toplevel, NULL); return VO_TRUE; } @@ -1217,7 +1225,7 @@ static int update_window_title(struct vo_wayland_state *wl, char *title) { if (!wl->xdg_toplevel) return VO_NOTAVAIL; - zxdg_toplevel_v6_set_title(wl->xdg_toplevel, title); + xdg_toplevel_set_title(wl->xdg_toplevel, title); return VO_TRUE; } @@ -1318,7 +1326,7 @@ int vo_wayland_control(struct vo *vo, int *events, int request, void *arg) } case VOCTRL_SET_UNFS_WINDOW_SIZE: { int *s = arg; - if (!wl->fullscreen) { + if (!wl->fullscreen && !wl->maximized) { wl->geometry.x0 = 0; wl->geometry.y0 = 0; wl->geometry.x1 = s[0]/wl->scaling; diff --git a/video/out/wayland_common.h b/video/out/wayland_common.h index 4911009..9aa057f 100644 --- a/video/out/wayland_common.h +++ b/video/out/wayland_common.h @@ -54,6 +54,7 @@ struct vo_wayland_state { struct mp_rect window_size; float aspect_ratio; bool fullscreen; + bool maximized; bool configured; int wakeup_pipe[2]; int pending_vo_events; @@ -69,9 +70,9 @@ struct vo_wayland_state { /* Shell */ struct wl_surface *surface; - struct zxdg_shell_v6 *shell; - struct zxdg_toplevel_v6 *xdg_toplevel; - struct zxdg_surface_v6 *xdg_surface; + struct xdg_wm_base *shell; + struct xdg_toplevel *xdg_toplevel; + struct xdg_surface *xdg_surface; struct org_kde_kwin_server_decoration_manager *server_decoration_manager; struct org_kde_kwin_server_decoration *server_decoration; struct zwp_idle_inhibit_manager_v1 *idle_inhibit_manager; diff --git a/video/out/win_state.c b/video/out/win_state.c index d6c8788..f9fb0ca 100644 --- a/video/out/win_state.c +++ b/video/out/win_state.c @@ -84,8 +84,8 @@ void vo_calc_window_geometry2(struct vo *vo, const struct mp_rect *screen, *out_geo = (struct vo_win_geometry){0}; // The case of calling this function even though no video was configured - // yet (i.e. vo->params==NULL) happens when vo_opengl creates a hidden - // window in order to create an OpenGL context. + // yet (i.e. vo->params==NULL) happens when vo_gpu creates a hidden window + // in order to create a rendering context. struct mp_image_params params = { .w = 320, .h = 200 }; if (vo->params) params = *vo->params; diff --git a/video/out/x11_common.c b/video/out/x11_common.c index 5f2c658..25325e7 100644 --- a/video/out/x11_common.c +++ b/video/out/x11_common.c @@ -630,7 +630,7 @@ static const struct mp_keymap keymap[] = { {XK_Pause, MP_KEY_PAUSE}, {XK_Escape, MP_KEY_ESC}, {XK_BackSpace, MP_KEY_BS}, {XK_Tab, MP_KEY_TAB}, {XK_Return, MP_KEY_ENTER}, {XK_Menu, MP_KEY_MENU}, {XK_Print, MP_KEY_PRINT}, - {XK_Cancel, MP_KEY_CANCEL}, + {XK_Cancel, MP_KEY_CANCEL}, {XK_ISO_Left_Tab, MP_KEY_TAB}, // cursor keys {XK_Left, MP_KEY_LEFT}, {XK_Right, MP_KEY_RIGHT}, {XK_Up, MP_KEY_UP}, @@ -1133,6 +1133,8 @@ void vo_x11_check_events(struct vo *vo) mp_input_put_key(x11->input_ctx, MP_KEY_MOUSE_ENTER); break; case ButtonPress: + if (Event.xbutton.button - 1 >= MP_KEY_MOUSE_BTN_COUNT) + break; if (Event.xbutton.button == 1) x11->win_drag_button1_down = true; mp_input_put_key(x11->input_ctx, @@ -1142,6 +1144,8 @@ void vo_x11_check_events(struct vo *vo) vo_x11_xembed_send_message(x11, msg); break; case ButtonRelease: + if (Event.xbutton.button - 1 >= MP_KEY_MOUSE_BTN_COUNT) + break; if (Event.xbutton.button == 1) x11->win_drag_button1_down = false; mp_input_put_key(x11->input_ctx, |