summaryrefslogtreecommitdiff
path: root/clean-repository.ml
blob: 07ad34f0ca9c92498d336ba500ada917151babe1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
(**************************************************************************)
(*                                                                        *)
(*  Copyright (C) 2012 Johannes 'josch' Schauer <j.schauer@email.de>      *)
(*  Copyright (C) 2012 Pietro Abate <pietro.abate@pps.jussieu.fr>         *)
(*                                                                        *)
(*  This library is free software: you can redistribute it and/or modify  *)
(*  it under the terms of the GNU Lesser General Public License as        *)
(*  published by the Free Software Foundation, either version 3 of the    *)
(*  License, or (at your option) any later version.  A special linking    *)
(*  exception to the GNU Lesser General Public License applies to this    *)
(*  library, see the COPYING file for more information.                   *)
(**************************************************************************)

open! ExtLib
open Dose_common
open Dose_debian
open Dose_algo
open Dose_doseparse

#define __label __FILE__
let label =  __label ;;
include Util.Logging(struct let label = label end) ;;

let binwithoutsrc ?(reqsrcarchall=false) ?(allowmismatch=false) universe bl =
  CudfAdd.Cudf_set.fold (fun binpkg acc ->
    try
      ignore (BootstrapCommon.get_src_package ~allowmismatch universe binpkg);
      acc
    with Sources.NotfoundSrc ->
      (* Source for this package was not found. But if this package is not
       * arch:all or reqsrcarchall is true, then we don't care. *)
      if (BootstrapCommon.pkg_is_not_arch_all binpkg) || reqsrcarchall then
        CudfAdd.Cudf_set.add binpkg acc
      else
        acc
  ) bl CudfAdd.Cudf_set.empty
;;

let srcwithoutbin ?(addsrcarchall=false) ?(allowmismatch=false) universe bl sl =
  let h = CudfAdd.Cudf_hashtbl.create (Cudf.universe_size universe) in
  CudfAdd.Cudf_set.iter (fun binpkg ->
    (* if the binary package is not arch:all or if addsrcarchall is true, then
     * try to retrieve the source package for this binary package *)
    if (BootstrapCommon.pkg_is_not_arch_all binpkg) || addsrcarchall then begin
      try
        let srcpkg = BootstrapCommon.get_src_package ~allowmismatch universe binpkg in
        CudfAdd.Cudf_hashtbl.replace h srcpkg ()
      with Sources.NotfoundSrc -> ()
    end
  ) bl;
  CudfAdd.Cudf_set.fold
    (fun srcpkg acc -> if CudfAdd.Cudf_hashtbl.mem h srcpkg then acc else CudfAdd.Cudf_set.add srcpkg acc)
    sl CudfAdd.Cudf_set.empty
;;

module PackageGraphCondensed = struct
  module IntSet = BootstrapCommon.IntSet
  module PkgV = struct
    type t =
      | Pkg of int
      | SCC of IntSet.t
    let compare x y = match (x,y) with
      | Pkg p1, Pkg p2 -> Pervasives.compare p1 p2
      | SCC s1, SCC s2 -> IntSet.compare s1 s2
      | Pkg _, SCC _ -> -1
      | SCC _, Pkg _ -> 1
    let hash = function
      (* since package ids are unique in the graph, the minimum set element unique
       * identifies sets *)
      | Pkg p -> p
      | SCC s -> IntSet.min_elt s
    let equal x y = (compare x y) = 0
  end

  module G = Graph.Imperative.Digraph.ConcreteBidirectional(PkgV)

  let dependency_graph ?(reverse=false) ?(global_constraints=[]) universe =
    let gr = G.create () in
    let add_edge v1 v2 = if reverse then
        G.add_edge gr v2 v1
      else
        G.add_edge gr v1 v2
    in
    let essential = if List.length global_constraints > 0 then begin
      let essential = Cudf.get_packages
          ~filter:(fun pkg ->
              BootstrapCommon.debtype_of_cudfpkg pkg = `BinPkg
              && BootstrapCommon.debessential_of_cudfpkg pkg)
          universe
      in
      List.map (fun pkg -> PkgV.Pkg (CudfAdd.pkgtoint universe pkg)) essential
    end else [] in
    Cudf.iter_packages (fun pkg ->
        let pkgv = PkgV.Pkg (CudfAdd.pkgtoint universe pkg) in
        G.add_vertex gr pkgv;
        List.iter (fun vpkgs ->
            (* this is a reverse dependency graph so if A depends on B then an
             * edge is added from B to A *)
            List.iter
              (fun p -> add_edge pkgv (PkgV.Pkg p))
              (List.flatten (List.map (CudfAdd.resolve_vpkg_int universe) vpkgs))
          ) pkg.Cudf.depends;
        (* add edges from all essential packages (if any) to all vertices *)
        List.iter (add_edge pkgv) essential
      ) universe;
    gr

  let condense g =
    let module C = Graph.Components.Make(G) in
    (* calculate SCCs and map each found SCC to either its original vertex in
     * case of a degenerate component of size one or to a vertex containing the
     * set of packages in the SCC *)
    let sccs = Array.map (function
        | [] -> fatal "scc cannot be empty"
        | [v] -> v
        | scc ->
          let pset = List.fold_right (function
              | PkgV.SCC _ -> fatal "cannot condense graph with SSC vertices"
              | PkgV.Pkg p -> IntSet.add p) scc IntSet.empty
          in PkgV.SCC pset)
        (C.scc_array g)
    in
    (* associate with each SCC a unique ID and create a mapping from all
     * vertices in the original graph to its SCC ID *)
    let mapping = Hashtbl.create (G.nb_vertex g) in
    Array.iteri (fun i v ->
        match v with
        | PkgV.Pkg _ -> Hashtbl.add mapping v i
        | PkgV.SCC s -> IntSet.iter (fun p -> Hashtbl.add mapping (PkgV.Pkg p) i) s
      ) sccs;
    (* go through the edges in the original graph, find out the SCC IDs that
     * their vertices belong to and if they are different, add that edge to the
     * new graph connecting the SCCs *)
    let cg = G.create () in
    G.iter_edges (fun v1 v2 ->
      let scc1 = Hashtbl.find mapping v1 in
      let scc2 = Hashtbl.find mapping v2 in
      if scc1 <> scc2 then
        G.add_edge cg sccs.(scc1) sccs.(scc2)
    ) g;
    cg

  let get_dependency_closures universe g =
    let module Dfs = Graph.Traverse.Dfs(G) in
    if Dfs.has_cycle g then fatal "cannot get dependency closures of a cyclic graph";
    (* traverse the vertices in the graph in post-order depth first search and
     * for each visited vertex V add to V the union of:
     *       1. all packages in V (one package in Pkg vertices and multiple in
     *          SCC vertices)
     *       2. all packages associated with the successors of V
     *)
    let intclosureht = Hashtbl.create (G.nb_vertex g) in
    (* a package graph of Debian Sid 2014 has 2500 SCCs *)
    let sccht = Hashtbl.create 2500 in
    let intsettovarset s = IntSet.fold
        (fun p -> CudfAdd.Cudf_set.add (CudfAdd.inttopkg universe p))
        s CudfAdd.Cudf_set.empty
    in
    Dfs.postfix (fun v ->
        let succ_set = G.fold_succ (function
            | PkgV.Pkg pp -> CudfAdd.Cudf_set.union (Hashtbl.find intclosureht pp)
            | PkgV.SCC ps -> CudfAdd.Cudf_set.union (Hashtbl.find sccht ps)
          ) g v CudfAdd.Cudf_set.empty
        in
        match v with
        | PkgV.Pkg p ->
          if Hashtbl.mem intclosureht p then fatal "package already in hashtbl";
          let closure = CudfAdd.Cudf_set.add (CudfAdd.inttopkg universe p) succ_set in
          Hashtbl.add intclosureht p closure
        | PkgV.SCC s ->
          if Hashtbl.mem sccht s then fatal "package already in hashtbl";
          let closure = CudfAdd.Cudf_set.union (intsettovarset s) succ_set in
          Hashtbl.add sccht s closure
      ) g;
    let closureht = Hashtbl.create (G.nb_vertex g) in
    (* expand SCCs into individual packages *)
    Hashtbl.iter
      (fun scc s -> IntSet.iter
          (fun p -> Hashtbl.add closureht (CudfAdd.inttopkg universe p) s) scc)
      sccht;
    (* convert integer based hashtable into var based one *)
    Hashtbl.iter
      (fun k v -> Hashtbl.add closureht (CudfAdd.inttopkg universe k) v)
      intclosureht;
    closureht
end

let maxclosure ?(global_constraints=[]) ?(reqsrcarchall=false) ?(addsrcarchall=false) ?(allowmismatch=false) bl sl =
  let module Set = CudfAdd.Cudf_set in
  let univ = Cudf.load_universe (bl@sl) in
  (* create a reverse dependency graph *)
  info "creating dependency graph";
  let g = PackageGraphCondensed.dependency_graph ~reverse:true ~global_constraints univ in
  let module Dfs = Graph.Traverse.Dfs(PackageGraphCondensed.G) in
  info "condensing";
  (* if graph has cycle, condense strongly connected components into single vertices *)
  let g = if Dfs.has_cycle g then PackageGraphCondensed.condense g else g in
  info "retrieving reverse closures";
  let affectedht_2 = PackageGraphCondensed.get_dependency_closures univ g in
  let affected removed =
    let a = Set.fold (fun pkg acc ->
        Set.union (Hashtbl.find affectedht_2 pkg) acc
      ) removed Set.empty in
    Set.partition (fun p -> BootstrapCommon.debtype_of_cudfpkg p = `BinPkg) a
  in
  let rec aux (notinstacc, notcompacc, nobinacc, nosrcacc, bintocheck, srctocheck) bl sl univ =
    let notinst, notcomp = match bintocheck, srctocheck with
    | [],[] -> Set.empty, Set.empty
    | _ -> begin
        info "checking %d binary packages" (List.length bintocheck);
        (* find binary packages that can't be installed *)
        let notinst = CudfAdd.to_set (Depsolver.find_listbroken ~global_constraints univ bintocheck) in
        info "checking %d source packages" (List.length srctocheck);
        (* find source packages that can't be compiled *)
        let notcomp = CudfAdd.to_set (Depsolver.find_listbroken ~global_constraints univ srctocheck) in
        info "removing %d not installable binary and %d not compilable source packages" (Set.cardinal notinst) (Set.cardinal notcomp);
        notinst, notcomp
      end
    in
    info "checking for source without binary";
    (* find source packages without binary packages *)
    let nobin = srcwithoutbin ~addsrcarchall ~allowmismatch univ bl sl in
    info "checking for binary without source";
    (* find binary packages without source packages *)
    let nosrc = binwithoutsrc ~reqsrcarchall ~allowmismatch univ bl in
    info "removing %d binary packages without source packages and %d source packages without binary packages" (Set.cardinal nosrc) (Set.cardinal nosrc);

    let removedbin = Set.union notinst nosrc in
    if List.length global_constraints > 0 && Set.exists BootstrapCommon.debessential_of_cudfpkg removedbin then
      fatal "will not remove essential package";

    if Set.is_empty notinst && Set.is_empty notcomp && Set.is_empty nobin && Set.is_empty nosrc then
      notinstacc, notcompacc, nobinacc, nosrcacc, (Set.elements bl)
    else begin
      let bl = Set.diff bl removedbin in
      let sl = Set.diff sl (Set.union notcomp nobin) in
      let affbin, affsrc = affected removedbin in
      let bintocheck = Set.inter affbin bl in
      let srctocheck = Set.inter affsrc sl in
      let notinstacc = Set.union notinstacc notinst in
      let notcompacc = Set.union notcompacc notcomp in
      let nobinacc = Set.union nobinacc nobin in
      let nosrcacc = Set.union nosrcacc nosrc in
      let univ = Cudf.load_universe (Set.elements (Set.union bl sl)) in
      aux (notinstacc, notcompacc, nobinacc, nosrcacc, (Set.elements bintocheck), (Set.elements srctocheck)) bl sl univ
    end
  in
  let e = Set.empty in
  aux (e,e,e,e,bl,sl) (CudfAdd.to_set bl) (CudfAdd.to_set sl) univ
;;

module Options = struct
  open OptParse
  let description = (
    "given a repository of binary packages and source packages, clean up that "^
    "repository by removing packages that can't be compiled or installed and "^
    "packages which don't have an associated binary or source package and "^
    "packages which do not match the specified architecture."
  )
  let usage = "%prog [options] Packages Sources"

  let options = OptParser.make ~description ~usage
  include BootstrapCommon.MakeOptions(struct let options = options end)

  let reqsrcarchall = StdOpt.store_true ()
  let addsrcarchall = StdOpt.store_true ()
  let allowsrcmismatch = StdOpt.store_true ()
  let progress = StdOpt.store_true ()

  open OptParser ;;

  let prog_group = add_group options "Program specific options" in

  add options ~group:prog_group ~long_name:"reqsrcarchall" ~help:"require source packages for arch:all binary packages" reqsrcarchall;
  add options ~group:prog_group ~long_name:"addsrcarchall" ~help:"add source packages which only build arch:all binary packages" addsrcarchall;
  add options ~group:prog_group ~long_name:"allowsrcmismatch" ~help:("If a binary package is "^
    "without a source package but there is a source package of same name but "^ 
    "different version, match this binary package to that source package.") allowsrcmismatch;
  add options ~group:prog_group ~long_name:"progress" ~help:"print progress bars" progress;

  include StdOptions.InputOptions;;
  let default = List.filter (fun e -> not (List.mem e ["compare";"checkonly";"latest";"fg";"bg";"inputtype"])) StdOptions.InputOptions.default_options in
  StdOptions.InputOptions.add_options ~default options;;

  include StdOptions.OutputOptions;;
  let default = List.filter (fun e -> not (List.mem e ["outdir"; "dot"])) StdOptions.OutputOptions.default_options in
  StdOptions.OutputOptions.add_options ~default options;;

  include StdOptions.DistribOptions;;
  let default = List.filter (fun e -> not (List.mem e ["deb-profiles"; "deb-builds-from"])) StdOptions.DistribOptions.default_options in
  StdOptions.DistribOptions.add_debian_options ~default options;;
end

let main () =
  let posargs = OptParse.OptParser.parse_argv Options.options in
  StdDebug.enable_debug (OptParse.Opt.get Options.verbose);
  StdDebug.all_quiet (OptParse.Opt.get Options.quiet);
  StdDebug.enable_bars (OptParse.Opt.get Options.progress) ["Depsolver_int.univcheck"];
  Util.Warning.disable "Sources";

  let options = Options.set_deb_options () in
  let buildarch = Option.get options.Debcudf.native in
  let hostarch = match options.Debcudf.host with None -> "" | Some s -> s in
  let foreignarchs = options.Debcudf.foreign in
  let reqsrcarchall = OptParse.Opt.get Options.reqsrcarchall in
  let addsrcarchall = OptParse.Opt.get Options.addsrcarchall in
  let noindep = options.Debcudf.drop_bd_indep in
  let allowmismatch = OptParse.Opt.get Options.allowsrcmismatch in

  let binlist, (fgsrclist,bgsrclist), _ = BootstrapCommon.parse_packages ~noindep Options.parse_cmdline buildarch hostarch foreignarchs posargs in
  let srclist = fgsrclist @ bgsrclist in
  let tables = Debcudf.init_tables ~options (srclist@binlist) in
  let global_constraints = Debcudf.get_essential ~options tables in
  let sl = List.map (Debcudf.tocudf ?inst:None ~options tables) srclist in
  let bl = List.map (Debcudf.tocudf ?inst:None ~options tables) binlist in

  (* save mappings from cudf packages to binary packages *)
  let cudftobin_table = Hashtbl.create 30000 in
  List.iter2 (fun cudfpkg -> fun binpkg ->
    let id = (cudfpkg.Cudf.package, cudfpkg.Cudf.version) in
    Hashtbl.add cudftobin_table id binpkg
  ) bl binlist;

  let notinst, notcomp, nobin, nosrc, bl = maxclosure ~global_constraints ~reqsrcarchall ~addsrcarchall ~allowmismatch bl sl in

  debug "not installable: %s" (BootstrapCommon.string_of_list (BootstrapCommon.string_of_package ?noversion:None) " " (CudfAdd.Cudf_set.elements notinst));
  debug "not compilable: %s" (BootstrapCommon.string_of_list (BootstrapCommon.string_of_package ?noversion:None) " " (CudfAdd.Cudf_set.elements notcomp));
  debug "source without binary: %s" (BootstrapCommon.string_of_list (BootstrapCommon.string_of_package ?noversion:None) " " (CudfAdd.Cudf_set.elements nobin));
  debug "binary without source: %s" (BootstrapCommon.string_of_list (BootstrapCommon.string_of_package ?noversion:None) " " (CudfAdd.Cudf_set.elements nosrc));
  info "not installable: %d" (CudfAdd.Cudf_set.cardinal notinst);
  info "not compilable: %d" (CudfAdd.Cudf_set.cardinal notcomp);
  info "source without binary: %d" (CudfAdd.Cudf_set.cardinal nobin);
  info "binary without source: %d" (CudfAdd.Cudf_set.cardinal nosrc);

  let oc =
    if OptParse.Opt.is_set Options.outfile then
      open_out (OptParse.Opt.get Options.outfile)
    else
      stdout
  in

  (* for each binary package, get the associated format822 stanza and print it
   * to stdout *)
  match bl with
    | [] -> failwith "no binary package remains compilable"
    | _ -> begin
        List.iter (fun p ->
          let id = (p.Cudf.package, p.Cudf.version) in
          let b = Hashtbl.find cudftobin_table id in
          b#pp oc;
        ) (BootstrapCommon.debcudf_sort bl);
    end;

  close_out oc;
;;

main ();;