223 lines
6.9 KiB
OCaml
223 lines
6.9 KiB
OCaml
|
|
module NameMap = Map.Make(String)
|
|
|
|
type metadata = Unix.LargeFile.stats
|
|
type header = {
|
|
full_path : string;
|
|
offset : int64;
|
|
hard_link : bool;
|
|
linked_path : string; (* Meaningless unless a hard or soft link *)
|
|
md : metadata;
|
|
}
|
|
|
|
|
|
type entry =
|
|
| File of int64 * metadata
|
|
| Symlink of string * metadata
|
|
| Hardlink of string
|
|
| InferredDirectory of entry NameMap.t * metadata
|
|
| Directory of entry NameMap.t * metadata
|
|
|
|
let empty_dir_metadata : metadata = {
|
|
st_dev = 0;
|
|
st_ino = 1;
|
|
st_kind = Unix.S_DIR;
|
|
st_perm = 7 * 64 + 7 * 8 + 7;
|
|
st_nlink = 0;
|
|
st_uid = 0;
|
|
st_gid = 0;
|
|
st_rdev = 0;
|
|
st_size = Int64.of_int 0;
|
|
st_atime = 0.;
|
|
st_mtime = 0.;
|
|
st_ctime = 0.;
|
|
}
|
|
let default_file_metadata : metadata = {
|
|
st_dev = 0;
|
|
st_ino = 1;
|
|
st_kind = Unix.S_REG;
|
|
st_perm = 7 * 64 + 7 * 8 + 7;
|
|
st_nlink = 1;
|
|
st_uid = 0;
|
|
st_gid = 0;
|
|
st_rdev = 0;
|
|
st_size = Int64.of_int 0;
|
|
st_atime = 0.;
|
|
st_mtime = 0.;
|
|
st_ctime = 0.;
|
|
}
|
|
|
|
let clean_path path =
|
|
let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in
|
|
clean_split (String.split_on_char '/' (String.trim path))
|
|
|
|
let find path tree =
|
|
let p = clean_path path in
|
|
let rec aux = function
|
|
| ([], tree) -> Some tree
|
|
| (i :: rest, InferredDirectory (map, _))
|
|
| (i :: rest, Directory (map, _)) ->
|
|
(match NameMap.find_opt i map with
|
|
| None -> None
|
|
| Some x -> aux (rest, x))
|
|
| _ -> failwith "cannot search non-directory"
|
|
in aux (p, tree)
|
|
|
|
let add_child name node map =
|
|
match (NameMap.find_opt name map, node) with
|
|
| Some (InferredDirectory (children, _)), Directory (_, md) ->
|
|
NameMap.add name (Directory (children, md)) map
|
|
| _ -> NameMap.add name node map
|
|
(* TODO: add logic for links, device nodes, and replacing inferred directories *)
|
|
let add_direct name node = function
|
|
| InferredDirectory (map, md) ->
|
|
InferredDirectory ((add_child name node map), md)
|
|
| Directory (map, md) ->
|
|
Directory ((add_child name node map), md)
|
|
| _ ->
|
|
Printf.printf "whatever the fuck %s" name; Directory (NameMap.empty, empty_dir_metadata)
|
|
|
|
|
|
let find_or_infer_dir name = function
|
|
| InferredDirectory (children, _)
|
|
| Directory (children, _) ->
|
|
(match NameMap.find_opt name children with
|
|
| None -> InferredDirectory (NameMap.empty, empty_dir_metadata)
|
|
| Some x -> x)
|
|
| _ -> failwith "findorinfer"
|
|
|
|
let rec add_deep node = function
|
|
| (last :: [], dir) ->
|
|
add_direct last node dir
|
|
| (one :: rest, dir) ->
|
|
add_direct one (add_deep node (rest, (find_or_infer_dir one dir))) dir
|
|
| (_, dir) -> dir
|
|
|
|
let inc_nlink path tree =
|
|
add_deep
|
|
(match find path tree with
|
|
| Some (File (o, md)) -> File (o, {md with st_nlink = md.st_nlink + 1})
|
|
| Some (Directory (o, md)) -> Directory (o, {md with st_nlink = md.st_nlink + 1})
|
|
| _ -> failwith "can't increase hardlink count...")
|
|
(clean_path path, tree)
|
|
|
|
|
|
let read_path buf o c =
|
|
List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string buf o c))
|
|
|
|
(* Assume we have a block, extract data from it into a header object *)
|
|
let read_header offset (block : bytes) : header =
|
|
let name = read_path block 0 100 in
|
|
let mode = Octal.to_int (Bytes.sub_string block 100 8) in
|
|
let uid = Octal.to_int (Bytes.sub_string block 108 8) in
|
|
let gid = Octal.to_int (Bytes.sub_string block 116 8) in
|
|
let size = Octal.to_int64 (Bytes.sub_string block 124 12) in
|
|
let time = Octal.to_int (Bytes.sub_string block 136 12) in
|
|
(* tODO: checksum, link *)
|
|
let type_flag = Bytes.get block 156 in
|
|
let linked_path = read_path block 157 100 in
|
|
let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in
|
|
(*let () = assert ustar_used in*)
|
|
let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in
|
|
let path = if ustar_used then String.cat prefix name else name in
|
|
let md = {
|
|
default_file_metadata with
|
|
st_ino = (Int64.to_int offset) + 1;
|
|
st_kind = (match type_flag with
|
|
| '2' -> Unix.S_LNK
|
|
| '5' -> Unix.S_DIR
|
|
| _ -> Unix.S_REG);
|
|
st_perm = mode;
|
|
st_uid = uid;
|
|
st_gid = gid;
|
|
st_size = size;
|
|
st_mtime = float_of_int time;
|
|
} in
|
|
{
|
|
full_path = path;
|
|
md = md;
|
|
hard_link = (type_flag == '1');
|
|
offset = offset;
|
|
linked_path = linked_path;
|
|
}
|
|
|
|
let buffer = Bytes.create 512
|
|
let empty_p h =
|
|
(String.equal "" h.full_path) && (h.md.st_size = 0L)
|
|
let next_header_pos h =
|
|
let (+) = Int64.add in
|
|
let (/) = Int64.div in
|
|
let (%) = Int64.rem in
|
|
let ( * ) = Int64.mul in
|
|
let o = (h.offset + 512L) + h.md.st_size in
|
|
let b = 512L * (o / 512L) in
|
|
if (o % 512L) > 0L then b + 512L else b
|
|
|
|
let rec read_headers offset ch =
|
|
let () = In_channel.seek ch offset in
|
|
match In_channel.really_input ch buffer 0 512 with
|
|
| None -> (* Print message *)
|
|
Printf.printf "Tar file ended with no empty entry? Continuing anyway.\n";
|
|
[]
|
|
| _ ->
|
|
let h = read_header offset buffer in
|
|
if empty_p h || ((In_channel.length ch) <= (next_header_pos h)) then
|
|
[]
|
|
else
|
|
h :: (read_headers (next_header_pos h) ch)
|
|
let make_node h =
|
|
if h.hard_link then
|
|
Hardlink (h.linked_path)
|
|
else
|
|
match h.md.st_kind with
|
|
| Unix.S_DIR -> Directory (NameMap.empty, h.md)
|
|
| Unix.S_REG ->
|
|
File (h.offset, h.md)
|
|
| Unix.S_LNK -> Symlink (h.linked_path, h.md)
|
|
| _ -> failwith "other kinds of files not yet implemented"
|
|
|
|
let add_node dir h =
|
|
let node = make_node h in
|
|
let root =
|
|
(match node with
|
|
| Hardlink (path) -> (inc_nlink path dir)
|
|
| _ -> dir) in
|
|
add_deep (make_node h) ((clean_path h.full_path), root)
|
|
|
|
let build_tree headers =
|
|
let root = Directory (NameMap.empty, empty_dir_metadata) in
|
|
List.fold_left add_node root headers
|
|
|
|
let parse_tar fname =
|
|
let chan = In_channel.open_bin fname in
|
|
let headers = read_headers 0L chan in
|
|
(build_tree headers, chan)
|
|
|
|
(* Helpful for debugging *)
|
|
let rec print_tree path = function
|
|
| File (off, _) ->
|
|
Printf.printf "%d file: %s\n" (Int64.to_int off) path
|
|
| InferredDirectory (children, _)
|
|
| Directory (children, _) ->
|
|
Printf.printf "dir: %s/\n" path;
|
|
NameMap.iter (fun s d -> print_tree (Printf.sprintf "%s/%s" path s) d) children
|
|
| Symlink (s, _) ->
|
|
Printf.printf "link '%s' -> '%s'" path s
|
|
| Hardlink (s) ->
|
|
Printf.printf "hardlink '%s' -> '%s'" path s
|
|
|
|
(* Now that we know everything about a file, we need to be able to actually read it *)
|
|
let read_file f ch off count =
|
|
match f with
|
|
| File (file_off, md) ->
|
|
let remaining = Int64.max 0L (Int64.sub md.st_size off) in
|
|
let to_read = Int64.to_int (Int64.min (Int64.of_int count) remaining) in
|
|
let buf = Bytes.create to_read in
|
|
let () = In_channel.seek ch (Int64.add file_off (Int64.add 512L off)) in
|
|
(match In_channel.really_input ch buf 0 to_read with
|
|
| None -> failwith "can't input"
|
|
| _ ->
|
|
(buf, to_read)
|
|
)
|
|
| _ -> failwith "can't read non-file"
|