module NameMap = Map.Make(String) type metadata = Unix.LargeFile.stats type entry = | File of int64 * metadata | Directory of entry NameMap.t * metadata let empty_dir_metadata : metadata = { st_dev = 0; st_ino = 1; st_kind = Unix.S_DIR; st_perm = 7 * 64 + 7 * 8 + 7; st_nlink = 0; st_uid = 0; st_gid = 0; st_rdev = 0; st_size = Int64.of_int 0; st_atime = 0.; st_mtime = 0.; st_ctime = 0.; } let default_file_metadata : metadata = { st_dev = 0; st_ino = 1; st_kind = Unix.S_REG; st_perm = 7 * 64 + 7 * 8 + 7; st_nlink = 0; st_uid = 0; st_gid = 0; st_rdev = 0; st_size = Int64.of_int 0; st_atime = 0.; st_mtime = 0.; st_ctime = 0.; } let clean_path path = let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in clean_split (String.split_on_char '/' (String.trim path)) let rec find_from_split path tree = match (path, tree) with | ([], _) -> Some tree | (_, File _) -> failwith "cannot search file!" | ([name], Directory (children, _)) -> NameMap.find_opt name children | (name :: rest, Directory (children, _)) -> (match NameMap.find_opt name children with | None -> None | Some d -> find_from_split rest d) let find_or_empty_dir name dir = match dir with | File _ -> failwith "cannot search file!" | Directory (children, _) -> (match NameMap.find_opt name children with | None -> Directory (NameMap.empty, empty_dir_metadata) | Some (Directory (x,y)) -> (Directory (x,y)) | _ -> failwith "whoops, can't access child of file!") let add_to_dir name node dir = match dir with | Directory (children, md) -> (* There is a specific case we may need to handle here - Normally when a file a/b/c is found, we infer the existence of a/ and a/b/, however if an entry for a/ is later found we need to keep the children of a/, but need to merge the metadata from the new entry. There is also the more general case of the same path being seen multiple times - the standard seems to say the last one wins. *) Directory (NameMap.add name node children, md) | _ -> failwith "not a directory" let add_node path node tree = let split_path = clean_path path in let rec aux = function | ([], dir) -> dir | ([name], dir) -> add_to_dir name node dir | (dir_name :: rest, dir) -> add_to_dir dir_name (aux (rest, find_or_empty_dir dir_name dir)) dir (*| _ -> failwith "whatever"*) in aux (split_path, tree) (* Assume we have a block, extract metadata from it. *) let read_metadata offset (block : bytes) = let name = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 0 100)) in let mode = Octal.to_int (Bytes.sub_string block 100 8) in let uid = Octal.to_int (Bytes.sub_string block 108 8) in let gid = Octal.to_int (Bytes.sub_string block 116 8) in let size = Octal.to_int64 (Bytes.sub_string block 124 12) in let time = Octal.to_int (Bytes.sub_string block 136 12) in (* tODO: checksum, link *) let type_flag = Bytes.get block 156 in let _ = Bytes.sub_string block 157 100 in if (String.equal "" name) && (size == 0L) && time == 0 then ("", 0L, File (0L, default_file_metadata)) else let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in (*let () = assert ustar_used in*) let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in let path = String.cat prefix name in let md = { default_file_metadata with st_ino = (Int64.to_int offset) + 1; st_kind = (match type_flag with | '5' -> Unix.S_DIR | _ -> Unix.S_REG); st_perm = mode; st_uid = uid; st_gid = gid; st_size = size; st_mtime = float_of_int time; } in if (String.ends_with ~suffix:"/" path) || (ustar_used && type_flag == '5') then (path, 0L, Directory ((NameMap.empty), md)) else (path, size, File (offset, md)) let buffer = Bytes.create 512 let next_offset o = Int64.add (Int64.mul 512L (Int64.div o 512L)) (if (Int64.rem o 512L) > 0L then 512L else 0L) let read_file f ch off amount = match f with | File (file_offset, md) -> (let () = In_channel.seek ch (Int64.add off (Int64.add 512L file_offset)) in let n = Int64.to_int (max (Int64.of_int amount) (md.st_size)) in let buf = Bytes.create n in match In_channel.really_input ch buf 0 n with | None -> failwith "can't read file!!!!" | _ -> (buf, n)) | _ -> failwith "can't read file" let rec read_structure accum offset ch = let () = In_channel.seek ch offset in match In_channel.really_input ch buffer 0 512 with | None -> Printf.printf "End of file\n"; accum | _ -> let (path, size, v) = read_metadata offset buffer in let new_dir = add_node path v accum in match (path, size) with | ("", 0L) -> accum(*read_structure accum (next_offset (Int64.add 512L offset)) ch*) | _ -> read_structure new_dir (next_offset (Int64.add 512L (Int64.add offset size))) ch and print_tree path = function | File (offset, _) -> Printf.printf "%s: %d\n" path (Int64.to_int offset) | Directory (children, _) -> NameMap.iter (fun s c -> print_tree (Printf.sprintf "%s/%s" path s) c) children let parse_tar fname = let chan = In_channel.open_bin fname in let root = Directory (NameMap.empty, empty_dir_metadata) in let structure = read_structure root 0L chan in (structure, chan)