Refactored the entire code.
This commit is contained in:
182
bin/tar.ml
182
bin/tar.ml
@@ -2,11 +2,17 @@
|
||||
module NameMap = Map.Make(String)
|
||||
|
||||
type metadata = Unix.LargeFile.stats
|
||||
|
||||
type header = {
|
||||
full_path : string;
|
||||
offset : int64;
|
||||
linked_path : string; (* Meaningless unless a hard or soft link *)
|
||||
md : metadata;
|
||||
}
|
||||
|
||||
|
||||
type entry =
|
||||
| File of int64 * metadata
|
||||
| InferredDirectory of entry NameMap.t * metadata
|
||||
| Directory of entry NameMap.t * metadata
|
||||
|
||||
let empty_dir_metadata : metadata = {
|
||||
@@ -42,53 +48,45 @@ let clean_path path =
|
||||
let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in
|
||||
clean_split (String.split_on_char '/' (String.trim path))
|
||||
|
||||
let find path tree =
|
||||
let p = clean_path path in
|
||||
let rec aux = function
|
||||
| ([], tree) -> Some tree
|
||||
| (i :: rest, InferredDirectory (map, _))
|
||||
| (i :: rest, Directory (map, _)) ->
|
||||
(match NameMap.find_opt i map with
|
||||
| None -> None
|
||||
| Some x -> aux (rest, x))
|
||||
| _ -> failwith "cannot search non-directory"
|
||||
in aux (p, tree)
|
||||
|
||||
let rec find_from_split path tree =
|
||||
match (path, tree) with
|
||||
| ([], _) -> Some tree
|
||||
| (_, File _) -> failwith "cannot search file!"
|
||||
| ([name], Directory (children, _)) ->
|
||||
NameMap.find_opt name children
|
||||
| (name :: rest, Directory (children, _)) ->
|
||||
(match NameMap.find_opt name children with
|
||||
| None -> None
|
||||
| Some d -> find_from_split rest d)
|
||||
let find_or_empty_dir name dir =
|
||||
match dir with
|
||||
| File _ -> failwith "cannot search file!"
|
||||
let add_direct name node = function
|
||||
| InferredDirectory (map, md) ->
|
||||
InferredDirectory ((NameMap.add name node map), md)
|
||||
| Directory (map, md) ->
|
||||
Directory ((NameMap.add name node map), md)
|
||||
| _ ->
|
||||
Printf.printf "whatever the fuck %s" name; Directory (NameMap.empty, empty_dir_metadata)
|
||||
|
||||
let find_or_infer_dir name = function
|
||||
| InferredDirectory (children, _)
|
||||
| Directory (children, _) ->
|
||||
(match NameMap.find_opt name children with
|
||||
| None -> Directory (NameMap.empty, empty_dir_metadata)
|
||||
| Some (Directory (x,y)) -> (Directory (x,y))
|
||||
| _ -> failwith "whoops, can't access child of file!")
|
||||
| None -> InferredDirectory (NameMap.empty, empty_dir_metadata)
|
||||
| Some x -> x)
|
||||
| _ -> failwith "findorinfer"
|
||||
|
||||
let add_to_dir name node dir =
|
||||
match dir with
|
||||
| Directory (children, md) ->
|
||||
(* There is a specific case we may need to handle here -
|
||||
Normally when a file a/b/c is found, we infer the existence
|
||||
of a/ and a/b/, however if an entry for a/ is later found
|
||||
we need to keep the children of a/, but need to merge the metadata
|
||||
from the new entry.
|
||||
There is also the more general case of the same path being
|
||||
seen multiple times - the standard seems to say the last one wins.
|
||||
*)
|
||||
Directory (NameMap.add name node children, md)
|
||||
| _ -> failwith "not a directory"
|
||||
(* entry -> string list -> entry -> entry*)
|
||||
let rec add_deep node = function
|
||||
| (last :: [], dir) ->
|
||||
add_direct last node dir
|
||||
| (one :: rest, dir) ->
|
||||
add_direct one (add_deep node (rest, (find_or_infer_dir one dir))) dir
|
||||
| _ -> failwith "heh"
|
||||
|
||||
let add_node path node tree =
|
||||
let split_path = clean_path path in
|
||||
let rec aux = function
|
||||
| ([], dir) -> dir
|
||||
| ([name], dir) ->
|
||||
add_to_dir name node dir
|
||||
| (dir_name :: rest, dir) ->
|
||||
add_to_dir dir_name (aux (rest, find_or_empty_dir dir_name dir)) dir
|
||||
(*| _ -> failwith "whatever"*)
|
||||
in aux (split_path, tree)
|
||||
|
||||
(* Assume we have a block, extract metadata from it. *)
|
||||
let read_metadata offset (block : bytes) =
|
||||
(* Assume we have a block, extract data from it into a header object *)
|
||||
let read_header offset (block : bytes) : header =
|
||||
let name = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 0 100)) in
|
||||
let mode = Octal.to_int (Bytes.sub_string block 100 8) in
|
||||
let uid = Octal.to_int (Bytes.sub_string block 108 8) in
|
||||
@@ -97,14 +95,11 @@ let read_metadata offset (block : bytes) =
|
||||
let time = Octal.to_int (Bytes.sub_string block 136 12) in
|
||||
(* tODO: checksum, link *)
|
||||
let type_flag = Bytes.get block 156 in
|
||||
let _ = Bytes.sub_string block 157 100 in
|
||||
if (String.equal "" name) && (size == 0L) && time == 0 then
|
||||
("", 0L, File (0L, default_file_metadata))
|
||||
else
|
||||
let linked_path = Bytes.sub_string block 157 100 in
|
||||
let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in
|
||||
(*let () = assert ustar_used in*)
|
||||
let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in
|
||||
let path = String.cat prefix name in
|
||||
let path = if ustar_used then String.cat prefix name else name in
|
||||
let md = {
|
||||
default_file_metadata with
|
||||
st_ino = (Int64.to_int offset) + 1;
|
||||
@@ -117,44 +112,75 @@ let read_metadata offset (block : bytes) =
|
||||
st_size = size;
|
||||
st_mtime = float_of_int time;
|
||||
} in
|
||||
if (String.ends_with ~suffix:"/" path) || (ustar_used && type_flag == '5') then
|
||||
(path, 0L, Directory ((NameMap.empty), md))
|
||||
else
|
||||
(path, size, File (offset, md))
|
||||
{
|
||||
full_path = path;
|
||||
md = md;
|
||||
offset = offset;
|
||||
linked_path = linked_path;
|
||||
}
|
||||
|
||||
let buffer = Bytes.create 512
|
||||
let next_offset o =
|
||||
Int64.add (Int64.mul 512L (Int64.div o 512L)) (if (Int64.rem o 512L) > 0L then 512L else 0L)
|
||||
let read_file f ch off amount =
|
||||
match f with
|
||||
| File (file_offset, md) ->
|
||||
(let () = In_channel.seek ch (Int64.add off (Int64.add 512L file_offset)) in
|
||||
let n = Int64.to_int (max (Int64.of_int amount) (md.st_size)) in
|
||||
let buf = Bytes.create n in
|
||||
match In_channel.really_input ch buf 0 n with
|
||||
| None -> failwith "can't read file!!!!"
|
||||
| _ -> (buf, n))
|
||||
| _ -> failwith "can't read file"
|
||||
let rec read_structure accum offset ch =
|
||||
let empty_p h =
|
||||
(String.equal "" h.full_path) && (h.md.st_size = 0L)
|
||||
let next_header_pos h =
|
||||
let (+) = Int64.add in
|
||||
let (/) = Int64.div in
|
||||
let (%) = Int64.rem in
|
||||
let ( * ) = Int64.mul in
|
||||
let o = (h.offset + 512L) + h.md.st_size in
|
||||
let b = 512L * (o / 512L) in
|
||||
if (o % 512L) > 0L then b + 512L else b
|
||||
|
||||
let rec read_headers offset ch =
|
||||
let () = In_channel.seek ch offset in
|
||||
match In_channel.really_input ch buffer 0 512 with
|
||||
| None -> Printf.printf "End of file\n"; accum
|
||||
| None -> (* Print message *)
|
||||
Printf.printf "Tar file ended with no empty entry? Continuing anyway.\n";
|
||||
[]
|
||||
| _ ->
|
||||
let (path, size, v) = read_metadata offset buffer in
|
||||
let new_dir = add_node path v accum in
|
||||
match (path, size) with
|
||||
| ("", 0L) -> accum(*read_structure accum (next_offset (Int64.add 512L offset)) ch*)
|
||||
| _ -> read_structure new_dir (next_offset (Int64.add 512L (Int64.add offset size))) ch
|
||||
let h = read_header offset buffer in
|
||||
if empty_p h || ((In_channel.length ch) <= (next_header_pos h)) then
|
||||
[]
|
||||
else
|
||||
h :: (read_headers (next_header_pos h) ch)
|
||||
let make_node h =
|
||||
match h.md.st_kind with
|
||||
| Unix.S_DIR -> Directory (NameMap.empty, h.md)
|
||||
| Unix.S_REG -> File (h.offset, h.md)
|
||||
| _ -> failwith "other kinds of files not yet implemented"
|
||||
|
||||
and print_tree path = function
|
||||
| File (offset, _) ->
|
||||
Printf.printf "%s: %d\n" path (Int64.to_int offset)
|
||||
| Directory (children, _) ->
|
||||
NameMap.iter (fun s c -> print_tree (Printf.sprintf "%s/%s" path s) c) children
|
||||
let add_node dir h =
|
||||
add_deep (make_node h) ((clean_path h.full_path), dir)
|
||||
|
||||
let build_tree headers =
|
||||
let root = Directory (NameMap.empty, empty_dir_metadata) in
|
||||
List.fold_left add_node root headers
|
||||
|
||||
let parse_tar fname =
|
||||
let chan = In_channel.open_bin fname in
|
||||
let root = Directory (NameMap.empty, empty_dir_metadata) in
|
||||
let structure = read_structure root 0L chan in
|
||||
(structure, chan)
|
||||
let headers = read_headers 0L chan in
|
||||
(build_tree headers, chan)
|
||||
|
||||
(* Helpful for debugging *)
|
||||
let rec print_tree path = function
|
||||
| File (off, _) ->
|
||||
Printf.printf "%d file: %s\n" (Int64.to_int off) path
|
||||
| InferredDirectory (children, _)
|
||||
| Directory (children, _) ->
|
||||
Printf.printf "dir: %s/\n" path;
|
||||
NameMap.iter (fun s d -> print_tree (Printf.sprintf "%s/%s" path s) d) children
|
||||
|
||||
(* Now that we know everything about a file, we need to be able to actually read it *)
|
||||
let read_file f ch off count =
|
||||
match f with
|
||||
| File (file_off, md) ->
|
||||
let remaining = Int64.max 0L (Int64.sub md.st_size off) in
|
||||
let to_read = Int64.to_int (Int64.min (Int64.of_int count) remaining) in
|
||||
let buf = Bytes.create to_read in
|
||||
let () = In_channel.seek ch (Int64.add file_off (Int64.add 512L off)) in
|
||||
(match In_channel.really_input ch buf 0 to_read with
|
||||
| None -> failwith "can't input"
|
||||
| _ ->
|
||||
(buf, to_read)
|
||||
)
|
||||
| _ -> failwith "can't read non-file"
|
||||
|
||||
Reference in New Issue
Block a user