From 684d584402597514446a5b3f32ea73839f5b3ccb Mon Sep 17 00:00:00 2001 From: haxala1r Date: Wed, 26 Nov 2025 23:36:52 +0300 Subject: [PATCH] Refactored the entire code. --- bin/main.ml | 27 ++++---- bin/octal.ml | 3 + bin/tar.ml | 182 +++++++++++++++++++++++++++++---------------------- 3 files changed, 122 insertions(+), 90 deletions(-) diff --git a/bin/main.ml b/bin/main.ml index 8c36ef0..b3af7fc 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -3,40 +3,43 @@ module Octal = Octal module Tar = Tar -let clean_path path = - let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in - clean_split (String.split_on_char '/' (String.trim path)) let (root, tar_ch) = Tar.parse_tar "asd.tar" let () = Tar.print_tree "" root let getattr path = - match Tar.find_from_split (clean_path path) root with + match Tar.find path root with | None -> raise (Unix.Unix_error (Unix.ENOENT, "hi1", "hi1")) | Some (File (_, md)) -> md | Some (Directory (_, md)) -> md + | Some (InferredDirectory (_, md)) -> md let readdir path _ = - match Tar.find_from_split (clean_path path) root with + match Tar.find path root with | None -> raise (Unix.Unix_error (Unix.ENOENT, "234", "234")) | Some (File (_, _)) -> raise (Unix.Unix_error (Unix.ENOENT, "345", "345")) + | Some (InferredDirectory (children, _)) | Some (Directory (children, _)) -> ["."; ".."] @ (List.map (fun (name, _) -> name) (Tar.NameMap.to_list children)) -let read path buf offset amount = - match Tar.find_from_split (clean_path path) root with +(* There seems to be a bug here - the amount argument given to this function + is NOT the correct requested amount. The buffer size is correct though + so I'm taking that as the source of truth. + *) +let read path buf offset _amount = +(* + let () = Printf.printf "PARAMS %d %d %d\n" (Int64.to_int offset) amount (Bigarray.Array1.dim buf) in + let () = Out_channel.flush_all () in*) + match Tar.find path root with | None -> raise (Unix.Unix_error (Unix.ENOENT, "read", path)) | Some (File (fo, md)) -> - (let (file_buf,read) = (Tar.read_file (File (fo, md)) tar_ch offset amount) in + (let (file_buf,read) = (Tar.read_file (File (fo, md)) tar_ch offset (Bigarray.Array1.dim buf)) in Bytes.iteri (fun i c -> Bigarray.Array1.set buf i c) file_buf; read) | _ -> raise (Unix.Unix_error (Unix.EISDIR, "read", path)) - + let _ = - let () = Printf.printf "hmmm?\n" in - let () = List.iter (fun x -> Printf.printf "%s\n" x) (readdir "bin" 5) in - let () = Out_channel.flush_all () in Fuse.main Sys.argv { Fuse.default_operations with diff --git a/bin/octal.ml b/bin/octal.ml index e367516..466c9fa 100644 --- a/bin/octal.ml +++ b/bin/octal.ml @@ -31,6 +31,9 @@ let octal_reader (type a) (module I : IntType with type t = a) sin : a = let l = List.init (String.length s) (String.get s) in let rec aux a = function | c :: rest -> + if (Char.code c) > (Char.code '7') || (Char.code c) < (Char.code '0') then + I.of_int 0 + else aux ((a * (I.of_int 8)) + (I.of_int (int_of_char c)) - (I.of_int (int_of_char '0'))) rest | [] -> a in aux (I.of_int 0) l diff --git a/bin/tar.ml b/bin/tar.ml index 7b496cf..46f6a6b 100644 --- a/bin/tar.ml +++ b/bin/tar.ml @@ -2,11 +2,17 @@ module NameMap = Map.Make(String) type metadata = Unix.LargeFile.stats - +type header = { + full_path : string; + offset : int64; + linked_path : string; (* Meaningless unless a hard or soft link *) + md : metadata; + } type entry = | File of int64 * metadata + | InferredDirectory of entry NameMap.t * metadata | Directory of entry NameMap.t * metadata let empty_dir_metadata : metadata = { @@ -42,53 +48,45 @@ let clean_path path = let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in clean_split (String.split_on_char '/' (String.trim path)) +let find path tree = + let p = clean_path path in + let rec aux = function + | ([], tree) -> Some tree + | (i :: rest, InferredDirectory (map, _)) + | (i :: rest, Directory (map, _)) -> + (match NameMap.find_opt i map with + | None -> None + | Some x -> aux (rest, x)) + | _ -> failwith "cannot search non-directory" + in aux (p, tree) -let rec find_from_split path tree = - match (path, tree) with - | ([], _) -> Some tree - | (_, File _) -> failwith "cannot search file!" - | ([name], Directory (children, _)) -> - NameMap.find_opt name children - | (name :: rest, Directory (children, _)) -> - (match NameMap.find_opt name children with - | None -> None - | Some d -> find_from_split rest d) -let find_or_empty_dir name dir = - match dir with - | File _ -> failwith "cannot search file!" +let add_direct name node = function + | InferredDirectory (map, md) -> + InferredDirectory ((NameMap.add name node map), md) + | Directory (map, md) -> + Directory ((NameMap.add name node map), md) + | _ -> + Printf.printf "whatever the fuck %s" name; Directory (NameMap.empty, empty_dir_metadata) + +let find_or_infer_dir name = function + | InferredDirectory (children, _) | Directory (children, _) -> (match NameMap.find_opt name children with - | None -> Directory (NameMap.empty, empty_dir_metadata) - | Some (Directory (x,y)) -> (Directory (x,y)) - | _ -> failwith "whoops, can't access child of file!") + | None -> InferredDirectory (NameMap.empty, empty_dir_metadata) + | Some x -> x) + | _ -> failwith "findorinfer" -let add_to_dir name node dir = - match dir with - | Directory (children, md) -> - (* There is a specific case we may need to handle here - - Normally when a file a/b/c is found, we infer the existence - of a/ and a/b/, however if an entry for a/ is later found - we need to keep the children of a/, but need to merge the metadata - from the new entry. - There is also the more general case of the same path being - seen multiple times - the standard seems to say the last one wins. - *) - Directory (NameMap.add name node children, md) - | _ -> failwith "not a directory" +(* entry -> string list -> entry -> entry*) +let rec add_deep node = function + | (last :: [], dir) -> + add_direct last node dir + | (one :: rest, dir) -> + add_direct one (add_deep node (rest, (find_or_infer_dir one dir))) dir + | _ -> failwith "heh" -let add_node path node tree = - let split_path = clean_path path in - let rec aux = function - | ([], dir) -> dir - | ([name], dir) -> - add_to_dir name node dir - | (dir_name :: rest, dir) -> - add_to_dir dir_name (aux (rest, find_or_empty_dir dir_name dir)) dir - (*| _ -> failwith "whatever"*) - in aux (split_path, tree) -(* Assume we have a block, extract metadata from it. *) -let read_metadata offset (block : bytes) = +(* Assume we have a block, extract data from it into a header object *) +let read_header offset (block : bytes) : header = let name = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 0 100)) in let mode = Octal.to_int (Bytes.sub_string block 100 8) in let uid = Octal.to_int (Bytes.sub_string block 108 8) in @@ -97,14 +95,11 @@ let read_metadata offset (block : bytes) = let time = Octal.to_int (Bytes.sub_string block 136 12) in (* tODO: checksum, link *) let type_flag = Bytes.get block 156 in - let _ = Bytes.sub_string block 157 100 in - if (String.equal "" name) && (size == 0L) && time == 0 then - ("", 0L, File (0L, default_file_metadata)) - else + let linked_path = Bytes.sub_string block 157 100 in let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in (*let () = assert ustar_used in*) let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in - let path = String.cat prefix name in + let path = if ustar_used then String.cat prefix name else name in let md = { default_file_metadata with st_ino = (Int64.to_int offset) + 1; @@ -117,44 +112,75 @@ let read_metadata offset (block : bytes) = st_size = size; st_mtime = float_of_int time; } in - if (String.ends_with ~suffix:"/" path) || (ustar_used && type_flag == '5') then - (path, 0L, Directory ((NameMap.empty), md)) - else - (path, size, File (offset, md)) + { + full_path = path; + md = md; + offset = offset; + linked_path = linked_path; + } let buffer = Bytes.create 512 -let next_offset o = - Int64.add (Int64.mul 512L (Int64.div o 512L)) (if (Int64.rem o 512L) > 0L then 512L else 0L) -let read_file f ch off amount = - match f with - | File (file_offset, md) -> - (let () = In_channel.seek ch (Int64.add off (Int64.add 512L file_offset)) in - let n = Int64.to_int (max (Int64.of_int amount) (md.st_size)) in - let buf = Bytes.create n in - match In_channel.really_input ch buf 0 n with - | None -> failwith "can't read file!!!!" - | _ -> (buf, n)) - | _ -> failwith "can't read file" -let rec read_structure accum offset ch = +let empty_p h = + (String.equal "" h.full_path) && (h.md.st_size = 0L) +let next_header_pos h = + let (+) = Int64.add in + let (/) = Int64.div in + let (%) = Int64.rem in + let ( * ) = Int64.mul in + let o = (h.offset + 512L) + h.md.st_size in + let b = 512L * (o / 512L) in + if (o % 512L) > 0L then b + 512L else b + +let rec read_headers offset ch = let () = In_channel.seek ch offset in match In_channel.really_input ch buffer 0 512 with - | None -> Printf.printf "End of file\n"; accum + | None -> (* Print message *) + Printf.printf "Tar file ended with no empty entry? Continuing anyway.\n"; + [] | _ -> - let (path, size, v) = read_metadata offset buffer in - let new_dir = add_node path v accum in - match (path, size) with - | ("", 0L) -> accum(*read_structure accum (next_offset (Int64.add 512L offset)) ch*) - | _ -> read_structure new_dir (next_offset (Int64.add 512L (Int64.add offset size))) ch + let h = read_header offset buffer in + if empty_p h || ((In_channel.length ch) <= (next_header_pos h)) then + [] + else + h :: (read_headers (next_header_pos h) ch) +let make_node h = + match h.md.st_kind with + | Unix.S_DIR -> Directory (NameMap.empty, h.md) + | Unix.S_REG -> File (h.offset, h.md) + | _ -> failwith "other kinds of files not yet implemented" -and print_tree path = function - | File (offset, _) -> - Printf.printf "%s: %d\n" path (Int64.to_int offset) - | Directory (children, _) -> - NameMap.iter (fun s c -> print_tree (Printf.sprintf "%s/%s" path s) c) children +let add_node dir h = + add_deep (make_node h) ((clean_path h.full_path), dir) + +let build_tree headers = + let root = Directory (NameMap.empty, empty_dir_metadata) in + List.fold_left add_node root headers let parse_tar fname = let chan = In_channel.open_bin fname in - let root = Directory (NameMap.empty, empty_dir_metadata) in - let structure = read_structure root 0L chan in - (structure, chan) + let headers = read_headers 0L chan in + (build_tree headers, chan) +(* Helpful for debugging *) +let rec print_tree path = function + | File (off, _) -> + Printf.printf "%d file: %s\n" (Int64.to_int off) path + | InferredDirectory (children, _) + | Directory (children, _) -> + Printf.printf "dir: %s/\n" path; + NameMap.iter (fun s d -> print_tree (Printf.sprintf "%s/%s" path s) d) children + +(* Now that we know everything about a file, we need to be able to actually read it *) +let read_file f ch off count = + match f with + | File (file_off, md) -> + let remaining = Int64.max 0L (Int64.sub md.st_size off) in + let to_read = Int64.to_int (Int64.min (Int64.of_int count) remaining) in + let buf = Bytes.create to_read in + let () = In_channel.seek ch (Int64.add file_off (Int64.add 512L off)) in + (match In_channel.really_input ch buf 0 to_read with + | None -> failwith "can't input" + | _ -> + (buf, to_read) + ) + | _ -> failwith "can't read non-file"