Refactored the entire code.

This commit is contained in:
2025-11-26 23:36:52 +03:00
parent 4709e81b81
commit 684d584402
3 changed files with 122 additions and 90 deletions

View File

@@ -3,40 +3,43 @@
module Octal = Octal module Octal = Octal
module Tar = Tar module Tar = Tar
let clean_path path =
let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in
clean_split (String.split_on_char '/' (String.trim path))
let (root, tar_ch) = Tar.parse_tar "asd.tar" let (root, tar_ch) = Tar.parse_tar "asd.tar"
let () = Tar.print_tree "" root let () = Tar.print_tree "" root
let getattr path = let getattr path =
match Tar.find_from_split (clean_path path) root with match Tar.find path root with
| None -> raise (Unix.Unix_error (Unix.ENOENT, "hi1", "hi1")) | None -> raise (Unix.Unix_error (Unix.ENOENT, "hi1", "hi1"))
| Some (File (_, md)) -> md | Some (File (_, md)) -> md
| Some (Directory (_, md)) -> md | Some (Directory (_, md)) -> md
| Some (InferredDirectory (_, md)) -> md
let readdir path _ = let readdir path _ =
match Tar.find_from_split (clean_path path) root with match Tar.find path root with
| None -> raise (Unix.Unix_error (Unix.ENOENT, "234", "234")) | None -> raise (Unix.Unix_error (Unix.ENOENT, "234", "234"))
| Some (File (_, _)) -> raise (Unix.Unix_error (Unix.ENOENT, "345", "345")) | Some (File (_, _)) -> raise (Unix.Unix_error (Unix.ENOENT, "345", "345"))
| Some (InferredDirectory (children, _))
| Some (Directory (children, _)) -> | Some (Directory (children, _)) ->
["."; ".."] @ (List.map (fun (name, _) -> name) (Tar.NameMap.to_list children)) ["."; ".."] @ (List.map (fun (name, _) -> name) (Tar.NameMap.to_list children))
let read path buf offset amount = (* There seems to be a bug here - the amount argument given to this function
match Tar.find_from_split (clean_path path) root with is NOT the correct requested amount. The buffer size is correct though
so I'm taking that as the source of truth.
*)
let read path buf offset _amount =
(*
let () = Printf.printf "PARAMS %d %d %d\n" (Int64.to_int offset) amount (Bigarray.Array1.dim buf) in
let () = Out_channel.flush_all () in*)
match Tar.find path root with
| None -> raise (Unix.Unix_error (Unix.ENOENT, "read", path)) | None -> raise (Unix.Unix_error (Unix.ENOENT, "read", path))
| Some (File (fo, md)) -> | Some (File (fo, md)) ->
(let (file_buf,read) = (Tar.read_file (File (fo, md)) tar_ch offset amount) in (let (file_buf,read) = (Tar.read_file (File (fo, md)) tar_ch offset (Bigarray.Array1.dim buf)) in
Bytes.iteri (fun i c -> Bigarray.Array1.set buf i c) file_buf; Bytes.iteri (fun i c -> Bigarray.Array1.set buf i c) file_buf;
read) read)
| _ -> raise (Unix.Unix_error (Unix.EISDIR, "read", path)) | _ -> raise (Unix.Unix_error (Unix.EISDIR, "read", path))
let _ = let _ =
let () = Printf.printf "hmmm?\n" in
let () = List.iter (fun x -> Printf.printf "%s\n" x) (readdir "bin" 5) in
let () = Out_channel.flush_all () in
Fuse.main Sys.argv Fuse.main Sys.argv
{ {
Fuse.default_operations with Fuse.default_operations with

View File

@@ -31,6 +31,9 @@ let octal_reader (type a) (module I : IntType with type t = a) sin : a =
let l = List.init (String.length s) (String.get s) in let l = List.init (String.length s) (String.get s) in
let rec aux a = function let rec aux a = function
| c :: rest -> | c :: rest ->
if (Char.code c) > (Char.code '7') || (Char.code c) < (Char.code '0') then
I.of_int 0
else
aux ((a * (I.of_int 8)) + (I.of_int (int_of_char c)) - (I.of_int (int_of_char '0'))) rest aux ((a * (I.of_int 8)) + (I.of_int (int_of_char c)) - (I.of_int (int_of_char '0'))) rest
| [] -> a | [] -> a
in aux (I.of_int 0) l in aux (I.of_int 0) l

View File

@@ -2,11 +2,17 @@
module NameMap = Map.Make(String) module NameMap = Map.Make(String)
type metadata = Unix.LargeFile.stats type metadata = Unix.LargeFile.stats
type header = {
full_path : string;
offset : int64;
linked_path : string; (* Meaningless unless a hard or soft link *)
md : metadata;
}
type entry = type entry =
| File of int64 * metadata | File of int64 * metadata
| InferredDirectory of entry NameMap.t * metadata
| Directory of entry NameMap.t * metadata | Directory of entry NameMap.t * metadata
let empty_dir_metadata : metadata = { let empty_dir_metadata : metadata = {
@@ -42,53 +48,45 @@ let clean_path path =
let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in let clean_split = List.filter (fun x -> (not (String.equal x "")) && (not (String.equal x "."))) in
clean_split (String.split_on_char '/' (String.trim path)) clean_split (String.split_on_char '/' (String.trim path))
let find path tree =
let p = clean_path path in
let rec aux = function
| ([], tree) -> Some tree
| (i :: rest, InferredDirectory (map, _))
| (i :: rest, Directory (map, _)) ->
(match NameMap.find_opt i map with
| None -> None
| Some x -> aux (rest, x))
| _ -> failwith "cannot search non-directory"
in aux (p, tree)
let rec find_from_split path tree = let add_direct name node = function
match (path, tree) with | InferredDirectory (map, md) ->
| ([], _) -> Some tree InferredDirectory ((NameMap.add name node map), md)
| (_, File _) -> failwith "cannot search file!" | Directory (map, md) ->
| ([name], Directory (children, _)) -> Directory ((NameMap.add name node map), md)
NameMap.find_opt name children | _ ->
| (name :: rest, Directory (children, _)) -> Printf.printf "whatever the fuck %s" name; Directory (NameMap.empty, empty_dir_metadata)
(match NameMap.find_opt name children with
| None -> None let find_or_infer_dir name = function
| Some d -> find_from_split rest d) | InferredDirectory (children, _)
let find_or_empty_dir name dir =
match dir with
| File _ -> failwith "cannot search file!"
| Directory (children, _) -> | Directory (children, _) ->
(match NameMap.find_opt name children with (match NameMap.find_opt name children with
| None -> Directory (NameMap.empty, empty_dir_metadata) | None -> InferredDirectory (NameMap.empty, empty_dir_metadata)
| Some (Directory (x,y)) -> (Directory (x,y)) | Some x -> x)
| _ -> failwith "whoops, can't access child of file!") | _ -> failwith "findorinfer"
let add_to_dir name node dir = (* entry -> string list -> entry -> entry*)
match dir with let rec add_deep node = function
| Directory (children, md) -> | (last :: [], dir) ->
(* There is a specific case we may need to handle here - add_direct last node dir
Normally when a file a/b/c is found, we infer the existence | (one :: rest, dir) ->
of a/ and a/b/, however if an entry for a/ is later found add_direct one (add_deep node (rest, (find_or_infer_dir one dir))) dir
we need to keep the children of a/, but need to merge the metadata | _ -> failwith "heh"
from the new entry.
There is also the more general case of the same path being
seen multiple times - the standard seems to say the last one wins.
*)
Directory (NameMap.add name node children, md)
| _ -> failwith "not a directory"
let add_node path node tree =
let split_path = clean_path path in
let rec aux = function
| ([], dir) -> dir
| ([name], dir) ->
add_to_dir name node dir
| (dir_name :: rest, dir) ->
add_to_dir dir_name (aux (rest, find_or_empty_dir dir_name dir)) dir
(*| _ -> failwith "whatever"*)
in aux (split_path, tree)
(* Assume we have a block, extract metadata from it. *) (* Assume we have a block, extract data from it into a header object *)
let read_metadata offset (block : bytes) = let read_header offset (block : bytes) : header =
let name = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 0 100)) in let name = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 0 100)) in
let mode = Octal.to_int (Bytes.sub_string block 100 8) in let mode = Octal.to_int (Bytes.sub_string block 100 8) in
let uid = Octal.to_int (Bytes.sub_string block 108 8) in let uid = Octal.to_int (Bytes.sub_string block 108 8) in
@@ -97,14 +95,11 @@ let read_metadata offset (block : bytes) =
let time = Octal.to_int (Bytes.sub_string block 136 12) in let time = Octal.to_int (Bytes.sub_string block 136 12) in
(* tODO: checksum, link *) (* tODO: checksum, link *)
let type_flag = Bytes.get block 156 in let type_flag = Bytes.get block 156 in
let _ = Bytes.sub_string block 157 100 in let linked_path = Bytes.sub_string block 157 100 in
if (String.equal "" name) && (size == 0L) && time == 0 then
("", 0L, File (0L, default_file_metadata))
else
let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in let ustar_used = String.equal "ustar" (Bytes.sub_string block 257 5) in
(*let () = assert ustar_used in*) (*let () = assert ustar_used in*)
let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in let prefix = List.hd (String.split_on_char (Char.chr 0) (Bytes.sub_string block 345 155)) in
let path = String.cat prefix name in let path = if ustar_used then String.cat prefix name else name in
let md = { let md = {
default_file_metadata with default_file_metadata with
st_ino = (Int64.to_int offset) + 1; st_ino = (Int64.to_int offset) + 1;
@@ -117,44 +112,75 @@ let read_metadata offset (block : bytes) =
st_size = size; st_size = size;
st_mtime = float_of_int time; st_mtime = float_of_int time;
} in } in
if (String.ends_with ~suffix:"/" path) || (ustar_used && type_flag == '5') then {
(path, 0L, Directory ((NameMap.empty), md)) full_path = path;
else md = md;
(path, size, File (offset, md)) offset = offset;
linked_path = linked_path;
}
let buffer = Bytes.create 512 let buffer = Bytes.create 512
let next_offset o = let empty_p h =
Int64.add (Int64.mul 512L (Int64.div o 512L)) (if (Int64.rem o 512L) > 0L then 512L else 0L) (String.equal "" h.full_path) && (h.md.st_size = 0L)
let read_file f ch off amount = let next_header_pos h =
match f with let (+) = Int64.add in
| File (file_offset, md) -> let (/) = Int64.div in
(let () = In_channel.seek ch (Int64.add off (Int64.add 512L file_offset)) in let (%) = Int64.rem in
let n = Int64.to_int (max (Int64.of_int amount) (md.st_size)) in let ( * ) = Int64.mul in
let buf = Bytes.create n in let o = (h.offset + 512L) + h.md.st_size in
match In_channel.really_input ch buf 0 n with let b = 512L * (o / 512L) in
| None -> failwith "can't read file!!!!" if (o % 512L) > 0L then b + 512L else b
| _ -> (buf, n))
| _ -> failwith "can't read file" let rec read_headers offset ch =
let rec read_structure accum offset ch =
let () = In_channel.seek ch offset in let () = In_channel.seek ch offset in
match In_channel.really_input ch buffer 0 512 with match In_channel.really_input ch buffer 0 512 with
| None -> Printf.printf "End of file\n"; accum | None -> (* Print message *)
Printf.printf "Tar file ended with no empty entry? Continuing anyway.\n";
[]
| _ -> | _ ->
let (path, size, v) = read_metadata offset buffer in let h = read_header offset buffer in
let new_dir = add_node path v accum in if empty_p h || ((In_channel.length ch) <= (next_header_pos h)) then
match (path, size) with []
| ("", 0L) -> accum(*read_structure accum (next_offset (Int64.add 512L offset)) ch*) else
| _ -> read_structure new_dir (next_offset (Int64.add 512L (Int64.add offset size))) ch h :: (read_headers (next_header_pos h) ch)
let make_node h =
match h.md.st_kind with
| Unix.S_DIR -> Directory (NameMap.empty, h.md)
| Unix.S_REG -> File (h.offset, h.md)
| _ -> failwith "other kinds of files not yet implemented"
and print_tree path = function let add_node dir h =
| File (offset, _) -> add_deep (make_node h) ((clean_path h.full_path), dir)
Printf.printf "%s: %d\n" path (Int64.to_int offset)
| Directory (children, _) -> let build_tree headers =
NameMap.iter (fun s c -> print_tree (Printf.sprintf "%s/%s" path s) c) children let root = Directory (NameMap.empty, empty_dir_metadata) in
List.fold_left add_node root headers
let parse_tar fname = let parse_tar fname =
let chan = In_channel.open_bin fname in let chan = In_channel.open_bin fname in
let root = Directory (NameMap.empty, empty_dir_metadata) in let headers = read_headers 0L chan in
let structure = read_structure root 0L chan in (build_tree headers, chan)
(structure, chan)
(* Helpful for debugging *)
let rec print_tree path = function
| File (off, _) ->
Printf.printf "%d file: %s\n" (Int64.to_int off) path
| InferredDirectory (children, _)
| Directory (children, _) ->
Printf.printf "dir: %s/\n" path;
NameMap.iter (fun s d -> print_tree (Printf.sprintf "%s/%s" path s) d) children
(* Now that we know everything about a file, we need to be able to actually read it *)
let read_file f ch off count =
match f with
| File (file_off, md) ->
let remaining = Int64.max 0L (Int64.sub md.st_size off) in
let to_read = Int64.to_int (Int64.min (Int64.of_int count) remaining) in
let buf = Bytes.create to_read in
let () = In_channel.seek ch (Int64.add file_off (Int64.add 512L off)) in
(match In_channel.really_input ch buf 0 to_read with
| None -> failwith "can't input"
| _ ->
(buf, to_read)
)
| _ -> failwith "can't read non-file"