This document lists all error codes that Merlint can detect, along with their descriptions and fix hints.
High cyclomatic complexity makes code harder to understand and test. Consider breaking complex functions into smaller, more focused functions. Each function should ideally do one thing well.
let check_input x y z =
if x > 0 then
if y > 0 then
if z > 0 then
if x + y > z then
if y + z > x then
if x + z > y then
"valid"
else "invalid"
else "invalid"
else "invalid"
else "invalid"
else "invalid"
else "invalid"
let check_positive x = x > 0
let check_triangle x y z =
x + y > z && y + z > x && x + z > y
let check_input x y z =
if not (check_positive x && check_positive y && check_positive z) then
"invalid"
else if not (check_triangle x y z) then
"invalid"
else
"valid"
This issue means your functions are too long and hard to read. Fix them by extracting logical sections into separate functions with descriptive names. Note: Functions with pattern matching get additional allowance (2 lines per case). Pure data structures (lists, records) are also exempt from length checks. For better readability, consider using helper functions for complex logic. Aim for functions under 50 lines of actual logic.
let process_all_data x y z =
(* This function is intentionally long to demonstrate the rule *)
let a = x + 1 in
let b = y + 1 in
let c = z + 1 in
let d = a * 2 in
let e = b * 2 in
let f = c * 2 in
let g = d + e in
let h = e + f in
let i = f + d in
let j = g * 2 in
let k = h * 2 in
let l = i * 2 in
let m = j + k in
let n = k + l in
let o = l + j in
let p = m * 2 in
let q = n * 2 in
let r = o * 2 in
let s = p + q in
let t = q + r in
let u = r + p in
let v = s * 2 in
let w = t * 2 in
let x1 = u * 2 in
let y1 = v + w in
let z1 = w + x1 in
let a1 = x1 + v in
let b1 = y1 * 2 in
let c1 = z1 * 2 in
let d1 = a1 * 2 in
let e1 = b1 + c1 in
let f1 = c1 + d1 in
let g1 = d1 + b1 in
let h1 = e1 * 2 in
let i1 = f1 * 2 in
let j1 = g1 * 2 in
let k1 = h1 + i1 in
let l1 = i1 + j1 in
let m1 = j1 + h1 in
let n1 = k1 * 2 in
let o1 = l1 * 2 in
let p1 = m1 * 2 in
let q1 = n1 + o1 in
let r1 = o1 + p1 in
let s1 = p1 + n1 in
let t1 = q1 * 2 in
let u1 = r1 * 2 in
let v1 = s1 * 2 in
let w1 = t1 + u1 in
let x2 = u1 + v1 in
let y2 = v1 + t1 in
let result = w1 + x2 + y2 in
result
let step1 x y = (x + 1, y + 1)
let step2 (a, b) = (a * 2, b * 2)
let combine (c, d) = (c + d) * 2
let process_all x y =
let (a, b) = step1 x y in
let (c, d) = step2 (a, b) in
combine (c, d)
This issue means your code has too many nested conditions making it hard to follow. Fix it by extracting nested logic into helper functions, using early returns to reduce nesting, or combining conditions when appropriate. Aim for maximum nesting depth of 4.
let process x y z =
if x > 0 then
if y > 0 then
if z > 0 then
if x < 100 then
x + y + z
else 0
else 0
else 0
else 0
let process x y z =
if x <= 0 || y <= 0 || z <= 0 then 0
else if x >= 100 then 0
else x + y + z
Obj.magic completely bypasses OCaml's type system and is extremely dangerous. It can lead to segmentation faults, data corruption, and unpredictable behavior. Instead, use proper type definitions, GADTs, or polymorphic variants. If you absolutely must use unsafe features, document why and isolate the usage.
let coerce x = Stdlib.Obj.magic x
(* Use proper type conversions *)
let int_of_string_opt s =
try Some (int_of_string s) with _ -> None
(* Or use variant types *)
type value = Int of int | String of string
let to_int = function Int i -> Some i | _ -> None
Catch-all exception handlers (with _ ->) can hide unexpected errors and make debugging difficult. Always handle specific exceptions explicitly. If you must catch all exceptions, log them or re-raise after cleanup.
let parse_int s =
try int_of_string s with _ -> 0
let read_config () =
try
let ic = open_in "config.txt" in
let data = input_line ic in
close_in ic;
data
with _ -> "default"
let parse_int s =
try int_of_string s with
| Failure _ -> 0
let read_config () =
try
let ic = open_in "config.txt" in
let data = input_line ic in
close_in ic;
data
with
| Sys_error msg ->
Fmt.epr "Config error: %s@." msg;
"default"
| End_of_file ->
Fmt.epr "Config file is empty@.";
"default"
module Log = struct
let err f = f Format.err_formatter
end
let dangerous_operation () =
failwith "Something went wrong"
let safe_wrapper () =
try dangerous_operation () with
| exn ->
Log.err (fun m ->
Format.fprintf m "Operation failed: %s@."
(Printexc.to_string exn));
raise exn
Warnings should be addressed rather than silenced. Fix the underlying issue instead of using warning suppression attributes. If you must suppress a warning, document why it's necessary.
[@@@ocaml.warning "-32"]
let unused_function x = x + 1
[@@ocaml.warning "-27"]
let partial_match = function
| Some x -> x
[@ocaml.warning "-9"]
type t = { mutable field : int; another : string }
let used_function x = x + 1
let () = print_int (used_function 5)
let complete_match = function
| Some x -> x
| None -> 0
type t = { field : int; another : string }
The Str module is outdated and has a problematic API. Use the Re module instead for regular expressions. Re provides a better API, is more performant, and doesn't have global state issues.
let contains_at s =
Str.string_match (Str.regexp ".*@.*") s 0
(* Requires: opam install re *)
let at_re = Re.compile (Re.str "@")
let contains_at s = Re.execp at_re s
The Fmt module provides a more modern and composable approach to formatting. It offers better type safety and cleaner APIs compared to Printf/Format modules.
let make_error msg line =
Stdlib.Printf.sprintf "Error: %s at line %d" msg line
let print_count n =
Stdlib.Printf.printf "Processing %d items...\n" n
(* Requires: opam install fmt *)
let make_error msg line =
Fmt.str "Error: %s at line %d" msg line
let print_count n =
Fmt.pr "Processing %d items...@." n
Variant constructors should use Snake_case (e.g., Waiting_for_input, Processing_data), not CamelCase. This matches the project's naming conventions.
type status =
| WaitingForInput
| ProcessingData
| ErrorOccurred
type status =
| Waiting_for_input (* Snake_case *)
| Processing_data
| Error_occurred
Module names should use Snake_case (e.g., My_module, User_profile). File names use lowercase_with_underscores which OCaml automatically converts to module names.
module UserProfile = struct end
module User_profile = struct end
Values and function names should use snake_case (e.g., find_user, create_channel). Short, descriptive, and lowercase with underscores. This is the standard convention in OCaml for values and functions.
type user = { name : string }
let myValue = 42
let getUserName user = user.name
type user = { name : string }
let my_value = 42
let get_user_name user = user.name
Type names should use snake_case. The primary type in a module should be named t, and identifiers should be id. This convention helps maintain consistency across the codebase.
type userProfile = { name : string }
type http_response = HttpOk | HttpError
type user_profile = { name: string }
type http_response = Ok | Error
Avoid using too many underscores in identifier names as they make code harder to read. Consider using more descriptive names or restructuring the code to avoid deeply nested concepts.
let get_user_profile_data_from_database_by_id () = 42
let get_user_by_id () = 42
Functions that return option types should be prefixed with 'find_', while functions that return non-option types should be prefixed with 'get_'. This convention helps communicate the function's behavior to callers.
let get_user () = None (* returns option but named get_* *)
let find_name () = "John" (* returns string but named find_* *)
let find_user () = None (* returns option, correctly named *)
let get_name () = "John" (* returns string, correctly named *)
Avoid prefixing type or function names with the module name. The module already provides the namespace, so Message.message_type should just be Message.t. Exception: Pp.pp is idiomatic for pretty-printing modules.
(* Module process.ml *)
let process_start () = ()
let process_stop () = ()
type process_config = {timeout: int}
(* In process.ml *)
let start () = ()
let stop () = ()
type config = {timeout: int}
(* Usage: Process.start (), Process.config *)
Functions prefixed with 'create_', 'make_', 'get_', or 'find_' can often omit the prefix when the remaining name is descriptive enough. For example, 'create_user' can be just 'user', 'make_widget' can be 'widget', 'get_name' can be 'name', and 'find_user' can be 'user' (returning option). Keep the prefix only when it adds meaningful distinction or when the bare name would be ambiguous. In modules, 'Module.create_module' should be 'Module.v'.
(* Bad examples - functions with redundant prefixes *)
(* Define necessary types *)
type user = { name : string; email : string; id : int }
type project = { name : string; title : string; description : string; status : string }
module Widget = struct
let make ~visible ~bordered = (visible, bordered)
let construct config = config
end
(* create_ prefix examples *)
let create_user name email = { name; email; id = 0 }
let create_project title = { name = ""; title; description = ""; status = "" }
let create_widget ~visible ~bordered = Widget.make ~visible ~bordered
(* get_ prefix examples *)
let get_name user = user.name
let get_email user = user.email
let get_status project = project.status
(* find_ prefix examples *)
let find_user_by_id users id = List.find_opt (fun u -> u.id = id) users
let find_project_by_name projects name = List.find_opt (fun p -> p.name = name) projects
(* Module.create_module pattern *)
module Progress = struct
type t = { current : int; total : int }
let create_progress current total = { current; total }
end
module User = struct
type t = { name : string; email : string }
let create_user name email = { name; email }
end
(* More aggressive cases that should be flagged *)
let create_temp_file prefix = Filename.temp_file prefix ".tmp"
let make_widget config = Widget.construct config
let get_current_time () = Unix.gettimeofday ()
let find_free_port start_port = start_port + 1 (* dummy implementation *)
let find_next_available_port start_port = find_free_port start_port
(* Good examples - functions without redundant prefixes *)
(* Define necessary types *)
type user = { name : string; email : string; id : int }
type project = { name : string; title : string; description : string; status : string }
module Widget = struct
let make ~visible ~bordered = (visible, bordered)
let construct config = config
end
module Filesystem = struct
let find path = path
end
module Http = struct
let get remote = remote
end
module Db = struct
let query db query = (db, query)
end
(* Direct constructor-style functions *)
let user name email = { name; email; id = 0 }
let project title = { name = ""; title; description = ""; status = "" }
let widget ~visible ~bordered = Widget.make ~visible ~bordered
(* Property accessors *)
let name user = user.name
let email user = user.email
let status project = project.status
(* Finders returning options *)
let user_by_id users id = List.find_opt (fun u -> u.id = id) users
let project_by_name projects name = List.find_opt (fun p -> p.name = name) projects
(* Good module patterns *)
module Progress = struct
type t = { current : int; total : int }
let v current total = { current; total }
let create current total = { current; total }
end
module User = struct
type t = { name : string; email : string }
let v name email = { name; email }
let create name email = { name; email }
end
(* Helper function *)
let construct_user data = data
(* These are fine - different semantic meanings *)
let build_user data = construct_user data (* build vs create *)
let build_widget config = Widget.construct config (* build vs make *)
let fetch_name remote = Http.get remote (* fetch vs get *)
let retrieve_email user = user.email (* retrieve vs get *)
let search_user db query = Db.query db query (* search vs find *)
let locate_file path = Filesystem.find path (* locate vs find *)
In OCaml modules, the idiomatic name for the primary constructor is 'v' rather than 'create' or 'make'. This follows the convention used by many standard libraries. For example, 'Module.create' should be 'Module.v'. This makes the API more consistent and idiomatic.
(* Bad examples - using create/make instead of v *)
module User = struct
type t = { name : string; email : string }
(* Should be 'v' *)
let create name email = { name; email }
end
module Widget = struct
type t = { id : int; label : string }
(* Should be 'v' *)
let make id label = { id; label }
end
module Config = struct
type t = { host : string; port : int }
(* Should be 'v' *)
let create ~host ~port = { host; port }
end
(* At top-level, the rule flags create/make *)
module Unix = struct
type file_descr = int
end
type connection = { socket : Unix.file_descr }
let create socket = { socket }
let make socket = { socket }
(* Note: The rule currently only catches top-level create/make functions.
Module-level detection would require more context about module boundaries. *)
(* Good examples - using idiomatic 'v' constructor *)
module User = struct
type t = { name : string; email : string }
(* Idiomatic constructor *)
let v name email = { name; email }
end
module Widget = struct
type t = { id : int; label : string }
(* Idiomatic constructor *)
let v id label = { id; label }
(* Other constructors can have descriptive names *)
let parse_widget json = { id = 0; label = "" } (* dummy *)
let from_json json = parse_widget json
let empty = { id = 0; label = "" }
end
module Config = struct
type t = { host : string; port : int }
(* Idiomatic constructor *)
let v ~host ~port = { host; port }
(* Specialized constructors are fine *)
let default = { host = "localhost"; port = 8080 }
let getenv _ = "dummy"
let from_env () = { host = getenv "HOST"; port = int_of_string (getenv "PORT") }
end
(* At top-level, descriptive names are often better than 'v' *)
module Unix = struct
type file_descr = int
let socket = 0
end
type connection = { socket : Unix.file_descr }
let connection socket = { socket }
let connect ~host ~port = { socket = Unix.socket }
Bindings prefixed with underscore (like '_x') indicate they are meant to be unused. If you need to use the binding, remove the underscore prefix. If the binding is truly unused, consider using a wildcard pattern '_' instead. Note: PPX-generated code with double underscore prefix (like '__ppx_generated') is excluded from this check.
let _debug_mode = true in
if _debug_mode then
print_endline "Debug mode enabled"
let debug_mode = true in
if debug_mode then
print_endline "Debug mode enabled"
Using raw Error constructors with Fmt.str (including polymorphic variants like `Msg) can lead to inconsistent error messages. Consider creating error helper functions (prefixed with 'err_') that encapsulate common error patterns and provide consistent formatting. Place these error helpers at the top of the file to make it easier to see all the different error cases in one place.
let process_data x =
match x with
| 0 -> Error (Fmt.str "Invalid data: %d" x)
| n ->
if n > 100 then
Error (Fmt.str "Data too large: %d" n)
else Ok n
let process_string s =
if s = "" then
Error (`Msg (Fmt.str "Invalid string: '%s' is empty" s))
else if String.length s > 100 then
Error (`Msg (Fmt.str "String too long: %d characters" (String.length s)))
else
Ok s
(* Define error helpers at the top of the file *)
let err_invalid x = Error (Fmt.str "Invalid data: %d" x)
let err_too_large n = Error (Fmt.str "Data too large: %d" n)
let err_invalid_string s = Error (`Msg (Fmt.str "Invalid string: '%s' is empty" s))
let err_string_too_long len = Error (`Msg (Fmt.str "String too long: %d characters" len))
let process_data x =
match x with
| 0 -> err_invalid x
| n ->
if n > 100 then
err_too_large n
else Ok n
let process_string s =
if s = "" then
err_invalid_string s
else if String.length s > 100 then
err_string_too_long (String.length s)
else
Ok s
Functions with multiple boolean parameters are hard to use correctly. It's easy to mix up the order of arguments at call sites. Consider using variant types, labeled arguments, or a configuration record instead.
(* BAD - Boolean blindness *)
let create_widget visible bordered = ...
let w = create_widget true false (* What does this mean? *)
(* GOOD - Explicit variants *)
type visibility = Visible | Hidden
type border = With_border | Without_border
let create_widget ~visibility ~border = ...
let w = create_widget ~visibility:Visible ~border:Without_border
Exposing global mutable state in interfaces (.mli files) breaks encapsulation and makes programs harder to reason about. Instead of exposing refs or mutable arrays directly, provide functions that encapsulate state manipulation. This preserves module abstraction and makes the API clearer. Internal mutable state in .ml files is fine as long as it's not exposed in the interface.
(* Global mutable state - avoid this *)
let counter = ref 0
let incr_counter () = counter := !counter + 1
let global_cache = Array.make 100 None
let cached_results = Hashtbl.create 100
(* Good example - encapsulated state *)
(* Internal state - not exposed in interface *)
let counter = ref 0
let cache = Hashtbl.create 100
(* Functions that encapsulate state access *)
let init () =
counter := 0;
Hashtbl.clear cache
let increment () =
incr counter;
!counter
let get_count () = !counter
let cache_get key =
try Some (Hashtbl.find cache key)
with Not_found -> None
let cache_set key value =
Hashtbl.replace cache key value
let cache_clear () =
Hashtbl.clear cache
MLI files should start with a documentation comment (** ... *) that describes the module's purpose and API. This helps users understand how to use the module.
(* user.mli - no module doc *)
type t
val create : string -> t
(** User management module
Handles user creation and authentication. *)
type t
val create : string -> t
All public values should have documentation explaining their purpose and usage. Add doc comments (** ... *) before or after value declarations in .mli files.
type t
val parse : string -> t
(** Documentation after is also acceptable *)
val format : t -> string
val missing_documentation : int -> int
type t
(** [parse str] converts a string to type [t].
@raise Invalid_argument if [str] is malformed. *)
val parse : string -> t
val format : t -> string
(** [format t] converts a value of type [t] to a string representation. *)
(** [process n] processes an integer value. *)
val process : int -> int
Follow OCaml documentation conventions: Functions should use '[name args] description.' format. Operators should use infix notation like '[x op y] description.' All documentation should end with a period. Avoid redundant phrases like 'This function...'.
val is_bot : t -> bool
(** [is_bot u] is [true] if [u] is a bot user. *)
type id = string
(** A user identifier. *)
The main type 't' should implement a pretty-printer function (pp) for better debugging and logging. Unlike equality and comparison which can use polymorphic functions (= and compare), pretty-printing requires a custom implementation to provide meaningful output.
type t = { id: int; name: string }
type t = { id: int; name: string }
val pp : Format.formatter -> t -> unit
All OCaml projects should have a .ocamlformat file in the root directory to ensure consistent code formatting. Create one with your preferred settings.
Library modules should have corresponding .mli files for proper encapsulation and API documentation. Create interface files to hide implementation details and provide a clean API.
type t = { name: string; id: int }
let create name id = { name; id }
let name t = t.name
(* user.mli *)
type t
val create : string -> int -> t
val name : t -> string
Modules that use logging should declare a log source for better debugging and log filtering. Add 'let src = Logs.Src.create "module.name" ~doc:"..."'
let log_src = Logs.Src.create "project_name.module_name"
module Log = (val Logs.src_log log_src : Logs.LOG)
Log.info (fun m ->
m "Received event: %s" event_type
~tags:(Logs.Tag.add "channel_id" channel_id Logs.Tag.empty))
Enforces proper test organization: (1) Test executables (test.ml) should use test suites from test modules (e.g., Test_user.suite) rather than defining their own test lists directly. (2) Test module interfaces (test_*.mli) should only export a 'suite' value with type 'string * unit Alcotest.test_case list' and no other values.
Each library module should have a corresponding test file to ensure proper testing coverage. Create test files following the naming convention test_<module>.ml
Test files for different libraries should not be mixed in the same test directory. Organize test files so that each test directory contains tests for only one library to maintain clear test organization.
let test_utils () = assert true
let () = test_utils ()
(** Good: Test is in generic 'test' stanza which is allowed *)
let parse s = int_of_string s
Every test module should have a corresponding library module. This ensures that tests are testing actual library functionality rather than testing code that doesn't exist in the library.
All test modules should be included in the main test runner (test.ml). Add the missing test suite to ensure all tests are run.
In test files, use Alcotest.failf or failf instead of Alcotest.fail (Fmt.str ...) or fail (Fmt.str ...). The failf function provides printf-style formatting directly, making the code more concise and readable.
let test_parse () =
match parse input with
| Error e -> Alcotest.fail (Fmt.str "Parse error: %s" e)
| Ok _ -> ()
let test_invalid () =
if not (is_valid data) then
fail (Fmt.str "Invalid data: %a" pp_data data)
let test_parse () =
match parse input with
| Error e -> Alcotest.failf "Parse error: %s" e
| Ok _ -> ()
let test_invalid () =
if not (is_valid data) then
failf "Invalid data: %a" pp_data data
Test suite names should follow these conventions: (1) Use lowercase snake_case for the suite name. (2) The suite name should match the test file name - for example, test_foo.ml should have suite name 'foo'. This makes it easier to identify which test file contains which suite.
(* In test_config.ml *)
let suite = ("Config", tests) (* Wrong: uppercase *)
(* In test_user_auth.ml *)
let suite = ("auth", tests) (* Wrong: doesn't match filename *)
(* In test_parser.ml *)
let suite = ("parser-tests", tests) (* Wrong: not snake_case *)
(* In test_config.ml *)
let suite = ("config", tests)
(* In test_user_auth.ml *)
let suite = ("user_auth", tests)
(* In test_parser.ml *)
let suite = ("parser", tests)
Avoid using double underscore module access like 'Module__Submodule'. Use dot notation 'Module.Submodule' instead. Double underscore notation is internal to the OCaml module system and should not be used in application code.
let result = Merlint__Location.v ~file:"test.ml"
~start_line:1 ~start_col:0 ~end_line:1 ~end_col:10
let result = Merlint.Location.v ~file:"test.ml"
~start_line:1 ~start_col:0 ~end_line:1 ~end_col:10
Use Fmt.failwith instead of failwith (Fmt.str ...). Fmt.failwith provides printf-style formatting directly, making the code more concise and readable.
let validate_input input =
if String.length input = 0 then
failwith (Fmt.str "Empty input provided")
else if String.length input > 100 then
failwith (Fmt.str "Input too long: %d characters" (String.length input))
else
input
let validate_input input =
if String.length input = 0 then
Fmt.failwith "Empty input provided"
else if String.length input > 100 then
Fmt.failwith "Input too long: %d characters" (String.length input)
else
input