Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace all historic boolean arguments with all current args #49

Open
wants to merge 1 commit into
base: gh-pages
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 242 additions & 0 deletions extract_curl_args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#!/usr/bin/env python3
#
# This script assumes ../curl/ is a git repo containing curl's source code
# and extracts the list of arguments curl accepts and writes the result as
# two JS objects (one for --long-options and one for -s (short) options)
# to curl-to-go.js.
#
# curl defines its arguments in src/tool_getparam.c:
# https://github.com/curl/curl/blob/master/src/tool_getparam.c#L73
#
# Each argument definition is composed of
# letter - a 1 or 2 character string which acts as both a unique identifier
# of this argument, as well as its short form if it's 1 character long.
# lname - the --long-name of this option
# desc - the type of the option, which specifies if the option consumes a
# second argument or not.
# ARG_STRING, ARG_FILENAME - consume a second argument
# ARG_BOOL, ARG_NONE - don't consume a second argument.
# Historically, TRUE and FALSE were used.
#
# Each boolean argument (ARG_BOOL) also gets a --no-OPTION-NAME
# counterpart. ARG_NONE arguments do not.
#
# Multiple options can have the same `letter` if an option was renamed but
# the old name needs to also be kept for backwards compatibility. To these
# options we add a "name" property with the newest name.

from pathlib import Path
import sys
import subprocess
from collections import Counter

# Git repo of curl's source code to extract the args from
# TODO: make this a command line arg?
CURL_REPO = Path(__file__).parent.parent / "curl"
INPUT_FILE = CURL_REPO / "src" / "tool_getparam.c"
OUTPUT_FILE = Path(__file__).parent / "resources/js/curl-to-go.js"

JS_PARAMS_START = "BEGIN GENERATED CURL OPTIONS"
JS_PARAMS_END = "END GENERATED CURL OPTIONS"

OPTS_START = "struct LongShort aliases[]= {"
OPTS_END = "};"

BOOL_TYPES = ["bool", "none"]
STR_TYPES = ["string", "filename"]
ALIAS_TYPES = BOOL_TYPES + STR_TYPES

# These are options with the same `letter`, which are options that were
# renamed, along with their new name.
DUPES = {
"krb": "krb",
"krb4": "krb",
"ftp-ssl": "ssl",
"ssl": "ssl",
"ftp-ssl-reqd": "ssl-reqd",
"ssl-reqd": "ssl-reqd",
"proxy-service-name": "proxy-service-name",
"socks5-gssapi-service": "proxy-service-name",
}

if not OUTPUT_FILE.is_file():
sys.exit(
f"{OUTPUT_FILE} doesn't exist. You should run this script from curl-to-go/"
)
if not CURL_REPO.is_dir():
sys.exit(
f"{CURL_REPO} needs to be a git repo with curl's source code. "
"You can clone it with\n\n"
"git clone https://github.com/curl/curl ../curl"
# or modify the CURL_REPO variable above
)


def on_git_master(git_dir):
curl_branch = subprocess.run(
["git", "rev-parse", "--abbrev-ref", "HEAD"],
cwd=git_dir,
check=True,
capture_output=True,
text=True,
).stdout.strip()
return curl_branch == "master"


def parse_aliases(lines):
aliases = {}
for line in lines:
if OPTS_START in line:
break
for line in lines:
line = line.strip()
if line.endswith(OPTS_END):
break
if not line.strip().startswith("{"):
continue

# main.c has comments on the same line
letter, lname, desc = line.split("/*")[0].strip().strip("{},").split(",")

letter = letter.strip().strip('"')
lname = lname.strip().strip('"')
type_ = desc.strip().removeprefix("ARG_").lower()
# The only difference is that ARG_FILENAMEs raise a warning if you pass a value
# that starts with '-'
if type_ == "filename":
type_ = "string"
# TODO: for most options, if you specify them more than once, only the last
# one is taken. For others, (such as --url) each value is appended to a list
# and all are processed. This would require parsing the C code in the switch
# statement that processes the options.

if 1 > len(letter) > 2:
raise ValueError(f"letter form of --{lname} must be 1 or 2 characters long")
if type_ not in ALIAS_TYPES:
raise ValueError(f"unknown desc for --{lname}: {desc!r}")

alias = {"letter": letter, "lname": lname, "type": type_}
if lname in aliases and aliases[lname] != alias:
print(
f"{lname!r} repeated with different values:\n"
+ f"{aliases[lname]}\n"
+ f"{alias}",
file=sys.stderr,
)
aliases[lname] = alias

return list(aliases.values())


def fill_out_aliases(aliases):
# If both --option and --other-option have "oO" (for example) as their `letter`,
# add a "name" property with the main option's `lname`
letter_count = Counter(a["letter"] for a in aliases)

# "ARB_BOOL"-type OPTIONs have a --no-OPTION counterpart
no_aliases = []

for idx, alias in enumerate(aliases):
if alias["type"] in BOOL_TYPES:
without_no = alias["lname"].removeprefix("no-").removeprefix("disable-")
if alias["lname"] != without_no:
print(f"Assuming --{alias['lname']} is {without_no!r}", file=sys.stderr)
alias["name"] = without_no

if letter_count[alias["letter"]] > 1:
# Raise KeyError if special case hasn't been added yet
candidate = DUPES[alias["lname"]]
if alias["lname"] != candidate:
alias["name"] = candidate

if alias["type"] == "bool":
no_alias = {
**alias,
"name": alias.get("name", alias["lname"]),
"lname": "no-" + alias["lname"],
# --no-OPTION options cannot be shortened
"expand": False,
}
no_aliases.append((idx, no_alias))
elif alias["type"] == "none":
# The none/bool distinction is irrelevant after the step above
alias["type"] = "bool"

for i, (insert_idx, no_alias) in enumerate(no_aliases):
# +1 so that --no-OPTION appears after --OPTION
aliases.insert(insert_idx + i + 1, no_alias)

return aliases


def split(aliases):
long_args = {}
short_args = {}
for alias in aliases:
long_args[alias["lname"]] = {
k: v for k, v in alias.items() if k not in ["letter", "lname"]
}
if len(alias["letter"]) == 1:
alias_name = alias.get("name", alias["lname"])
if alias["letter"] == "N": # -N is short for --no-buffer
alias_name = "no-" + alias_name
short_args[alias["letter"]] = alias_name
return long_args, short_args


def format_as_js(d, var_name):
yield f"\tvar {var_name} = {{"
for top_key, opt in d.items():

def quote(key):
return key if key.isalpha() else repr(key)

def val_to_js(val):
if isinstance(val, str):
return repr(val)
if isinstance(val, bool):
return str(val).lower()
raise TypeError(f"can't convert values of type {type(val)} to JS")

if isinstance(opt, dict):
vals = [f"{quote(k)}: {val_to_js(v)}" for k, v in opt.items()]
yield f"\t\t{top_key!r}: {{{', '.join(vals)}}},"
elif isinstance(opt, str):
yield f"\t\t{top_key!r}: {val_to_js(opt)},"

yield "\t};"


if __name__ == "__main__":
if not on_git_master(CURL_REPO):
sys.exit("not on curl repo's git master")

with open(INPUT_FILE) as f:
aliases = fill_out_aliases(parse_aliases(f))
long_args, short_args = split(aliases)

js_params_lines = list(format_as_js(long_args, "longOptions"))
js_params_lines += [""] # separate by a newline
js_params_lines += list(format_as_js(short_args, "shortOptions"))

new_lines = []
with open(OUTPUT_FILE) as f:
for line in f:
new_lines.append(line)
if JS_PARAMS_START in line:
break
else:
raise ValueError(f"{'// ' + JS_PARAMS_START!r} not in {OUTPUT_FILE}")

new_lines += [l + "\n" for l in js_params_lines]
for line in f:
if JS_PARAMS_END in line:
new_lines.append(line)
break
else:
raise ValueError(f"{'// ' + JS_PARAMS_END!r} not in {OUTPUT_FILE}")
for line in f:
new_lines.append(line)

with open(OUTPUT_FILE, "w", newline="\n") as f:
f.write("".join(new_lines))
4 changes: 2 additions & 2 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ <h1>curl-to-Go</h1>
<h2>Instantly convert <a href="http://curl.haxx.se/">curl</a> commands to <a href="https://golang.org/">Go</a> code</h2>

<p>
This tool turns a curl command into Go code. (To do the reverse, check out <a href="https://github.com/moul/http2curl">moul/http2curl</a>.) Currently, it knows the following options: -d/--data, -H/--header, -I/--head, -u/--user, --url, and -X/--request. It also understands JSON content types (see <a href="https://mholt.github.io/json-to-go">JSON-to-Go</a>). If the content type is application/x-www-form-urlencoded then it will convert the data to <a href="https://pkg.go.dev/net/url#Values">Values</a> (same as <a href="https://pkg.go.dev/net/http#Client.PostForm">PostForm</a>). Feel free to <a href="https://github.com/mholt/curl-to-go">contribute on GitHub</a>!
This tool turns a curl command into Go code. (To do the reverse, check out <a href="https://github.com/moul/http2curl">moul/http2curl</a>.) Currently, it knows the following options: <code>-d</code>/<code>--data</code>, <code>-H</code>/<code>--header</code>, <code>-I</code>/<code>--head</code>, <code>-u</code>/<code>--user</code>, <code>--url</code>, and <code>-X</code>/<code>--request</code>. It also understands JSON content types (see <a href="https://mholt.github.io/json-to-go">JSON-to-Go</a>). If the content type is <code>application/x-www-form-urlencoded</code> then it will convert the data to <a href="https://pkg.go.dev/net/url#Values"><code>Values</code></a> (same as <a href="https://pkg.go.dev/net/http#Client.PostForm"><code>PostForm</code></a>). Feel free to <a href="https://github.com/mholt/curl-to-go">contribute on GitHub</a>!
</p>

<p class="examples">
Expand All @@ -50,7 +50,7 @@ <h2>Instantly convert <a href="http://curl.haxx.se/">curl</a> commands to <a hre
</main>

<p>
Note: http.DefaultClient will follow redirects by default, whereas curl does not without the <code>--location</code> flag. Since reusing the HTTP client is good Go practice, this tool does not attempt to configure the HTTP client for you.
Note: <code>http.DefaultClient</code> will follow redirects by default, whereas curl does not without the <code>--location</code> flag. Since reusing the HTTP client is good Go practice, this tool does not attempt to configure the HTTP client for you.
</p>


Expand Down
2 changes: 1 addition & 1 deletion resources/js/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ $(function()

// Fill in examples
$('#example1').click(function() {
$('#input').val('curl canhazip.com').keyup();
$('#input').val('curl icanhazip.com').keyup();
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why change this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's the correct URL

https://major.io/2021/06/06/a-new-future-for-icanhazip/

https://github.com/major/icanhaz

Looking at whois, it looks like they're both on cloudflare but I don't know if they're actually run by the same person, the shortened form isn't mentioned on any blog posts.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't remember how I found it or why I used it, but I think someone recommended it to me... if I recall it was an obscure reason...

Sorry, been busy; will try to look at this when I can.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, they both just return your IP as plaintext

$ curl icanhazip.com
2607:<REDACTED>
$ curl canhazip.com
2607:<REDACTED

most likely the guy registered the shortened form after making icanhazip. I just noticed that it's technically not the official URL and figured we should use the proper one.

});
$('#example2').click(function() {
$('#input').val('curl https://api.example.com/surprise \\\n -u banana:coconuts \\\n -d "sample data"').keyup();
Expand Down
Loading