上文写到HTTP包的服务器端原理。本文写写客户端的剖析。

客户端代码从设计上可以分为两个层次:

  1. HTTP语义的处理
  2. HTTP的底层连接

下面以一个文件上传例子为开头。

下文以上传David Garrett - Viva La Vida为例。客户端代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
package main

import (
"bytes"
"fmt"
"io"
"io/util"
"log"
"mime/multipart"
"net/http"
"os"
)

func PostFile(filename, target string) error {
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)

fd, err := writer.CreateFormFile("uploadfile", filename)
if err != nil {
log.Println(err)
return err
}

fp, err := os.Open(filename)
if err != nil {
log.Println(err)
return err
}
defer fp.Close()

_, err = io.Copy(fd, fp)
if err != nil {
return err
}

kind := writer.FormDataContentType()
writer.Close()

response, err := http.Post(url, kind, body)
if err != nil {
return err
}

defer response.Body.Close()
html, err := ioutil.ReadAll(response.Body)
if err != nil {
return err
}

fmt.Println(response.Status)
fmt.Println(html)
return nil
}

func main() {
url := "http://localhost:8080/upload"
filename := `F:\video\David Garrett - Viva La Vida.mp4`
PostFile(filename, url)
}

客户端代码就三点:

  1. 构造上传文件的表单
  2. 使用HTTP方法POST上传文件
  3. 查看上传结果

第二点,使用POST方法上传文件正是本文的关键:POST上传文件的过程是怎样的?

出于完整性,我们补充服务器端的代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
package main

import (
"crypto/md5"
"fmt"
"html/template"
"io"
"log"
"net/http"
"os"
"strconv"
"time"
)

/*
application/x-www-form-urlencoded 表示在发送前编码所有字符(默认)
multipart/form-data 不对字符编码。在使用包含文件上传控件的表单时,必须使用该值。
text/plain 空格转换为 "+" 加号,但不对特殊字符编码。
*/

func upload(w http.ResponseWriter, r *http.Request) {
fmt.Println(r.Method, r.URL)
if r.Method == "GET" {
current := time.Now().Unix()
h := md5.New()
io.WriteString(h, strconv.FormatInt(current, 10))
token := fmt.Sprintf("%x", h.Sum(nil))

t, _ := template.ParseFiles("upload.tpl")
t.Execute(w, token)
} else {
r.ParseMultipartForm(32 << 10) // set max memory
file, handler, err := r.FormFile("uploadfile") // get file handle
if err != nil {
fmt.Println(err)
return
}
defer file.Close()
fmt.Fprintf(w, "%v", handler.Header) // response
f, err := os.OpenFile("./upload/"+handler.Filename,
os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
fmt.Println(err)
return
}
defer f.Close()
io.Copy(f, file)
}
}

func main() {
http.HandleFunc("/upload", upload)
log.Fatal(http.ListenAndServe(":8080", nil))
}

服务器端也就三点:

  1. 处理表单,调用ParseMultipartForm方法
  2. 接收文件和保存文件
  3. 返回信息给客户端。

其中第二点是重点,在前文已经讲过大致的原理。

POST原理之客户端hight-level API

以POST方法为例而不是GET方法为例的好处是考虑了表单问题。在处理上也涵盖了GET方法的过程。

上传文件的客户端代码中,有这样一句代码:response, err := http.Post(url, kind, body)。它位于client.go源码文件中。

1
2
3
4
5
6
7
8
9
10
11
12
func Post(url string, contentType string, body io.Reader) (resp *Response, err error) {
return DefaultClient.Post(url, contentType, body)
}

func (c *Client) Post(url string, contentType string, body io.Reader) (resp *Response, err error) {
req, err := NewRequest("POST", url, body)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", contentType)
return c.Do(req)
}

NewRequest函数创建不同的HTTP请求体RequestRequest结构代表客户端的请求或服务器端接收的请求。Do方法用于发送请求实例req。其他的HTTP请求最终都调用该方法。这个方法的代码太长,我们压缩不相关的代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
func (c *Client) Do(req *Request) (*Response, error) {
var (
deadline = c.deadline()
reqs []*Request
resp *Response
copyHeaders = c.makeHeadersCopier(req)
reqBodyClosed = false // have we closed the current req.Body?

// Redirect behavior:
redirectMethod string
includeBody bool
)

for {
// for redirect
// ignore code

reqs = append(reqs, req)
var err error
var didTimeout func() bool
if resp, didTimeout, err = c.send(req, deadline); err != nil {
// c.send() always closes req.Body
reqBodyClosed = true
if !deadline.IsZero() && didTimeout() {
err = &httpError{
err: err.Error() + " (Client.Timeout exceeded while awaiting headers)",
timeout: true,
}
}
return nil, uerr(err)
}

var shouldRedirect bool
redirectMethod, shouldRedirect, includeBody = redirectBehavior(req.Method, resp, reqs[0])
if !shouldRedirect {
return resp, nil
}

req.closeBody()
}
}

reqs变量保存当次HTTP方法调用经历的请求,包括重定向的处理。我们在for循环中看到和重定向相关的代码。但这里不是关键。注意到这段代码:resp, didTimeout, err = c.send(req, deadline),该方法最终返回一次实际请求的Response对象,不管是否是重定向。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
func (c *Client) send(req *Request, deadline time.Time) (resp *Response, didTimeout func() bool, err error) {
if c.Jar != nil {
for _, cookie := range c.Jar.Cookies(req.URL) {
req.AddCookie(cookie)
}
}
resp, didTimeout, err = send(req, c.transport(), deadline)
if err != nil {
return nil, didTimeout, err
}
if c.Jar != nil {
if rc := resp.Cookies(); len(rc) > 0 {
c.Jar.SetCookies(req.URL, rc)
}
}
return resp, nil, nil
}

注意到send方法包括了处理Cookies的信息,包括请求处理完毕更新Cookies的代码段。但发送请求的代码是:

resp, didTimeout, err = send(req, c.transport(), deadline)

send函数的具体实现如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
func send(ireq *Request, rt RoundTripper, deadline time.Time) (resp *Response, didTimeout func() bool, err error) {
req := ireq // req is either the original request, or a modified fork

forkReq := func() {
if ireq == req {
req = new(Request)
*req = *ireq // shallow clone
}
}

if req.Header == nil {
forkReq()
req.Header = make(Header)
}

if u := req.URL.User; u != nil && req.Header.Get("Authorization") == "" {
username := u.Username()
password, _ := u.Password()
forkReq()
req.Header = cloneHeader(ireq.Header)
req.Header.Set("Authorization", "Basic "+basicAuth(username, password))
}

if !deadline.IsZero() {
forkReq()
}
stopTimer, didTimeout := setRequestCancel(req, rt, deadline)

resp, err = rt.RoundTrip(req)
if err != nil {
stopTimer()
if resp != nil {
log.Printf("RoundTripper returned a response & error; ignoring response")
}
if tlsErr, ok := err.(tls.RecordHeaderError); ok {
// If we get a bad TLS record header, check to see if the
// response looks like HTTP and give a more helpful error.
// See golang.org/issue/11111.
if string(tlsErr.RecordHeader[:]) == "HTTP/" {
err = errors.New("http: server gave HTTP response to HTTPS client")
}
}
return nil, didTimeout, err
}
if !deadline.IsZero() {
resp.Body = &cancelTimerBody{
stop: stopTimer,
rc: resp.Body,
reqDidTimeout: didTimeout,
}
}
return resp, nil, nil
}

这段代码处理发送request外还有很多HTTP控制信息,包括授权Authorization、HTTP语义相关的正确性检测。其中核心的代码是:

resp, err = rt.RoundTrip(req)

rt是实现了RoundTripper接口的对象。HTTP客户端从架构上分为两层,一层负责处理HTTP语义,例如Cookies、Headers这些信息,另外一层负责HTTP的TCP连接。这两层通过RoundTripper接口建立桥梁。实现该方法的结构为Transport它包括底层连接的控制信息。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
type RoundTripper interface {
RoudTrip(*Request) (*Response, error)
}

type Transport struct {
idleMu sync.Mutex
wantIdle bool // user has requested to close all idle conns
idleConn map[connectMethodKey][]*persistConn // most recently used at end
idleConnCh map[connectMethodKey]chan *persistConn
idleLRU connLRU

reqMu sync.Mutex
reqCanceler map[*Request]func(error)

altMu sync.Mutex // guards changing altProto only
altProto atomic.Value // of nil or map[string]RoundTripper, key is URI scheme
}

RoundTrip方法的实现如下,省略部分细节代码。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
func (t *Transport) RoundTrip(req *Request) (*Response, error) {
t.nextProtoOnce.Do(t.onceSetNextProtoDefaults)
ctx := req.Context()
trace := httptrace.ContextClientTrace(ctx)
altProto, _ := t.altProto.Load().(map[string]RoundTripper)
scheme := req.URL.Scheme
if altRT := altProto[scheme]; altRT != nil {
if resp, err := altRT.RoundTrip(req); err != ErrSkipAltProtocol {
return resp, err
}
}

for {
// treq gets modified by roundTrip, so we need to recreate for each retry.
treq := &transportRequest{Request: req, trace: trace}
cm, err := t.connectMethodForRequest(treq)
if err != nil {
req.closeBody()
return nil, err
}

// Get the cached or newly-created connection to either the
// host (for http or https), the http proxy, or the http proxy
// pre-CONNECTed to https server. In any case, we'll be ready
// to send it requests.
pconn, err := t.getConn(treq, cm)
if err != nil {
t.setReqCanceler(req, nil)
req.closeBody()
return nil, err
}

var resp *Response
if pconn.alt != nil {
// HTTP/2 path.
t.setReqCanceler(req, nil) // not cancelable with CancelRequest
resp, err = pconn.alt.RoundTrip(req)
} else {
resp, err = pconn.roundTrip(treq)
}
if err == nil {
return resp, nil
}
if !pconn.shouldRetryRequest(req, err) {
// Issue 16465: return underlying net.Conn.Read error from peek,
// as we've historically done.
if e, ok := err.(transportReadFromServerError); ok {
err = e.err
}
return nil, err
}
testHookRoundTripRetried()

// Rewind the body if we're able to. (HTTP/2 does this itself so we only
// need to do it for HTTP/1.1 connections.)
if req.GetBody != nil && pconn.alt == nil {
newReq := *req
var err error
newReq.Body, err = req.GetBody()
if err != nil {
return nil, err
}
req = &newReq
}
}
}

到此,整个high-level API的分析结束。接下来结束Transport