From aa9be1e244f6f3fceb0e0be6a63fa3aeb1188d42 Mon Sep 17 00:00:00 2001 From: Tyler Date: Thu, 3 Oct 2019 19:44:38 -0400 Subject: [PATCH] initial commit --- default.go | 173 ++++++++++++++++++++++++++++++++++++++++++++++++ default_test.go | 40 +++++++++++ go.mod | 5 ++ go.sum | 7 ++ main.go | 33 +++++++++ util.go | 29 ++++++++ 6 files changed, 287 insertions(+) create mode 100644 default.go create mode 100644 default_test.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go create mode 100644 util.go diff --git a/default.go b/default.go new file mode 100644 index 0000000..87d65db --- /dev/null +++ b/default.go @@ -0,0 +1,173 @@ +package main + +import ( + "errors" + "fmt" + "github.com/PuerkitoBio/goquery" + "io" + "io/ioutil" + "net/http" + "net/url" + "path" + "strconv" + "strings" +) + +const ( + ContentTypeHtml = "text/html" +) + +func defaultLinkHandler(link string) (*LinkInfo, error) { + redirects := make([]string, 0) + + u, err := url.Parse(link) + + if err != nil { + return nil, err + } + + var res *http.Response + + for i := 0; i < 10; i++ { + res, err = client.Head(link) + + if err != nil { + return nil, err + } + + if (res.StatusCode == 301 || res.StatusCode == 302) && res.Header.Get("Location") != "" { + link = res.Header.Get("Location") + redirects = append(redirects, link) + } else { + break + } + } + + if res != nil && res.StatusCode != 200 { + return nil, errors.New("invalid response, expected 200, got " + strconv.Itoa(res.StatusCode)) + } + + contentType := res.Header.Get("Content-Type") + + if contentType == "" { + contentType = detectContentType(link, "application/octet-stream") + } + + if idx := strings.Index(contentType, ";"); idx != -1 { + contentType = contentType[:idx] + } + + var contentLength int64 + + if contentLengthStr := res.Header.Get("Content-Length"); contentLengthStr != "" { + contentLength, err = strconv.ParseInt(contentLengthStr, 10, 64) + } + + ret := &LinkInfo{ + ContentType: contentType, + ContentLength: contentLength, + } + + switch contentType { + case ContentTypeHtml: + err = retrieveHtmlLinkTitle(ret, link) + default: + ret.Title = fmt.Sprintf("%s (%s, %s)", path.Base(u.Path), contentType, ByteCountDecimal(contentLength)) + } + + return ret, err +} + +func detectContentType(link, defaultType string) string { + req, err := http.NewRequest("GET", link, nil) + + if err != nil { + return defaultType + } + + req.Header.Set("Range", "bytes=0-512") + + res, err := client.Do(req) + + if err != nil { + return defaultType + } + + defer res.Body.Close() + + b, err := ioutil.ReadAll(io.LimitReader(res.Body, 512)) + + if err != nil { + return defaultType + } + + t := http.DetectContentType(b) + + if t == "" { + t = defaultType + } + + return t +} + +var ( + attrKeys = []string{"property", "name", "itemprop"} +) + +func retrieveHtmlLinkTitle(i *LinkInfo, link string) error { + res, err := client.Get(link) + + if err != nil { + return err + } + + defer res.Body.Close() + + q, err := goquery.NewDocumentFromReader(res.Body) + + if err != nil { + return err + } + + meta := q.Find("meta") + + metaTags := make(map[string]string) + + meta.Each(func(_ int, s *goquery.Selection) { + var key string + var exists bool + + for _, k := range attrKeys { + key, exists = s.Attr(k) + + if exists { + break + } + } + + if key == "" { + return + } + }) + + var attr string + var exists bool + + if attr, exists = metaTags["og:title"]; exists { + i.Title = attr + } else if tag := q.Find("title"); tag.Length() > 0 { + i.Title = tag.Text() + } + + if attr, exists = metaTags["og:description"]; exists { + i.Description = attr + } else if attr, exists = metaTags["description"]; exists { + i.Description = attr + } + + if attr, exists = metaTags["duration"]; exists { + i.Duration = attr + } + + return nil +} \ No newline at end of file diff --git a/default_test.go b/default_test.go new file mode 100644 index 0000000..db2b550 --- /dev/null +++ b/default_test.go @@ -0,0 +1,40 @@ +package main + +import ( + "net/http" + "testing" + "time" +) + +func Test_DefaultLinkHandler(t *testing.T) { + client = &http.Client{ + Timeout: 10 * time.Second, + } + + type expectedData struct { + Link string + Title string + Type string + } + + testLinks := []expectedData{ + {"https://paste.ee", "Paste.ee", "text/html"}, + {"http://techslides.com/demos/sample-videos/small.mp4", "", "video/mp4"}, + } + + for _, link := range testLinks { + l, err := defaultLinkHandler(link.Link) + + if err != nil { + t.Fatal("Unable to retrieve link info:", err) + } + + if link.Title != "" && link.Title != l.Title { + t.Fatal("Unexpected title, expected:", link.Title, "got:", l.Title) + } + + if link.Type != "" && link.Type != l.ContentType { + t.Fatal("Unexpected content type, expected:", link.Type, "got:", l.ContentType) + } + } +} \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b6dd15f --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module meow.tf/go/linkinfo + +go 1.12 + +require github.com/PuerkitoBio/goquery v1.5.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..0327c72 --- /dev/null +++ b/go.sum @@ -0,0 +1,7 @@ +github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= +github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= +github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= +github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a h1:gOpx8G595UYyvj8UK4+OFyY4rx037g3fmfhe5SasG3U= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= diff --git a/main.go b/main.go new file mode 100644 index 0000000..2137919 --- /dev/null +++ b/main.go @@ -0,0 +1,33 @@ +package main + +import ( + "net/http" + "time" +) + +var ( + client *http.Client +) + +type LinkInfo struct { + Title string `json:"title"` + Description string `json:"description"` + ContentType string `json:"type"` + ContentLength int64 `json:"contentLength"` + Duration string `json:"duration,omitempty"` + Redirects []string `json:"redirects,omitempty"` +} + +func main() { + client = &http.Client{ + Timeout: 10 * time.Second, + } + + mux := http.NewServeMux() + mux.HandleFunc("/info", handleInfoRequest) + http.ListenAndServe(":8080", mux) +} + +func handleInfoRequest(w http.ResponseWriter, r *http.Request) { + +} \ No newline at end of file diff --git a/util.go b/util.go new file mode 100644 index 0000000..a8b8324 --- /dev/null +++ b/util.go @@ -0,0 +1,29 @@ +package main + +import "fmt" + +func ByteCountDecimal(b int64) string { + const unit = 1000 + if b < unit { + return fmt.Sprintf("%d B", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %cB", float64(b)/float64(div), "kMGTPE"[exp]) +} + +func ByteCountBinary(b int64) string { + const unit = 1024 + if b < unit { + return fmt.Sprintf("%d B", b) + } + div, exp := int64(unit), 0 + for n := b / unit; n >= unit; n /= unit { + div *= unit + exp++ + } + return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp]) +}