Go 爬虫
需要安装 goquery ,类似 jquery
的Go版本,可以向 jquery
一样选择对应的dom信息 go get github.com/PuerkitoBio/goquery
image
获取 所有的编程语言
func GetLanguages(w http.ResponseWriter, r *http.Request) { client := &http.Client{} req, err := http.NewRequest("GET", "https://github.com/trending", nil) if err != nil { panic(err) } req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36") resp, err := client.Do(req) if err != nil { panic(err) } defer resp.Body.Close() doc, err := goquery.NewDocumentFromResponse(resp) if err != nil { panic(err) } results := make([]map[string]string, 0) doc.Find(".select-menu-item.js-navigation-item ").Each(func(i int, contentSelection *goquery.Selection) { if i > 2 { href, _ := contentSelection.Attr("href") span := contentSelection.Find("span").Text() reg := regexp.MustCompile(`/trending/([^?/]+)`) s := reg.FindStringSubmatch(href)[1] params := make(map[string]string) params["alias"] = s params["name"] = span results = append(results, params) } }) helper.ResponseWithJson(w, http.StatusOK, results) }
获取对应语言的 热门库
func Repository(w http.ResponseWriter, r *http.Request) { vars := mux.Vars(r) lang := vars["lang"] since := r.FormValue("since") client := &http.Client{} fmt.Println(lang, since) if since == "" { since = "daily" } url := fmt.Sprintf("https://github.com/trending/%s?since=%s", lang, since) req, err := http.NewRequest("GET", url, nil) if err != nil { panic(err) } req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36") resp, err := client.Do(req) if err != nil { panic(err) } defer resp.Body.Close() doc, err := goquery.NewDocumentFromResponse(resp) if err != nil { panic(err) } results := make([]map[string]string, 0) doc.Find(".col-12.d-block.width-full.py-4.border-bottom").Each(func(i int, contentSelection *goquery.Selection) { s := contentSelection.Find(".d-inline-block.col-9.mb-1") a := s.Find("a") href, _ := a.Attr("href") text := strings.Replace(a.Text(), " ", "", -1) des := contentSelection.Find(".py-1 p") name := strings.FieldsFunc(strings.Split(text, "/")[0], unicode.IsSpace)[0] params := make(map[string]string) params["url"] = "https://github.com" + href params["name"] = name params["des"] = des.Text() div_a := contentSelection.Find(".f6.text-gray.mt-2") div_a.Find(".muted-link.d-inline-block.mr-3").Each(func(i int, cs *goquery.Selection) { if i == 0 { params["stars"] = strings.FieldsFunc(cs.Text(), unicode.IsSpace)[0] } else if i == 1 { params["forks"] = strings.FieldsFunc(cs.Text(), unicode.IsSpace)[0] } }) results = append(results, params) }) helper.ResponseWithJson(w, http.StatusOK, results) }
作者:CoderMiner
链接:https://www.jianshu.com/p/e887bb0eae8a
点击查看更多内容
为 TA 点赞
评论
共同学习,写下你的评论
评论加载中...
作者其他优质文章
正在加载中
感谢您的支持,我会继续努力的~
扫码打赏,你说多少就多少
赞赏金额会直接到老师账户
支付方式
打开微信扫一扫,即可进行扫码打赏哦