1 回答

TA贡献1860条经验 获得超8个赞
我很难理解你想做什么,但这是我想到的:
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"os"
"strings"
"time"
"github.com/gocolly/colly"
)
type Info struct {
ID int `json:"id"`
Images []string `json:"images"`
}
func main() {
allInfos := new(Info)
start := time.Now()
co := colly.NewCollector(
colly.AllowedDomains("www.amazon.com", "amazon.com"),
)
// GET Images
co.OnHTML("img[src]", func(e *colly.HTMLElement) {
imgsrc := e.Attr("src")
imgclass := e.Attr("class")
if strings.HasPrefix(imgsrc, "https://images-na.ssl-images-amazon.com/images/I/") && imgclass == "" {
fmt.Printf("The Array %d %v", len(allInfos.Images), imgsrc+"\n")
allInfos.Images = append(allInfos.Images, imgsrc)
co.Visit(e.Request.AbsoluteURL(imgsrc))
}
})
co.Visit("https://www.amazon.com/Bluetooth-FM-Transmitter-Compatible-Smartphones/dp/B088TCSH8T/ref=sr_1_1_sspa?dchild=1&keywords=transmitter&qid=1623860482&sr=8-1-spons&psc=1&smid=A2XMGHKVCX40WB&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFGVEgxTjJLRFdLSkUmZW5jcnlwdGVkSWQ9QTAyNDE0ODkyRlNDWlAzUktPQzJSJmVuY3J5cHRlZEFkSWQ9QTA5MTkwNjgyWVUzQ0RUMTBCTEFFJndpZGdldE5hbWU9c3BfYXRmJmFjdGlvbj1jbGlja1JlZGlyZWN0JmRvTm90TG9nQ2xpY2s9dHJ1ZQ==WDY4VktWQlImd2lkZ2V0TmFtZT1zcF9kZXRhaWwmYWN0aW9uPWNsaWNrUmVkaXJlY3QmZG9Ob3RMb2dDbGljaz10cnVl")
co.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting: ", r.URL.String())
})
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(allInfos)
writeJson(allInfos)
duration := time.Since(start)
fmt.Println(duration.Seconds())
}
func writeJson(data *Info) {
dataFile, err := json.MarshalIndent(data, "", " ")
if err != nil {
log.Println("Could not create JSON", err)
}
ioutil.WriteFile("stocky.json", dataFile, 0666)
}
正如@Flimzy建议的那样,我只将new附加到信息结构的自有实例中。看起来您的预期输出是包含所有图像的长度为1的切片。imagesrc
我还使用了一个来清理切片检查。这是等效的,但我认为它更干净,所以这是一个可选的更改。strings.HasPrefix
如果您还有其他问题,请告诉我。
编辑,如果您需要继续使用切片,如下所示的内容可能会起作用。从您的问题中,很难判断何时需要清除新信息:
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"os"
"strings"
"time"
"github.com/gocolly/colly"
)
type Info struct {
ID int `json:"id"`
Images []string `json:"images"`
}
func main() {
allInfos := make([]Info, 1)
start := time.Now()
co := colly.NewCollector(
colly.AllowedDomains("www.amazon.com", "amazon.com"),
)
// GET Images
co.OnHTML("img[src]", func(e *colly.HTMLElement) {
imgsrc := e.Attr("src")
imgclass := e.Attr("class")
if strings.HasPrefix(imgsrc, "https://images-na.ssl-images-amazon.com/images/I/") && imgclass == "" {
fmt.Printf("The Array %d %v", len(allInfos[0].Images), imgsrc+"\n")
allInfos[0].Images = append(allInfos[0].Images, imgsrc)
co.Visit(e.Request.AbsoluteURL(imgsrc))
}
})
co.Visit("https://www.amazon.com/Bluetooth-FM-Transmitter-Compatible-Smartphones/dp/B088TCSH8T/ref=sr_1_1_sspa?dchild=1&keywords=transmitter&qid=1623860482&sr=8-1-spons&psc=1&smid=A2XMGHKVCX40WB&spLa=ZW5jcnlwdGVkUXVhbGlmaWVyPUFGVEgxTjJLRFdLSkUmZW5jcnlwdGVkSWQ9QTAyNDE0ODkyRlNDWlAzUktPQzJSJmVuY3J5cHRlZEFkSWQ9QTA5MTkwNjgyWVUzQ0RUMTBCTEFFJndpZGdldE5hbWU9c3BfYXRmJmFjdGlvbj1jbGlja1JlZGlyZWN0JmRvTm90TG9nQ2xpY2s9dHJ1ZQ==WDY4VktWQlImd2lkZ2V0TmFtZT1zcF9kZXRhaWwmYWN0aW9uPWNsaWNrUmVkaXJlY3QmZG9Ob3RMb2dDbGljaz10cnVl")
co.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting: ", r.URL.String())
})
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(allInfos)
writeJson(allInfos)
duration := time.Since(start)
fmt.Println(duration.Seconds())
}
func writeJson(data []Info) {
dataFile, err := json.MarshalIndent(data, "", " ")
if err != nil {
log.Println("Could not create JSON", err)
}
ioutil.WriteFile("stocky.json", dataFile, 0666)
}
- 1 回答
- 0 关注
- 177 浏览
添加回答
举报