exercise: complete the exercise web crawler with sync.Mutex and sync.WaitGroup
This commit is contained in:
		
							parent
							
								
									9a390fa84b
								
							
						
					
					
						commit
						1703797170
					
				
							
								
								
									
										120
									
								
								exercise/web-crawler/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								exercise/web-crawler/main.go
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,120 @@
 | 
				
			|||||||
 | 
					package main
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import (
 | 
				
			||||||
 | 
						"fmt"
 | 
				
			||||||
 | 
						"sync"
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type Fetcher interface {
 | 
				
			||||||
 | 
						// Fetch returns the body of URL and
 | 
				
			||||||
 | 
						// a slice of URLs found on that page.
 | 
				
			||||||
 | 
						Fetch(url string) (body string, urls []string, err error)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type SafeCounter struct {
 | 
				
			||||||
 | 
						mu      sync.Mutex
 | 
				
			||||||
 | 
						wg      sync.WaitGroup
 | 
				
			||||||
 | 
						visited map[string]struct{}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					var (
 | 
				
			||||||
 | 
						safeCounter *SafeCounter
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Crawl uses fetcher to recursively crawl
 | 
				
			||||||
 | 
					// pages starting with url, to a maximum of depth.
 | 
				
			||||||
 | 
					func Crawl(url string, depth int, fetcher Fetcher) {
 | 
				
			||||||
 | 
						// TODO: Fetch URLs in parallel.
 | 
				
			||||||
 | 
						// TODO: Don't fetch the same URL twice.
 | 
				
			||||||
 | 
						// This implementation doesn't do either:
 | 
				
			||||||
 | 
						defer safeCounter.wg.Done()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						safeCounter.mu.Lock()
 | 
				
			||||||
 | 
						defer safeCounter.mu.Unlock()
 | 
				
			||||||
 | 
						if url != "" {
 | 
				
			||||||
 | 
							_, ok := safeCounter.visited[url]
 | 
				
			||||||
 | 
							if ok {
 | 
				
			||||||
 | 
								return
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							safeCounter.visited[url] = struct{}{}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if depth <= 0 {
 | 
				
			||||||
 | 
							safeCounter.wg.Done()
 | 
				
			||||||
 | 
							return
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						body, urls, err := fetcher.Fetch(url)
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							fmt.Println(err)
 | 
				
			||||||
 | 
							return
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						fmt.Printf("found: %s %q\n", url, body)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for _, u := range urls {
 | 
				
			||||||
 | 
							safeCounter.wg.Add(1)
 | 
				
			||||||
 | 
							go Crawl(u, depth-1, fetcher)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func main() {
 | 
				
			||||||
 | 
						safeCounter = &SafeCounter{
 | 
				
			||||||
 | 
							visited: make(map[string]struct{}),
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						safeCounter.wg.Add(1)
 | 
				
			||||||
 | 
						go Crawl("https://golang.org/", 4, fetcher)
 | 
				
			||||||
 | 
						safeCounter.wg.Wait()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// fakeFetcher is Fetcher that returns canned results.
 | 
				
			||||||
 | 
					type fakeFetcher map[string]*fakeResult
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					type fakeResult struct {
 | 
				
			||||||
 | 
						body string
 | 
				
			||||||
 | 
						urls []string
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (f fakeFetcher) Fetch(url string) (string, []string, error) {
 | 
				
			||||||
 | 
						if res, ok := f[url]; ok {
 | 
				
			||||||
 | 
							return res.body, res.urls, nil
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return "", nil, fmt.Errorf("not found: %s", url)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// fetcher is a populated fakeFetcher.
 | 
				
			||||||
 | 
					var fetcher = fakeFetcher{
 | 
				
			||||||
 | 
						"https://golang.org/": &fakeResult{
 | 
				
			||||||
 | 
							"The Go Programming Language",
 | 
				
			||||||
 | 
							[]string{
 | 
				
			||||||
 | 
								"https://golang.org/pkg/",
 | 
				
			||||||
 | 
								"https://golang.org/cmd/",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						"https://golang.org/pkg/": &fakeResult{
 | 
				
			||||||
 | 
							"Packages",
 | 
				
			||||||
 | 
							[]string{
 | 
				
			||||||
 | 
								"https://golang.org/",
 | 
				
			||||||
 | 
								"https://golang.org/cmd/",
 | 
				
			||||||
 | 
								"https://golang.org/pkg/fmt/",
 | 
				
			||||||
 | 
								"https://golang.org/pkg/os/",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						"https://golang.org/pkg/fmt/": &fakeResult{
 | 
				
			||||||
 | 
							"Package fmt",
 | 
				
			||||||
 | 
							[]string{
 | 
				
			||||||
 | 
								"https://golang.org/",
 | 
				
			||||||
 | 
								"https://golang.org/pkg/",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
						"https://golang.org/pkg/os/": &fakeResult{
 | 
				
			||||||
 | 
							"Package os",
 | 
				
			||||||
 | 
							[]string{
 | 
				
			||||||
 | 
								"https://golang.org/",
 | 
				
			||||||
 | 
								"https://golang.org/pkg/",
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user