go语言正则表达式regexp

    xiaoxiao2021-04-18  63

    regexp包里的正则表达式函数代码示例 package main import (     "fmt"     "io/ioutil"     "net/http"     "os"     "regexp"     "strings" ) /*func Match(pattern string, b []byte) (matched bool, error error) func MatchReader(pattern string, r io.RuneReader) (matched bool, error error) func MatchString(pattern string, s string) (matched bool, error error)*/ func IsIp(ip string) bool {     if m, _ := regexp.MatchString("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}.[0-9]{1,3}$", ip); !m {         return false     }     return true } func IsDigital(str string) string {     if m, _ := regexp.MatchString("^[0-9]+$", str); !m {         return "不是数字"     }     return "是数字" } func main1() {     fmt.Println(IsIp("192.168.172.88"))     fmt.Println(IsIp("192,167.12.11"))     fmt.Println(IsDigital("192,167.12.11"))     fmt.Println(IsDigital("192")) } /*以爬虫为例来说明如何使用正则来过滤或截取抓取到的数据*/ func main2() {     resp, err := http.Get("http://www.baidu.com")     if err != nil {         fmt.Println("http ger error")         return     }     defer resp.Body.Close()     body, err := ioutil.ReadAll(resp.Body)     if err != nil {         fmt.Println("http read err")         return     }     src := string(body)     /*O_RDONLY int = syscall.O_RDONLY // open the file read-only.     O_WRONLY int = syscall.O_WRONLY // open the file write-only.     O_RDWR int = syscall.O_RDWR // open the file read-write.     O_APPEND int = syscall.O_APPEND // append data to the file when writing.     O_CREATE int = syscall.O_CREAT // create a new file if none exists.     O_EXCL int = syscall.O_EXCL // used with O_CREATE, file must not exist     O_SYNC int = syscall.O_SYNC // open for synchronous I/O.     O_TRUNC int = syscall.O_TRUNC // if possible, truncate file when opened.*/     file, _ := os.OpenFile("baidu.html", os.O_RDWR|os.O_CREATE, 0666)     defer file.Close()     file.WriteString(src)     //使用复杂的正则首先是Compile,它会解析正则表达式是否合法,如果正确,那么就会返回一个Regexp,     //然后就可以利用返回的Regexp在任意的字符串上面执行需要的操作     //将HTML标签全转换成小写     re, _ := regexp.Compile("\\<[\\S\\s]+?\\>")     src = re.ReplaceAllStringFunc(src, strings.ToLower)     //去除STYLE     re, _ = regexp.Compile("\\<style[\\S\\s]+?\\</style\\>")     src = re.ReplaceAllString(src, "")     //去除SCRIPT     re, _ = regexp.Compile("\\<script[\\S\\s]+?\\</script\\>")     src = re.ReplaceAllString(src, "")     //去除所有尖括号内的HTML代码,并换成换行符     re, _ = regexp.Compile("\\<[\\S\\s]+?\\>")     src = re.ReplaceAllString(src, "\n")     //去除连续的换行符     re, _ = regexp.Compile("\\s{2,}")     src = re.ReplaceAllString(src, "\n")     fmt.Println(strings.TrimSpace(src)) } func main() {     a := "I am learning Go language"     fmt.Println([]byte(a))     re, _ := regexp.Compile("[a-z]{2,4}")     //查找符合正则的第一个     one := re.Find([]byte(a))     fmt.Println("Find:", string(one))     //查找符合正则的所有slice,n小于0表示返回全部符合的字符串,不然就是返回指定的长度     all := re.FindAll([]byte(a), -1)     for _, value := range all {         fmt.Println("FindAll", string(value))     }     //查找符合条件的index位置,开始位置和结束位置     index := re.FindIndex([]byte(a))     fmt.Println("FindIndex", index)     //查找符合条件的所有的index位置,n同上     allindex := re.FindAllIndex([]byte(a), -1)     fmt.Println("FindAllIndex", allindex)     re2, _ := regexp.Compile("am(.*)lang(.*)")     //查找Submatch,返回数组,第一个元素是匹配的全部元素,第二个元素是第一个()里面的,第三个是第二个()里面的     //下面的输出第一个元素是"am learning Go language"     //第二个元素是" learning Go ",注意包含空格的输出     //第三个元素是"uage"     submatch := re2.FindSubmatch([]byte(a))     fmt.Println("FindSubmatch", submatch)     for _, v := range submatch {         fmt.Println(string(v))     }     //定义和上面的FindIndex一样     submatchindex := re2.FindSubmatchIndex([]byte(a))     fmt.Println(submatchindex)     //FindAllSubmatch,查找所有符合条件的子匹配     submatchall := re2.FindAllSubmatch([]byte(a), -1)     fmt.Println(submatchall)     //FindAllSubmatchIndex,查找所有字匹配的index     submatchallindex := re2.FindAllSubmatchIndex([]byte(a), -1)     fmt.Println(submatchallindex)     src1 := []byte(` call hello alice hello bob call hello eve `)     pat := regexp.MustCompile(`(?m)(call)\s+(?P<cmd>\w+)\s+(?P<arg>.+)\s*$`)     res := []byte{}     for _, s := range pat.FindAllSubmatchIndex(src1, -1) {         res = pat.Expand(res, []byte("$cmd('$arg')\n"), src1, s)     }     fmt.Println("expand ", string(res)) }
    转载请注明原文地址: https://ju.6miu.com/read-675049.html

    最新回复(0)