From 74dc132b2b3315e219c3785eb092d412d83f6825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paulo=20Sima=CC=83o?= Date: Sun, 31 Oct 2021 06:37:05 -0300 Subject: [PATCH] Add OCR --- api/cli.go | 9 +++ api/ocr/cli.go | 149 ++++++++++++++++++++++++++++++++++++++++++++ api/ocr/cli_test.go | 27 ++++++++ go.mod | 9 ++- go.sum | 6 ++ 5 files changed, 197 insertions(+), 3 deletions(-) create mode 100644 api/ocr/cli.go create mode 100644 api/ocr/cli_test.go diff --git a/api/cli.go b/api/cli.go index 4ce5ea0..a39c90b 100644 --- a/api/cli.go +++ b/api/cli.go @@ -2,6 +2,7 @@ package api import ( "go.digitalcircle.com.br/open/httpcli" + "go.digitalcircle.com.br/open/replaycli-go/ipcmux" "go.digitalcircle.com.br/open/replaycli-go/util" "os" ) @@ -21,3 +22,11 @@ func NewApiCli() *ApiCli { ret.cli.AddHeader("X-API-KEY", apikey) return ret } + +func NewApiIPCCli() *ApiCli { + ret := &ApiCli{cli: httpcli.NewCli()} + ret.cli.SetCli(ipcmux.NewClient()) + apikey = os.Getenv("REPLAY_APIKEY") + ret.cli.AddHeader("X-API-KEY", apikey) + return ret +} diff --git a/api/ocr/cli.go b/api/ocr/cli.go new file mode 100644 index 0000000..69c9e95 --- /dev/null +++ b/api/ocr/cli.go @@ -0,0 +1,149 @@ +package ocr + +import ( + "encoding/xml" + "go.digitalcircle.com.br/open/replaycli-go/api" +) + +type AltoString struct { + Text string `xml:",chardata"` + ID string `xml:"ID,attr"` + HPOS int `xml:"HPOS,attr"` + VPOS int `xml:"VPOS,attr"` + WIDTH int `xml:"WIDTH,attr"` + HEIGHT int `xml:"HEIGHT,attr"` + WC float64 `xml:"WC,attr"` + CONTENT string `xml:"CONTENT,attr"` +} +type AltoSP struct { + Text string `xml:",chardata"` + WIDTH int `xml:"WIDTH,attr"` + VPOS int `xml:"VPOS,attr"` + HPOS int `xml:"HPOS,attr"` +} +type AltoTextLine struct { + Text string `xml:",chardata"` + ID string `xml:"ID,attr"` + HPOS string `xml:"HPOS,attr"` + VPOS string `xml:"VPOS,attr"` + WIDTH string `xml:"WIDTH,attr"` + HEIGHT string `xml:"HEIGHT,attr"` + String []AltoString `xml:"String"` + SP []AltoSP `xml:"SP"` +} +type AltoTextBlock struct { + Text string `xml:",chardata"` + ID string `xml:"ID,attr"` + HPOS int `xml:"HPOS,attr"` + VPOS int `xml:"VPOS,attr"` + WIDTH int `xml:"WIDTH,attr"` + HEIGHT int `xml:"HEIGHT,attr"` + TextLine []AltoTextLine `xml:"TextLine"` +} +type AltoComposedBlock struct { + Text string `xml:",chardata"` + ID string `xml:"ID,attr"` + HPOS int `xml:"HPOS,attr"` + VPOS int `xml:"VPOS,attr"` + WIDTH int `xml:"WIDTH,attr"` + HEIGHT int `xml:"HEIGHT,attr"` + TextBlock []AltoTextBlock `xml:"TextBlock"` +} +type AltoPrintSpace struct { + Text string `xml:",chardata"` + HPOS int `xml:"HPOS,attr"` + VPOS int `xml:"VPOS,attr"` + WIDTH int `xml:"WIDTH,attr"` + HEIGHT int `xml:"HEIGHT,attr"` + ComposedBlock []AltoComposedBlock `xml:"ComposedBlock"` +} +type AltoPage struct { + Text string `xml:",chardata"` + WIDTH int `xml:"WIDTH,attr"` + HEIGHT int `xml:"HEIGHT,attr"` + PHYSICALIMGNR string `xml:"PHYSICAL_IMG_NR,attr"` + ID string `xml:"ID,attr"` + PrintSpace AltoPrintSpace `xml:"PrintSpace"` +} +type AltoDescription struct { + Text string `xml:",chardata"` + MeasurementUnit string `xml:"MeasurementUnit"` + SourceImageInformation struct { + Text string `xml:",chardata"` + FileName string `xml:"fileName"` + } `xml:"sourceImageInformation"` + OCRProcessing struct { + Text string `xml:",chardata"` + ID string `xml:"ID,attr"` + OcrProcessingStep struct { + Text string `xml:",chardata"` + ProcessingSoftware struct { + Text string `xml:",chardata"` + SoftwareName string `xml:"softwareName"` + } `xml:"processingSoftware"` + } `xml:"ocrProcessingStep"` + } `xml:"OCRProcessing"` +} +type AltoLayout struct { + Text string `xml:",chardata"` + Page AltoPage `xml:"Page"` +} +type Alto struct { + XMLName xml.Name `xml:"alto"` + Text string `xml:",chardata"` + Xmlns string `xml:"xmlns,attr"` + Xlink string `xml:"xlink,attr"` + Xsi string `xml:"xsi,attr"` + SchemaLocation string `xml:"schemaLocation,attr"` + Description AltoDescription `xml:"Description"` + Layout AltoLayout `xml:"Layout"` +} + +func (a Alto) Strings() []AltoString { + ret := make([]AltoString, 0) + for _, v := range a.Layout.Page.PrintSpace.ComposedBlock { + for _, v1 := range v.TextBlock { + for _, v2 := range v1.TextLine { + for _, v3 := range v2.String { + ret = append(ret, v3) + } + } + } + } + return ret +} + +type Opts struct { + Ver string `json:"ver,omitempty"` + X int `json:"x"` + Y int `json:"y"` + H int `json:"h"` + W int `json:"w"` + Blur float64 `json:"blur"` + Sharpen float64 `json:"sharpen"` + Resizew int `json:"resizew"` + Resizeh int `json:"resizeh"` + Lang string `json:"lang"` + Tempfile string `json:"tempfile"` + Dispid int `json:"d"` + Gray bool `json:"gray"` + Src string `json:"src"` + Bytes []byte `json:"bytes"` + AddRects bool `json:"add_rects"` +} + +type Cli struct { + *api.ApiCli +} + +func (c *Cli) OCR(opts *Opts) (*Alto, error) { + res := &Alto{} + err := c.HttpCli().JsonPost("/ocr", opts, res) + return res, err +} + +func NewCli() *Cli { + ret := &Cli{ApiCli: api.NewApiIPCCli()} + ret.ApiCli.HttpCli().SetBasePath("http://ocr/") + return ret +} diff --git a/api/ocr/cli_test.go b/api/ocr/cli_test.go new file mode 100644 index 0000000..4a26d58 --- /dev/null +++ b/api/ocr/cli_test.go @@ -0,0 +1,27 @@ +package ocr_test + +import ( + "go.digitalcircle.com.br/open/replaycli-go/api/ocr" + "log" + "testing" +) + +func TestCli_OCR(t *testing.T) { + c := ocr.NewCli() + a, err := c.OCR(&ocr.Opts{ + X: 0, + Y: 0, + H: 400, + W: 400, + Resizew: 1200, + Sharpen: 1, + Tempfile: "some_test.jpg", + Src: "ss", + AddRects: true, + Gray: true, + }) + if err != nil { + t.Fatal(err.Error()) + } + log.Printf("#%v", a) +} diff --git a/go.mod b/go.mod index 3fd0c94..e2159fe 100644 --- a/go.mod +++ b/go.mod @@ -2,10 +2,13 @@ module go.digitalcircle.com.br/open/replaycli-go go 1.17 +//replace ( +// go.digitalcircle.com.br/open/httpcli => ../httpcli +//) require ( - github.com/Microsoft/go-winio v0.5.0 + github.com/Microsoft/go-winio v0.5.1 github.com/gorilla/websocket v1.4.2 github.com/mitchellh/go-ps v1.0.0 - go.digitalcircle.com.br/open/httpcli v0.0.0-20211010235458-74d201454158 - golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac // indirect + go.digitalcircle.com.br/open/httpcli v0.0.0-20211031093505-ecf33aed8afb + golang.org/x/sys v0.0.0-20211031064116-611d5d643895 // indirect ) diff --git a/go.sum b/go.sum index 2555dd9..e194fcc 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/Microsoft/go-winio v0.5.0 h1:Elr9Wn+sGKPlkaBvwu4mTrxtmOp3F3yV9qhaHbXGjwU= github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= +github.com/Microsoft/go-winio v0.5.1 h1:aPJp2QD7OOrhO5tQXqQoGSJc+DjDtWTGLOmNyAm6FgY= +github.com/Microsoft/go-winio v0.5.1/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= @@ -11,7 +13,11 @@ github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= go.digitalcircle.com.br/open/httpcli v0.0.0-20211010235458-74d201454158 h1:vFm0G1b4hBDJ7y/s2NqRuX99lwuI8/sxxH3XLUOdi0o= go.digitalcircle.com.br/open/httpcli v0.0.0-20211010235458-74d201454158/go.mod h1:e/6vw7F4vgfs+ThQoLeBbYCYLTH1b5thzFwkFixi+oA= +go.digitalcircle.com.br/open/httpcli v0.0.0-20211031093505-ecf33aed8afb h1:gv0FO26InL2ctmhCqthsOFEooi0FvA1rFT6VXzgveJE= +go.digitalcircle.com.br/open/httpcli v0.0.0-20211031093505-ecf33aed8afb/go.mod h1:e/6vw7F4vgfs+ThQoLeBbYCYLTH1b5thzFwkFixi+oA= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac h1:oN6lz7iLW/YC7un8pq+9bOLyXrprv2+DKfkJY+2LJJw= golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211031064116-611d5d643895 h1:iaNpwpnrgL5jzWS0vCNnfa8HqzxveCFpFx3uC/X4Tps= +golang.org/x/sys v0.0.0-20211031064116-611d5d643895/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=