diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e92cf9b --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.pt filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 939889d..03e9842 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ # Runtime directories -ckpt/ mAP_txt/ summary/ weight/ +files/ # IntelliJ IDEA .idea/ @@ -12,6 +12,7 @@ weight/ __pycache__/ *.py[cod] *$py.class +main # C extensions *.so diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..cfa5aec --- /dev/null +++ b/LICENSE @@ -0,0 +1,203 @@ +Copyright 2019 Petr Masopust, Aprar s.r.o.. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e0fcf0d --- /dev/null +++ b/README.md @@ -0,0 +1,87 @@ +# Face recognition technology demo + +Mass faces identification and recognition in images. + +## Installation + +The simplest complete installation is docker compose: ``docker-compose up -d`` in root directory. For detailed installation instructions look at [API server](apiserver/README.md) or [vectorizer](vectorizer/README.md) readme files. +Without nvidia docker support docker runs only on cpu with **very** degraded performance (over minute on 6 cpu cores). + +## Usage + +### Learn people faces + +```shell script +curl -X POST -F 'person=PID' -F 'directory=DIR' -F 'file=@portrait.jpg' http://localhost:8080/learn +``` + +Replace PID with person's id (e.g. database id or name) and DIR with your directory name (e.g. company name). People are recognized only within same directory. png or jpeg images are supported. Only images with one face are allowed for learning ! +Usually only one good portrait photo is enough but you can learn more photos for each person. + +### Recognize people + +```shell script +curl -X POST -F 'directory=DIR' -F 'file=@photo.jpg' http://localhost:8080/recognize +``` + +Replace DIR with your directory name (e.g. company name). People are recognized only within same directory. For each detected face the most probable person's id is returned. png or jpeg images are supported. + +Example result: + +```json +{ +"status":"OK", +"url":"/files/00636b47-e6a5-4fab-8a02-9e44d052c193.jpg", +"filename":"photo.jpg", +"directory":"mydir", +"persons":[ +{"id":"PID1","box":[2797,1164,2918,1285],"score":0.999998927116394,"probability":0.8342}, +{"id":"PID2","box":[2398,1854,2590,2046],"score":0.9999780654907227,"probability":0.32546}, +{"id":"PID3","box":[1753,1148,1905,1300],"score":0.9999217987060547,"probability":0.65785} +]} +``` + +| Field | Description | +| --- | --- | +| status | Status message - either OK or error text | +| url | Relative url to original image | +| filename | Original image filename | +| directory | Directory name | +| persons | Recognized people array | +| id | Person's id | +| box | Box around face | +| score | Face detection score (i.e. probability) | +| probability | Person recognition probability | + +## Architecture + +This demo consist of three parts - API server, vectorizer and database. API server is frontend server written in golang. +Vectorizer is the main part which identifies faces and creates vectors from faces. Database is simple storage for learned vectors. +Both API server and vectorizer are fully scalable e.g. in kubernetes. The only non scalable part is postgresql database but it can be easily replaced with different storage - e.g. HBase. +Just reimplement storage.go in API server. +Only API server listen to customer requests, the rest are internal components and should not be directly accessible from internet. + +## Future roadmap + + * Training on identified faces (both nets are trained separately now) + * Face alignment between identification and recognition + * Web user interface (help needed !) + +## Based on + +Github repositories: + +* [https://github.com/rainofmine/Face_Attention_Network](https://github.com/rainofmine/Face_Attention_Network) +* [https://github.com/ronghuaiyang/arcface-pytorch](https://github.com/ronghuaiyang/arcface-pytorch) + +Papers: + +* [Face Attention Network: An Effective Face Detector for the Occluded Faces](https://arxiv.org/abs/1711.07246) +* [AdaCos: Adaptively Scaling Cosine Logits for Effectively Learning Deep Face Representations](https://arxiv.org/abs/1905.00292) +* [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698) +* [SphereFace: Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063) +* [CosFace: Large Margin Cosine Loss for Deep Face Recognition](https://arxiv.org/abs/1801.09414) + +## Licensing + +Code in this repository is licensed under the Apache 2.0. See [LICENSE](LICENSE). diff --git a/apiserver/Dockerfile b/apiserver/Dockerfile new file mode 100644 index 0000000..9a930a1 --- /dev/null +++ b/apiserver/Dockerfile @@ -0,0 +1,20 @@ +FROM golang:alpine AS build-env + +RUN apk update && apk upgrade && \ + apk add --no-cache bash git openssh + +COPY ./apiserver /apiserver/apiserver +COPY ./apiserver.yaml /apiserver/apiserver.yaml +COPY ./go.mod /apiserver/go.mod +COPY ./main.go /apiserver/main.go + +WORKDIR /apiserver +RUN go build -o goapp + +# final stage +FROM alpine +WORKDIR /apiserver +COPY --from=build-env /apiserver/goapp /apiserver/apiserver +COPY --from=build-env /apiserver/apiserver.yaml /apiserver/apiserver.yaml +RUN mkdir /apiserver/files +ENTRYPOINT /apiserver/apiserver diff --git a/apiserver/README.md b/apiserver/README.md new file mode 100644 index 0000000..4d2b745 --- /dev/null +++ b/apiserver/README.md @@ -0,0 +1,129 @@ +# API server + +Frontend server written in golang. **Technology demo - do not use in production !** + +**Main purpose:** +* serve stored images +* send images to vectorizer +* store vectors in database +* compare vectors and return ids + +No local state, can be scaled. + +## Configuration +Edit ``apiserver.yaml`` file: + +| Key | Value | Description | +| --- | --- | --- | +| port | 8080 | Port to listen | +| vectorizer | http://vectorizer:8080/vectorize | Vectorizer url | +| dbuser | faceserver | DB user | +| dbpassword | secret | DB password | +| dbname | faceserver | DB name | +| dbhost | db | DB host | + +Do not change configuration if you want run prepared docker-compose. + +### DB configuration +Only postgresql is supported now. Create new role and user: +```shell script +createuser -D -P -S faceserver +createdb -E UTF8 -O faceserver faceserver +``` + +Create API server tables: + +```shell script +psql -U faceserver -h localhost faceserver <../init.sql +``` + +## Instalation +### Docker image +Build docker image - preferred method: + +```shell script +docker build -t apiserver:latest . +``` + +### Local compilation +Golang 1.12 is required. Run: + +```shell script +go build main.go +``` + +## HTTP API +### Learn + +```shell script +curl -X POST -F 'person=PID' -F 'directory=DIR' -F 'file=@portrait.jpg' http://localhost:8080/learn +``` + +Replace PID with person's id (e.g. database id or name) and DIR with your directory name (e.g. company name). People are recognized only within same directory. png or jpeg images are supported. Only images with one face are allowed for learning ! + +Result: + +```json +{ +"status":"OK", +"url":"/files/01e66d8f-536e-4e5ab3b1-521672739d15.jpg", +"filename":"photo.jpg", +"directory":"mydir", +"persons":[ +{"id":"PID","box":[0,15,65,88],"score":0.9909800887107849} +]} +``` + +|Field|Description| +|--|--| +|status|Status message - either OK or error text| +|url|Relative url to original image| +|filename|Original image filename| +|directory|Directory name| +|persons|Recognized people array| +|id|Person's id| +|box|Box around face| +|score|Face detection score (i.e. probability)| + +### Recognize + +```shell script +curl -X POST -F 'directory=DIR' -F 'file=@photo.jpg' http://localhost:8080/recognize +``` + +Replace DIR with your directory name (e.g. company name). People are recognized only within same directory. For each detected face the most probable person's id is returned. png or jpeg images are supported. + +Result: + +```json +{ +"status":"OK", +"url":"/files/00636b47-e6a5-4fab-8a02-9e44d052c193.jpg", +"filename":"photo.jpg", +"directory":"mydir", +"persons":[ +{"id":"PID1","box":[2797,1164,2918,1285],"score":0.999998927116394,"probability":0.8342}, +{"id":"PID2","box":[2398,1854,2590,2046],"score":0.9999780654907227,"probability":0.32546}, +{"id":"PID3","box":[1753,1148,1905,1300],"score":0.9999217987060547,"probability":0.65785} +]} +``` + +| Field | Description | +| --- | --- | +| status | Status message - either OK or error text | +| url | Relative url to original image | +| filename | Original image filename | +| directory | Directory name | +| persons | Recognized people array | +| id | Person's id | +| box | Box around face | +| score | Face detection score (i.e. probability) | +| probability | Person recognition probability | + +### Files + +``/files/...`` path contains all learned or recognized images. + +## Licensing + +Code in this repository is licensed under the Apache 2.0. See [LICENSE](../LICENSE). diff --git a/apiserver/apiserver.yaml b/apiserver/apiserver.yaml index 456455d..8bb6587 100644 --- a/apiserver/apiserver.yaml +++ b/apiserver/apiserver.yaml @@ -1,8 +1,6 @@ -port: 8081 -vectorizer: - url: http://localhost:8080/vectorize -db: - user: faceserver - password: aaa - name: faceserver - host: localhost +port: 8080 +vectorizer: http://vectorizer:8080/vectorize +dbuser: faceserver +dbpassword: secret +dbname: faceserver +dbhost: db diff --git a/apiserver/apiserver/math.go b/apiserver/apiserver/math.go index b0e0141..b297ae2 100644 --- a/apiserver/apiserver/math.go +++ b/apiserver/apiserver/math.go @@ -1,3 +1,17 @@ +// Copyright 2019 Petr Masopust, Aprar s.r.o. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package apiserver import ( diff --git a/apiserver/apiserver/math_test.go b/apiserver/apiserver/math_test.go index bc3d94f..7780520 100644 --- a/apiserver/apiserver/math_test.go +++ b/apiserver/apiserver/math_test.go @@ -1,3 +1,17 @@ +// Copyright 2019 Petr Masopust, Aprar s.r.o. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package apiserver import ( diff --git a/apiserver/apiserver/server.go b/apiserver/apiserver/server.go index 0d89246..b156111 100644 --- a/apiserver/apiserver/server.go +++ b/apiserver/apiserver/server.go @@ -1,3 +1,17 @@ +// Copyright 2019 Petr Masopust, Aprar s.r.o. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package apiserver import ( @@ -166,7 +180,7 @@ func uploadSave(w http.ResponseWriter, r *http.Request) (string, string, []Vecto return "", "", nil, err } defer reader.Close() - results, err := Vectorize(uid, reader, viper.GetString("vectorizer.url")) + results, err := Vectorize(uid, reader, viper.GetString("vectorizer")) if err != nil { return "", "", nil, err } diff --git a/apiserver/apiserver/storage.go b/apiserver/apiserver/storage.go index 242009b..c14fece 100644 --- a/apiserver/apiserver/storage.go +++ b/apiserver/apiserver/storage.go @@ -1,3 +1,17 @@ +// Copyright 2019 Petr Masopust, Aprar s.r.o. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package apiserver import ( @@ -25,7 +39,7 @@ type PgStorage struct { } func NewStorage(user string, password string, database string, host string) (PgStorage, error) { - connStr := fmt.Sprintf("user=%s dbname=%s password=%s host=%s", user, database, password, host) + connStr := fmt.Sprintf("user=%s dbname=%s password=%s host=%s sslmode=disable", user, database, password, host) db, err := sql.Open("postgres", connStr) if err != nil { return PgStorage{}, err diff --git a/apiserver/apiserver/vectorizer.go b/apiserver/apiserver/vectorizer.go index 1403094..c6cedd1 100644 --- a/apiserver/apiserver/vectorizer.go +++ b/apiserver/apiserver/vectorizer.go @@ -1,3 +1,17 @@ +// Copyright 2019 Petr Masopust, Aprar s.r.o. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package apiserver import ( diff --git a/apiserver/main.go b/apiserver/main.go index 2c63ad3..5cf8668 100644 --- a/apiserver/main.go +++ b/apiserver/main.go @@ -1,3 +1,17 @@ +// Copyright 2019 Petr Masopust, Aprar s.r.o. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package main import ( @@ -15,14 +29,12 @@ func main() { viper.AddConfigPath("/etc/faceserver/") // path to look for the config file in viper.AddConfigPath("$HOME/.faceserver") // call multiple times to add many search paths viper.AddConfigPath(".") // optionally look for config in the working directory - viper.SetEnvPrefix("AS_") - viper.AutomaticEnv() err := viper.ReadInConfig() // Find and read the config file if err != nil { // Handle errors reading the config file panic(fmt.Errorf("Fatal error config file: %s \n", err)) } - apiserver.Dbo, err = apiserver.NewStorage(viper.GetString("db.user"), viper.GetString("db.password"), viper.GetString("db.name"), viper.GetString("db.host")) + apiserver.Dbo, err = apiserver.NewStorage(viper.GetString("dbuser"), viper.GetString("dbpassword"), viper.GetString("dbname"), viper.GetString("dbhost")) if err != nil { panic(fmt.Errorf("Fatal error database connection: %s \n", err)) } diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..1037589 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,26 @@ +version: "3.7" +services: + apiserver: + build: + context: ./apiserver + ports: + - "8080:8080" + depends_on: + - db + - vectorizer + vectorizer: + build: + context: ./vectorizer + environment: + VS_PORT: 8080 + VS_FAN_MODEL: "./ckpt/wider6_10.pt" + VS_REC_DEPTH: 50 + VS_REC_MODEL: "./ckpt/recongition3_37.pt" + db: + image: postgres:11-alpine + environment: + POSTGRES_PASSWORD: secret + POSTGRES_USER: faceserver + POSTGRES_DB: faceserver + volumes: + - ./init.sql:/docker-entrypoint-initdb.d/init.sql diff --git a/init.sql b/init.sql new file mode 100644 index 0000000..a1a5d45 --- /dev/null +++ b/init.sql @@ -0,0 +1,11 @@ +CREATE TABLE persons ( + id character varying(255) NOT NULL, + directory character varying(255) NOT NULL, + vector double precision[] NOT NULL, + filename character varying(255) NOT NULL, + filenameuid character varying(255) NOT NULL, + box integer[] NOT NULL, + score double precision NOT NULL +); + +CREATE INDEX persons_directory ON persons USING btree (directory); diff --git a/vectorizer/README.md b/vectorizer/README.md new file mode 100644 index 0000000..dd89b9a --- /dev/null +++ b/vectorizer/README.md @@ -0,0 +1,160 @@ +# Vectorizer + +Heart of faceserver app. **Technology demo - do not use in production !** + +**Main purpose:** +* find faces in image +* create vector from face + +No local state, can be scaled. GPU is **highly** recommended. + +## Configuration + +Set environment variables: + +| Key | Default value | Description | +| --- | --- | --- | +| VS_PORT | 8080 | Port to listen (for Flask) | +| VS_FAN_MODEL | | Path to identification model | +| VS_REC_DEPTH | 50 | Recognition net depth | +| VS_REC_MODEL | | Path to recognition model | + +Do not change configuration if you want run prepared docker-compose. + +## Instalation + +### Docker image + +Build docker image - preferred method if you have nvidia-docker: +```shell script +docker build -t vectorizer:latest . +``` + +### Local installation + +Install PIP dependencies (virtualenv recommended): + +```shell script +pip install --upgrade -r requirements.txt +``` + +And then run server: + +```shell script +python3 -m vectorizer.server +``` + +## HTTP API + +### Vectorization + +```shell script +curl -X POST -F 'file=@image.jpg' http://localhost:8080/vectorize +``` + +png or jpeg images are supported. + +Result: + +```json +[ +{"box":[0,15,65,88],"vector":[-0.14234,...,0.32432],"score":0.9909800887107849} +] +``` + +| Field | Description | +| --- | --- | +| box | Box around face | +| vector | Array of 512 floats | +| score | Face detection score (i.e. probability) | + +## Training + +**GPU is mandatory for training !** +Training takes at least several days to achieve reasonable accuracy on single RTX 2070. +Trained models are stored in ``ckpt`` directory. Pretrained models with example parameters are included. + +### Identification + +Example: + +```shell script +python3 -m identification.train --wider_train ~/datasets/wider/wider_face_train_bbx_gt.txt \ +--wider_train_prefix ~/datasets/wider/WIDER_train/images \ +--wider_val ~/datasets/wider/wider_face_val_bbx_gt.txt \ +--wider_val_prefix ~/datasets/wider/WIDER_val/images \ +--depth 50 --epochs 30 --batch_size 1 --model_name wider1 +``` + +| Argument | Description | Required / Default value | +| --- | --- | --- | +| --wider_train | Path to file containing WIDER training annotations (wider_face_train_bbx_gt.txt) | Yes | +| --wider_val | Path to file containing WIDER validation annotations (wider_face_val_bbx_gt.txt) | | +| --wider_train_prefix | Prefix path to WIDER train images | Yes | +| --wider_val_prefix | Prefix path to WIDER validation images | | +| --depth | Resnet depth, must be one of 18, 34, 50, 101, 152 | 50 | +| --epochs | Number of epochs | 50 | +| --batch_size | Batch size - increase only if you have enough GPU memory (i.e. >16 GB) ! | 2 | +| --model_name | Model name prefix | Yes | +| --parallel | Run training with DataParallel | false | +| --pretrained | Pretrained model (e.g. for crash recovery) | | + +There is also option to train from csv files - see train.py and dataloader.py for details. + +### Recognition + +Example: + +```shell script +python3 -m recognition.train \ +--casia_list ~/datasets/CASIA-maxpy-clean/train.txt \ +--casia_root ~/datasets/CASIA-maxpy-clean \ +--lfw_root ~/datasets/lfw \ +--lfw_pair_list lfw_test_pair.txt \ +--model_name recongition1 --batch_size 20 \ +--loss adacos --print_freq 20 --depth 50 +``` + +| Argument | Description | Required / Default value | +| --- | --- | --- | +| --casia_list | Path to CASIA dataset file list (train.txt) | Yes | +| --casia_root | Path to CASIA images | Yes | +| --lfw_root | Path to LFW dataset | Yes | +| --lfw_pair_list | Path to LFW pair list file (lfw_test_pair.txt - in this repository) | Yes | +| --depth | Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere net | 50 | +| --epochs | Number of epochs | 50 | +| --batch_size | Batch size | 16 | +| --model_name | Model name prefix | Yes | +| --parallel | Run training with DataParallel | false | +| --loss | One of focal_loss. cross_entropy, arcface, cosface, sphereface, adacos | cross_entropy | +| --optimizer | One of sgd, adam | sgd | +| --weight_decay | Weight decay | 0.0005 | +| --lr | Learning rate | 0.1 | +| --lr_step | Learning rate step | 10 | +| --easy_margin | Use easy margin | false | +| --print_freq | Print every N batch | 100 | + +## Datasets for training + +* [WIDER](http://shuoyang1213.me/WIDERFACE/) +* [LFW](http://vis-www.cs.umass.edu/lfw/) +* CASIA maxpy clean - no official web but can be downloaded from suspicious sites (use google) + +## Based on + +Github repositories: + +* [https://github.com/rainofmine/Face_Attention_Network](https://github.com/rainofmine/Face_Attention_Network) +* [https://github.com/ronghuaiyang/arcface-pytorch](https://github.com/ronghuaiyang/arcface-pytorch) + +Papers: + +* [Face Attention Network: An Effective Face Detector for the Occluded Faces](https://arxiv.org/abs/1711.07246) +* [AdaCos: Adaptively Scaling Cosine Logits for Effectively Learning Deep Face Representations](https://arxiv.org/abs/1905.00292) +* [ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698) +* [SphereFace: Deep Hypersphere Embedding for Face Recognition](https://arxiv.org/abs/1704.08063) +* [CosFace: Large Margin Cosine Loss for Deep Face Recognition](https://arxiv.org/abs/1801.09414) + +## Licensing + +Code in this repository is licensed under the Apache 2.0. See [LICENSE](../LICENSE). diff --git a/vectorizer/ckpt/recongition3_37.pt b/vectorizer/ckpt/recongition3_37.pt new file mode 100644 index 0000000..f959a14 --- /dev/null +++ b/vectorizer/ckpt/recongition3_37.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c4b6850c06c00b086e0e3918e089f6bb181f0330a9ce1b60ac184e5b09c6e0 +size 98498540 diff --git a/vectorizer/ckpt/wider6_10.pt b/vectorizer/ckpt/wider6_10.pt new file mode 100644 index 0000000..b649b55 --- /dev/null +++ b/vectorizer/ckpt/wider6_10.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0c8b9095c6b85905e7236b253db4c445113ee5fccc272e558d65d52ab4c7523 +size 155109396 diff --git a/vectorizer/identification/anchors.py b/vectorizer/identification/anchors.py index 791cad0..65de358 100644 --- a/vectorizer/identification/anchors.py +++ b/vectorizer/identification/anchors.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/rainofmine/Face_Attention_Network +""" + import numpy as np import torch import torch.nn as nn diff --git a/vectorizer/identification/csv_eval.py b/vectorizer/identification/csv_eval.py index ac1f68e..e388431 100644 --- a/vectorizer/identification/csv_eval.py +++ b/vectorizer/identification/csv_eval.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/rainofmine/Face_Attention_Network +""" + import numpy as np import torch diff --git a/vectorizer/identification/dataloader.py b/vectorizer/identification/dataloader.py index 273466d..f981a88 100644 --- a/vectorizer/identification/dataloader.py +++ b/vectorizer/identification/dataloader.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/rainofmine/Face_Attention_Network +""" + import torch import numpy as np import random @@ -366,8 +385,9 @@ class Resizer(object): # resize the image with the computed scale - image = np.array(image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR)) - image = image / 255.0 + image = np.array( + image.resize((int(round((cols * scale))), int(round((rows * scale)))), resample=Image.BILINEAR)) + image = image / 255.0 rows, cols, cns = image.shape diff --git a/vectorizer/identification/detector.py b/vectorizer/identification/detector.py index 76bc7e0..1568905 100644 --- a/vectorizer/identification/detector.py +++ b/vectorizer/identification/detector.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/rainofmine/Face_Attention_Network +""" + import numpy as np import torch import argparse @@ -20,7 +39,7 @@ def fan_detect(model, img_data, threshold=0.9, max_detections=100, is_cuda=True) img_data = img_data.cuda() scores, labels, boxes = model(img_data) if scores is None: - return np.empty((0,0)), np.empty((0,0)) + return np.empty((0, 0)), np.empty((0, 0)) scores = scores.cpu().numpy() scale = transformed['scale'] @@ -49,7 +68,7 @@ def load_model(model_path, is_cuda=True): if is_cuda: model = model.cuda() - model.anchors.is_cuda=is_cuda + model.anchors.is_cuda = is_cuda return model diff --git a/vectorizer/identification/losses.py b/vectorizer/identification/losses.py index 9e63f34..d4c1235 100644 --- a/vectorizer/identification/losses.py +++ b/vectorizer/identification/losses.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/rainofmine/Face_Attention_Network +""" + import math import torch import torch.nn as nn @@ -8,6 +27,7 @@ def memprint(a): print(a.shape) print(a.element_size() * a.nelement()) + def calc_iou(a, b): step = 20 IoU = torch.zeros((len(a), len(b))).cuda() @@ -18,11 +38,11 @@ def calc_iou(a, b): area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) for i in range(step_count): - iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[i * step:(i+1) * step, 2]) - iw.sub_(torch.max(torch.unsqueeze(a[:, 0], 1), b[i * step:(i+1) * step, 0])) + iw = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[i * step:(i + 1) * step, 2]) + iw.sub_(torch.max(torch.unsqueeze(a[:, 0], 1), b[i * step:(i + 1) * step, 0])) - ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[i * step:(i+1) * step, 3]) - ih.sub_(torch.max(torch.unsqueeze(a[:, 1], 1), b[i * step:(i+1) * step, 1])) + ih = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[i * step:(i + 1) * step, 3]) + ih.sub_(torch.max(torch.unsqueeze(a[:, 1], 1), b[i * step:(i + 1) * step, 1])) iw.clamp_(min=0) ih.clamp_(min=0) @@ -30,12 +50,12 @@ def calc_iou(a, b): iw.mul_(ih) del ih - ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area[i * step:(i+1) * step] - iw + ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area[i * step:(i + 1) * step] - iw ua = torch.clamp(ua, min=1e-8) iw.div_(ua) del ua - IoU[:, i * step:(i+1) * step] = iw + IoU[:, i * step:(i + 1) * step] = iw return IoU @@ -131,7 +151,7 @@ class FocalLoss(nn.Module): alpha_factor = torch.ones(targets.shape) if self.is_cuda: alpha_factor = alpha_factor.cuda() - alpha_factor *= alpha + alpha_factor *= alpha except: print(targets) print(targets.shape) diff --git a/vectorizer/identification/model_level_attention.py b/vectorizer/identification/model_level_attention.py index 1bcd663..b85f184 100644 --- a/vectorizer/identification/model_level_attention.py +++ b/vectorizer/identification/model_level_attention.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/rainofmine/Face_Attention_Network +""" + import torch.nn as nn import torch import math diff --git a/vectorizer/identification/train.py b/vectorizer/identification/train.py index 6234933..b161668 100644 --- a/vectorizer/identification/train.py +++ b/vectorizer/identification/train.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/rainofmine/Face_Attention_Network +""" + import argparse import collections import os @@ -12,7 +31,8 @@ import torch.utils.model_zoo as model_zoo from identification.model_level_attention import resnet18, resnet34, resnet50, resnet101, resnet152 from torch.utils.data import DataLoader from identification.csv_eval import evaluate -from identification.dataloader import WIDERDataset, AspectRatioBasedSampler, collater, Resizer, Augmenter, Normalizer, CSVDataset +from identification.dataloader import WIDERDataset, AspectRatioBasedSampler, collater, Resizer, Augmenter, Normalizer, \ + CSVDataset is_cuda = torch.cuda.is_available() print('CUDA available: {}'.format(is_cuda)) @@ -27,6 +47,7 @@ model_urls = { ckpt = False + def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') @@ -154,9 +175,11 @@ def main(args=None): img_data = img_data.cuda() annot_data = annot_data.cuda() - print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % (torch.cuda.memory_allocated() / 1024**2, torch.cuda.max_memory_allocated() / 1024**2, torch.cuda.memory_cached() / 1024**2, torch.cuda.max_memory_cached() / 1024**2)) + print("GPU memory allocated: %d max memory allocated: %d memory cached: %d max memory cached: %d" % ( + torch.cuda.memory_allocated() / 1024 ** 2, torch.cuda.max_memory_allocated() / 1024 ** 2, + torch.cuda.memory_cached() / 1024 ** 2, torch.cuda.max_memory_cached() / 1024 ** 2)) classification_loss, regression_loss, mask_loss = retinanet([img_data, annot_data]) - + del img_data del annot_data @@ -195,7 +218,7 @@ def main(args=None): scheduler.step(np.mean(epoch_loss)) - #TODO remove makedir + # TODO remove makedir os.makedirs('./ckpt', exist_ok=True) if parser.parallel: torch.save(retinanet.module, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num)) diff --git a/vectorizer/identification/utils.py b/vectorizer/identification/utils.py index dd6b076..c8bf2f7 100644 --- a/vectorizer/identification/utils.py +++ b/vectorizer/identification/utils.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/rainofmine/Face_Attention_Network +""" + import torch import torch.nn as nn import numpy as np diff --git a/vectorizer/recognition/angle.py b/vectorizer/recognition/angle.py index cbb274d..0b3c661 100644 --- a/vectorizer/recognition/angle.py +++ b/vectorizer/recognition/angle.py @@ -1,4 +1,21 @@ # -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/ronghuaiyang/arcface-pytorch +""" import math @@ -32,18 +49,18 @@ class AdaCos(nn.Module): self.criterion = self.criterion.cuda() def forward(self, input, label): -# changed to fixed adacos -# theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7)) -# one_hot = torch.zeros_like(input) -# one_hot.scatter_(1, label.view(-1, 1).long(), 1) -# with torch.no_grad(): -# B_avg = torch.where(one_hot < 1, torch.exp(self.s * input), torch.zeros_like(input)) -# B_avg = torch.sum(B_avg) / input.size(0) -# theta_med = torch.median(theta) -# self.s = torch.log(B_avg) / torch.cos(torch.min(math.pi/4 * torch.ones_like(theta_med), theta_med)) -# # TODO why converge to infinity ? -# self.s = torch.clamp(self.s, self.base_s / 2, self.base_s * 2) -# print(self.s) + # changed to fixed adacos - faster and more stable + # theta = torch.acos(torch.clamp(input, -1.0 + 1e-7, 1.0 - 1e-7)) + # one_hot = torch.zeros_like(input) + # one_hot.scatter_(1, label.view(-1, 1).long(), 1) + # with torch.no_grad(): + # B_avg = torch.where(one_hot < 1, torch.exp(self.s * input), torch.zeros_like(input)) + # B_avg = torch.sum(B_avg) / input.size(0) + # theta_med = torch.median(theta) + # self.s = torch.log(B_avg) / torch.cos(torch.min(math.pi/4 * torch.ones_like(theta_med), theta_med)) + # # TODO why converge to infinity ? + # self.s = torch.clamp(self.s, self.base_s / 2, self.base_s * 2) + # print(self.s) output = self.s * input return self.criterion(output, label) diff --git a/vectorizer/recognition/focal_loss.py b/vectorizer/recognition/focal_loss.py index 39e42d7..e8152d3 100644 --- a/vectorizer/recognition/focal_loss.py +++ b/vectorizer/recognition/focal_loss.py @@ -1,8 +1,24 @@ # -*- coding: utf-8 -*- """ -Created on 18-6-7 上午10:11 + Copyright 2019 Petr Masopust, Aprar s.r.o. -@author: ronghuaiyang + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/ronghuaiyang/arcface-pytorch + + Created on 18-6-7 上午10:11 + + @author: ronghuaiyang """ import torch diff --git a/vectorizer/recognition/nets.py b/vectorizer/recognition/nets.py index 96a7744..d48fdf0 100644 --- a/vectorizer/recognition/nets.py +++ b/vectorizer/recognition/nets.py @@ -1,3 +1,26 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/ronghuaiyang/arcface-pytorch + + Created on 18-5-21 下午5:26 + + @author: ronghuaiyang +""" + import torchvision.models as models from torch import nn @@ -46,67 +69,85 @@ def resnet152(pretrained=False, **kwargs): model = models.resnet152(num_classes=512, **kwargs) return model + def sphere20(): return sphere20a() +def get_net_by_depth(depth): + if depth == 18: + model = resnet18() + elif depth == 20: + model = sphere20() + elif depth == 34: + model = resnet34() + elif depth == 50: + model = resnet50() + elif depth == 101: + model = resnet101() + elif depth == 152: + model = resnet152() + else: + raise ValueError('Unsupported model depth %d, must be one of 18, 34, 50, 101, 152' % depth) + + return model + + class sphere20a(nn.Module): def __init__(self): super(sphere20a, self).__init__() - #input = B*3*112*96 - self.conv1_1 = nn.Conv2d(3,64,3,2,1) #=>B*64*56*48 + # input = B*3*112*96 + self.conv1_1 = nn.Conv2d(3, 64, 3, 2, 1) # =>B*64*56*48 self.relu1_1 = nn.PReLU(64) - self.conv1_2 = nn.Conv2d(64,64,3,1,1) + self.conv1_2 = nn.Conv2d(64, 64, 3, 1, 1) self.relu1_2 = nn.PReLU(64) - self.conv1_3 = nn.Conv2d(64,64,3,1,1) + self.conv1_3 = nn.Conv2d(64, 64, 3, 1, 1) self.relu1_3 = nn.PReLU(64) - self.conv2_1 = nn.Conv2d(64,128,3,2,1) #=>B*128*28*24 + self.conv2_1 = nn.Conv2d(64, 128, 3, 2, 1) # =>B*128*28*24 self.relu2_1 = nn.PReLU(128) - self.conv2_2 = nn.Conv2d(128,128,3,1,1) + self.conv2_2 = nn.Conv2d(128, 128, 3, 1, 1) self.relu2_2 = nn.PReLU(128) - self.conv2_3 = nn.Conv2d(128,128,3,1,1) + self.conv2_3 = nn.Conv2d(128, 128, 3, 1, 1) self.relu2_3 = nn.PReLU(128) - self.conv2_4 = nn.Conv2d(128,128,3,1,1) #=>B*128*28*24 + self.conv2_4 = nn.Conv2d(128, 128, 3, 1, 1) # =>B*128*28*24 self.relu2_4 = nn.PReLU(128) - self.conv2_5 = nn.Conv2d(128,128,3,1,1) + self.conv2_5 = nn.Conv2d(128, 128, 3, 1, 1) self.relu2_5 = nn.PReLU(128) - - self.conv3_1 = nn.Conv2d(128,256,3,2,1) #=>B*256*14*12 + self.conv3_1 = nn.Conv2d(128, 256, 3, 2, 1) # =>B*256*14*12 self.relu3_1 = nn.PReLU(256) - self.conv3_2 = nn.Conv2d(256,256,3,1,1) + self.conv3_2 = nn.Conv2d(256, 256, 3, 1, 1) self.relu3_2 = nn.PReLU(256) - self.conv3_3 = nn.Conv2d(256,256,3,1,1) + self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1) self.relu3_3 = nn.PReLU(256) - self.conv3_4 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12 + self.conv3_4 = nn.Conv2d(256, 256, 3, 1, 1) # =>B*256*14*12 self.relu3_4 = nn.PReLU(256) - self.conv3_5 = nn.Conv2d(256,256,3,1,1) + self.conv3_5 = nn.Conv2d(256, 256, 3, 1, 1) self.relu3_5 = nn.PReLU(256) - self.conv3_6 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12 + self.conv3_6 = nn.Conv2d(256, 256, 3, 1, 1) # =>B*256*14*12 self.relu3_6 = nn.PReLU(256) - self.conv3_7 = nn.Conv2d(256,256,3,1,1) + self.conv3_7 = nn.Conv2d(256, 256, 3, 1, 1) self.relu3_7 = nn.PReLU(256) - self.conv3_8 = nn.Conv2d(256,256,3,1,1) #=>B*256*14*12 + self.conv3_8 = nn.Conv2d(256, 256, 3, 1, 1) # =>B*256*14*12 self.relu3_8 = nn.PReLU(256) - self.conv3_9 = nn.Conv2d(256,256,3,1,1) + self.conv3_9 = nn.Conv2d(256, 256, 3, 1, 1) self.relu3_9 = nn.PReLU(256) - self.conv4_1 = nn.Conv2d(256,512,3,2,1) #=>B*512*7*6 + self.conv4_1 = nn.Conv2d(256, 512, 3, 2, 1) # =>B*512*7*6 self.relu4_1 = nn.PReLU(512) - self.conv4_2 = nn.Conv2d(512,512,3,1,1) + self.conv4_2 = nn.Conv2d(512, 512, 3, 1, 1) self.relu4_2 = nn.PReLU(512) - self.conv4_3 = nn.Conv2d(512,512,3,1,1) + self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1) self.relu4_3 = nn.PReLU(512) - self.fc5 = nn.Linear(512*14*14,512) + self.fc5 = nn.Linear(512 * 14 * 14, 512) # ORIGINAL for 112x96: self.fc5 = nn.Linear(512*7*6,512) - def forward(self, x): x = self.relu1_1(self.conv1_1(x)) x = x + self.relu1_3(self.conv1_3(self.relu1_2(self.conv1_2(x)))) @@ -124,6 +165,6 @@ class sphere20a(nn.Module): x = self.relu4_1(self.conv4_1(x)) x = x + self.relu4_3(self.conv4_3(self.relu4_2(self.conv4_2(x)))) - x = x.view(x.size(0),-1) + x = x.view(x.size(0), -1) x = self.fc5(x) return x diff --git a/vectorizer/recognition/test.py b/vectorizer/recognition/test.py index b2f279c..b80b36b 100644 --- a/vectorizer/recognition/test.py +++ b/vectorizer/recognition/test.py @@ -1,22 +1,37 @@ # -*- coding: utf-8 -*- """ -Created on 18-5-30 下午4:55 + Copyright 2019 Petr Masopust, Aprar s.r.o. -@author: ronghuaiyang + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/ronghuaiyang/arcface-pytorch + + Created on 18-5-30 下午4:55 + + @author: ronghuaiyang """ import os import argparse from torch.utils.data import TensorDataset, DataLoader -from recognition.nets import resnet18, resnet34, resnet50, resnet101, resnet152, sphere20 +from recognition.nets import get_net_by_depth import torch import numpy as np from torch.nn import DataParallel from PIL import Image from torchvision import transforms as T - imagesize = 224 batch_size = 20 @@ -120,7 +135,8 @@ def cal_accuracy(y_score, y_true): def main(args=None): parser = argparse.ArgumentParser(description='Testing script for face identification.') - parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, default=50) + parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, + default=50) parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel', default=False, action='store_true') parser.add_argument('--model', help='Path to model') @@ -133,20 +149,7 @@ def main(args=None): is_cuda = torch.cuda.is_available() print('CUDA available: {}'.format(is_cuda)) - if parser.depth == 18: - model = resnet18() - elif parser.depth == 20: - model = sphere20() - elif parser.depth == 34: - model = resnet34() - elif parser.depth == 50: - model = resnet50() - elif parser.depth == 101: - model = resnet101() - elif parser.depth == 152: - model = resnet152() - else: - raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') + model = get_net_by_depth(parser.depth) if parser.parallel: model = DataParallel(model) diff --git a/vectorizer/recognition/train.py b/vectorizer/recognition/train.py index 141e557..45dbf69 100644 --- a/vectorizer/recognition/train.py +++ b/vectorizer/recognition/train.py @@ -1,3 +1,22 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Adopted code from https://github.com/ronghuaiyang/arcface-pytorch +""" + import argparse import os import time @@ -11,7 +30,7 @@ from torchvision import transforms as T from recognition.angle import AngleLinear, CosFace, SphereFace, ArcFace, AdaCos from recognition.focal_loss import FocalLoss -from recognition.nets import resnet18, resnet34, resnet50, resnet101, resnet152, sphere20 +from recognition.nets import get_net_by_depth from recognition.test import lfw_test2, get_pair_list, load_img_data @@ -25,7 +44,7 @@ class Dataset(torch.utils.data.Dataset): self.imgs = np.random.permutation(imgs) normalize = T.Normalize(mean=[0.485, 0.456, 0.406], - std=[0.229, 0.224, 0.225]) + std=[0.229, 0.224, 0.225]) self.transforms = T.Compose([ T.RandomResizedCrop(imagesize), @@ -62,14 +81,17 @@ def main(args=None): parser.add_argument('--print_freq', help='Print every N batch (default 100)', type=int, default=100) parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) - parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, default=50) + parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152 or 20 for sphere', type=int, + default=50) parser.add_argument('--lr_step', help='Learning rate step (default 10)', type=int, default=10) parser.add_argument('--lr', help='Learning rate (default 0.1)', type=float, default=0.1) parser.add_argument('--weight_decay', help='Weight decay (default 0.0005)', type=float, default=0.0005) - parser.add_argument('--easy_margin', help='Use easy margin (default false)', dest='easy_margin', default=False, action='store_true') + parser.add_argument('--easy_margin', help='Use easy margin (default false)', dest='easy_margin', default=False, + action='store_true') parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel', default=False, action='store_true') - parser.add_argument('--loss', help='One of focal_loss. cross_entropy, arcface, cosface, sphereface, adacos (default cross_entropy)', + parser.add_argument('--loss', + help='One of focal_loss. cross_entropy, arcface, cosface, sphereface, adacos (default cross_entropy)', type=str, default='cross_entropy') parser.add_argument('--optimizer', help='One of sgd, adam (default sgd)', type=str, default='sgd') @@ -86,20 +108,7 @@ def main(args=None): print('CUDA available: {}'.format(is_cuda)) imagesize = 224 - if parser.depth == 18: - model = resnet18() - elif parser.depth == 20: - model = sphere20() - elif parser.depth == 34: - model = resnet34() - elif parser.depth == 50: - model = resnet50() - elif parser.depth == 101: - model = resnet101() - elif parser.depth == 152: - model = resnet152() - else: - raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') + model = get_net_by_depth(parser.depth) # TODO split training dataset to train/validation and stop using test dataset for acc train_dataset = Dataset(parser.casia_root, parser.casia_list, imagesize) @@ -191,7 +200,7 @@ def main(args=None): acc = lfw_test2(model, identity_list, img_data, is_cuda=is_cuda) print('Accuracy: %f' % acc) if last_acc < acc: - #TODO remove makedir + # TODO remove makedir os.makedirs('./ckpt', exist_ok=True) torch.save(model.state_dict(), './ckpt/' + parser.model_name + '_{}.pt'.format(i)) torch.save(metric_fc.state_dict(), './ckpt/' + parser.model_name + '_metric_{}.pt'.format(i)) diff --git a/vectorizer/train-ident.sh b/vectorizer/train-ident.sh new file mode 100755 index 0000000..1749d09 --- /dev/null +++ b/vectorizer/train-ident.sh @@ -0,0 +1,3 @@ +python3 -m identification.train --wider_train ~/datasets/wider/wider_face_train_bbx_gt.txt --wider_train_prefix ~/datasets/wider/WIDER_train/images \ +--wider_val ~/datasets/wider/wider_face_val_bbx_gt.txt --wider_val_prefix ~/datasets/wider/WIDER_val/images \ +--depth 50 --epochs 30 --batch_size 1 --model_name wider1 diff --git a/vectorizer/train-rec.sh b/vectorizer/train-rec.sh index 4c502fd..53abbd9 100755 --- a/vectorizer/train-rec.sh +++ b/vectorizer/train-rec.sh @@ -1,2 +1,2 @@ -python3 -m recognition.train --casia_list /home/ehp/tmp/datasets/CASIA-maxpy-clean/train.txt --casia_root /home/ehp/tmp/datasets/CASIA-maxpy-clean --lfw_root /home/ehp/tmp/datasets/lfw \ ---lfw_pair_list /home/ehp/git/arcface/lfw_test_pair.txt --model_name recongition3 --batch_size 20 --loss adacos --print_freq 20 --depth 50 +python3 -m recognition.train --casia_list ~/datasets/CASIA-maxpy-clean/train.txt --casia_root ~/datasets/CASIA-maxpy-clean --lfw_root ~/datasets/lfw \ +--lfw_pair_list lfw_test_pair.txt --model_name recongition1 --batch_size 20 --loss adacos --print_freq 20 --depth 50 diff --git a/vectorizer/train.sh b/vectorizer/train.sh deleted file mode 100755 index 8bebf37..0000000 --- a/vectorizer/train.sh +++ /dev/null @@ -1,7 +0,0 @@ -#python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/sample.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/sample/images \ -#--wider_val /home/ehp/tmp/datasets/wider/sample_val.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/sample_val/images \ -#--depth 50 --epochs 30 --batch_size 1 --model_name wider_sample1 - -python3 -m identification.train --wider_train /home/ehp/tmp/datasets/wider/wider_face_train_bbx_gt.txt --wider_train_prefix /home/ehp/tmp/datasets/wider/WIDER_train/images \ ---wider_val /home/ehp/tmp/datasets/wider/wider_face_val_bbx_gt.txt --wider_val_prefix /home/ehp/tmp/datasets/wider/WIDER_val/images \ ---depth 50 --epochs 30 --batch_size 1 --model_name widernew1 diff --git a/vectorizer/vectorizer/server.py b/vectorizer/vectorizer/server.py index 4b24353..1589596 100644 --- a/vectorizer/vectorizer/server.py +++ b/vectorizer/vectorizer/server.py @@ -1,3 +1,20 @@ +# -*- coding: utf-8 -*- +""" + Copyright 2019 Petr Masopust, Aprar s.r.o. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + import logging import os import sys @@ -7,18 +24,25 @@ from flask import Flask, request, abort, jsonify from werkzeug.utils import secure_filename import torch -from recognition.nets import resnet50 +from recognition.nets import get_net_by_depth from torchvision import transforms as T from PIL import Image import identification.detector as fan is_cuda = torch.cuda.is_available() print('CUDA: %s' % is_cuda) -fan_model = fan.load_model('ckpt/wider6_10.pt', is_cuda=is_cuda) + +fan_file = os.environ.get('VS_FAN_MODEL', None) +if fan_file is None: + raise Exception('VS_FAN_MODEL is mandatory parameter') +fan_model = fan.load_model(fan_file, is_cuda=is_cuda) # load recognition model -rec_model = resnet50() -rec_model.load_state_dict(torch.load('ckpt/recongition3_37.pt', map_location=lambda storage, location: storage)) +rec_model = get_net_by_depth(int(os.environ.get('VS_REC_DEPTH', 50))) +rec_file = os.environ.get('VS_REC_MODEL', None) +if rec_file is None: + raise Exception('VS_REC_MODEL is mandatory parameter') +rec_model.load_state_dict(torch.load(rec_file, map_location=lambda storage, location: storage)) rec_model.eval() if is_cuda: rec_model = rec_model.cuda() @@ -38,6 +62,7 @@ app = Flask(__name__) UPLOAD_FOLDER = tempfile.gettempdir() app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + def compute_vector(data): with torch.no_grad(): data = transforms(data) @@ -75,7 +100,8 @@ def upload_file(): boxes = boxes.astype(int) scores = scores.astype(float) extracted = [{'box': arr.tolist(), - 'vector': compute_vector(img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist(), + 'vector': compute_vector( + img.crop((arr[0], arr[1], arr[2], arr[3]))).squeeze().tolist(), 'score': score } for arr, score in zip(boxes, scores)] return jsonify(extracted) @@ -87,4 +113,5 @@ def upload_file(): if __name__ == '__main__': logging.basicConfig() - app.run(host='0.0.0.0', debug=False, port=8080) + port = int(os.environ.get('VS_PORT', '8080')) + app.run(host='0.0.0.0', debug=False, port=port)