From 57ab8edfcee160d522abbb08aa1a6ca847cb6e89 Mon Sep 17 00:00:00 2001 From: Rene keijzer Date: Wed, 19 Sep 2018 13:20:11 +0200 Subject: [PATCH] Add: Definitions for the word2vector module --- types/word2vector/index.d.ts | 25 +++++++++++++++++ types/word2vector/tsconfig.json | 23 ++++++++++++++++ types/word2vector/tslint.json | 1 + types/word2vector/word2vector-tests.ts | 37 ++++++++++++++++++++++++++ 4 files changed, 86 insertions(+) create mode 100644 types/word2vector/index.d.ts create mode 100644 types/word2vector/tsconfig.json create mode 100644 types/word2vector/tslint.json create mode 100644 types/word2vector/word2vector-tests.ts diff --git a/types/word2vector/index.d.ts b/types/word2vector/index.d.ts new file mode 100644 index 0000000000..80861eb7a8 --- /dev/null +++ b/types/word2vector/index.d.ts @@ -0,0 +1,25 @@ +// Type definitions for word2vector 2.1 +// Project: https://github.com/LeeXun/word2vector#readme +// Definitions by: Rene Keijzer +// Definitions: https://github.com/DefinitelyTyped/DefinitelyTyped +// TypeScript Version: 2.2 + +export function w2v(): any; +export function load(modelfile: string, filetype?: string): boolean; +export function getVector(word: string): number[]; +export function getVectors(words: string[]): number[][]; +export function getNeighbors(vector: number[], opt?: object): object[]; +export function getSimilarWords(word: string, options?: object): object[]; +export function bin2txt(binFile: string): boolean; +export function add(p1: number[], p2: number[], opt?: object): any; +export function add(p1: string, p2: string, opt?: object): any; +export function substract(p1: string, p2: string, opt?: object): any; +export function substract(p1: number[], p2: number[], opt?: object): any; +export function similarity(w1: string, w2: string, options?: object): any; + +export function train( + trainFile: string, + modelFile: string, + options?: object, + callback?: () => void +): void; diff --git a/types/word2vector/tsconfig.json b/types/word2vector/tsconfig.json new file mode 100644 index 0000000000..f18903ed36 --- /dev/null +++ b/types/word2vector/tsconfig.json @@ -0,0 +1,23 @@ +{ + "compilerOptions": { + "module": "commonjs", + "lib": [ + "es6" + ], + "noImplicitAny": true, + "noImplicitThis": true, + "strictNullChecks": true, + "strictFunctionTypes": true, + "baseUrl": "../", + "typeRoots": [ + "../" + ], + "types": [], + "noEmit": true, + "forceConsistentCasingInFileNames": true + }, + "files": [ + "index.d.ts", + "word2vector-tests.ts" + ] +} diff --git a/types/word2vector/tslint.json b/types/word2vector/tslint.json new file mode 100644 index 0000000000..3db14f85ea --- /dev/null +++ b/types/word2vector/tslint.json @@ -0,0 +1 @@ +{ "extends": "dtslint/dt.json" } diff --git a/types/word2vector/word2vector-tests.ts b/types/word2vector/word2vector-tests.ts new file mode 100644 index 0000000000..dfbf1b6278 --- /dev/null +++ b/types/word2vector/word2vector-tests.ts @@ -0,0 +1,37 @@ +import w2v = require("word2vector"); + +const trainFile = "./build/data/train.data"; +const modelFile = "./build/data/test.model.bin"; + +w2v.train(trainFile, modelFile, { + cbow: 1, // use the continuous bag of words model //default + size: 10, // sets the size (dimension) of word vectors // default 100 + window: 8, // sets maximal skip length between words // default 5 + binary: 1, // save the resulting vectors in binary mode // default off + negative: 25, // number of negative examples; common values are 3 - 10 (0 = not used) // default 5 + hs: 0, // 1 = use Hierarchical Softmax // default 0 + sample: 1e-4, + threads: 20, + iter: 15, + minCount: 1, // This will discard words that appear less than *minCount* times // default 5 + logOn: false // sets whether any output should be printed to the console // default false +}); + +w2v.load(modelFile); +w2v.bin2txt("test.model.bin"); + +const vector: number[] = w2v.getVector("孫悟空"); +const vector2: number[] = w2v.getVector("李洵"); +const multipleVectors: number[][] = w2v.getVectors(["孫悟空", "李洵"]); + +const similiarwords: object[] = w2v.getSimilarWords("唐三藏"); +const a: object[] = w2v.getNeighbors(w2v.getVector("唐三藏"), { N: 9 }); + +// this can be a number or false if word is not in the model +const similarity: any = w2v.similarity("唐三藏", "孫悟空"); + +let vector3: number[] = w2v.add("孫悟空", "孫悟空"); +vector3 = w2v.add(vector, vector2); + +vector3 = w2v.substract("孫悟空", "孫悟空"); +vector3 = w2v.substract(vector, vector2);