8-15ASR

xiaoxiao2025-12-30 8

1 perl printf STDERR

source

unigrams

Units.txt

lexicon_numbers.txt

7196

211

8874

7772 的

1878 了

1554 在

1224 有

1181 也

1103 我

1057 是

1020 和

965 一

917 他

877 为

803 而

772 与

749 又

<NSN> 1

<SPN> 2

a1 3

a2 4

a3 5

a4 6

a5 7

aa 8

ai1 9

ai2 10

ai3 11

ai4 12

ai5 13

an1 14

an2 15

<NOISE> 1

<SPOKEN_NOISE> 2

<UNK> 2

一 91 61

一万 91 62 189 167

一万元 91 62 189 167 208 196

一万多 91 62 189 167 32 184

一下 91 62 209 69

一下子 91 62 209 69 210 121

一两 91 64 125 78

一个 91 62 59 36

一个人 91 62 59 36 146 45

一个劲 91 62 59 36 123 95

generate

Wordlist_mapped

Word_map

Wordlist.txt

Unigram counts

8876

8874

<s> A

</s> B

<UNK> C

的 C

了 D

在 E

有 F

也 G

我 H

是 I

和 J

一 K

他 L

<NOISE>

<SPOKEN_NOISE>

<UNK>

一

一万

一万元

一万多

一下

一下子

一两

一个

一个人

一个劲

一个半月

一个月

7773 的

1879 了

1555 在

1225 有

1182 也

1104 我

1058 是

1021 和

966 一

918 他

878 为

804 而

773 与

3 tail -n +$heldout_sent | tee~/heldout 从行号为heldout_sent开始打印

tail -n 20 filename显示filename最后20行。

gunzip -c $dir/train.gz | tail -n 10000 | \

sort | tee ~/before_discount | discount_ngrams $subdir/config.get_ngrams| tee ~/after_discount | \

sort | merge_ngrams | gzip -c > $subdir/ngrams.gz

5 Uniq -c ：进行计数

语言模型建立的过程

get_raw_ngrams

sort | uniq -c

uniq_to_ngrams

3train

D=0 tau=0 phi=1

D=1 tau=0 phi=1

4train

D=0 tau=0 phi=1

D=1 tau=0 phi=1

log: line= * 13.42

log: ngram_order=1

log: history= * 13.42

log: predicted=* 13.42

log: count=13.420000

log: line= B 0.16

log: ngram_order=1

log: history= B 0.16

log: predicted=B 0.16

log: count=0.160000

* 13.42

log: line= BR 0.79

log: ngram_order=1

log: history= BR 0.79

log: predicted=BR 0.79

log: count=0.790000

B 0.16

log: line= BYH 0.16

log: ngram_order=1

log: history= BYH 0.16

log: predicted=BYH 0.16

log: count=0.160000

BR 0.79

log: line= BaI 0.16

log: ngram_order=1

log: history= BaI 0.16

log: predicted=BaI 0.16

log: count=0.160000

BYH 0.16

log: line= Bbo 0.16

log: ngram_order=1

log: history= Bbo 0.16

log: predicted=Bbo 0.16

log: count=0.160000

BaI 0.16

log: line= Bg 0.16

log: ngram_order=1

log: history= Bg 0.16

log: predicted=Bg 0.16

log: count=0.160000

Bbo 0.16

log: line= BgN 0.16

log: ngram_order=1

log: history= BgN 0.16

log: predicted=BgN 0.16

log: count=0.160000

Bg 0.16

log: line= BjQ 0.16

log: ngram_order=1

log: history= BjQ 0.16

log: predicted=BjQ 0.16

log: count=0.160000

BgN 0.16

log: line= D 0.16

log: ngram_order=1

log: history= D 0.16

log: predicted=D 0.16

log: count=0.160000

BjQ 0.16

log: line= EG 0.16

log: ngram_order=1

log: history= EG 0.16

log: predicted=EG 0.16

log: count=0.160000

D 0.16

log: line= Gh 0.16

log: ngram_order=1

log: history= Gh 0.16

log: predicted=Gh 0.16

log: count=0.160000

EG 0.16

log: line= H 0.16

log: ngram_order=1

log: history= H 0.16

log: predicted=H 0.16

log: count=0.160000

Gh 0.16

log: line= JH 0.16

log: ngram_order=1

log: history= JH 0.16

log: predicted=JH 0.16

log: count=0.160000

H 0.16

log: line= Ph 0.16

log: ngram_order=1

log: history= Ph 0.16

log: predicted=Ph 0.16

log: count=0.160000

JH 0.16

log: line= a 0.16

log: ngram_order=1

log: history= a 0.16

log: predicted=a 0.16

log: count=0.160000

Ph 0.16

log: line= c 0.16

log: ngram_order=1

log: history= c 0.16

log: predicted=c 0.16

log: count=0.160000

a 0.16

log: line= dV 0.16

log: ngram_order=1

log: history= dV 0.16

log: predicted=dV 0.16

log: count=0.160000

c 0.16

log: line= dl 0.16

log: ngram_order=1

log: history= dl 0.16

log: predicted=dl 0.16

log: count=0.160000

dV 0.16

log: line=A * 0.89

log: ngram_order=2

log: history=A * 0.89

log: predicted=* 0.89

log: count=0.890000

dl 0.16

log: line=A Bbo 0.11

log: ngram_order=2

log: history=A Bbo 0.11

log: predicted=Bbo 0.11

log: count=0.110000

A * 0.89

log: line=BR * 1.78

log: ngram_order=2

log: history=BR * 1.78

log: predicted=* 1.78

log: count=1.780000

A Bbo 0.11

log: line=BR EG 0.11

log: ngram_order=2

log: history=BR EG 0.11

log: predicted=EG 0.11

log: count=0.110000

BR * 1.78

log: line=BR dV 0.11

log: ngram_order=2

log: history=BR dV 0.11

log: predicted=dV 0.11

log: count=0.110000

BR EG 0.11

log: line=BYH * 0.89

log: ngram_order=2

log: history=BYH * 0.89

log: predicted=* 0.89

log: count=0.890000

BR dV 0.11

log: line=BYH D 0.11

log: ngram_order=2

log: history=BYH D 0.11

log: predicted=D 0.11

log: count=0.110000

BYH * 0.89

log: line=BaI * 0.89

log: ngram_order=2

log: history=BaI * 0.89

log: predicted=* 0.89

log: count=0.890000

BYH D 0.11

log: line=BaI Ph 0.11

log: ngram_order=2

log: history=BaI Ph 0.11

log: predicted=Ph 0.11

log: count=0.110000

BaI * 0.89

log: line=Bbo * 0.89

log: ngram_order=2

log: history=Bbo * 0.89

log: predicted=* 0.89

log: count=0.890000

BaI Ph 0.11

log: line=Bbo c 0.11

log: ngram_order=2

log: history=Bbo c 0.11

log: predicted=c 0.11

log: count=0.110000

Bbo * 0.89

log: line=Bg * 0.89

log: ngram_order=2

log: history=Bg * 0.89

log: predicted=* 0.89

log: count=0.890000

Bbo c 0.11

log: line=Bg Gh 0.11

log: ngram_order=2

log: history=Bg Gh 0.11

log: predicted=Gh 0.11

log: count=0.110000

Bg * 0.89

log: line=BgN * 0.89

log: ngram_order=2

log: history=BgN * 0.89

log: predicted=* 0.89

log: count=0.890000

Bg Gh 0.11

log: line=BgN B 0.11

log: ngram_order=2

log: history=BgN B 0.11

log: predicted=B 0.11

log: count=0.110000

BgN * 0.89

log: line=BjQ * 0.89

log: ngram_order=2

log: history=BjQ * 0.89

log: predicted=* 0.89

log: count=0.890000

BgN B 0.11

log: line=BjQ BgN 0.11

log: ngram_order=2

log: history=BjQ BgN 0.11

log: predicted=BgN 0.11

log: count=0.110000

BjQ * 0.89

log: line=D * 0.89

log: ngram_order=2

log: history=D * 0.89

log: predicted=* 0.89

log: count=0.890000

BjQ BgN 0.11

log: line=D a 0.11

log: ngram_order=2

log: history=D a 0.11

log: predicted=a 0.11

log: count=0.110000

D * 0.89

log: line=EG * 0.89

log: ngram_order=2

log: history=EG * 0.89

log: predicted=* 0.89

log: count=0.890000

D a 0.11

log: line=EG BYH 0.11

log: ngram_order=2

log: history=EG BYH 0.11

log: predicted=BYH 0.11

log: count=0.110000

EG * 0.89

log: line=Gh * 0.89

log: ngram_order=2

log: history=Gh * 0.89

log: predicted=* 0.89

log: count=0.890000

EG BYH 0.11

log: line=Gh BR 0.11

log: ngram_order=2

log: history=Gh BR 0.11

log: predicted=BR 0.11

log: count=0.110000

Gh * 0.89

log: line=H * 0.89

log: ngram_order=2

log: history=H * 0.89

log: predicted=* 0.89

log: count=0.890000

Gh BR 0.11

log: line=H Bg 0.11

log: ngram_order=2

log: history=H Bg 0.11

log: predicted=Bg 0.11

log: count=0.110000

H * 0.89

log: line=JH * 0.89

log: ngram_order=2

log: history=JH * 0.89

log: predicted=* 0.89

log: count=0.890000

H Bg 0.11

log: line=JH BaI 0.11

log: ngram_order=2

log: history=JH BaI 0.11

log: predicted=BaI 0.11

log: count=0.110000

JH * 0.89

log: line=Ph * 0.89

log: ngram_order=2

log: history=Ph * 0.89

log: predicted=* 0.89

log: count=0.890000

JH BaI 0.11

log: line=Ph BR 0.11

log: ngram_order=2

log: history=Ph BR 0.11

log: predicted=BR 0.11

log: count=0.110000

Ph * 0.89

log: line=a * 0.89

log: ngram_order=2

log: history=a * 0.89

log: predicted=* 0.89

log: count=0.890000

Ph BR 0.11

log: line=a H 0.11

log: ngram_order=2

log: history=a H 0.11

log: predicted=H 0.11

log: count=0.110000

a * 0.89

log: line=c * 0.89

log: ngram_order=2

log: history=c * 0.89

log: predicted=* 0.89

log: count=0.890000

a H 0.11

log: line=c JH 0.11

log: ngram_order=2

log: history=c JH 0.11

log: predicted=JH 0.11

log: count=0.110000

c * 0.89

log: line=dV * 0.89

log: ngram_order=2

log: history=dV * 0.89

log: predicted=* 0.89

log: count=0.890000

c JH 0.11

log: line=dV dl 0.11

log: ngram_order=2

log: history=dV dl 0.11

log: predicted=dl 0.11

log: count=0.110000

dV * 0.89

log: line=dl * 0.89

log: ngram_order=2

log: history=dl * 0.89

log: predicted=* 0.89

log: count=0.890000

dV dl 0.11

log: line=dl BjQ 0.11

log: ngram_order=2

log: history=dl BjQ 0.11

log: predicted=BjQ 0.11

log: count=0.110000

dl * 0.89

dl BjQ 0.11

7 统计程序运行的时间

FILE *fp = fopen(“log.txt”,”w”);

time_t begin = clock();

time_t end = clock();

fprintf(fp,"log: latgen-faster the running time is : %fms\n", (double)(end -begin)*1000/CLOCKS_PER_SEC);

fclose(fp);

8 修改文件格式

替换

ed 's/}/\n/g' test.txt >enter.txt

sed 's/:\s//g' map.txt > map_1.txt

:%s/\n/ /g

删除

sed -i '/^,\s{"license"/d' enter.txt

打印列

awk -F '"' '{print $5 $7}' enter.txt >map.txt

9 Git上传环境备份

转载请注明原文地址: https://ju.6miu.com/read-1305481.html

最新回复(0)