Hauptthemen unserer Publikationen zu Schweizerdeutsch:
2022
Forschungsberichte
Schraner, Yanick; Scheller, Christian; Plüss, Michel; Vogel, Manfred
Swiss German Speech to Text Evaluation Forschungsbericht
2022.
Abstract | Links | BibTeX | Schlagwörter: speech translation, Speech-to-Text, Swiss German, System Evaluation
@techreport{nokey,
title = {Swiss German Speech to Text Evaluation},
author = {Yanick Schraner and Christian Scheller and Michel Plüss and Manfred Vogel
},
editor = {University of Applied Sciences and Arts Northwestern Switzerland},
url = {https://arxiv.org/pdf/2207.00412.pdf},
year = {2022},
date = {2022-11-14},
urldate = {2022-11-14},
abstract = {We present an in-depth evaluation of four commercially available Speech-to-Text (STT) systems
for Swiss German. The systems are anonymized and referred to as system a, b, c and d in this
report. We compare the four systems to our STT models, referred to as FHNW in the following,
and provide details on how we trained our model. To evaluate the models, we use two STT datasets
from different domains. The Swiss Parliament Corpus (SPC) test set and the STT4SG-350 corpus,
which contains texts from the news sector with an even distribution across seven dialect regions. We
provide a detailed error analysis to detect the strengths and weaknesses of the different systems. On
both datasets, our model achieves the best results for both, the WER (word error rate) and the BLEU
(bilingual evaluation understudy) scores. On the SPC test set, we obtain a BLEU score of 0.607,
whereas the best commercial system reaches a BLEU score of 0.509. On the STT4SG-350 test set,
we obtain a BLEU score of 0.722, while the best commercial system achieves a BLEU score of 0.568.
However, we would like to point out that this analysis is somewhat limited by the domain-specific
idiosyncrasies of the selected texts of the two test sets.
},
keywords = {speech translation, Speech-to-Text, Swiss German, System Evaluation},
pubstate = {published},
tppubtype = {techreport}
}
for Swiss German. The systems are anonymized and referred to as system a, b, c and d in this
report. We compare the four systems to our STT models, referred to as FHNW in the following,
and provide details on how we trained our model. To evaluate the models, we use two STT datasets
from different domains. The Swiss Parliament Corpus (SPC) test set and the STT4SG-350 corpus,
which contains texts from the news sector with an even distribution across seven dialect regions. We
provide a detailed error analysis to detect the strengths and weaknesses of the different systems. On
both datasets, our model achieves the best results for both, the WER (word error rate) and the BLEU
(bilingual evaluation understudy) scores. On the SPC test set, we obtain a BLEU score of 0.607,
whereas the best commercial system reaches a BLEU score of 0.509. On the STT4SG-350 test set,
we obtain a BLEU score of 0.722, while the best commercial system achieves a BLEU score of 0.568.
However, we would like to point out that this analysis is somewhat limited by the domain-specific
idiosyncrasies of the selected texts of the two test sets.
Schraner, Yanick; Scheller, Christian; Plüss, Michel; Neukom, Lukas; Vogel, Manfred
Comparison of Unsupervised Learning and Supervised Learning with Noisy Labels for Low-Resource Speech Recognition Forschungsbericht
2022.
Links | BibTeX | Schlagwörter: forced-alignment, low-resource, self-supervised, semi-supervised, Speech Recognition/Understanding, speech translation
@techreport{nokey,
title = {Comparison of Unsupervised Learning and Supervised Learning with Noisy Labels for Low-Resource Speech Recognition},
author = {Yanick Schraner and Christian Scheller and Michel Plüss and Lukas Neukom and Manfred Vogel},
editor = {University of Applied Sciences and Arts Northwestern Switzerland},
url = {https://www.isca-speech.org/archive/pdfs/interspeech_2022/schraner22_interspeech.pdf},
year = {2022},
date = {2022-09-22},
keywords = {forced-alignment, low-resource, self-supervised, semi-supervised, Speech Recognition/Understanding, speech translation},
pubstate = {published},
tppubtype = {techreport}
}
2021
Dokumentationen
Ulasik, Malgorzata Anna; Hürlimann, Manuela; Dubel, Bogumila; Kaufmann, Yves; Rudolf, Silas; Deriu, Jan; Mlynchyk, Katsiaryna; Hutter, Hans-Peter; Cieliebak, Mark
ZHAW-CAI : Ensemble Method for Swiss German Speech to Standard German Text Dokumentation
on Swiss German Speech to Standard German Text Shared Task at 6th Swiss Text Analytics Conference (SwissText), Shared Task (Hrsg.): 2021, ISSN: 1613-0073.
Abstract | Links | BibTeX | Schlagwörter: speech translation
@manual{nokey,
title = {ZHAW-CAI : Ensemble Method for Swiss German Speech to Standard German Text},
author = {Malgorzata Anna Ulasik and Manuela Hürlimann and Bogumila Dubel and Yves Kaufmann and Silas Rudolf and Jan Deriu and Katsiaryna Mlynchyk and Hans-Peter Hutter and Mark Cieliebak},
editor = {Shared Task on Swiss German Speech to Standard German Text Shared Task at 6th Swiss Text Analytics Conference (SwissText)},
url = {http://ceur-ws.org/Vol-2957/sg_paper3.pdf
https://digitalcollection.zhaw.ch/handle/11475/23889
https://doi.org/10.21256/zhaw-23889},
doi = {10.21256/zhaw-23889},
issn = {1613-0073},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
abstract = {This paper presents the contribution of ZHAW-CAI to the Shared Task ”Swiss German Speech to Standard German Text” at the SwissText 2021 conference. Our approach combines three models based on the Fairseq, Jasper and Wav2vec architectures trained on multilingual, German and Swiss German data. We applied an ensembling algorithm on the predictions of the three models in order to retrieve the most reliable candidate out of the provided translations for each spoken utterance. With the ensembling output, we achieved a BLEU score of 39.39 on the private test set, which gave us the third place out of four contributors in the competition.},
keywords = {speech translation},
pubstate = {published},
tppubtype = {manual}
}
Forschungsberichte
Plüss, Michel; Neukom, Lukas; Vogel, Manfred
SwissText 2021 Task 3: Swiss German Speech to Standard German Text Forschungsbericht
2021.
Abstract | Links | BibTeX | Schlagwörter: speech translation, Speech-to-Text
@techreport{nokey,
title = {SwissText 2021 Task 3: Swiss German Speech to Standard German Text},
author = {Michel Plüss and Lukas Neukom and Manfred Vogel },
editor = {Institute for Data Science
University of Applied Sciences and Arts Northwestern Switzerland
Windisch, Switzerland
},
url = {https://ceur-ws.org/Vol-2957/sg_paper1.pdf},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
abstract = {We present the results and findings of SwissText 2021 Task 3 on Swiss German Speech to Standard German Text. Participants were asked to build a system translating Swiss German speech to Standard German text. The objective was to maximize the BLEU score on a new test set covering a large part of the Swiss German dialect landscape. Four teams participated, with the winning contribution achieving a BLEU score of 46.0.},
keywords = {speech translation, Speech-to-Text},
pubstate = {published},
tppubtype = {techreport}
}
2020
Dokumentationen
Büchi, Matthias; Ulasik, Malgorzata Anna; Hürlimann, Manuela; Benites, Fernando; von Däniken, Pius; Cieliebak, Mark
ZHAW-InIT at GermEval 2020 Task 4: Low-Resource Speech-to-Text Dokumentation
at GermEval, Low-Resource Speech-to-Text Shared Task (Hrsg.): 2020, ISSN: 1613-0073.
Abstract | Links | BibTeX | Schlagwörter: CNN, low-resource, speech translation, Speech-to-Text
@manual{nokey,
title = {ZHAW-InIT at GermEval 2020 Task 4: Low-Resource Speech-to-Text },
author = {Matthias Büchi and Malgorzata Anna Ulasik and Manuela Hürlimann and Fernando Benites and Pius von Däniken and Mark Cieliebak},
editor = {Low-Resource Speech-to-Text Shared Task at GermEval},
url = {https://doi.org/10.21256/zhaw-21550
https://digitalcollection.zhaw.ch/handle/11475/21550},
doi = {10.21256/zhaw-21550},
issn = {1613-0073},
year = {2020},
date = {2020-06-01},
urldate = {2020-06-01},
abstract = {This paper presents the contribution of ZHAW-InIT to Task 4 ”Low-Resource STT” at GermEval 2020. The goal of the task is to develop a system for translating Swiss German dialect speech into Standard German text in the domain of parliamentary debates. Our approach is based on Jasper, a CNN Acoustic Model, which we fine-tune on the task data. We enhance the base system with an extended Language Model containing in-domain data and speed perturbation and run further experiments with post-processing. Our submission achieved first place with a final Word Error Rate of 40.29%.},
keywords = {CNN, low-resource, speech translation, Speech-to-Text},
pubstate = {published},
tppubtype = {manual}
}
Forschungsberichte
Plüss, Michel; Neukom, Lukas; Vogel, Manfred
GermEval 2020 Task 4: Low-Resource Speech-to-Text Forschungsbericht
2020.
Abstract | Links | BibTeX | Schlagwörter: low-resource, speech translation, Speech-to-Text
@techreport{nokey,
title = {GermEval 2020 Task 4: Low-Resource Speech-to-Text},
author = {Michel Plüss and Lukas Neukom and Manfred Vogel},
editor = {Institute for Data Science
University of Applied Sciences and Arts Northwestern Switzerland
Windisch, Switzerland},
url = {https://ceur-ws.org/Vol-2624/germeval-task4-paper1.pdf},
year = {2020},
date = {2020-01-01},
urldate = {2020-01-01},
abstract = {We present the results and findings of GermEval 2020 Task 4 on Low-Resource Speech-to-Text. Participants were asked to build a system translating Swiss German speech to Standard German text and minimize its word error rate. The task was based on a new dataset for Swiss German to Standard German speech translation, which contains 74 hours of sentence-level speech-text-pairs. 3 teams participated, with the winning contribution reaching a word error rate of 40.29 %.
},
keywords = {low-resource, speech translation, Speech-to-Text},
pubstate = {published},
tppubtype = {techreport}
}