{"id":301,"date":"2023-11-16T17:49:09","date_gmt":"2023-11-16T16:49:09","guid":{"rendered":"https:\/\/spot.ff.uni-lj.si\/wp\/corpora\/"},"modified":"2025-06-05T16:18:42","modified_gmt":"2025-06-05T14:18:42","slug":"corpora","status":"publish","type":"page","link":"https:\/\/spot.ff.uni-lj.si\/en\/corpora\/","title":{"rendered":"Corpora"},"content":{"rendered":"<div  class='flex_column av-3plpw7-988dd21589d5b06b8805836a1a3fb88c av_one_full  avia-builder-el-0  el_before_av_one_full  avia-builder-el-first  first flex_column_div  '     ><p>\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-3kju1z-7946a6009ac1256630afea6956f7712f\">\n#top .av-special-heading.av-3kju1z-7946a6009ac1256630afea6956f7712f{\npadding-bottom:10px;\n}\nbody .av-special-heading.av-3kju1z-7946a6009ac1256630afea6956f7712f .av-special-heading-tag .heading-char{\nfont-size:25px;\npadding-bottom:10px;\n}\n.av-special-heading.av-3kju1z-7946a6009ac1256630afea6956f7712f .av-subheading{\nfont-size:15px;\n}\n<\/style>\n<div  class='av-special-heading av-3kju1z-7946a6009ac1256630afea6956f7712f av-special-heading-h3 blockquote elegant-quote elegant-centered  avia-builder-el-1  el_before_av_textblock  avia-builder-el-first  av-icon'><h3 class='av-special-heading-tag'  itemprop=\"headline\"  ><span class='heading-char avia-font-entypo-fontello' aria-hidden='true' data-av_icon='\ue83c' data-av_iconfont='entypo-fontello'><\/span><span class=\"heading-wrap\">SST Treebank of Spoken Slovenian<\/span><\/h3><div class=\"special-heading-border\"><div class=\"special-heading-inner-border\"><\/div><\/div><\/div><br \/>\n<section  class='av_textblock_section av-3d3c5r-7e5fa457d8261190a80a9d3cff687cd6 '   itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock'  itemprop=\"text\" ><p data-start=\"136\" data-end=\"858\">The Spoken Slovenian Treebank (SST) is the only syntactically annotated corpus of spoken Slovenian currently available. It was developed as a representative subset of the \u00a0<a href=\"https:\/\/viri.cjvt.si\/gos\/\">Gos<\/a> reference corpus and is intended to support both linguistic and computational research on Slovenian speech. The SST contains manually annotated transcripts of spontaneous speech in a variety of everyday contexts\u2014from academic lectures to informal conversations among friends\u2014and was significantly expanded and refined as part of the SPOT project. Annotated according to the cross-linguistically harmonized <a href=\"https:\/\/universaldependencies.org\/\">Universal Dependencies<\/a> scheme, the SST enables direct comparison with written and spoken corpora in over 160 languages worldwide. SST is also the backbone of the recently emerged ROG training corpus of spoken Slovene, which includes additional annotation layers such as prosody, disfluency, and dialogue acts.<\/p>\n<\/div><\/section><br \/>\n<div  class='avia-buttonrow-wrap av-3906bj-28ae6dbdf67da219f2b2f60d8445fcd7 avia-buttonrow-left  avia-builder-el-3  el_after_av_textblock  avia-builder-el-last '>\n\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-32d32f-e1c604dccb4d95b6d778822ae515ce06\">\n#top #wrap_all .avia-button.av-32d32f-e1c604dccb4d95b6d778822ae515ce06{\nmargin-bottom:5px;\nmargin-right:5px;\n}\n<\/style>\n<a href=\"https:\/\/orodja.cjvt.si\/drevesnik\/\" class=\"avia-button av-32d32f-e1c604dccb4d95b6d778822ae515ce06 avia-icon_select-yes-left-icon avia-size-small avia-color-theme-color-subtle\" target=\"_blank\" rel=\"noopener noreferrer\"><span class='avia_button_icon avia_button_icon_left ' aria-hidden='true' data-av_icon='\ue803' data-av_iconfont='entypo-fontello'><\/span><span class='avia_iconbox_title' >Browse SST in Drevesnik<\/span><\/a>\n\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-2ttme7-ce2cddc31f03a778b263711f2b670c69\">\n#top #wrap_all .avia-button.av-2ttme7-ce2cddc31f03a778b263711f2b670c69{\nmargin-bottom:5px;\nmargin-right:5px;\n}\n<\/style>\n<a href=\"https:\/\/universal.grew.fr\/?corpus=UD_Slovenian-SST@2.16\" class=\"avia-button av-2ttme7-ce2cddc31f03a778b263711f2b670c69 avia-icon_select-yes-left-icon avia-size-small avia-color-theme-color-subtle\" target=\"_blank\" rel=\"noopener noreferrer\"><span class='avia_button_icon avia_button_icon_left ' aria-hidden='true' data-av_icon='\ue803' data-av_iconfont='entypo-fontello'><\/span><span class='avia_iconbox_title' >Browse SST in Grew-match<\/span><\/a>\n\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-2qfjbz-66869b897db06579eafdb34b79df7931\">\n#top #wrap_all .avia-button.av-2qfjbz-66869b897db06579eafdb34b79df7931{\nmargin-bottom:5px;\nmargin-right:5px;\n}\n<\/style>\n<a href=\"https:\/\/github.com\/UniversalDependencies\/UD_Slovenian-SST\" class=\"avia-button av-2qfjbz-66869b897db06579eafdb34b79df7931 avia-icon_select-yes-left-icon avia-size-small avia-color-theme-color-subtle\" target=\"_blank\" rel=\"noopener noreferrer\"><span class='avia_button_icon avia_button_icon_left ' aria-hidden='true' data-av_icon='\ue851' data-av_iconfont='entypo-fontello'><\/span><span class='avia_iconbox_title' >SST at UD GitHub repository<\/span><\/a>\n\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-mbjfp3x0-41b722e53be74ff8d69dc0a54c44434e\">\n#top #wrap_all .avia-button.av-mbjfp3x0-41b722e53be74ff8d69dc0a54c44434e{\nmargin-bottom:5px;\nmargin-right:5px;\n}\n<\/style>\n<a href=\"http:\/\/hdl.handle.net\/11356\/1992\" class=\"avia-button av-mbjfp3x0-41b722e53be74ff8d69dc0a54c44434e avia-icon_select-yes-left-icon avia-size-small avia-color-theme-color-subtle\" target=\"_blank\" rel=\"noopener noreferrer\"><span class='avia_button_icon avia_button_icon_left ' aria-hidden='true' data-av_icon='\ue800' data-av_iconfont='entypo-fontello'><\/span><span class='avia_iconbox_title' >SST\/ROG at CLARIN.SI repository<\/span><\/a>\n<\/div><\/p><\/div><div  class='flex_column av-2js8d3-a1eb4e5acb15dea8bbb797066ee09080 av_one_full  avia-builder-el-4  el_after_av_one_full  avia-builder-el-last  first flex_column_div  column-top-margin'     ><p>\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-lpbtrer5-3b6041e337e81d24783f5123c3ea7262\">\n#top .av-special-heading.av-lpbtrer5-3b6041e337e81d24783f5123c3ea7262{\npadding-bottom:10px;\n}\nbody .av-special-heading.av-lpbtrer5-3b6041e337e81d24783f5123c3ea7262 .av-special-heading-tag .heading-char{\nfont-size:25px;\npadding-bottom:10px;\n}\n.av-special-heading.av-lpbtrer5-3b6041e337e81d24783f5123c3ea7262 .av-subheading{\nfont-size:15px;\n}\n<\/style>\n<div  class='av-special-heading av-lpbtrer5-3b6041e337e81d24783f5123c3ea7262 av-special-heading-h3 blockquote elegant-quote elegant-centered  avia-builder-el-5  el_before_av_textblock  avia-builder-el-first  av-icon'><h3 class='av-special-heading-tag'  itemprop=\"headline\"  ><span class='heading-char avia-font-entypo-fontello' aria-hidden='true' data-av_icon='\ue84c' data-av_iconfont='entypo-fontello'><\/span><span class=\"heading-wrap\">SSJ Treebank of Written Slovenian<\/span><\/h3><div class=\"special-heading-border\"><div class=\"special-heading-inner-border\"><\/div><\/div><\/div><br \/>\n<section  class='av_textblock_section av-2e9h9r-6c1fc330e855183b19508f49eb454f6f '   itemscope=\"itemscope\" itemtype=\"https:\/\/schema.org\/CreativeWork\" ><div class='avia_textblock'  itemprop=\"text\" ><p>The SSJ treebank, named after the original project of the same name, is the largest manually parsed corpus of Slovenian language to date. It contains morphosyntactically annotated sentences sourced from fiction, non-fiction, journalistic and encyclopedic texts. In addition to being used in the development of language technology, such as software for automated grammatical annotation, the SSJ treebank is increasingly being used for monolingual and contrastive linguistic research as well. It adheres to the cross-linguistically harmonized <a href=\"https:\/\/universaldependencies.org\/\">Universal Dependencies<\/a>\u00a0and, as part of the SUK reference training corpus for Slovene, also contains linguistic annotation on several other levels. Within SPOT, the SSJ treebank serves as a reference corpus for automatic detection of speech-specific syntactic patterns in the identically annotated Spoken Slovenian Treebank (SST).<\/p>\n<\/div><\/section><br \/>\n<div  class='avia-buttonrow-wrap av-264b9b-156282452fd6be6ca105a3496b9e71fb avia-buttonrow-left  avia-builder-el-7  el_after_av_textblock  avia-builder-el-last '>\n\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-7q4jr-4ef4317dfdaebc285937bf03575d1d91\">\n#top #wrap_all .avia-button.av-7q4jr-4ef4317dfdaebc285937bf03575d1d91{\nmargin-bottom:5px;\nmargin-right:5px;\n}\n<\/style>\n<a href=\"https:\/\/orodja.cjvt.si\/drevesnik\/\" class=\"avia-button av-7q4jr-4ef4317dfdaebc285937bf03575d1d91 avia-icon_select-yes-left-icon avia-size-small avia-color-theme-color-subtle\" target=\"_blank\" rel=\"noopener noreferrer\"><span class='avia_button_icon avia_button_icon_left ' aria-hidden='true' data-av_icon='\ue803' data-av_iconfont='entypo-fontello'><\/span><span class='avia_iconbox_title' >Browse SSJ in Drevesnik<\/span><\/a>\n\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-1yh1jb-187cf26a5f77ae58f2414acb66fcb23c\">\n#top #wrap_all .avia-button.av-1yh1jb-187cf26a5f77ae58f2414acb66fcb23c{\nmargin-bottom:5px;\nmargin-right:5px;\n}\n<\/style>\n<a href=\"https:\/\/universal.grew.fr\/?corpus=UD_Slovenian-SSJ@2.16\" class=\"avia-button av-1yh1jb-187cf26a5f77ae58f2414acb66fcb23c avia-icon_select-yes-left-icon avia-size-small avia-color-theme-color-subtle\" target=\"_blank\" rel=\"noopener noreferrer\"><span class='avia_button_icon avia_button_icon_left ' aria-hidden='true' data-av_icon='\ue803' data-av_iconfont='entypo-fontello'><\/span><span class='avia_iconbox_title' >Browse SSJ in Grew-match<\/span><\/a>\n\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-66t1b-87cb64f728dc21f8b5c69f58ec5a0fdc\">\n#top #wrap_all .avia-button.av-66t1b-87cb64f728dc21f8b5c69f58ec5a0fdc{\nmargin-bottom:5px;\nmargin-right:5px;\n}\n<\/style>\n<a href=\"https:\/\/github.com\/UniversalDependencies\/UD_Slovenian-SSJ\" class=\"avia-button av-66t1b-87cb64f728dc21f8b5c69f58ec5a0fdc avia-icon_select-yes-left-icon avia-size-small avia-color-theme-color-subtle\" target=\"_blank\" rel=\"noopener noreferrer\"><span class='avia_button_icon avia_button_icon_left ' aria-hidden='true' data-av_icon='\ue851' data-av_iconfont='entypo-fontello'><\/span><span class='avia_iconbox_title' >SSJ at UD GitHub repository<\/span><\/a>\n\n<style type=\"text\/css\" data-created_by=\"avia_inline_auto\" id=\"style-css-av-lpbu2fty-042b6cc4daa104e975bfed682f0ed089\">\n#top #wrap_all .avia-button.av-lpbu2fty-042b6cc4daa104e975bfed682f0ed089{\nmargin-bottom:5px;\nmargin-right:5px;\n}\n<\/style>\n<a href=\"http:\/\/hdl.handle.net\/11356\/1747\" class=\"avia-button av-lpbu2fty-042b6cc4daa104e975bfed682f0ed089 avia-icon_select-yes-left-icon avia-size-small avia-color-theme-color-subtle\" target=\"_blank\" rel=\"noopener noreferrer\"><span class='avia_button_icon avia_button_icon_left ' aria-hidden='true' data-av_icon='\ue851' data-av_iconfont='entypo-fontello'><\/span><span class='avia_iconbox_title' >SSJ\/SUK at CLARIN.SI repository<\/span><\/a>\n<\/div><\/p><\/div><\/p>\n","protected":false},"excerpt":{"rendered":"","protected":false},"author":1,"featured_media":0,"parent":0,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-301","page","type-page","status-publish","hentry"],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v25.3 - https:\/\/yoast.com\/wordpress\/plugins\/seo\/ -->\n<title>Corpora - SPOT<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/spot.ff.uni-lj.si\/korpusi\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Corpora - SPOT\" \/>\n<meta property=\"og:url\" content=\"https:\/\/spot.ff.uni-lj.si\/korpusi\/\" \/>\n<meta property=\"og:site_name\" content=\"SPOT\" \/>\n<meta property=\"article:modified_time\" content=\"2025-06-05T14:18:42+00:00\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data1\" content=\"18 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/spot.ff.uni-lj.si\/korpusi\/\",\"url\":\"https:\/\/spot.ff.uni-lj.si\/korpusi\/\",\"name\":\"Corpora - SPOT\",\"isPartOf\":{\"@id\":\"https:\/\/spot.ff.uni-lj.si\/#website\"},\"datePublished\":\"2023-11-16T16:49:09+00:00\",\"dateModified\":\"2025-06-05T14:18:42+00:00\",\"breadcrumb\":{\"@id\":\"https:\/\/spot.ff.uni-lj.si\/korpusi\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/spot.ff.uni-lj.si\/korpusi\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/spot.ff.uni-lj.si\/korpusi\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/spot.ff.uni-lj.si\/en\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Corpora\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/spot.ff.uni-lj.si\/#website\",\"url\":\"https:\/\/spot.ff.uni-lj.si\/\",\"name\":\"SPOT\",\"description\":\"A Treebank-Driven Approach to the Study of Spoken Slovenian\",\"publisher\":{\"@id\":\"https:\/\/spot.ff.uni-lj.si\/#organization\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/spot.ff.uni-lj.si\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Organization\",\"@id\":\"https:\/\/spot.ff.uni-lj.si\/#organization\",\"name\":\"SPOT\",\"url\":\"https:\/\/spot.ff.uni-lj.si\/\",\"logo\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/spot.ff.uni-lj.si\/#\/schema\/logo\/image\/\",\"url\":\"https:\/\/spot.ff.uni-lj.si\/wp-content\/uploads\/2025\/03\/spot_logo_final.svg\",\"contentUrl\":\"https:\/\/spot.ff.uni-lj.si\/wp-content\/uploads\/2025\/03\/spot_logo_final.svg\",\"width\":1,\"height\":1,\"caption\":\"SPOT\"},\"image\":{\"@id\":\"https:\/\/spot.ff.uni-lj.si\/#\/schema\/logo\/image\/\"}}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"Corpora - SPOT","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/spot.ff.uni-lj.si\/korpusi\/","og_locale":"en_US","og_type":"article","og_title":"Corpora - SPOT","og_url":"https:\/\/spot.ff.uni-lj.si\/korpusi\/","og_site_name":"SPOT","article_modified_time":"2025-06-05T14:18:42+00:00","twitter_card":"summary_large_image","twitter_misc":{"Est. reading time":"18 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/spot.ff.uni-lj.si\/korpusi\/","url":"https:\/\/spot.ff.uni-lj.si\/korpusi\/","name":"Corpora - SPOT","isPartOf":{"@id":"https:\/\/spot.ff.uni-lj.si\/#website"},"datePublished":"2023-11-16T16:49:09+00:00","dateModified":"2025-06-05T14:18:42+00:00","breadcrumb":{"@id":"https:\/\/spot.ff.uni-lj.si\/korpusi\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/spot.ff.uni-lj.si\/korpusi\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/spot.ff.uni-lj.si\/korpusi\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/spot.ff.uni-lj.si\/en\/"},{"@type":"ListItem","position":2,"name":"Corpora"}]},{"@type":"WebSite","@id":"https:\/\/spot.ff.uni-lj.si\/#website","url":"https:\/\/spot.ff.uni-lj.si\/","name":"SPOT","description":"A Treebank-Driven Approach to the Study of Spoken Slovenian","publisher":{"@id":"https:\/\/spot.ff.uni-lj.si\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/spot.ff.uni-lj.si\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/spot.ff.uni-lj.si\/#organization","name":"SPOT","url":"https:\/\/spot.ff.uni-lj.si\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/spot.ff.uni-lj.si\/#\/schema\/logo\/image\/","url":"https:\/\/spot.ff.uni-lj.si\/wp-content\/uploads\/2025\/03\/spot_logo_final.svg","contentUrl":"https:\/\/spot.ff.uni-lj.si\/wp-content\/uploads\/2025\/03\/spot_logo_final.svg","width":1,"height":1,"caption":"SPOT"},"image":{"@id":"https:\/\/spot.ff.uni-lj.si\/#\/schema\/logo\/image\/"}}]}},"_links":{"self":[{"href":"https:\/\/spot.ff.uni-lj.si\/en\/wp-json\/wp\/v2\/pages\/301","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/spot.ff.uni-lj.si\/en\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/spot.ff.uni-lj.si\/en\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/spot.ff.uni-lj.si\/en\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/spot.ff.uni-lj.si\/en\/wp-json\/wp\/v2\/comments?post=301"}],"version-history":[{"count":58,"href":"https:\/\/spot.ff.uni-lj.si\/en\/wp-json\/wp\/v2\/pages\/301\/revisions"}],"predecessor-version":[{"id":1496,"href":"https:\/\/spot.ff.uni-lj.si\/en\/wp-json\/wp\/v2\/pages\/301\/revisions\/1496"}],"wp:attachment":[{"href":"https:\/\/spot.ff.uni-lj.si\/en\/wp-json\/wp\/v2\/media?parent=301"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}