@article{pham2025discourse,title={Discourse Graph Guided Document Translation with Large Language Models},author={Pham, Viet-Thanh and Wang, Minghan and Liao, Hao-Han and Vu, Thuy-Trang},journal={arXiv preprint arXiv:2511.07230},year={2025}}
preprint
Towards Inference-time Scaling for Continuous Space Reasoning
Minghan Wang , Thuy-Trang Vu, Ehsan Shareghi , and Gholamreza Haffari
@article{wang2025towards,title={Towards Inference-time Scaling for Continuous Space Reasoning},author={Wang, Minghan and Vu, Thuy-Trang and Shareghi, Ehsan and Haffari, Gholamreza},journal={arXiv preprint arXiv:2510.12167},year={2025}}
preprint
The Social Cost of Intelligence: Emergence, Propagation, and Amplification of Stereotypical Bias in Multi-Agent Systems
Thi-Nhung Nguyen , Linhao Luo , Thuy-Trang Vu, and Dinh Phung
@article{nguyen2025social,title={The Social Cost of Intelligence: Emergence, Propagation, and Amplification of Stereotypical Bias in Multi-Agent Systems},author={Nguyen, Thi-Nhung and Luo, Linhao and Vu, Thuy-Trang and Phung, Dinh},journal={arXiv preprint arXiv:2510.10943},year={2025}}
preprint
Beyond Imitation: Recovering Dense Rewards from Demonstrations
Jiangnan Li , Thuy-Trang Vu, Ehsan Abbasnejad , and Gholamreza Haffari
@article{li2025beyond,title={Beyond Imitation: Recovering Dense Rewards from Demonstrations},author={Li, Jiangnan and Vu, Thuy-Trang and Abbasnejad, Ehsan and Haffari, Gholamreza},journal={arXiv preprint arXiv:2510.02493},year={2025}}
preprint
G-reasoner: Foundation Models for Unified Reasoning over Graph-structured Knowledge
Linhao Luo , Zicheng Zhao , Junnan Liu , Zhangchi Qiu , Junnan Dong , and 6 more authors
@article{luo2025g,title={G-reasoner: Foundation Models for Unified Reasoning over Graph-structured Knowledge},author={Luo, Linhao and Zhao, Zicheng and Liu, Junnan and Qiu, Zhangchi and Dong, Junnan and Panev, Serge and Gong, Chen and Vu, Thuy-Trang and Haffari, Gholamreza and Phung, Dinh and others},journal={arXiv preprint arXiv:2509.24276},year={2025}}
preprint
Mitigating Semantic Collapse in Generative Personalization with a Surprisingly Simple Test-Time Embedding Adjustment
Anh Bui , Thuy-Trang Vu, Trung Le , Junae Kim , Tamas Abraham , and 3 more authors
@article{bui2025mitigating,title={Mitigating Semantic Collapse in Generative Personalization with a Surprisingly Simple Test-Time Embedding Adjustment},author={Bui, Anh and Vu, Thuy-Trang and Le, Trung and Kim, Junae and Abraham, Tamas and Omari, Rollin and Kaur, Amar and Phung, Dinh},journal={arXiv preprint arXiv:2506.22685},year={2025}}
preprint
SituatedThinker: Grounding LLM Reasoning with Real-World through Situated Thinking
Junnan Liu , Linhao Luo , Thuy-Trang Vu, and Gholamreza Haffari
@article{liu2025situatedthinker,title={SituatedThinker: Grounding LLM Reasoning with Real-World through Situated Thinking},author={Liu, Junnan and Luo, Linhao and Vu, Thuy-Trang and Haffari, Gholamreza},journal={arXiv preprint arXiv:2505.19300},year={2025}}
preprint
CONGRAD: Conflicting Gradient Filtering for Multilingual Preference Alignment
Jiangnan Li , Thuy-Trang Vu, Christian Herold , Amirhossein Tebbifakhr , Shahram Khadivi , and 1 more author
@article{li2025congrad,title={CONGRAD: Conflicting Gradient Filtering for Multilingual Preference Alignment},author={Li, Jiangnan and Vu, Thuy-Trang and Herold, Christian and Tebbifakhr, Amirhossein and Khadivi, Shahram and Haffari, Gholamreza},journal={arXiv preprint arXiv:2503.23777},year={2025}}
2024
Continual learning for large language models: A survey
Tongtong Wu , Linhao Luo , Yuan-Fang Li , Shirui Pan , Thuy-Trang Vu, and 1 more author
@article{wu2024continual,title={Continual learning for large language models: A survey},author={Wu, Tongtong and Luo, Linhao and Li, Yuan-Fang and Pan, Shirui and Vu, Thuy-Trang and Haffari, Gholamreza},journal={arXiv preprint arXiv:2402.01364},year={2024}}
Adapting large language models for document-level machine translation
Minghao Wu , Thuy-Trang Vu, Lizhen Qu , George Foster , and Gholamreza Haffari
@article{wu2024adapting,title={Adapting large language models for document-level machine translation},author={Wu, Minghao and Vu, Thuy-Trang and Qu, Lizhen and Foster, George and Haffari, Gholamreza},journal={arXiv preprint arXiv:2401.06468},year={2024}}
Peered-review
2025
ALTA
MAPLE: Multi-Agent Adaptive Planning with Long-Term Memory for Table Reasoning
Ye Bai , Minghan Wang , and Thuy-Trang Vu
In Proceedings of the 23nd Annual Workshop of the Australasian Language Technology Association , 2025
@inproceedings{bai2025maple,title={MAPLE: Multi-Agent Adaptive Planning with Long-Term Memory for Table Reasoning},author={Bai, Ye and Wang, Minghan and Vu, Thuy-Trang},booktitle={Proceedings of the 23nd Annual Workshop of the Australasian Language Technology Association},year={2025},}
EMNLP
Discrete Minds in a Continuous World: Do Language Models Know Time Passes?
Minghan Wang , Ye Bai , Thuy-Trang Vu, Ehsan Shareghi , and Gholamreza Haffari
In Findings of the Association for Computational Linguistics: EMNLP 2025 , 2025
While Large Language Models (LLMs) excel at temporal reasoning tasks like event ordering and duration estimation, their ability to perceive the actual passage of time remains unexplored. We investigate whether LLMs perceive the passage of time and adapt their decision-making accordingly through three complementary experiments. First, we introduce the Token-Time Hypothesis, positing that LLMs can map discrete token counts to continuous wall-clock time, and validate this through a dialogue duration judgment task. Second, we demonstrate that LLMs could use this awareness to adapt their response length while maintaining accuracy when users express urgency in question answering tasks. Finally, we develop BombRush, an interactive navigation challenge that examines how LLMs modify behavior under progressive time pressure in dynamic environments. Our findings indicate that LLMs possess certain awareness of time passage, enabling them to bridge discrete linguistic tokens and continuous physical time, though this capability varies with model size and reasoning abilities. This work establishes a theoretical foundation for enhancing temporal awareness in LLMs for time-sensitive applications.
@inproceedings{wang-etal-2025-discrete,title={Discrete Minds in a Continuous World: Do Language Models Know Time Passes?},author={Wang, Minghan and Bai, Ye and Vu, Thuy-Trang and Shareghi, Ehsan and Haffari, Gholamreza},editor={Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2025},year={2025},address={Suzhou, China},publisher={Association for Computational Linguistics},doi={10.18653/v1/2025.findings-emnlp.1016},pages={18703--18729},isbn={979-8-89176-335-7}}
CoNLL
Planning for Success: Exploring LLM Long-term Planning Capabilities in Table Understanding
Thi-Nhung Nguyen , Hoang Ngo , Dinh Phung , Thuy-Trang Vu, and Dat Quoc Nguyen
In Proceedings of the 29th Conference on Computational Natural Language Learning , 2025
Table understanding is key to addressing challenging downstream tasks such as table-based question answering and fact verification. Recent works have focused on leveraging Chain-of-Thought and question decomposition to solve complex questions requiring multiple operations on tables. However, these methods often suffer from a lack of explicit long-term planning and weak inter-step connections, leading to miss constraints within questions. In this paper, we propose leveraging the long-term planning capabilities of large language models (LLMs) to enhance table understanding. Our approach enables the execution of a long-term plan, where the steps are tightly interconnected and serve the ultimate goal, an aspect that methods based on Chain-of-Thought and question decomposition lack. In addition, our method effectively minimizes the inclusion of unnecessary details in the process of solving the next short-term goals, a limitation of methods based on Chain-of-Thought. Extensive experiments demonstrate that our method outperforms strong baselines and achieves state-of-the-art performance on WikiTableQuestions and TabFact datasets.
@inproceedings{nguyen-etal-2025-planning,title={Planning for Success: Exploring {LLM} Long-term Planning Capabilities in Table Understanding},author={Nguyen, Thi-Nhung and Ngo, Hoang and Phung, Dinh and Vu, Thuy-Trang and Nguyen, Dat Quoc},editor={Boleda, Gemma and Roth, Michael},booktitle={Proceedings of the 29th Conference on Computational Natural Language Learning},year={2025},address={Vienna, Austria},publisher={Association for Computational Linguistics},doi={10.18653/v1/2025.conll-1.6},pages={81--92},isbn={979-8-89176-271-8}}
ACL
Proverbs Run in Pairs: Evaluating Proverb Translation Capability of Large Language Model
Minghan Wang , Viet Thanh Pham , Farhad Moghimifar , and Thuy-Trang Vu
In Findings of the Association for Computational Linguistics: ACL 2025 , 2025
Despite achieving remarkable performance, machine translation (MT) research remains underexplored in terms of translating cultural elements in languages, such as idioms, proverbs, and colloquial expressions. This paper investigates the capability of state-of-the-art neural machine translation (NMT) and large language models (LLMs) in translating proverbs, which are deeply rooted in cultural contexts. We construct a translation dataset of standalone proverbs and proverbs in conversation for four language pairs. Our experiments show that the studied models can achieve good translation between languages with similar cultural backgrounds, and LLMs generally outperform NMT models in proverb translation. Furthermore, we find that current automatic evaluation metrics such as BLEU, CHRF++ and COMET are inadequate for reliably assessing the quality of proverb translation, highlighting the need for more culturally aware evaluation metrics.
@inproceedings{wang-etal-2025-proverbs,title={Proverbs Run in Pairs: Evaluating Proverb Translation Capability of Large Language Model},author={Wang, Minghan and Pham, Viet Thanh and Moghimifar, Farhad and Vu, Thuy-Trang},editor={Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher},booktitle={Findings of the Association for Computational Linguistics: ACL 2025},year={2025},address={Vienna, Austria},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2025.findings-acl.83/},doi={10.18653/v1/2025.findings-acl.83},pages={1646--1662},isbn={979-8-89176-256-5}}
EMNLP
MixLoRA-DSI: Dynamically Expandable Mixture-of-LoRA Experts for Rehearsal-Free Generative Retrieval over Dynamic Corpora
Tuan-Luc Huynh , Thuy-Trang Vu, Weiqing Wang , Trung Le , Dragan Gasevic , and 2 more authors
In Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing , 2025
Continually updating model-based indexes in generative retrieval with new documents remains challenging, as full retraining is computationally expensive and impractical under resource constraints. We propose MixLoRA-DSI, a novel framework that combines an expandable mixture of Low-Rank Adaptation experts with a layer-wise out-of-distribution (OOD)-driven expansion strategy. Instead of allocating new experts for each new corpus, our proposed expansion strategy enables sublinear parameter growth by selectively introducing new experts only when significant number of OOD documents are detected. Experiments on NQ320k and MS MARCO Passage demonstrate that MixLoRA-DSI outperforms full-model update baselines, with minimal parameter overhead and substantially lower training costs.
@inproceedings{huynh-etal-2025-mixlora,title={{M}ix{L}o{RA}-{DSI}: Dynamically Expandable Mixture-of-{L}o{RA} Experts for Rehearsal-Free Generative Retrieval over Dynamic Corpora},author={Huynh, Tuan-Luc and Vu, Thuy-Trang and Wang, Weiqing and Le, Trung and Gasevic, Dragan and Li, Yuan-Fang and Do, Thanh-Toan},editor={Christodoulopoulos, Christos and Chakraborty, Tanmoy and Rose, Carolyn and Peng, Violet},booktitle={Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing},year={2025},address={Suzhou, China},publisher={Association for Computational Linguistics},doi={10.18653/v1/2025.emnlp-main.20},pages={380--396},isbn={979-8-89176-332-6}}
InterSpeech
SpeechDialogueFactory: Generating High-Quality Speech Dialogue Data to Accelerate Your Speech-LLM Development
Minghan Wang , Ye Bai , Yuxia Wang , Thuy-Trang Vu, Ehsan Shareghi , and 1 more author
@article{wang2025speechdialoguefactory,title={SpeechDialogueFactory: Generating High-Quality Speech Dialogue Data to Accelerate Your Speech-LLM Development},author={Wang, Minghan and Bai, Ye and Wang, Yuxia and Vu, Thuy-Trang and Shareghi, Ehsan and Haffari, Gholamreza},journal={In Proceedings of InterSpeech},year={2025}}
ICLR
Fantastic Targets for Concept Erasure in Diffusion Models and Where To Find Them
Anh Tuan Bui , Thuy-Trang Vu, Long Tung Vuong , Trung Le , Paul Montague , and 3 more authors
In The Thirteenth International Conference on Learning Representations , 2025
@inproceedings{bui2025fantastic,title={Fantastic Targets for Concept Erasure in Diffusion Models and Where To Find Them},author={Bui, Anh Tuan and Vu, Thuy-Trang and Vuong, Long Tung and Le, Trung and Montague, Paul and Abraham, Tamas and Kim, Junae and Phung, Dinh},booktitle={The Thirteenth International Conference on Learning Representations},year={2025},}
ICML
The Best of Both Worlds: Bridging Quality and Diversity in Data Selection with Bipartite Graph
@inproceedings{wu2024graphfilter,title={The Best of Both Worlds: Bridging Quality and Diversity in Data Selection with Bipartite Graph},author={Wu, Minghao and Vu, Thuy-Trang and Qu, Lizhen and Haffari, Gholamreza},booktitle={ICML},year={2025},}
COLING
Extending LLMs to New Languages: A Case Study of Llama and Persian Adaptation
Large language models (LLMs) have made great progress in classification and text generation tasks. However, they are mainly trained on English data and often struggle with low-resource languages. In this study, we explore adding a new language, i.e., Persian, to Llama (a model with a limited understanding of Persian) using parameter-efficient fine-tuning. We employ a multi-stage approach involving pretraining on monolingual Persian data, aligning representations through bilingual pretraining and instruction datasets, and instruction-tuning with task-specific datasets. We evaluate the model’s performance at each stage on generation and classification tasks. Our findings suggest that incorporating the Persian language, through bilingual data alignment, can enhance classification accuracy for Persian tasks, with no adverse impact and sometimes even improvements on English tasks. Additionally, the results highlight the model’s initial strength as a critical factor when working with limited training data, with cross-lingual alignment offering minimal benefits for the low-resource language. Knowledge transfer from English to Persian has a marginal effect, primarily benefiting simple classification tasks.
@inproceedings{mahdizadeh-sani-etal-2025-extending,title={Extending {LLM}s to New Languages: A Case Study of Llama and {P}ersian Adaptation},author={Mahdizadeh Sani, Samin and Sadeghi, Pouya and Vu, Thuy-Trang and Yaghoobzadeh, Yadollah and Haffari, Gholamreza},editor={Rambow, Owen and Wanner, Leo and Apidianaki, Marianna and Al-Khalifa, Hend and Eugenio, Barbara Di and Schockaert, Steven},booktitle={Proceedings of the 31st International Conference on Computational Linguistics},year={2025},address={Abu Dhabi, UAE},publisher={Association for Computational Linguistics},pages={8868--8884}}
ECML PKDD
PromptDSI: Prompt-Based Rehearsal-Free Continual Learning for Document Retrieval
Tuan-Luc Huynh , Thuy-Trang Vu, Weiqing Wang , Yinwei Wei , Trung Le , and 3 more authors
In Joint European Conference on Machine Learning and Knowledge Discovery in Databases , 2025
@inproceedings{huynh2025promptdsi,title={PromptDSI: Prompt-Based Rehearsal-Free Continual Learning for Document Retrieval},author={Huynh, Tuan-Luc and Vu, Thuy-Trang and Wang, Weiqing and Wei, Yinwei and Le, Trung and Gasevic, Dragan and Li, Yuan-Fang and Do, Thanh-Toan},booktitle={Joint European Conference on Machine Learning and Knowledge Discovery in Databases},pages={383--401},year={2025},organization={Springer},}
CoLM
Improving Table Understanding with LLMs and Entity-Oriented Search
Thi-Nhung Nguyen , Hoang Ngo , Dinh Phung , Thuy-Trang Vu, and Dat Quoc Nguyen
@inproceedings{nguyen2025improving,title={Improving Table Understanding with {LLM}s and Entity-Oriented Search},author={Nguyen, Thi-Nhung and Ngo, Hoang and Phung, Dinh and Vu, Thuy-Trang and Nguyen, Dat Quoc},booktitle={Second Conference on Language Modeling},year={2025},}
ACL
SCAR: Data Selection via Style Consistency-Aware Response Ranking for Efficient Instruction-Tuning of Large Language Models
Zhuang Li , Yuncheng Hua , Thuy-Trang Vu, Haolan Zhan , Lizhen Qu , and 1 more author
In Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers) , 2025
Recent studies emphasize that manually ensuring a consistent response style and maintaining high data quality in training sets can significantly improve the performance of fine-tuned Large Language Models (LLMs) while reducing the number of training examples needed. However, the precise definition of style and the relationship between style, data quality, and LLM performance remains unclear. This research identifies two key stylistic elements in responses: linguistic form and instructional surprisal. We find that, among training data of comparable quality, higher consistency in these response elements leads to better LLM performance. Inspired by this, we introduce Style Consistency-Aware Response Ranking (SCAR), which automatically prioritizes instruction-response pairs in the training set based on their response stylistic consistency. By selecting the most style-consistent examples, using 0.7% of the full dataset in certain cases, the fine-tuned LLMs can match or even surpass the performance of models trained on the entire dataset in coding and open-ended question-answering benchmarks. Code and data are available at https://github.com/zhuang-li/SCAR .
@inproceedings{li-etal-2025-scar,title={{SCAR}: Data Selection via Style Consistency-Aware Response Ranking for Efficient Instruction-Tuning of Large Language Models},author={Li, Zhuang and Hua, Yuncheng and Vu, Thuy-Trang and Zhan, Haolan and Qu, Lizhen and Haffari, Gholamreza},editor={Che, Wanxiang and Nabende, Joyce and Shutova, Ekaterina and Pilehvar, Mohammad Taher},booktitle={Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},year={2025},address={Vienna, Austria},publisher={Association for Computational Linguistics},doi={10.18653/v1/2025.acl-long.625},pages={12756--12790},isbn={979-8-89176-251-0}}
IWSLT
Conversational SimulMT: Efficient Simultaneous Translation with Large Language Models
Minghan Wang , Thuy-Trang Vu, Yuxia Wang , Ehsan Shareghi , and Gholamreza Haffari
In Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025) , 2025
Simultaneous machine translation (SimulMT) presents a challenging trade-off between translation quality and latency. Recent studies have shown that LLMs can achieve good performance in SimulMT tasks. However, this often comes at the expense of high inference costs and latency. In this paper, we propose a conversational SimulMT framework to enhance the inference efficiency of LLM-based SimulMT through multi-turn-dialogue-based decoding where source and target chunks interleave in translation history, enabling the reuse of Key-Value cache. To adapt LLMs to the proposed conversational decoding, we create supervised fine-tuning training data by segmenting parallel sentences using an alignment tool and a novel augmentation technique to enhance generalization. Our experiments with Llama2-7b-chat on three SimulMT benchmarks demonstrate that the proposed method empowers the superiority of LLM in translation quality, meanwhile achieving comparable computational latency with specialized SimulMT models.
@inproceedings{wang-etal-2025-conversational,title={Conversational {S}imul{MT}: Efficient Simultaneous Translation with Large Language Models},author={Wang, Minghan and Vu, Thuy-Trang and Wang, Yuxia and Shareghi, Ehsan and Haffari, Gholamreza},editor={Salesky, Elizabeth and Federico, Marcello and Anastasopoulos, Antonis},booktitle={Proceedings of the 22nd International Conference on Spoken Language Translation (IWSLT 2025)},year={2025},address={Vienna, Austria (in-person and online)},publisher={Association for Computational Linguistics},doi={10.18653/v1/2025.iwslt-1.8},pages={93--105},isbn={979-8-89176-272-5}}
2024
AJCAI
Active Continual Learning: On Balancing Knowledge Retention and Learnability
Recent advancements in multimodal large language models (MLLMs) have made significant progress in integrating information across various modalities, yet real-world applications in educational and scientific domains remain challenging. This paper introduces the Multimodal Scientific ASR (MS-ASR) task, which focuses on transcribing scientific conference videos by leveraging visual information from slides to enhance the accuracy of technical terminologies. Realized that traditional metrics like WER fall short in assessing performance accurately, prompting the proposal of severity-aware WER (SWER) that considers the content type and severity of ASR errors. We propose the Scientific Vision Augmented ASR (SciVASR) framework as a baseline method, enabling MLLMs to improve transcript quality through post-editing. Evaluations of state-of-the-art MLLMs, including GPT-4o, show a 45% improvement over speech-only baselines, highlighting the importance of multimodal information integration.
@inproceedings{wang-etal-2024-exploring,title={Exploring the Potential of Multimodal {LLM} with Knowledge-Intensive Multimodal {ASR}},author={Wang, Minghan and Wang, Yuxia and Vu, Thuy-Trang and Shareghi, Ehsan and Haf, Reza},editor={Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2024},year={2024},address={Miami, Florida, USA},publisher={Association for Computational Linguistics},doi={10.18653/v1/2024.findings-emnlp.776},pages={13274--13288}}
EMNLP
Mixture-of-Skills: Learning to Optimize Data Usage for Fine-Tuning Large Language Models
Large language models (LLMs) are typically fine-tuned on diverse and extensive datasets sourced from various origins to develop a comprehensive range of skills, such as writing, reasoning, chatting, coding, and more. Each skill has unique characteristics, and these datasets are often heterogeneous and imbalanced, making the fine-tuning process highly challenging. Balancing the development of each skill while ensuring the model maintains its overall performance requires sophisticated techniques and careful dataset curation. In this work, we propose a general, model-agnostic, reinforcement learning framework, Mixture-of-Skills (MoS), that learns to optimize data usage automatically during the fine-tuning process. This framework ensures the optimal comprehensive skill development of LLMs by dynamically adjusting the focus on different datasets based on their current learning state. To validate the effectiveness of MoS, we conduct extensive experiments using three diverse LLM backbones on two widely used benchmarks and demonstrate that MoS substantially enhances model performance. Building on the success of MoS, we propose MoSpec, an adaptation for task-specific fine-tuning, which harnesses the utilities of various datasets for a specific purpose. Our work underlines the significance of dataset rebalancing and present MoS as a powerful, general solution for optimizing data usage in the fine-tuning of LLMs for various purposes.
@inproceedings{wu-etal-2024-mixture-skills,title={Mixture-of-Skills: Learning to Optimize Data Usage for Fine-Tuning Large Language Models},author={Wu, Minghao and Vu, Thuy-Trang and Qu, Lizhen and Haf, Reza},editor={Al-Onaizan, Yaser and Bansal, Mohit and Chen, Yun-Nung},booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing},year={2024},address={Miami, Florida, USA},publisher={Association for Computational Linguistics},doi={10.18653/v1/2024.emnlp-main.787},pages={14226--14240}}
Direct Evaluation of Chain-of-Thought in Multi-hop Reasoning with Knowledge Graphs
Minh-Vuong Nguyen , Linhao Luo , Fatemeh Shiri , Dinh Phung , Yuan-Fang Li , and 2 more authors
In Findings of the Association for Computational Linguistics: ACL 2024 , 2024
Large language models (LLMs) have demonstrated strong reasoning abilities when prompted to generate chain-of-thought (CoT) explanations alongside answers. However, previous research on evaluating LLMs has solely focused on answer accuracy, neglecting the correctness of the generated CoT. In this paper, we delve deeper into the CoT reasoning capabilities of LLMs in multi-hop question answering by utilizing knowledge graphs (KGs). We propose a novel discriminative and generative CoT evaluation paradigm to assess LLMs’ knowledge of reasoning and the accuracy of the generated CoT. Through experiments conducted on 5 different families of LLMs across 2 multi-hop question-answering datasets, we find that LLMs possess sufficient knowledge to perform reasoning. However, there exists a significant disparity between answer accuracy and faithfulness of the CoT generated by LLMs, indicating that they often arrive at correct answers through incorrect reasoning.
@inproceedings{nguyen-etal-2024-direct,title={Direct Evaluation of Chain-of-Thought in Multi-hop Reasoning with Knowledge Graphs},author={Nguyen, Minh-Vuong and Luo, Linhao and Shiri, Fatemeh and Phung, Dinh and Li, Yuan-Fang and Vu, Thuy-Trang and Haffari, Gholamreza},editor={Ku, Lun-Wei and Martins, Andre and Srikumar, Vivek},booktitle={Findings of the Association for Computational Linguistics: ACL 2024},year={2024},address={Bangkok, Thailand},publisher={Association for Computational Linguistics},doi={10.18653/v1/2024.findings-acl.168},pages={2862--2883}}
Simultaneous Machine Translation with Large Language Models
Minghan Wang , Thuy-Trang Vu, Jinming Zhao , Fatemeh Shiri , Ehsan Shareghi , and 1 more author
In Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association , 2024
Real-world simultaneous machine translation (SimulMT) systems face more challenges than just the quality-latency trade-off. They also need to address issues related to robustness with noisy input, processing long contexts, and flexibility for knowledge injection. These challenges demand models with strong language understanding and generation capabilities which may not often equipped by dedicated MT models. In this paper, we investigate the possibility of applying Large Language Models (LLM) to SimulMT tasks by using existing incremental-decoding methods with a newly proposed RALCP algorithm for latency reduction. We conducted experiments using the Llama2-7b-chat model on nine different languages from the MUST-C dataset. The results show that LLM outperforms dedicated MT models in terms of BLEU and LAAL metrics. Further analysis indicates that LLM has advantages in terms of tuning efficiency and robustness. However, it is important to note that the computational cost of LLM remains a significant obstacle to its application in SimulMT.
@inproceedings{wang-etal-2024-simultaneous,title={Simultaneous Machine Translation with Large Language Models},author={Wang, Minghan and Vu, Thuy-Trang and Zhao, Jinming and Shiri, Fatemeh and Shareghi, Ehsan and Haffari, Gholamreza},editor={Baldwin, Tim and Rodr{\'i}guez M{\'e}ndez, Sergio Jos{\'e} and Kuo, Nicholas},booktitle={Proceedings of the 22nd Annual Workshop of the Australasian Language Technology Association},year={2024},address={Canberra, Australia},publisher={Association for Computational Linguistics},pages={89--103}}
2023
Koala: An Index for Quantifying Overlaps with Pre-training Corpora
Thuy-Trang Vu, Xuanli He , Gholamreza Haffari , and Ehsan Shareghi
In very recent years more attention has been placed on probing the role of pre-training data in Large Language Models (LLMs) downstream behaviour. Despite the importance, there is no public tool that supports such analysis of pre-training corpora at large scale. To help research in this space, we launch Koala, a searchable index over large pre-training corpora using lossless compressed suffix arrays with highly efficient compression rate and search support. In its first release we index the public proportion of OPT 175B, GPT-3, GPT-Neo, GPT-Neo, LLaMA, BERT, ELECTRA, RoBERTA, XLNet pre-training corpora. Koala provides a framework to do forensic analysis on the current and future benchmarks as well as to assess the degree of memorization in the output from the LLMs. Koala is available for public use at https://koala-index.erc.monash.edu/.
@inproceedings{vu-etal-2023-koala,title={Koala: An Index for Quantifying Overlaps with Pre-training Corpora},author={Vu, Thuy-Trang and He, Xuanli and Haffari, Gholamreza and Shareghi, Ehsan},editor={Feng, Yansong and Lefever, Els},booktitle={EMNLP Demos},year={2023},address={Singapore},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2023.emnlp-demo.7},doi={10.18653/v1/2023.emnlp-demo.7},pages={90--98}}
Systematic Assessment of Factual Knowledge in Large Language Models
Linhao Luo , Trang Vu, Dinh Phung , and Reza Haf
In Findings of the Association for Computational Linguistics: EMNLP 2023 , 2023
Previous studies have relied on existing question-answering benchmarks to evaluate the knowledge stored in large language models (LLMs). However, this approach has limitations regarding factual knowledge coverage, as it mostly focuses on generic domains which may overlap with the pretraining data. This paper proposes a framework to systematically assess the factual knowledge of LLMs by leveraging knowledge graphs (KGs). Our framework automatically generates a set of questions and expected answers from the facts stored in a given KG, and then evaluates the accuracy of LLMs in answering these questions. We systematically evaluate the state-of-the-art LLMs with KGs in generic and specific domains. The experiment shows that ChatGPT is consistently the top performer across all domains. We also find that LLMs performance depends on the instruction finetuning, domain and question complexity and is prone to adversarial context.
@inproceedings{luo-etal-2023-systematic,title={Systematic Assessment of Factual Knowledge in Large Language Models},author={Luo, Linhao and Vu, Trang and Phung, Dinh and Haf, Reza},editor={Bouamor, Houda and Pino, Juan and Bali, Kalika},booktitle={Findings of the Association for Computational Linguistics: EMNLP 2023},year={2023},address={Singapore},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2023.findings-emnlp.885},doi={10.18653/v1/2023.findings-emnlp.885},pages={13272--13286}}
2022
Can Domains Be Transferred across Languages in Multi-Domain Multilingual Neural Machine Translation?
Thuy-Trang Vu, Shahram Khadivi , Xuanli He , Dinh Phung , and Gholamreza Haffari
In Proceedings of the Seventh Conference on Machine Translation (WMT) , 2022
Previous works mostly focus on either multilingual or multi-domain aspects of neural machine translation (NMT). This paper investigates whether the domain information can be transferred across languages on the composition of multi-domain and multilingual NMT, particularly for the incomplete data condition where in-domain bitext is missing for some language pairs. Our results in the curated leave-one-domain-out experiments show that multi-domain multilingual (MDML) NMT can boost zero-shot translation performance up to +10 gains on BLEU, as well as aid the generalisation of multi-domain NMT to the missing domain. We also explore strategies for effective integration of multilingual and multi-domain NMT, including language and domain tag combination and auxiliary task training. We find that learning domain-aware representations and adding target-language tags to the encoder leads to effective MDML-NMT.
@inproceedings{vu-etal-2022-domains,title={Can Domains Be Transferred across Languages in Multi-Domain Multilingual Neural Machine Translation?},author={Vu, Thuy-Trang and Khadivi, Shahram and He, Xuanli and Phung, Dinh and Haffari, Gholamreza},booktitle={Proceedings of the Seventh Conference on Machine Translation (WMT)},year={2022},address={Abu Dhabi, United Arab Emirates (Hybrid)},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2022.wmt-1.34},pages={381--396}}
Domain Generalisation of NMT: Fusing Adapters with Leave-One-Domain-Out Training
Generalising to unseen domains is under-explored and remains a challenge in neural machine translation. Inspired by recent research in parameter-efficient transfer learning from pretrained models, this paper proposes a fusion-based generalisation method that learns to combine domain-specific parameters. We propose a leave-one-domain-out training strategy to avoid information leaking to address the challenge of not knowing the test domain during training time. Empirical results on three language pairs show that our proposed fusion method outperforms other baselines up to +0.8 BLEU score on average.
@inproceedings{vu-etal-2022-domain,title={Domain Generalisation of {NMT}: Fusing Adapters with Leave-One-Domain-Out Training},author={Vu, Thuy-Trang and Khadivi, Shahram and Phung, Dinh and Haffari, Gholamreza},booktitle={Findings of the Association for Computational Linguistics: ACL 2022},year={2022},address={Dublin, Ireland},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2022.findings-acl.49},doi={10.18653/v1/2022.findings-acl.49},pages={582--588}}
2021
Generalised Unsupervised Domain Adaptation of Neural Machine Translation with Cross-Lingual Data Selection
Thuy-Trang Vu, Xuanli He , Dinh Phung , and Gholamreza Haffari
In Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing , 2021
This paper considers the unsupervised domain adaptation problem for neural machine translation (NMT), where we assume the access to only monolingual text in either the source or target language in the new domain. We propose a cross-lingual data selection method to extract in-domain sentences in the missing language side from a large generic monolingual corpus. Our proposed method trains an adaptive layer on top of multilingual BERT by contrastive learning to align the representation between the source and target language. This then enables the transferability of the domain classifier between the languages in a zero-shot manner. Once the in-domain data is detected by the classifier, the NMT model is then adapted to the new domain by jointly learning translation and domain discrimination tasks. We evaluate our cross-lingual data selection method on NMT across five diverse domains in three language pairs, as well as a real-world scenario of translation for COVID-19. The results show that our proposed method outperforms other selection baselines up to +1.5 BLEU score.
@inproceedings{vu-etal-2021-generalised,title={Generalised Unsupervised Domain Adaptation of Neural Machine Translation with Cross-Lingual Data Selection},author={Vu, Thuy-Trang and He, Xuanli and Phung, Dinh and Haffari, Gholamreza},booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},year={2021},address={Online and Punta Cana, Dominican Republic},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2021.emnlp-main.268},doi={10.18653/v1/2021.emnlp-main.268},pages={3335--3346}}
2020
Effective Unsupervised Domain Adaptation with Adversarially Trained Language Models
Thuy-Trang Vu, Dinh Phung , and Gholamreza Haffari
In Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP) , 2020
Recent work has shown the importance of adaptation of broad-coverage contextualised embedding models on the domain of the target task of interest. Current self-supervised adaptation methods are simplistic, as the training signal comes from a small percentage of \textitrandomly masked-out tokens. In this paper, we show that careful masking strategies can bridge the knowledge gap of masked language models (MLMs) about the domains more effectively by allocating self-supervision where it is needed. Furthermore, we propose an effective training strategy by adversarially masking out those tokens which are harder to reconstruct by the underlying MLM. The adversarial objective leads to a challenging combinatorial optimisation problem over \textitsubsets of tokens, which we tackle efficiently through relaxation to a variational lowerbound and dynamic programming. On six unsupervised domain adaptation tasks involving named entity recognition, our method strongly outperforms the random masking strategy and achieves up to +1.64 F1 score improvements.
@inproceedings{vu-etal-2020-effective,title={Effective Unsupervised Domain Adaptation with Adversarially Trained Language Models},author={Vu, Thuy-Trang and Phung, Dinh and Haffari, Gholamreza},booktitle={Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},year={2020},address={Online},publisher={Association for Computational Linguistics},url={https://aclanthology.org/2020.emnlp-main.497},doi={10.18653/v1/2020.emnlp-main.497},pages={6163--6173}}
2019
Learning How to Active Learn by Dreaming
Thuy-Trang Vu, Ming Liu , Dinh Phung , and Gholamreza Haffari
In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics , 2019
Heuristic-based active learning (AL) methods are limited when the data distribution of the underlying learning problems vary. Recent data-driven AL policy learning methods are also restricted to learn from closely related domains. We introduce a new sample-efficient method that learns the AL policy directly on the target domain of interest by using wake and dream cycles. Our approach interleaves between querying the annotation of the selected datapoints to update the underlying student learner and improving AL policy using simulation where the current student learner acts as an imperfect annotator. We evaluate our method on cross-domain and cross-lingual text classification and named entity recognition tasks. Experimental results show that our dream-based AL policy training strategy is more effective than applying the pretrained policy without further fine-tuning and better than the existing strong baseline methods that use heuristics or reinforcement learning.
@inproceedings{vu-etal-2019-learning,title={Learning How to Active Learn by Dreaming},author={Vu, Thuy-Trang and Liu, Ming and Phung, Dinh and Haffari, Gholamreza},booktitle={Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},year={2019},address={Florence, Italy},publisher={Association for Computational Linguistics},url={https://aclanthology.org/P19-1401},doi={10.18653/v1/P19-1401},pages={4091--4101}}
2018
Automatic Post-Editing of Machine Translation: A Neural Programmer-Interpreter Approach
Thuy-Trang Vu, and Gholamreza Haffari
In Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing , 2018
Automated Post-Editing (PE) is the task of automatically correct common and repetitive errors found in machine translation (MT) output. In this paper, we present a neural programmer-interpreter approach to this task, resembling the way that human perform post-editing using discrete edit operations, wich we refer to as programs. Our model outperforms previous neural models for inducing PE programs on the WMT17 APE task for German-English up to +1 BLEU score and -0.7 TER scores.
@inproceedings{vu-haffari-2018-automatic,title={Automatic Post-Editing of Machine Translation: A Neural Programmer-Interpreter Approach},author={Vu, Thuy-Trang and Haffari, Gholamreza},booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},year={2018},address={Brussels, Belgium},publisher={Association for Computational Linguistics},url={https://aclanthology.org/D18-1341},doi={10.18653/v1/D18-1341},pages={3048--3053}}