Skip to content

Workflows

combine_answers(answers, research_question, use_gpt4=False, temperature=0.1)

Combines a list of answers into a concise literature review using OpenAI API.

Parameters:

Name Type Description Default
answers list

A list of answers to combine.

required
research_question str

The research question to use in the literature review.

required
use_gpt4 bool

Whether to use GPT-4 for the literature review. Defaults to False.

False
temperature float

The temperature to use for the OpenAI API. Defaults to 0.1.

0.1

Returns:

Name Type Description
str

The literature review.

Examples:

>>> answers = ["Answer 1", "Answer 2"]
>>> research_question = "What is the impact of AI on society?"
>>> combine_answers(answers, research_question)
"The impact of AI on society is significant. Answer 1...Answer 2..."
Source code in autoresearcher/workflows/literature_review/combine_answers.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def combine_answers(answers, research_question, use_gpt4=False, temperature=0.1):
    """
    Combines a list of answers into a concise literature review using OpenAI API.
    Args:
      answers (list): A list of answers to combine.
      research_question (str): The research question to use in the literature review.
      use_gpt4 (bool, optional): Whether to use GPT-4 for the literature review. Defaults to False.
      temperature (float, optional): The temperature to use for the OpenAI API. Defaults to 0.1.
    Returns:
      str: The literature review.
    Examples:
      >>> answers = ["Answer 1", "Answer 2"]
      >>> research_question = "What is the impact of AI on society?"
      >>> combine_answers(answers, research_question)
      "The impact of AI on society is significant. Answer 1...Answer 2..."
    """
    answer_list = "\n\n".join(answers)
    prompt = literature_review_prompt.format(
        research_question=research_question, answer_list=answer_list
    )

    # Calculate the tokens in the input
    input_tokens = count_tokens(prompt)

    # Calculate the remaining tokens for the response
    remaining_tokens = 4080 - input_tokens
    max_tokens = max(remaining_tokens, 0)
    literature_review = openai_call(
        prompt, use_gpt4=use_gpt4, temperature=temperature, max_tokens=max_tokens
    )

    return literature_review

literature_review(research_question, output_file=None)

Generates an academic literature review for a given research question.

Parameters:

Name Type Description Default
research_question str

The research question to generate a literature review for.

required
output_file str

The file path to save the literature review to.

None

Returns:

Name Type Description
str

The generated literature review.

Examples:

>>> literature_review('What is the impact of AI on healthcare?')
Research question: What is the impact of AI on healthcare?
Auto Researcher initiated!
Generating keyword combinations...
Keyword combinations generated!
Fetching top 20 papers...
Top 20 papers fetched!
Extracting research findings from papers...
Research findings extracted!
Synthesizing answers...
Literature review generated!
Academic Literature Review: ...
References:
1. ...
Keyword combinations used to search for papers: 1. AI healthcare, 2. impact AI healthcare
Source code in autoresearcher/workflows/literature_review/literature_review.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def literature_review(research_question, output_file=None):
    """
    Generates an academic literature review for a given research question.
    Args:
      research_question (str): The research question to generate a literature review for.
      output_file (str, optional): The file path to save the literature review to.
    Returns:
      str: The generated literature review.
    Examples:
      >>> literature_review('What is the impact of AI on healthcare?')
      Research question: What is the impact of AI on healthcare?
      Auto Researcher initiated!
      Generating keyword combinations...
      Keyword combinations generated!
      Fetching top 20 papers...
      Top 20 papers fetched!
      Extracting research findings from papers...
      Research findings extracted!
      Synthesizing answers...
      Literature review generated!
      Academic Literature Review: ...
      References:
      1. ...
      Keyword combinations used to search for papers: 1. AI healthcare, 2. impact AI healthcare
    """
    SemanticScholar = SemanticScholarLoader()

    print(
        colored(
            f"Research question: {research_question}", "yellow", attrs=["bold", "blink"]
        )
    )
    print(colored("Auto Researcher initiated!", "yellow"))

    # Generate keyword combinations
    print(colored("Generating keyword combinations...", "yellow"))
    keyword_combinations = generate_keyword_combinations(research_question)
    print(colored("Keyword combinations generated!", "green"))

    # Fetch the top 20 papers for the research question
    search_query = research_question
    print(colored("Fetching top 20 papers...", "yellow"))
    top_papers = SemanticScholar.fetch_and_sort_papers(
        search_query, keyword_combinations=keyword_combinations, year_range="2000-2023"
    )
    print(colored("Top 20 papers fetched!", "green"))

    # Extract answers and from the top 20 papers
    print(colored("Extracting research findings from papers...", "yellow"))
    answers = extract_answers_from_papers(top_papers, research_question)
    print(colored("Research findings extracted!", "green"))

    # Combine answers into a concise academic literature review
    print(colored("Synthesizing answers...", "yellow"))
    literature_review = combine_answers(answers, research_question)
    print(colored("Literature review generated!", "green"))

    # Extract citations from answers and append a references list to the literature review
    citations = extract_citations(answers)
    references_list = "\n".join(
        [f"{idx + 1}. {citation}" for idx, citation in enumerate(citations)]
    )
    literature_review += "\n\nReferences:\n" + references_list

    # Append the keyword combinations to the literature review
    literature_review += "\n\nKeyword combinations used to search for papers: "
    literature_review += ", ".join(
        [f"{i+1}. {combination}" for i, combination in enumerate(keyword_combinations)]
    )

    # Print the academic literature review
    print(colored("Academic Literature Review:", "cyan"), literature_review, "\n")

    # Save the literature review to a file if the output_file argument is provided
    if output_file:
        with open(output_file, "w") as f:
            f.write(literature_review)
        print(colored(f"Literature review saved to {output_file}", "green"))

    return literature_review

extract_answers_from_papers(papers, research_question, use_gpt4=False, temperature=0, max_tokens=150)

Extracts answers from paper abstracts.

Parameters:

Name Type Description Default
papers list

A list of papers.

required
research_question str

The research question to answer.

required
use_gpt4 bool

Whether to use GPT-4 for answer extraction. Defaults to False.

False
temperature float

The temperature for GPT-4 answer extraction. Defaults to 0.

0
max_tokens int

The maximum number of tokens for GPT-4 answer extraction. Defaults to 150.

150

Returns:

Name Type Description
list

A list of answers extracted from the paper abstracts.

Examples:

>>> extract_answers_from_papers(papers, research_question)
['Answer 1 SOURCE: Citation 1', 'Answer 2 SOURCE: Citation 2']
Source code in autoresearcher/workflows/literature_review/extract_answers_from_papers.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def extract_answers_from_papers(
    papers, research_question, use_gpt4=False, temperature=0, max_tokens=150
):
    """
    Extracts answers from paper abstracts.
    Args:
      papers (list): A list of papers.
      research_question (str): The research question to answer.
      use_gpt4 (bool, optional): Whether to use GPT-4 for answer extraction. Defaults to False.
      temperature (float, optional): The temperature for GPT-4 answer extraction. Defaults to 0.
      max_tokens (int, optional): The maximum number of tokens for GPT-4 answer extraction. Defaults to 150.
    Returns:
      list: A list of answers extracted from the paper abstracts.
    Examples:
      >>> extract_answers_from_papers(papers, research_question)
      ['Answer 1 SOURCE: Citation 1', 'Answer 2 SOURCE: Citation 2']
    """
    answers = []
    default_answer = "No answer found."

    for paper in papers:
        abstract = paper.get("abstract", "")
        title = colored(paper.get("title", ""), "magenta", attrs=["bold"])
        if "externalIds" in paper and "DOI" in paper["externalIds"]:
            citation = get_citation_by_doi(paper["externalIds"]["DOI"])
        else:
            citation = paper["url"]
        prompt = extract_answer_prompt.format(
            research_question=research_question, abstract=abstract
        )
        answer = openai_call(
            prompt, use_gpt4=use_gpt4, temperature=temperature, max_tokens=max_tokens
        )

        print(f"Processing paper: {title}")

        answer_with_citation = f"{answer}\n{citation}"
        if answer != default_answer:
            answer_with_citation = f"{answer} SOURCE: {citation}"
            answers.append(answer_with_citation)
            print(colored(f"Answer found!", "green"))
            print(colored(f"{answer_with_citation}", "cyan"))

    return answers

extract_citations(answers)

Extracts bibliographical citations from a list of answers.

Parameters:

Name Type Description Default
answers list

A list of strings containing answers.

required

Returns:

Name Type Description
list

A list of strings containing bibliographical citations.

Examples:

>>> answers = ["This is an answer. SOURCE: Smith, J. (2020).",
...            "This is another answer. SOURCE: Jones, A. (2021)."]
>>> extract_citations(answers)
["Smith, J. (2020)", "Jones, A. (2021)"]
Source code in autoresearcher/workflows/literature_review/extract_citations.py
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
def extract_citations(answers):
    """
    Extracts bibliographical citations from a list of answers.
    Args:
      answers (list): A list of strings containing answers.
    Returns:
      list: A list of strings containing bibliographical citations.
    Examples:
      >>> answers = ["This is an answer. SOURCE: Smith, J. (2020).",
      ...            "This is another answer. SOURCE: Jones, A. (2021)."]
      >>> extract_citations(answers)
      ["Smith, J. (2020)", "Jones, A. (2021)"]
    """
    citations = []
    for answer in answers:
        citation_start = answer.rfind("SOURCE: ")
        if citation_start != -1:
            citation = answer[citation_start + len("SOURCE: ") :]
            citations.append(citation)
    return citations