Python - Extract Literals
I am working on a project where we execute python generated by LLM on the backend in a container. Many times LLMs generate code with literals which can be made as parameters in a web ui.
My first attempt was doing it using Javascript, but found to be not reliable. I created a simple backend service to do this
import ast
from typing import List, Dict, Union, Tuple
def extract_literals(code: str) -> List[Dict[str, Union[str, int, List, Tuple[int, int]]]]:
tree = ast.parse(code)
literals = []
class LiteralVisitor(ast.NodeVisitor):
def visit_Constant(self, node):
if isinstance(node.value, (str, int, float)):
literals.append({
'value': node.value,
'type': type(node.value).__name__,
'line_start': node.lineno,
'line_end': node.end_lineno,
'col_start': node.col_offset,
'col_end': node.end_col_offset
})
def visit_List(self, node):
# Only handle lists of numbers or strings
try:
values = []
for elt in node.elts:
if isinstance(elt, ast.Constant):
values.append(elt.value)
if values: # Only add if we could parse all elements
literals.append({
'value': values,
'type': 'array',
'line_start': node.lineno,
'line_end': node.end_lineno,
'col_start': node.col_offset,
'col_end': node.end_col_offset
})
except:
pass
LiteralVisitor().visit(tree)
return literals
the code uses the ast
module in python and walks and retrives literals and lists of literals.
On the Frontend I have a dynamic form generator
const renderInput = (literal: Literal, index: number) => {
switch (literal.type) {
case "bool":
return (
<Select
defaultValue={literal.value ? "True" : "False"} // Convert boolean to Python string format
onValueChange={(value) =>
handleValueChange(index, value === "True")
}
>
<SelectTrigger className="h-8">
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectItem value="True">True</SelectItem>
<SelectItem value="False">False</SelectItem>
</SelectContent>
</Select>
);
case "int":
return (
<Input
type="number"
defaultValue={String(literal.value)}
onChange={(e) => handleValueChange(index, Number(e.target.value))}
className="h-8"
/>
);
case "str":
return (
<Input
type="text"
defaultValue={literal.value as string}
onChange={(e) => handleValueChange(index, e.target.value)}
className="h-8"
/>
);
case "array":
if (!isArrayValue(literal.value)) {
console.error("Expected array value for array type literal");
return null;
}
return (
<ArrayInput
value={literal.value}
onChange={(newValue) => handleValueChange(index, newValue)}
/>
);
}
};
The result is below. So code generated from LLM, the literals could be extracted and be modified by the user without asking it to generate again
I got additional feedback asking for where the literals came from, whether they are assignments to variables or put inside function calls. Here is my modified code to also capture associations
import ast
from typing import List, Dict, Union, Tuple
def extract_code_elements(code: str) -> Dict[str, List[Dict[str, Union[str, int, List, Tuple[int, int]]]]]:
tree = ast.parse(code)
literals = []
variables = []
literal_id = 0
class CodeElementVisitor(ast.NodeVisitor):
def add_literal(self, node, value, type_name, associated_variable=None, is_kwarg=False, kwarg_name=None):
nonlocal literal_id
literal_info = {
'id': f'lit_{literal_id}',
'value': value,
'type': type_name,
'line_start': node.lineno,
'line_end': node.end_lineno,
'col_start': node.col_offset,
'col_end': node.end_col_offset,
'associated_variable': associated_variable,
'is_kwarg': is_kwarg,
'kwarg_name': kwarg_name,
'context': self.get_context(node)
}
literals.append(literal_info)
literal_id += 1
return literal_info
def get_context(self, node):
current = node
while current:
if isinstance(current, ast.Call):
if isinstance(current.func, ast.Attribute):
return f"Function call: {current.func.attr}"
elif isinstance(current.func, ast.Name):
return f"Function call: {current.func.id}"
current = getattr(current, 'parent', None)
return None
def generic_visit(self, node):
"""Add parent references during traversal"""
for child in ast.iter_child_nodes(node):
child.parent = node
self.visit(child)
def visit_Num(self, node): # For Python < 3.8
self.add_literal(node, node.n, type(node.n).__name__)
def visit_Constant(self, node):
if isinstance(node.value, (int, float, str)):
# Skip if already handled by visit_Num
if not hasattr(node, '_handled'):
self.add_literal(node, node.value, type(node.value).__name__,
is_kwarg=isinstance(node.parent, ast.keyword),
kwarg_name=node.parent.arg if isinstance(node.parent, ast.keyword) else None)
def visit_List(self, node):
# Extract list values
values = []
for elt in node.elts:
if isinstance(elt, ast.Constant):
values.append(elt.value)
elif isinstance(elt, ast.Num): # For Python < 3.8
values.append(elt.n)
# Add the list as a literal
if values:
self.add_literal(
node,
values,
'array',
associated_variable=getattr(node.parent, 'targets', [None])[0].id
if isinstance(node.parent, ast.Assign) and node.parent.targets
else None
)
def visit_Call(self, node):
# Mark argument nodes as being in a call context
for arg in node.args:
arg.in_call = True
for kw in node.keywords:
kw.value.in_call = True
self.generic_visit(node)
def visit_Assign(self, node):
for target in node.targets:
if isinstance(target, ast.Name):
if isinstance(node.value, ast.Constant):
if isinstance(node.value.value, (str, int, float)):
literal_info = self.add_literal(
node.value,
node.value.value,
type(node.value.value).__name__,
target.id
)
variables.append({
'name': target.id,
'associated_literal_id': literal_info['id']
})
elif isinstance(node.value, ast.List):
# The list literal will be handled by visit_List
# We just need to record the variable association
variables.append({
'name': target.id,
'associated_literal_id': f'lit_{literal_id}' # Next ID to be used
})
self.generic_visit(node)
visitor = CodeElementVisitor()
visitor.visit(tree)
# Create associations list
associations = [
{
'variable_name': lit['associated_variable'],
'literal_id': lit['id'],
'value': lit['value'],
'type': lit['type']
}
for lit in literals if lit['associated_variable']
]
return {
'literals': literals,
'variables': variables,
'associations': associations
}
# Test with matplotlib code
code = """
import matplotlib.pyplot as plt
import numpy as np
list = [1,2,3]
x = np.arange(5)
y1 = np.random.randn(5)
y2 = np.random.randn(5)
plt.bar(x, y1, label='Data 1')
plt.bar(x, y2, label='Data 2')
plt.legend()
plt.show()
"""
results = extract_code_elements(code)
print("\nAll Literals found:")
for lit in results['literals']:
print(f"ID: {lit['id']}")
print(f"Value: {lit['value']} (Type: {lit['type']})")
if lit['context']:
print(f"Context: {lit['context']}")
if lit['is_kwarg']:
print(f"Keyword Argument: {lit['kwarg_name']}")
print(f"Associated Variable: {lit['associated_variable']}")
print(f"Location: Line {lit['line_start']}, Col {lit['col_start']}")
print()
The UI was updated to include the additional information